In [None]:
import os
import numpy as np
from skimage.io import imread
from skimage.transform import resize
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import joblib

# -------- 1. Load Dataset --------
def load_dataset(image_folder):
    images = []
    labels = []
    classes = os.listdir(image_folder)

    for label in classes:
        label_path = os.path.join(image_folder, label)
        if not os.path.isdir(label_path):
            continue

        for file in os.listdir(label_path):
            file_path = os.path.join(label_path, file)

            if not file.lower().endswith((".png", ".jpg", ".jpeg", ".tif", ".tiff")):
                continue

            try:
                img = imread(file_path, as_gray=True)
                img_resized = resize(img, (128, 128))  # Resize to fixed shape
                images.append(img_resized.flatten())   # Flatten to 1D
                labels.append(label)
            except Exception as e:
                print(f"Skipping {file}: {e}")

    return np.array(images), np.array(labels)

# -------- 2. Load & Preprocess --------
dataset_path = "C:\\Users\\admin\\Downloads\\Project\\Brain\\brain-tumor-dataset"
X, y = load_dataset(dataset_path)
print("✅ Dataset Loaded")

# -------- 3. Encode Labels --------
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
joblib.dump(label_encoder, "label_encoder.pkl")
print("✅ Labels Encoded")

# -------- 4. Split Dataset --------
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
print("✅ Dataset Split")

# -------- 5. Scale Features --------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
joblib.dump(scaler, "scaler.pkl")
print("✅ Features Scaled")

# -------- 6. Train Model --------
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)
joblib.dump(model, "brain_tumor_model.pkl")
print("✅ Model Trained and Saved")


✅ Dataset Loaded
✅ Labels Encoded
✅ Dataset Split
✅ Features Scaled
