In [1]:
import os
from PIL import Image
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
import random

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [6]:
%cd /content/gdrive/My Drive/dataset
!ls

/content/gdrive/My Drive/dataset
test_data.npy  Testing_Augmented  Training	 X_train.pickle  Y_train.pickle
Testing        train_data.npy	  X_test.pickle  Y_test.pickle


In [4]:
TEST_DIR = '/content/gdrive/My Drive/dataset/Testing' # test data folder
TRAIN_DIR = '/content/gdrive/My Drive/dataset/Training' # train data folder
IMG_SIZE = 150 # image size
CATEGORIES = ["glioma_tumor","meningioma_tumor","no_tumor","pituitary_tumor"]

In [7]:
import pickle
training_data = []

def create_training_data():
    for category in CATEGORIES:
        path = os.path.join(TRAIN_DIR, category)  # path to category folder
        class_num = CATEGORIES.index(category)    # class index

        for img in tqdm(os.listdir(path), desc=f"Processing {category}"):
            try:
                img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_COLOR)  # always 3 channels
                if img_array is None:  # skip corrupt or unreadable images
                    print(f"Skipped broken image: {img}")
                    continue

                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  # resize
                training_data.append([new_array, class_num])

            except Exception as e:
                print(f"Error with image {img}: {e}")
                continue

    random.shuffle(training_data)

# run data creation
create_training_data()

print(f"✅ Total training samples: {len(training_data)}")

# Separate features and labels
X_train = np.array([i[0] for i in training_data], dtype=np.uint8).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
Y_train = np.array([i[1] for i in training_data])

# Save with pickle
with open("X_train.pickle", "wb") as f:
    pickle.dump(X_train, f)

with open("Y_train.pickle", "wb") as f:
    pickle.dump(Y_train, f)

print("✅ X_train and Y_train saved successfully")

Processing glioma_tumor: 100%|██████████| 826/826 [00:42<00:00, 19.31it/s] 
Processing meningioma_tumor: 100%|██████████| 822/822 [00:18<00:00, 43.37it/s] 
Processing no_tumor: 100%|██████████| 395/395 [00:06<00:00, 56.92it/s] 
Processing pituitary_tumor: 100%|██████████| 827/827 [00:26<00:00, 31.76it/s] 


✅ Total training samples: 2870
✅ X_train and Y_train saved successfully
