In [7]:
import os
import shutil
import numpy as np
from sklearn.model_selection import train_test_split

In [8]:
# Define dataset directory
dataset_dir = "C:/Users/samik/Documents/GitHub/MS-disease/Originalpatches/classification"  # Update this to your actual dataset path

In [11]:
# New dataset path with separate train/val/test
split_dataset_dir = "C:/Users/samik/Documents/GitHub/MS-disease/SplitDataset"

In [12]:
import concurrent.futures

def copy_image(src, dst):
    shutil.copy2(src, dst)

def split_and_copy_parallel(category):
    src_folder = os.path.join(dataset_dir, category)
    
    all_images = [entry.name for entry in os.scandir(src_folder) if entry.is_file()]
    all_images = np.array(all_images)

    # Train (80%), Val (10%), Test (10%) Split
    train_imgs, temp_imgs = train_test_split(all_images, test_size=0.2, random_state=42)
    val_imgs, test_imgs = train_test_split(temp_imgs, test_size=0.5, random_state=42)

    for img_set, split in zip([train_imgs, val_imgs, test_imgs], ["train", "val", "test"]):
        os.makedirs(os.path.join(split_dataset_dir, split, category), exist_ok=True)

        # Use parallel processing for faster copying
        with concurrent.futures.ThreadPoolExecutor() as executor:
            for img in img_set:
                src = os.path.join(src_folder, img)
                dst = os.path.join(split_dataset_dir, split, category, img)
                executor.submit(copy_image, src, dst)

# Process both categories (0 & 1)
for category in ["0", "1"]:
    split_and_copy_parallel(category)

print("✅ Dataset successfully split much faster using parallel processing!")


✅ Dataset successfully split much faster using parallel processing!


In [16]:
import tensorflow as tf

# Define dataset path
dataset_dir = "C:/Users/samik/Documents/GitHub/MS-disease/SplitDataset"

# Define batch size and image size
img_size = (128, 128)
batch_size = 128  # ✅ Larger batch for faster processing

# Use TensorFlow's built-in function to load data
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    os.path.join(dataset_dir, "train"),
    image_size=img_size,
    batch_size=batch_size,
    color_mode="grayscale",  # Load images as grayscale
    label_mode="binary"
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    os.path.join(dataset_dir, "val"),
    image_size=img_size,
    batch_size=batch_size,
    color_mode="grayscale",  # Load images as grayscale
    label_mode="binary"
)

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    os.path.join(dataset_dir, "test"),
    image_size=img_size,
    batch_size=batch_size,
    color_mode="grayscale",  # Load images as grayscale
    label_mode="binary"
)

# ✅ Optimize dataset for performance
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)



Found 198798 files belonging to 2 classes.
Found 24849 files belonging to 2 classes.
Found 24851 files belonging to 2 classes.


In [17]:
from tensorflow import keras
from tensorflow.keras import layers

# Define CNN model
model = keras.Sequential([
    keras.Input(shape=(128, 128, 1)),

    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),

    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Print model summary
model.summary()


In [18]:
# Train the CNN model
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,  # Start with 10 and increase if needed
    batch_size=batch_size
)


Epoch 1/10
[1m1554/1554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4939s[0m 3s/step - accuracy: 0.8282 - loss: 0.8399 - val_accuracy: 0.8690 - val_loss: 0.2720
Epoch 2/10
[1m1554/1554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1919s[0m 1s/step - accuracy: 0.8673 - loss: 0.2787 - val_accuracy: 0.8757 - val_loss: 0.2656
Epoch 3/10
[1m1554/1554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1884s[0m 1s/step - accuracy: 0.8730 - loss: 0.2659 - val_accuracy: 0.8817 - val_loss: 0.2459
Epoch 4/10
[1m1554/1554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1853s[0m 1s/step - accuracy: 0.8808 - loss: 0.2526 - val_accuracy: 0.8822 - val_loss: 0.2509
Epoch 5/10
[1m1554/1554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2208s[0m 1s/step - accuracy: 0.8868 - loss: 0.2421 - val_accuracy: 0.8890 - val_loss: 0.2430
Epoch 6/10
[1m1554/1554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2733s[0m 2s/step - accuracy: 0.8948 - loss: 0.2283 - val_accuracy: 0.8936 - val_loss: 0.2315
Epoc

In [19]:
# Evaluate the model on test dataset
test_loss, test_acc = model.evaluate(test_ds)
print(f"✅ Test Accuracy: {test_acc:.4f}")

[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 479ms/step - accuracy: 0.9096 - loss: 0.2426
✅ Test Accuracy: 0.9090


In [20]:
model.save("ms_detection_cnn.h5")

