In [2]:
import os
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
! kaggle datasets download ninadaithal/imagesoasis

Dataset URL: https://www.kaggle.com/datasets/ninadaithal/imagesoasis
License(s): apache-2.0


In [4]:
! unzip imagesoasis.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_102.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_103.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_104.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_105.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_106.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_107.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_108.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_109.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_110.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_111.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_112.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_113.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_114.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_115.jpg

In [5]:
import os
import shutil
import random

# Define paths
original_dataset_dir = "/content/Data"
new_dataset_dir = "/content/New_Data"

# Ensure new dataset directory exists
os.makedirs(new_dataset_dir, exist_ok=True)

# Define number of samples per class (35k total, distributed across classes)
target_samples = 35000
num_classes = 4  # Update if needed
samples_per_class = target_samples // num_classes

# Sample files from each class
for class_name in os.listdir(original_dataset_dir):
    class_path = os.path.join(original_dataset_dir, class_name)
    new_class_path = os.path.join(new_dataset_dir, class_name)

    if os.path.isdir(class_path):
        os.makedirs(new_class_path, exist_ok=True)

        # Get all images in the class
        all_images = os.listdir(class_path)
        selected_images = random.sample(all_images, min(samples_per_class, len(all_images)))

        # Copy selected images
        for img in selected_images:
            shutil.copy(os.path.join(class_path, img), os.path.join(new_class_path, img))

print("Subset dataset created with 35k images.")

Subset dataset created with 35k images.


In [6]:
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix

In [7]:
# Set paths
DATASET_PATH = "/content/New_Data"
IMAGE_SIZE = (48, 48)
BATCH_SIZE = 32
TEST_SPLIT = 0.1
VAL_SPLIT = 0.1

# Class names
CLASSES = ["Mild Dementia", "Moderate Dementia", "Non Demented", "Very mild Dementia"]

# Created a directory for organized dataset (Train, Val, Test)
OUTPUT_DIR = "processed_dataset"
os.makedirs(OUTPUT_DIR, exist_ok=True)

for split in ["train", "val", "test"]:
    for cls in CLASSES:
        os.makedirs(os.path.join(OUTPUT_DIR, split, cls), exist_ok=True)

In [8]:
# Function to split dataset into train/val/test
def split_and_copy_images():
    for cls in CLASSES:
        cls_path = os.path.join(DATASET_PATH, cls)
        images = os.listdir(cls_path)

        # Shuffle images
        np.random.shuffle(images)

        # Train-Val-Test Split
        train_imgs, temp_imgs = train_test_split(images, test_size=(VAL_SPLIT + TEST_SPLIT))
        val_imgs, test_imgs = train_test_split(temp_imgs, test_size=(TEST_SPLIT / (VAL_SPLIT + TEST_SPLIT)))

        # Copy images to new directory
        for img in train_imgs:
            shutil.copy(os.path.join(cls_path, img), os.path.join(OUTPUT_DIR, "train", cls, img))
        for img in val_imgs:
            shutil.copy(os.path.join(cls_path, img), os.path.join(OUTPUT_DIR, "val", cls, img))
        for img in test_imgs:
            shutil.copy(os.path.join(cls_path, img), os.path.join(OUTPUT_DIR, "test", cls, img))

In [9]:
from sklearn.model_selection import train_test_split
split_and_copy_images()

# Data augmentation for training set to handle class imbalance
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,  # Normalize pixel values
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
)

# No augmentation for validation and test sets
test_val_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

In [10]:
# Load data using ImageDataGenerator
train_generator = train_datagen.flow_from_directory(
    os.path.join(OUTPUT_DIR, "train"),
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

val_generator = test_val_datagen.flow_from_directory(
    os.path.join(OUTPUT_DIR, "val"),
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

test_generator = test_val_datagen.flow_from_directory(
    os.path.join(OUTPUT_DIR, "test"),
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)
print("Class indices:", train_generator.class_indices)

Found 18391 images belonging to 4 classes.
Found 2299 images belonging to 4 classes.
Found 2300 images belonging to 4 classes.
Class indices: {'Mild Dementia': 0, 'Moderate Dementia': 1, 'Non Demented': 2, 'Very mild Dementia': 3}


In [11]:
# Compute class weights
from sklearn.utils.class_weight import compute_class_weight
labels = train_generator.classes
class_weights = compute_class_weight('balanced', classes=np.unique(labels), y=labels)
class_weights = dict(enumerate(class_weights))

In [13]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Load InceptionV3 (pretrained on ImageNet) with a minimum required input size
base_model = InceptionV3(weights="imagenet", include_top=False, input_shape=(75, 75, 3))

# Freeze base model layers
base_model.trainable = False

# Build the model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(256, activation="relu"),
    Dropout(0.5),
    Dense(num_classes, activation="softmax")  # Output layer
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

# Model summary
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [25]:
print("Classes in train generator:", train_generator.class_indices)
print("Number of classes:", len(train_generator.class_indices))


Classes in train generator: {'Mild Dementia': 0, 'Moderate Dementia': 1, 'Non Demented': 2, 'Very mild Dementia': 3}
Number of classes: 4


In [26]:

NUM_CLASSES = len(train_generator.class_indices)  # Get the actual number of classes

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(256, activation="relu"),
    Dropout(0.5),
    Dense(NUM_CLASSES, activation="softmax")  # Update output layer
])


In [27]:
model.compile(optimizer=Adam(learning_rate=0.001),
              loss="categorical_crossentropy",
              metrics=["accuracy"])


In [28]:
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=15,
    class_weight=class_weights,
    steps_per_epoch=len(train_generator),
    validation_steps=len(val_generator)
)


Epoch 1/15
[1m719/719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m298s[0m 393ms/step - accuracy: 0.3531 - loss: 1.6242 - val_accuracy: 0.4389 - val_loss: 1.1642
Epoch 2/15
[1m719/719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m310s[0m 378ms/step - accuracy: 0.3872 - loss: 1.1659 - val_accuracy: 0.4798 - val_loss: 1.0868
Epoch 3/15
[1m719/719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 377ms/step - accuracy: 0.4235 - loss: 1.1141 - val_accuracy: 0.4732 - val_loss: 1.1184
Epoch 4/15
[1m719/719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 377ms/step - accuracy: 0.4427 - loss: 1.0421 - val_accuracy: 0.5481 - val_loss: 1.0206
Epoch 5/15
[1m719/719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 373ms/step - accuracy: 0.4734 - loss: 0.9912 - val_accuracy: 0.5924 - val_loss: 0.9182
Epoch 6/15
[1m719/719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m339s[0m 397ms/step - accuracy: 0.4868 - loss: 1.0009 - val_accuracy: 0.5924 - val_loss: 0.9158
Epoc

In [29]:
 # Save model
model.save("InceptionV3_oasis_15_epochs.keras")

In [30]:
# Get train,val and test accuracies
train_loss, train_acc = model.evaluate(train_generator)
val_loss, val_acc = model.evaluate(val_generator)
test_loss, test_acc = model.evaluate(test_generator)

print(f"Train Accuracy: {train_acc * 100:.2f}%")
print(f"Val Accuracy: {val_acc * 100:.2f}%")
print(f"Test Accuracy: {test_acc * 100:.2f}%")

[1m719/719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m244s[0m 339ms/step - accuracy: 0.6489 - loss: 0.7932
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 342ms/step - accuracy: 0.6381 - loss: 0.8044
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 118ms/step - accuracy: 0.5393 - loss: nan
Train Accuracy: 64.77%
Val Accuracy: 65.46%
Test Accuracy: 21.78%


In [31]:
from sklearn.metrics import classification_report

# Assuming you have the true labels and predicted labels for both train and test datasets
val_predictions = model.predict(val_generator)
test_predictions = model.predict(test_generator)

# Convert predictions to class labels (if necessary, depends on your model output)
val_predictions_labels = np.argmax(val_predictions, axis=1)
test_predictions_labels = np.argmax(test_predictions, axis=1)

# Assuming your labels are categorical and represented as integers
print("Train Classification Report:")
print(classification_report(val_generator.classes, val_predictions_labels))

print("Test Classification Report:")
print(classification_report(test_generator.classes, test_predictions_labels))

# You can also extract individual metrics:
# For train
val_report = classification_report(val_generator.classes, val_predictions_labels, output_dict=True)
print(f"Val Precision: {val_report['weighted avg']['precision']:.2f}")
print(f"Val Recall: {val_report['weighted avg']['recall']:.2f}")
print(f"Val F1-Score: {val_report['weighted avg']['f1-score']:.2f}")

# For test
test_report = classification_report(test_generator.classes, test_predictions_labels, output_dict=True)
print(f"Test Precision: {test_report['weighted avg']['precision']:.2f}")
print(f"Test Recall: {test_report['weighted avg']['recall']:.2f}")
print(f"Test F1-Score: {test_report['weighted avg']['f1-score']:.2f}")

[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 452ms/step
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 112ms/step
Train Classification Report:
              precision    recall  f1-score   support

           0       0.22      0.40      0.28       500
           1       0.02      0.02      0.02        49
           2       0.36      0.24      0.29       875
           3       0.37      0.32      0.34       875

    accuracy                           0.30      2299
   macro avg       0.24      0.24      0.23      2299
weighted avg       0.32      0.30      0.30      2299

Test Classification Report:
              precision    recall  f1-score   support

           0       0.22      1.00      0.36       501
           1       0.00      0.00      0.00        49
           2       0.00      0.00      0.00       875
           3       0.00      0.00      0.00       875

    accuracy                           0.22      2300
   macro avg       0.05      0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
