In [1]:
import os
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [2]:
! kaggle datasets download ninadaithal/imagesoasis

Dataset URL: https://www.kaggle.com/datasets/ninadaithal/imagesoasis
License(s): apache-2.0


In [3]:
! unzip imagesoasis.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_102.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_103.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_104.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_105.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_106.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_107.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_108.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_109.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_110.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_111.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_112.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_113.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_114.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_115.jpg

In [4]:
import os
import shutil
import random

# Define paths
original_dataset_dir = "/content/Data"
new_dataset_dir = "/content/New_Data"

# Ensure new dataset directory exists
os.makedirs(new_dataset_dir, exist_ok=True)

# Define number of samples per class (35k total, distributed across classes)
target_samples = 35000
num_classes = 4  # Update if needed
samples_per_class = target_samples // num_classes

# Sample files from each class
for class_name in os.listdir(original_dataset_dir):
    class_path = os.path.join(original_dataset_dir, class_name)
    new_class_path = os.path.join(new_dataset_dir, class_name)

    if os.path.isdir(class_path):
        os.makedirs(new_class_path, exist_ok=True)

        # Get all images in the class
        all_images = os.listdir(class_path)
        selected_images = random.sample(all_images, min(samples_per_class, len(all_images)))

        # Copy selected images
        for img in selected_images:
            shutil.copy(os.path.join(class_path, img), os.path.join(new_class_path, img))

print("Subset dataset created with 35k images.")

Subset dataset created with 35k images.


In [20]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import GlobalAveragePooling2D,Dense, Flatten, Dropout
from tensorflow.keras.models import Sequential,Model
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report
from tensorflow.keras.optimizers import Adam

In [6]:
# Directories for dataset
dataset_dir = "/content/New_Data"  # Change this to your dataset path

# Define image parameters
IMG_SIZE = (48, 48)
BATCH_SIZE = 32
EPOCHS = 15
TEST_SPLIT = 0.1
VAL_SPLIT = 0.1

# Class names
CLASSES = ["Mild Dementia", "Moderate Dementia", "Non Demented", "Very mild Dementia"]

# Created a directory for organized dataset (Train, Val, Test)
OUTPUT_DIR = "processed_dataset"
os.makedirs(OUTPUT_DIR, exist_ok=True)

for split in ["train", "val", "test"]:
    for cls in CLASSES:
        os.makedirs(os.path.join(OUTPUT_DIR, split, cls), exist_ok=True)

In [8]:
# Function to split dataset into train/val/test
def split_and_copy_images():
    for cls in CLASSES:
        cls_path = os.path.join(dataset_dir, cls)
        images = os.listdir(cls_path)

        # Shuffle images
        np.random.shuffle(images)

        # Train-Val-Test Split
        train_imgs, temp_imgs = train_test_split(images, test_size=(VAL_SPLIT + TEST_SPLIT))
        val_imgs, test_imgs = train_test_split(temp_imgs, test_size=(TEST_SPLIT / (VAL_SPLIT + TEST_SPLIT)))

        # Copy images to new directory
        for img in train_imgs:
            shutil.copy(os.path.join(cls_path, img), os.path.join(OUTPUT_DIR, "train", cls, img))
        for img in val_imgs:
            shutil.copy(os.path.join(cls_path, img), os.path.join(OUTPUT_DIR, "val", cls, img))
        for img in test_imgs:
            shutil.copy(os.path.join(cls_path, img), os.path.join(OUTPUT_DIR, "test", cls, img))

from sklearn.model_selection import train_test_split
split_and_copy_images()

In [9]:
# Image Augmentation
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    horizontal_flip=True,
    rotation_range=20,
    zoom_range=0.2,
    shear_range=0.2
)
# Load Data
train_generator = datagen.flow_from_directory(
    dataset_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

val_generator = datagen.flow_from_directory(
    dataset_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

Found 18393 images belonging to 4 classes.
Found 4597 images belonging to 4 classes.


In [21]:
# Load VGG16 (pretrained on ImageNet)
base_model = VGG19(weights="imagenet", include_top=False, input_shape=(48, 48, 3))

# Freeze base model layers
base_model.trainable = False

# Build the model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(256, activation="relu"),
    Dropout(0.5),
    Dense(num_classes, activation="softmax")  # Output layer
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

# Model summary
model.summary()

In [22]:
# Train Model
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator
)

  self._warn_if_super_not_called()


Epoch 1/15
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 90ms/step - accuracy: 0.4103 - loss: 1.2517 - val_accuracy: 0.4821 - val_loss: 1.0681
Epoch 2/15
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 79ms/step - accuracy: 0.4690 - loss: 1.0896 - val_accuracy: 0.5193 - val_loss: 1.0595
Epoch 3/15
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 80ms/step - accuracy: 0.4896 - loss: 1.0714 - val_accuracy: 0.5275 - val_loss: 1.0437
Epoch 4/15
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 78ms/step - accuracy: 0.4965 - loss: 1.0598 - val_accuracy: 0.5219 - val_loss: 1.0603
Epoch 5/15
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 78ms/step - accuracy: 0.4968 - loss: 1.0467 - val_accuracy: 0.5251 - val_loss: 1.0485
Epoch 6/15
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 80ms/step - accuracy: 0.4947 - loss: 1.0496 - val_accuracy: 0.5175 - val_loss: 1.0299
Epoch 7/15
[1m5

In [23]:
# Save model
model.save("vgg19_oasis_15_epochs.keras")

In [24]:
# Evaluate on Validation Set
y_val_true = val_generator.classes
y_val_pred = model.predict(val_generator)
y_val_pred_classes = np.argmax(y_val_pred, axis=1)
val_report = classification_report(y_val_true, y_val_pred_classes, target_names=val_generator.class_indices.keys(), output_dict=True)

# Evaluate on Test Set
test_generator = datagen.flow_from_directory(
    dataset_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

y_true = test_generator.classes
y_pred = model.predict(test_generator)
y_pred_classes = np.argmax(y_pred, axis=1)

test_report = classification_report(y_true, y_pred_classes, target_names=test_generator.class_indices.keys(), output_dict=True)

[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 68ms/step
Found 22990 images belonging to 4 classes.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  self._warn_if_super_not_called()


[1m719/719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 72ms/step


In [25]:
# Classification Report
report = classification_report(y_true, y_pred_classes, target_names=test_generator.class_indices.keys(), output_dict=True)
print("Validation Accuracy:", history.history['val_accuracy'][-1])
print("Test Accuracy:", report['accuracy'])
print("Test Precision:", report['weighted avg']['precision'])
print("Test Recall:", report['weighted avg']['recall'])

Validation Accuracy: 0.5264303088188171
Test Accuracy: 0.5548064375815572
Test Precision: 0.5715162057584778
Test Recall: 0.5548064375815572


In [26]:
# Print Results
print("Validation Accuracy:", history.history['val_accuracy'][-1])
print("Validation Precision:", val_report['weighted avg']['precision'])
print("Validation Recall:", val_report['weighted avg']['recall'])
print("Test Accuracy:", test_report['accuracy'])
print("Test Precision:", test_report['weighted avg']['precision'])
print("Test Recall:", test_report['weighted avg']['recall'])

Validation Accuracy: 0.5264303088188171
Validation Precision: 0.3421298330595227
Validation Recall: 0.3582771372634327
Test Accuracy: 0.5548064375815572
Test Precision: 0.5715162057584778
Test Recall: 0.5548064375815572
