In [1]:
import os
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [2]:
! kaggle datasets download ninadaithal/imagesoasis

Dataset URL: https://www.kaggle.com/datasets/ninadaithal/imagesoasis
License(s): apache-2.0


In [3]:
! unzip imagesoasis.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_102.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_103.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_104.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_105.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_106.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_107.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_108.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_109.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_110.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_111.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_112.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_113.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_114.jpg  
  inflating: Data/Very mild Dementia/OAS1_0247_MR1_mpr-4_115.jpg

In [4]:
import os
import shutil
import random

# Define paths
original_dataset_dir = "/content/Data"
new_dataset_dir = "/content/New_Data"

# Ensure new dataset directory exists
os.makedirs(new_dataset_dir, exist_ok=True)

# Define number of samples per class (35k total, distributed across classes)
target_samples = 35000
num_classes = 4  # Update if needed
samples_per_class = target_samples // num_classes

# Sample files from each class
for class_name in os.listdir(original_dataset_dir):
    class_path = os.path.join(original_dataset_dir, class_name)
    new_class_path = os.path.join(new_dataset_dir, class_name)

    if os.path.isdir(class_path):
        os.makedirs(new_class_path, exist_ok=True)

        # Get all images in the class
        all_images = os.listdir(class_path)
        selected_images = random.sample(all_images, min(samples_per_class, len(all_images)))

        # Copy selected images
        for img in selected_images:
            shutil.copy(os.path.join(class_path, img), os.path.join(new_class_path, img))

print("Subset dataset created with 35k images.")

Subset dataset created with 35k images.


In [5]:
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ConvNeXtBase
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix

In [7]:
# Set paths
DATASET_PATH = "/content/New_Data"
IMAGE_SIZE = (48, 48)
BATCH_SIZE = 32
TEST_SPLIT = 0.1
VAL_SPLIT = 0.1

# Class names
CLASSES = ["Mild Dementia", "Moderate Dementia", "Non Demented", "Very mild Dementia"]

# Created a directory for organized dataset (Train, Val, Test)
OUTPUT_DIR = "processed_dataset"
os.makedirs(OUTPUT_DIR, exist_ok=True)

for split in ["train", "val", "test"]:
    for cls in CLASSES:
        os.makedirs(os.path.join(OUTPUT_DIR, split, cls), exist_ok=True)

In [14]:
# Function to split dataset into train/val/test
def split_and_copy_images():
    for cls in CLASSES:
        cls_path = os.path.join(dataset_dir, cls)
        images = os.listdir(cls_path)

        # Shuffle images
        np.random.shuffle(images)

        # Train-Val-Test Split
        train_imgs, temp_imgs = train_test_split(images, test_size=(VAL_SPLIT + TEST_SPLIT))
        val_imgs, test_imgs = train_test_split(temp_imgs, test_size=(TEST_SPLIT / (VAL_SPLIT + TEST_SPLIT)))

        # Copy images to new directory
        for img in train_imgs:
            shutil.copy(os.path.join(cls_path, img), os.path.join(OUTPUT_DIR, "train", cls, img))
        for img in val_imgs:
            shutil.copy(os.path.join(cls_path, img), os.path.join(OUTPUT_DIR, "val", cls, img))
        for img in test_imgs:
            shutil.copy(os.path.join(cls_path, img), os.path.join(OUTPUT_DIR, "test", cls, img))




In [15]:
from sklearn.model_selection import train_test_split
split_and_copy_images()

# Data augmentation for training set to handle class imbalance
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,  # Normalize pixel values
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
)

# No augmentation for validation and test sets
test_val_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

In [16]:
# Load data using ImageDataGenerator
train_generator = train_datagen.flow_from_directory(
    os.path.join(OUTPUT_DIR, "train"),
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

val_generator = test_val_datagen.flow_from_directory(
    os.path.join(OUTPUT_DIR, "val"),
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

test_generator = test_val_datagen.flow_from_directory(
    os.path.join(OUTPUT_DIR, "test"),
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)
print("Class indices:", train_generator.class_indices)


Found 22070 images belonging to 4 classes.
Found 4370 images belonging to 4 classes.
Found 4358 images belonging to 4 classes.
Class indices: {'Mild Dementia': 0, 'Moderate Dementia': 1, 'Non Demented': 2, 'Very mild Dementia': 3}


In [17]:
# Compute class weights
from sklearn.utils.class_weight import compute_class_weight
labels = train_generator.classes
class_weights = compute_class_weight('balanced', classes=np.unique(labels), y=labels)
class_weights = dict(enumerate(class_weights))

In [18]:
from tensorflow.keras.applications import ConvNeXtBase  # Import ConvNeXt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.optimizers import Adam

# Load ConvNeXtBase (pretrained on ImageNet)
base_model = ConvNeXtBase(weights="imagenet", include_top=False, input_shape=(48, 48, 3))

# Freeze base model layers
base_model.trainable = False

# Build the model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(256, activation="relu"),
    Dropout(0.5),
    Dense(num_classes, activation="softmax")  # Output layer
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

# Model summary
model.summary()


In [19]:
# Train Model
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator
)

  self._warn_if_super_not_called()


Epoch 1/15
[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 120ms/step - accuracy: 0.3914 - loss: 1.3303 - val_accuracy: 0.4432 - val_loss: 1.1085
Epoch 2/15
[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 85ms/step - accuracy: 0.4205 - loss: 1.1389 - val_accuracy: 0.4307 - val_loss: 1.0992
Epoch 3/15
[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 82ms/step - accuracy: 0.4304 - loss: 1.1258 - val_accuracy: 0.4469 - val_loss: 1.0902
Epoch 4/15
[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 91ms/step - accuracy: 0.4457 - loss: 1.1111 - val_accuracy: 0.4636 - val_loss: 1.0810
Epoch 5/15
[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 91ms/step - accuracy: 0.4357 - loss: 1.1153 - val_accuracy: 0.4577 - val_loss: 1.0856
Epoch 6/15
[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 83ms/step - accuracy: 0.4376 - loss: 1.1095 - val_accuracy: 0.4394 - val_loss: 1.0854
Epoch 7/15
[1

In [20]:
 # Save model
model.save("convnext_oasis_15_epochs.keras")

In [21]:
# Get train,val and test accuracies
train_loss, train_acc = model.evaluate(train_generator)
val_loss, val_acc = model.evaluate(val_generator)
test_loss, test_acc = model.evaluate(test_generator)

print(f"Train Accuracy: {train_acc * 100:.2f}%")
print(f"Val Accuracy: {val_acc * 100:.2f}%")
print(f"Test Accuracy: {test_acc * 100:.2f}%")

[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 79ms/step - accuracy: 0.4581 - loss: 1.0830
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 42ms/step - accuracy: 0.4572 - loss: 1.0788
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 65ms/step - accuracy: 0.1968 - loss: 1.2514
Train Accuracy: 45.98%
Val Accuracy: 45.97%
Test Accuracy: 46.31%


In [22]:
from sklearn.metrics import classification_report

# Assuming you have the true labels and predicted labels for both train and test datasets
val_predictions = model.predict(val_generator)
test_predictions = model.predict(test_generator)

# Convert predictions to class labels (if necessary, depends on your model output)
val_predictions_labels = np.argmax(val_predictions, axis=1)
test_predictions_labels = np.argmax(test_predictions, axis=1)

# Assuming your labels are categorical and represented as integers
print("Train Classification Report:")
print(classification_report(val_generator.classes, val_predictions_labels))

print("Test Classification Report:")
print(classification_report(test_generator.classes, test_predictions_labels))

# You can also extract individual metrics:
# For train
val_report = classification_report(val_generator.classes, val_predictions_labels, output_dict=True)
print(f"Val Precision: {val_report['weighted avg']['precision']:.2f}")
print(f"Val Recall: {val_report['weighted avg']['recall']:.2f}")
print(f"Val F1-Score: {val_report['weighted avg']['f1-score']:.2f}")

# For test
test_report = classification_report(test_generator.classes, test_predictions_labels, output_dict=True)
print(f"Test Precision: {test_report['weighted avg']['precision']:.2f}")
print(f"Test Recall: {test_report['weighted avg']['recall']:.2f}")
print(f"Test F1-Score: {test_report['weighted avg']['f1-score']:.2f}")

[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 84ms/step
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 60ms/step
Train Classification Report:
              precision    recall  f1-score   support

           0       0.11      0.00      0.00       946
           1       0.00      0.00      0.00        94
           2       0.38      0.19      0.25      1662
           3       0.38      0.80      0.52      1668

    accuracy                           0.38      4370
   macro avg       0.22      0.25      0.19      4370
weighted avg       0.31      0.38      0.29      4370

Test Classification Report:
              precision    recall  f1-score   support

           0       0.44      0.01      0.02       954
           1       0.00      0.00      0.00        93
           2       0.70      0.34      0.46      1659
           3       0.41      0.88      0.56      1652

    accuracy                           0.46      4358
   macro avg       0.39      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
