In [33]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [34]:
df = pd.read_csv("C:\\SEM-4\\ML-Datasets\\oasis_longitudinal.csv")

In [35]:
df.head(10)

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034
5,OAS2_0004,OAS2_0004_MR1,Nondemented,1,0,F,R,88,18,3.0,28.0,0.0,1215,0.71,1.444
6,OAS2_0004,OAS2_0004_MR2,Nondemented,2,538,F,R,90,18,3.0,27.0,0.0,1200,0.718,1.462
7,OAS2_0005,OAS2_0005_MR1,Nondemented,1,0,M,R,80,12,4.0,28.0,0.0,1689,0.712,1.039
8,OAS2_0005,OAS2_0005_MR2,Nondemented,2,1010,M,R,83,12,4.0,29.0,0.5,1701,0.711,1.032
9,OAS2_0005,OAS2_0005_MR3,Nondemented,3,1603,M,R,85,12,4.0,30.0,0.0,1699,0.705,1.033


In [36]:
df.isnull().sum()

Subject ID     0
MRI ID         0
Group          0
Visit          0
MR Delay       0
M/F            0
Hand           0
Age            0
EDUC           0
SES           19
MMSE           2
CDR            0
eTIV           0
nWBV           0
ASF            0
dtype: int64

In [37]:
df['SES'] = df['SES'].fillna(df['SES'].median())
df['MMSE'] = df['MMSE'].fillna(df['MMSE'].median())

In [38]:
df['M/F'] = df['M/F'].map({'M': 0, 'F': 1})

In [39]:
df.isnull().sum()

Subject ID    0
MRI ID        0
Group         0
Visit         0
MR Delay      0
M/F           0
Hand          0
Age           0
EDUC          0
SES           0
MMSE          0
CDR           0
eTIV          0
nWBV          0
ASF           0
dtype: int64

In [40]:
# Convert Group (Converted → 0, Demented → 1, Nondemented → 2)
df['Group'] = df['Group'].map({'Converted': 0, 'Demented': 1, 'Nondemented': 2})

In [62]:
#count the new classes of our Groups now
df['Group'].value_counts()

Group
2    190
1    146
0     37
Name: count, dtype: int64

In [43]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report


In [44]:
# Sort by Subject ID and Visit number
df.sort_values(by=['Subject ID', 'Visit'], inplace=True)

# Create new feature: Change in MMSE score over time
df['MMSE_Change'] = df.groupby('Subject ID')['MMSE'].diff()

# Create new feature: Change in Whole Brain Volume
df['nWBV_Change'] = df.groupby('Subject ID')['nWBV'].diff()

# Create new feature: Change in Clinical Dementia Rating (CDR)
df['CDR_Change'] = df.groupby('Subject ID')['CDR'].diff()


In [45]:
# Compute the rate of MMSE decline for each patient
df['MMSE_Decline_Rate'] = df.groupby('Subject ID')['MMSE_Change'].transform(lambda x: x.mean())

# Compute the rate of Brain Volume decline for each patient
df['nWBV_Decline_Rate'] = df.groupby('Subject ID')['nWBV_Change'].transform(lambda x: x.mean())

In [46]:
df.isnull().sum()

Subject ID             0
MRI ID                 0
Group                  0
Visit                  0
MR Delay               0
M/F                    0
Hand                   0
Age                    0
EDUC                   0
SES                    0
MMSE                   0
CDR                    0
eTIV                   0
nWBV                   0
ASF                    0
MMSE_Change          150
nWBV_Change          150
CDR_Change           150
MMSE_Decline_Rate      0
nWBV_Decline_Rate      0
dtype: int64

In [47]:
df['MMSE_Change'] = df['MMSE_Change'].fillna(df['MMSE_Change'].mean())
df['nWBV_Change'] = df['nWBV_Change'].fillna(df['nWBV_Change'].mean())
df['CDR_Change'] = df['CDR_Change'].fillna(df['CDR_Change'].mean())

In [48]:
df.isnull().sum()

Subject ID           0
MRI ID               0
Group                0
Visit                0
MR Delay             0
M/F                  0
Hand                 0
Age                  0
EDUC                 0
SES                  0
MMSE                 0
CDR                  0
eTIV                 0
nWBV                 0
ASF                  0
MMSE_Change          0
nWBV_Change          0
CDR_Change           0
MMSE_Decline_Rate    0
nWBV_Decline_Rate    0
dtype: int64

In [49]:
X = df.drop(columns=['Group', 'Subject ID', 'MRI ID','Visit','Hand'])
y = df['Group']

In [58]:
X.columns

Index(['MR Delay', 'M/F', 'Age', 'EDUC', 'SES', 'MMSE', 'CDR', 'eTIV', 'nWBV',
       'ASF', 'MMSE_Change', 'nWBV_Change', 'CDR_Change', 'MMSE_Decline_Rate',
       'nWBV_Decline_Rate'],
      dtype='object')

In [50]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
# First, split into train (85%) and test (15%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

# Now, split train into train (70%) and validation (15%)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.176, random_state=42, stratify=y_train)

# Check sizes
print("Training Data:", X_train.shape)
print("Validation Data:", X_val.shape)
print("Test Data:", X_test.shape)

Training Data: (261, 15)
Validation Data: (56, 15)
Test Data: (56, 15)


In [51]:
# Apply SMOTE to balance the dataset
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# Check new class distribution
print("Balanced class distribution:", y_train_balanced.value_counts())

Balanced class distribution: Group
2    133
0    133
1    133
Name: count, dtype: int64


In [52]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_balanced)
X_val_scaled = scaler.transform(X_val)  
X_test_scaled = scaler.transform(X_test)  

In [53]:
y_test.value_counts()

Group
2    28
1    22
0     6
Name: count, dtype: int64

In [54]:
from tensorflow.keras.optimizers import Adam


# model = Sequential([
#     Dense(256, activation='relu', input_shape=(X_train_scaled.shape[1],)),  
#     Dropout(0.4),  # Increase dropout to prevent overfitting
#     Dense(128, activation='relu'),
#     Dropout(0.4),  
#     Dense(64, activation='relu'),
#     Dropout(0.4),
#     Dense(3, activation='softmax')  
# ])
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.regularizers import l2

model = Sequential([
    Dense(256, activation='relu', kernel_regularizer=l2(0.01), input_shape=(X_train_scaled.shape[1],)),  
    BatchNormalization(),
    Dropout(0.3),  
    Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
    BatchNormalization(),
    Dropout(0.3),  
    Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(3, activation='softmax')  
])

# Compile the model
# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.compile(optimizer=Adam(learning_rate=0.0003), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Model Summary
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [55]:
# history = model.fit(X_train_scaled, y_train_balanced, 
#                     epochs=50, 
#                     batch_size=32, 
#                     validation_data=(X_val, y_val),verbose=1)  # Use validation set

from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(
    X_train_scaled, y_train_balanced,
    validation_data=(X_val_scaled, y_val),
    epochs=100,
    batch_size=16,
    callbacks=[early_stop],  # Stop training when validation loss stops improving
    verbose=1
)


Epoch 1/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.4067 - loss: 4.4908 - val_accuracy: 0.7857 - val_loss: 3.8122
Epoch 2/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5530 - loss: 3.9304 - val_accuracy: 0.8393 - val_loss: 3.6783
Epoch 3/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6144 - loss: 3.7155 - val_accuracy: 0.8571 - val_loss: 3.5522
Epoch 4/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7217 - loss: 3.4608 - val_accuracy: 0.8571 - val_loss: 3.4436
Epoch 5/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7397 - loss: 3.4149 - val_accuracy: 0.8929 - val_loss: 3.3427
Epoch 6/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7780 - loss: 3.2683 - val_accuracy: 0.8929 - val_loss: 3.2552
Epoch 7/100
[1m25/25[0m [32m━━

In [56]:

# Get training accuracy
train_accuracy = history.history['accuracy'][-1]  # Last epoch training accuracy
val_accuracy = history.history['val_accuracy'][-1]  # Last epoch validation accuracy

print(f"Final Training Accuracy: {train_accuracy:.4f}")
print(f"Final Validation Accuracy: {val_accuracy:.4f}")

Final Training Accuracy: 0.9749
Final Validation Accuracy: 0.9464


In [63]:
# Predict on test set
y_pred_prob = model.predict(X_test_scaled)
y_pred = y_pred_prob.argmax(axis=1)  # Convert probabilities to class labels

# Evaluate accuracy
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Test Accuracy: 0.9642857142857143
              precision    recall  f1-score   support

           0       1.00      0.67      0.80         6
           1       1.00      1.00      1.00        22
           2       0.93      1.00      0.97        28

    accuracy                           0.96        56
   macro avg       0.98      0.89      0.92        56
weighted avg       0.97      0.96      0.96        56



In [68]:
X_test_scaled

array([[ 0.4385987 , -0.98260737, -1.84059039, -0.98501669,  1.66243084,
        -2.96886013,  0.55560119,  0.26221202, -0.73324731, -0.36894114,
         0.46235078, -2.58670263, -0.5624773 ,  0.44313263, -2.21385796],
       [-0.16892966,  1.01770049, -0.72236341, -0.98501669,  1.66243084,
        -0.4084232 ,  0.55560119, -0.15586634,  0.99508301,  0.04872074,
         0.46235078, -1.07632229, -0.5624773 ,  0.44313263, -0.93986047],
       [-0.93379229, -0.98260737, -1.14169853,  0.51649872,  1.66243084,
        -0.4084232 ,  0.55560119,  1.93452548,  0.54810103, -1.72064683,
         0.13701747,  0.19856585, -0.21804548,  1.06781293,  0.48113672],
       [ 2.06802093, -0.98260737, -0.02347156,  2.01801414, -0.23920857,
         0.44505578, -0.8927998 ,  2.11629868,  0.30971064, -1.84214846,
        -0.15772895,  0.66642426, -0.5624773 ,  0.23490586,  0.23613721],
       [-0.93379229, -0.98260737, -1.00192016,  0.51649872, -0.23920857,
         0.16056279,  0.55560119, -0.72542238, 

In [61]:
# model.save('AlzheimerPredictionWithANN.h5')
model.save('AlzheimerPredictionWithANN.keras')

In [69]:
import joblib

# Save the fitted scaler
joblib.dump(scaler, 'scaler.pkl')


['scaler.pkl']