In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Loading dataset
df = pd.read_excel('/Users/sukanya/Documents/Alziemers Multi Modal/Dataset/FINAL.xlsx')

X = df.drop(columns=['ID', 'Dementia_Category'])
y = df['Dementia_Category']

# Spliting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Random Forest Classifier

In [23]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

rf = RandomForestClassifier(n_estimators=50)

# Training
rf.fit(X_train_scaled, y_train)

y_pred_rf = rf.predict(X_test_scaled)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
report_rf = classification_report(y_test, y_pred_rf)

print("Random Forest")
print(f"Accuracy: {accuracy_rf}")
print(report_rf)

Random Forest
Accuracy: 0.9900497512437811
              precision    recall  f1-score   support

           0       1.00      0.91      0.95        23
           1       0.83      1.00      0.91        10
           2       1.00      1.00      1.00       102
           3       1.00      1.00      1.00        66

    accuracy                           0.99       201
   macro avg       0.96      0.98      0.97       201
weighted avg       0.99      0.99      0.99       201



# Random Forest Classifier + Boosting Algorithms

## AdaBoost

In [24]:
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.metrics import accuracy_score, classification_report

# StackAdaBoosting and Gradient Boosting
stacked_model = StackingClassifier(estimators=[
    ('ada', AdaBoostClassifier())
], final_estimator=RandomForestClassifier())

stacked_model.fit(X_train_scaled, y_train)
y_pred_stacked = stacked_model.predict(X_test_scaled)

print("Stacked Random Forest, Gradient Boosting, and AdaBoost Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_stacked)}")
print(classification_report(y_test, y_pred_stacked))



Stacked Random Forest, Gradient Boosting, and AdaBoost Results:
Accuracy: 0.9502487562189055
              precision    recall  f1-score   support

           0       0.70      1.00      0.82        23
           1       0.00      0.00      0.00        10
           2       1.00      1.00      1.00       102
           3       1.00      1.00      1.00        66

    accuracy                           0.95       201
   macro avg       0.67      0.75      0.71       201
weighted avg       0.92      0.95      0.93       201



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Gradient Boosting

In [25]:
from sklearn.ensemble import GradientBoostingClassifier, StackingClassifier

# Stacking Gradient Boosting and Random Forest 
stacked_model = StackingClassifier(estimators=[
    ('gb', GradientBoostingClassifier())
], final_estimator=RandomForestClassifier())

stacked_model.fit(X_train_scaled, y_train)
y_pred_stacked = stacked_model.predict(X_test_scaled)

print("Stacked Random Forest and Gradient Boosting Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_stacked)}")
print(classification_report(y_test, y_pred_stacked))

Stacked Random Forest and Gradient Boosting Results:
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        10
           2       1.00      1.00      1.00       102
           3       1.00      1.00      1.00        66

    accuracy                           1.00       201
   macro avg       1.00      1.00      1.00       201
weighted avg       1.00      1.00      1.00       201



## XGboost

In [26]:
from sklearn.ensemble import VotingClassifier
import xgboost as xgb

# Stacking Random Forest and XGBoost
xgb_clf = xgb.XGBClassifier()
voting_clf = VotingClassifier(estimators=[
    ('rf', RandomForestClassifier()),
    ('xgb', xgb_clf)
])

voting_clf.fit(X_train_scaled, y_train)
y_pred_voting = voting_clf.predict(X_test_scaled)

print("Voting Classifier with Random Forest and XGBoost Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_voting)}")
print(classification_report(y_test, y_pred_voting))


Voting Classifier with Random Forest and XGBoost Results:
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        10
           2       1.00      1.00      1.00       102
           3       1.00      1.00      1.00        66

    accuracy                           1.00       201
   macro avg       1.00      1.00      1.00       201
weighted avg       1.00      1.00      1.00       201



## LightGBM Boost

In [27]:
from lightgbm import LGBMClassifier

# Stacking Random Forest and LightGBM
lgbm_clf = LGBMClassifier()
voting_clf_lgbm = VotingClassifier(estimators=[
    ('rf', RandomForestClassifier()),
    ('lgbm', lgbm_clf)
])

voting_clf_lgbm.fit(X_train_scaled, y_train)
y_pred_voting_lgbm = voting_clf_lgbm.predict(X_test_scaled)

print("Voting Classifier with Random Forest and LightGBM Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_voting_lgbm)}")
print(classification_report(y_test, y_pred_voting_lgbm))

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000228 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1869
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 10
[LightGBM] [Info] Start training from score -1.948413
[LightGBM] [Info] Start training from score -2.677279
[LightGBM] [Info] Start training from score -0.715904
[LightGBM] [Info] Start training from score -1.203973
Voting Classifier with Random Forest and LightGBM Results:
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        10
           2       1.00      1.00      1.00       102
           3       1.00      1.00      1.00        66

    accuracy                           1.00       201
   macro avg       1.00      1.00      1.00       201
weighted avg       1.00      1.00      1.00 

## Cat Boost

In [28]:
from catboost import CatBoostClassifier

# Voting ensemble of Random Forest and CatBoost
catboost_clf = CatBoostClassifier(verbose=0)
voting_clf_catboost = VotingClassifier(estimators=[
    ('rf', RandomForestClassifier()),
    ('catboost', catboost_clf)
])

voting_clf_catboost.fit(X_train_scaled, y_train)
y_pred_voting_catboost = voting_clf_catboost.predict(X_test_scaled)

print("Voting Classifier with Random Forest and CatBoost Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_voting_catboost)}")
print(classification_report(y_test, y_pred_voting_catboost))

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (2, 201) + inhomogeneous part.