In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df=pd.read_csv('alzheimer.csv')

In [3]:
df.head()

Unnamed: 0,Group,M/F,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,Nondemented,M,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,Nondemented,M,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,Demented,M,75,12,,23.0,0.5,1678,0.736,1.046
3,Demented,M,76,12,,28.0,0.5,1738,0.713,1.01
4,Demented,M,80,12,,22.0,0.5,1698,0.701,1.034


In [4]:
#the dimensions of the data set
print("Diabetes data set dimensions : {}".format(df.shape))

Diabetes data set dimensions : (373, 10)


In [5]:
df.groupby('Group').size()

Group
Converted       37
Demented       146
Nondemented    190
dtype: int64

In [6]:
#checking for duplication
print(df.duplicated().sum())

0


In [7]:
df.isnull().sum()

Group     0
M/F       0
Age       0
EDUC      0
SES      19
MMSE      2
CDR       0
eTIV      0
nWBV      0
ASF       0
dtype: int64

In [8]:
df=df.dropna()
df.head()

Unnamed: 0,Group,M/F,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,Nondemented,M,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,Nondemented,M,88,14,2.0,30.0,0.0,2004,0.681,0.876
5,Nondemented,F,88,18,3.0,28.0,0.0,1215,0.71,1.444
6,Nondemented,F,90,18,3.0,27.0,0.0,1200,0.718,1.462
7,Nondemented,M,80,12,4.0,28.0,0.0,1689,0.712,1.039


In [9]:
df.shape

(354, 10)

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 354 entries, 0 to 372
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Group   354 non-null    object 
 1   M/F     354 non-null    object 
 2   Age     354 non-null    int64  
 3   EDUC    354 non-null    int64  
 4   SES     354 non-null    float64
 5   MMSE    354 non-null    float64
 6   CDR     354 non-null    float64
 7   eTIV    354 non-null    int64  
 8   nWBV    354 non-null    float64
 9   ASF     354 non-null    float64
dtypes: float64(5), int64(3), object(2)
memory usage: 30.4+ KB


In [11]:
df.rename(columns={"M/F" : "Gender"}, inplace=True)

In [12]:
df["Gender"] = np.where(df["Gender"]=="F", 1, 0)
df["Group"] = np.where(df["Group"] == 'Demented', 1, 0)
df.head()


Unnamed: 0,Group,Gender,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,0,0,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,0,0,88,14,2.0,30.0,0.0,2004,0.681,0.876
5,0,1,88,18,3.0,28.0,0.0,1215,0.71,1.444
6,0,1,90,18,3.0,27.0,0.0,1200,0.718,1.462
7,0,0,80,12,4.0,28.0,0.0,1689,0.712,1.039


In [13]:
#Split the dataset into training and testing data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X = df.drop(columns=['Group'])
y = df['Group']
sc = StandardScaler()
X_transformed = sc.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 42)

In [14]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB  

#model objects
rf_model = RandomForestClassifier()
lr_model = LogisticRegression()
gb_model = GradientBoostingClassifier()
nb_model = GaussianNB()
svc_model = SVC()

#model training
rf_model.fit(X_train, y_train)
lr_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)
nb_model.fit(X_train, y_train)
svc_model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


SVC()

In [15]:
print("Random Forest Classifier : {:.2%}".format(rf_model.score(X_test, y_test)))
print("Logistic Regression : {:.2%}".format(lr_model.score(X_test, y_test)))
print("Gradient Boosting Classifier : {:.2%}".format(gb_model.score(X_test, y_test)))
print("Naive Bayes Classifier : {:.2%}".format(nb_model.score(X_test, y_test)))
print("SVC Classifier : {:.2%}".format(svc_model.score(X_test, y_test)))

Random Forest Classifier : 95.77%
Logistic Regression : 95.77%
Gradient Boosting Classifier : 95.77%
Naive Bayes Classifier : 92.96%
SVC Classifier : 63.38%


In [16]:
#print(predictions)
rf_predictions = rf_model.predict(X_test)
lr_predictions = lr_model.predict(X_test)
gb_predictions = gb_model.predict(X_test)
nb_predictions = nb_model.predict(X_test)
svc_predictions = svc_model.predict(X_test)

In [17]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
print(classification_report(y_test,rf_predictions))
print("Random Forest Classifier : {:.2%}".format(accuracy_score(y_test, rf_predictions)))

              precision    recall  f1-score   support

           0       1.00      0.93      0.97        45
           1       0.90      1.00      0.95        26

    accuracy                           0.96        71
   macro avg       0.95      0.97      0.96        71
weighted avg       0.96      0.96      0.96        71

Random Forest Classifier : 95.77%


In [18]:
print(classification_report(y_test,lr_predictions))
print("Logistic Regression : {:.2%}".format(accuracy_score(y_test, lr_predictions)))

              precision    recall  f1-score   support

           0       0.98      0.96      0.97        45
           1       0.93      0.96      0.94        26

    accuracy                           0.96        71
   macro avg       0.95      0.96      0.95        71
weighted avg       0.96      0.96      0.96        71

Logistic Regression : 95.77%


In [19]:
print(classification_report(y_test,gb_predictions))
print("Gradient Boosting Classifier : {:.2%}".format(accuracy_score(y_test, gb_predictions)))

              precision    recall  f1-score   support

           0       1.00      0.93      0.97        45
           1       0.90      1.00      0.95        26

    accuracy                           0.96        71
   macro avg       0.95      0.97      0.96        71
weighted avg       0.96      0.96      0.96        71

Gradient Boosting Classifier : 95.77%


In [20]:
print(classification_report(y_test,nb_predictions))
print("Naive Bayes Classifier : {:.2%}".format(accuracy_score(y_test, nb_predictions)))

              precision    recall  f1-score   support

           0       1.00      0.89      0.94        45
           1       0.84      1.00      0.91        26

    accuracy                           0.93        71
   macro avg       0.92      0.94      0.93        71
weighted avg       0.94      0.93      0.93        71

Naive Bayes Classifier : 92.96%


In [21]:
print(classification_report(y_test,svc_predictions))
print("SVC Classifier : {:.2%}".format(accuracy_score(y_test, svc_predictions)))

              precision    recall  f1-score   support

           0       0.63      1.00      0.78        45
           1       0.00      0.00      0.00        26

    accuracy                           0.63        71
   macro avg       0.32      0.50      0.39        71
weighted avg       0.40      0.63      0.49        71

SVC Classifier : 63.38%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
