In [None]:
# from google.colab import drive
# drive.mount('/content/drive',force_remount=True)

In [None]:
import numpy as np
import pandas as pd

import seaborn as sns

In [None]:
df = pd.read_csv('Alzheimer.csv')
df

In [None]:
df.info()

# Preprocessing Data

Label Encoder

In [None]:
#LabelEncoder to Encode Multiple Columns
from sklearn.preprocessing import LabelEncoder

cols = ['Education', 'family history', 'Occupation', 'Triglycerides', 'smoking_status']

df[cols] = df[cols].apply(LabelEncoder().fit_transform)
df.head()

In [None]:
#Check for missing values
df.isna().sum()

# Hanlde missing value of BMI Column

In [None]:
df_filter = df.loc[df['BMI'].notnull(),:]
df_filter['alzheimer'].value_counts()

In [None]:
#fill na in BMI column with mean
df = df.dropna(subset=['BMI'])

In [None]:
df.isna().sum()

In [None]:
df.alzheimer.value_counts()

In [None]:
target_count = df.alzheimer.value_counts()
target_count.plot(kind='bar', title='Count (target)');

In [None]:
sns.pairplot(data=df,
             diag_kind="kde")

# Create feature and label data

In [None]:
#Remove HN of ID patiant and Label class
X = df.loc[:,~((df.columns=='alzheimer') | (df.columns=='HN'))]
X

In [None]:
y = df['alzheimer']
y

In [None]:
y.value_counts()

# Split Train-Test Data

In [None]:
# Your code (use random_state=1234)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=1234)

In [None]:
X_train.shape

In [None]:
y_train.value_counts()

In [None]:
X_test.shape

In [None]:
y_test.value_counts()

In [None]:
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

# Feature Data Scaling

In [None]:
df.std(numeric_only=True)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)

In [None]:
X_train_sc = scaler.transform(X_train)
X_train_sc = pd.DataFrame(X_train_sc, columns=X_train.columns)

In [None]:
X_test_sc = scaler.transform(X_test)
X_test_sc = pd.DataFrame(X_test_sc, columns=X_test.columns)

In [None]:
X_train_sc.std(numeric_only=True)

In [None]:
X_test_sc.std(numeric_only=True)

# Classification with ANN model

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold
import sklearn.metrics as metrics

In [None]:
ann = MLPClassifier(activation='relu',
                    solver='adam',
                    random_state=1234, # random number generation for weights and bias initialization
                    learning_rate='adaptive',
                    max_iter=700
                   )

In [None]:
skf = StratifiedKFold(n_splits=10,
                      shuffle=True,
                      random_state=1234)

In [None]:
parameters = {'batch_size': [16, 32, 64, 128],
              'hidden_layer_sizes': [(24, 12, 6), (64, 24, 12, 6), (200, 50, 50, 25)]
              }

In [None]:
grid_search = GridSearchCV(estimator = ann,
                           param_grid = parameters,
                           cv = skf)

In [None]:
grid_result = grid_search.fit(X_train, y_train)

In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Train model with best batch_size

In [None]:
ann_optimal = MLPClassifier(hidden_layer_sizes=(24, 12, 6),
                    activation='relu',
                    solver='adam',
                    random_state=1234, # random number generation for weights and bias initialization
                    learning_rate='adaptive',
                    max_iter=700,
                    batch_size=16
                   )

In [None]:
ann_optimal.fit(X=X_train_sc, y=y_train)

In [None]:
y_pred_test = ann_optimal.predict(X_test_sc)
y_pred_test

In [None]:
result_df = pd.DataFrame(y_pred_test)
result_df.value_counts()

# Performance of ANN model

In [None]:
conf_mat = metrics.confusion_matrix(y_test, y_pred_test, labels=[0,1])

In [None]:
metrics.ConfusionMatrixDisplay(conf_mat,
                               display_labels=[0,1]).plot()

In [None]:
print(metrics.classification_report(y_test, y_pred_test))