In [1]:
import pandas as pd

from joblib import dump

from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import ElasticNet
from sklearn import tree
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import mean_absolute_error, r2_score, accuracy_score, recall_score, confusion_matrix
from sklearn.svm import SVC

df = pd.read_csv('../Dataset/alzheimers_prediction_dataset.csv')

df = df.dropna()
df = df.fillna('Dado Ausente')

colList = ['Gender', 'Alcohol Consumption', 'Smoking Status', 'Cholesterol Level', 'Sleep Quality', 'Dietary Habits', 'Physical Activity Level', 'Diabetes', 'Depression Level', 'Air Pollution Exposure', 'Social Engagement Level', 'Income Level', 'Stress Levels', 'Hypertension', 'Family History of Alzheimer’s', 'Genetic Risk Factor (APOE-ε4 allele)', 'Alzheimer’s Diagnosis', 'Marital Status', 'Employment Status', 'Urban vs Rural Living', 'Country']

for col in colList:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

Y = df['Alzheimer’s Diagnosis']
X = df.drop('Alzheimer’s Diagnosis', axis = 1)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.15, random_state = 42)


In [4]:
model = tree.DecisionTreeClassifier(criterion = 'entropy', random_state = 0, max_depth=50, min_samples_split=1280)
model.fit(X_train, Y_train)

print(f"Accuracy treino: {accuracy_score(Y_train, model.predict(X_train))}")
print(f"Accuracy teste: {accuracy_score(Y_test, model.predict(X_test))}\n")

print(f"Recall treino: {recall_score(Y_train, model.predict(X_train))}")
print(f"Recall teste: {recall_score(Y_test, model.predict(X_test))}\n")

print(f"Matrix treino: \n{confusion_matrix(Y_train, model.predict(X_train))}")
print(f"Matrix teste: \n{confusion_matrix(Y_test, model.predict(X_test))}\n")

Accuracy treino: 0.7294583465315173
Accuracy teste: 0.7259265906847349

Recall treino: 0.6728031230863442
Recall teste: 0.6604143947655398

Matrix treino: 
[[28479  8533]
 [ 8549 17579]]
Matrix teste: 
[[5061 1497]
 [1557 3028]]



In [3]:
model = SVC(gamma = 'scale')

model.fit(X_train, Y_train)

predictXTrain = model.predict(X_train)
predictXTest = model.predict(X_test)

print(f"Accuracy treino: {accuracy_score(Y_train, predictXTrain)}")
print(f"Accuracy teste: {accuracy_score(Y_test, predictXTest)}\n")

print(f"Recall treino: {recall_score(Y_train, predictXTrain)}")
print(f"Recall teste: {recall_score(Y_test, predictXTest)}\n")

print(f"Matrix treino: \n{confusion_matrix(Y_train, predictXTrain)}")
print(f"Matrix teste: \n{confusion_matrix(Y_test, predictXTest)}\n")

Accuracy treino: 0.7126385809312639
Accuracy teste: 0.7115678004128152

Recall treino: 0.5969458052663809
Recall teste: 0.5906215921483097

Matrix treino: 
[[29399  7613]
 [10531 15597]]
Matrix teste: 
[[5221 1337]
 [1877 2708]]

