In [4]:
# Bibliotecas
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.dummy import DummyClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [5]:
# Leitura dos dados e remoção de colunas desnecessárias
dados_alzheimer = pd.read_csv("alzheimers_disease_data.csv")
dados_alzheimer.drop(['PatientID', 'DoctorInCharge'], axis=1, inplace=True)
dados_alzheimer.head()

Unnamed: 0,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,AlcoholConsumption,PhysicalActivity,DietQuality,SleepQuality,...,FunctionalAssessment,MemoryComplaints,BehavioralProblems,ADL,Confusion,Disorientation,PersonalityChanges,DifficultyCompletingTasks,Forgetfulness,Diagnosis
0,73,0,0,2,22.927749,0,13.297218,6.327112,1.347214,9.025679,...,6.518877,0,0,1.725883,0,0,0,1,0,0
1,89,0,0,0,26.827681,0,4.542524,7.619885,0.518767,7.151293,...,7.118696,0,0,2.592424,0,0,0,0,1,0
2,73,0,3,1,17.795882,0,19.555085,7.844988,1.826335,9.673574,...,5.895077,0,0,7.119548,0,1,0,1,0,0
3,74,1,0,1,33.800817,1,12.209266,8.428001,7.435604,8.392554,...,8.965106,0,1,6.481226,0,0,0,0,0,0
4,89,0,0,0,20.716974,0,18.454356,6.310461,0.795498,5.597238,...,6.045039,0,0,0.014691,0,0,1,1,0,0


In [6]:
# Processamento dos dados para aplicar o algoritmo

# Separar os dados de treinamento e de teste
X, y = dados_alzheimer.drop(columns='Diagnosis'), dados_alzheimer["Diagnosis"]
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, shuffle=False) #Divide os dados pela metade

# Normalizar os dados
scaler = StandardScaler()
colunas = ['Age', 'BMI', 'AlcoholConsumption', 'PhysicalActivity', 'DietQuality', 'SleepQuality', 'SystolicBP', 'DiastolicBP', 'CholesterolTotal', 'CholesterolLDL', 'CholesterolHDL', 'CholesterolTriglycerides', 'MMSE', 'FunctionalAssessment', 'ADL']
X_train[colunas] = scaler.fit_transform(X_train[colunas])
X_test[colunas] = scaler.transform(X_test[colunas])

In [11]:
# Parametros

# K-vizinhos
k = 9 # n° vizinhos
metrica_distancia = 'manhattan' # euclidean/manhattan

# Redes Perceptron Multicamadas
camadas = (3, 2)
taxa_aprendizado = 0.1
maximo_iteracoes = 1000

# Árvore de decisão
criterio = 'gini'
profundidade = 3

In [21]:
# Aplicação dos algorítmos

# DummyClassifier
# É um algoritmo 'Burro' serve para checar se o algoritmo usado é melhor que ele ou não
dummy_clf = DummyClassifier()
dummy_clf.fit(X_train, y_train)
y_pred_dummy = dummy_clf.predict(X_test)
dummy_acc = accuracy_score(y_test, y_pred_dummy) * 100

# K-vizinhos
knn_clf = KNeighborsClassifier(n_neighbors= k, metric= metrica_distancia)
knn_clf.fit(X_train, y_train)
y_pred_knn = knn_clf.predict(X_test)
knn_acc = accuracy_score(y_test, y_pred_knn) * 100

# Redes Perceptron Multicamadas
mlp_clf = MLPClassifier(
    hidden_layer_sizes= camadas,
    learning_rate_init= taxa_aprendizado,
    max_iter= maximo_iteracoes,
    activation= 'relu',
    solver= 'sgd',
    random_state = 42,
    momentum=0,
)
mlp_clf.fit(X_train, y_train)
y_pred_mlp = mlp_clf.predict(X_test)
mlp_acc = accuracy_score(y_test, y_pred_mlp) * 100

# Árvore de decisão
tree_clf = DecisionTreeClassifier(criterion= criterio, max_depth= profundidade)
tree_clf.fit(X_train,y_train)
y_pred_tree = tree_clf.predict(X_test)
tree_acc = accuracy_score(y_test, y_pred_tree) * 100


# Resultados
print(f"""Precisão:

Dummy ( mais frequente ): {dummy_acc:.2f}%

k-vizinhos: {knn_acc:.2f}%

Rede Perceptron: {mlp_acc:.2f}%

Árvore de decisão: {tree_acc:.2f}%
      """)

Precisão: 

Dummy ( mais frequente ): 63.26%
      
k-vizinhos: 74.88%

Rede Perceptron: 81.86%
      
Árvore de decisão: 84.56%
      
