In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, roc_auc_score, classification_report, f1_score, confusion_matrix, ConfusionMatrixDisplay, make_scorer, precision_score, recall_score
import numpy as np

In [None]:
df = pd.read_csv("alzheimers_disease_data.csv")
df

In [None]:
df.info()

In [None]:
alzheimer = df.drop(columns="DoctorInCharge")
alzheimer

In [None]:
plt.figure(figsize=(6,4))
sns.countplot(x="Diagnosis", data=alzheimer)
plt.title("Count Plot of Diagnosis")
plt.show()

In [None]:
var = [c for c in alzheimer.columns if c not in ["Diagnosis", "PatientID"]]

target_variable = "Diagnosis"

X = alzheimer[var]
y = alzheimer[target_variable]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1
)

model = LogisticRegression(max_iter=1000).fit(X_train, y_train)

print("Coeficients:")
for i in range(len(var)):
    print(f"\t{var[i]} --> {model.coef_[0][i]}")

y_pred = model.predict(X_test)

f1 = f1_score(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)

print("\nMÃ¨triques del model:")
print(f"Accuracy:   {acc:.3f}")
print(f"Precision:  {prec:.3f}")
print(f"Recall:     {rec:.3f}")
print(f"F1-score:   {f1:.3f}")

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show() 


In [None]:
#BMI
    #Pes baix BMI<18.5 - 0
    #Pes normal 18.5<=BMI<25 - 1
    #Sobrepes 25<=BMI<30 - 2
    # Obesitat BMI>=30 - 3

In [None]:
bins = [0, 18.5, 25, 30, float('inf')]
labels = [0, 1, 2, 3] 

alzheimer["BMI"] = pd.cut(alzheimer["BMI"], bins=bins, labels=labels, right=False)

sns.countplot(x="Diagnosis", hue="BMI", data=alzheimer)
plt.title("Count Plot of BMI")
plt.show()


In [None]:
#AlcoholConsumption 
    #Abstemi AlcoholConsumption<1
    #Consum molt baix o ocasional 1<=AlcoholConsumption<8
    #Consum moderat / de baix risc 8<=AlcoholConsumption<15
    #Consum elevat / de risc 15<=AlcoholConsumption<20

In [None]:
bins = [0, 1, 8, 15, float('inf')]
labels = [0, 1, 2, 3]

alzheimer["AlcoholConsumption"] = pd.cut(alzheimer["AlcoholConsumption"], bins=bins, labels=labels, right=False)

sns.countplot(x="Diagnosis", hue="AlcoholConsumption", data=alzheimer)
plt.title("Count Plot of AlcoholConsumption")
plt.show()


In [None]:
bins = [0, 2, 4, 7, float('inf')]
labels = [0, 1, 2, 3] 

alzheimer["PhysicalActivity"] = pd.cut(alzheimer["PhysicalActivity"], bins=bins, labels=labels, right=False)

sns.countplot(x="Diagnosis", hue="PhysicalActivity", data=alzheimer)
plt.title("Count Plot of PhysicalActivity")
plt.show()

In [None]:
var = []
for c in alzheimer.columns:
    if c != "Diagnosis" or "PatientID":
        var.append(c)

for v in var:
    plt.figure(figsize=(6,4))
    sns.countplot(x="Diagnosis", hue=v, data=alzheimer)
    plt.title(f"Count Plot of {v}")
    plt.show()
