In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression, Lasso
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay, roc_curve, roc_auc_score, classification_report, make_scorer

In [None]:
df = pd.read_csv("alzheimers_disease_data.csv")
df

In [None]:
df.info()

In [None]:
alzheimer = df.drop(columns="DoctorInCharge")
alzheimer

In [None]:
plt.figure(figsize=(6,4))
sns.countplot(x="Diagnosis", data=alzheimer)
plt.title("Count Plot of Diagnosis")
plt.show()

In [None]:
var = [c for c in alzheimer.columns if c not in ["Diagnosis", "PatientID"]]

target_variable = "Diagnosis"

X = alzheimer[var]
y = alzheimer[target_variable]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1
)

model = LogisticRegression(max_iter=1000).fit(X_train, y_train)

print("Coeficients:")
for i in range(len(var)):
    print(f"\t{var[i]} --> {model.coef_[0][i]}")

y_pred = model.predict(X_test)

f1 = f1_score(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)

print("\nMètriques del model:")
print(f"Accuracy:   {acc:.3f}")
print(f"Precision:  {prec:.3f}")
print(f"Recall:     {rec:.3f}")
print(f"F1-score:   {f1:.3f}")

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show() 


In [None]:
var = [c for c in alzheimer.columns if c not in ["Diagnosis", "PatientID"]]

target_variable = "Diagnosis"

X = alzheimer[var]
y = alzheimer[target_variable]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1
)

model = Lasso(alpha=0.001).fit(X_train, y_train)


print("Coeficients:")
for i in range(len(var)):
    print(f"\t{var[i]} --> {model.coef_[i]}")


y_pred = model.predict(X_test)

y_pred = (y_pred >= 0.5).astype(int)


f1 = f1_score(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)

print("\nMètriques del model:")
print(f"Accuracy:   {acc:.3f}")
print(f"Precision:  {prec:.3f}")
print(f"Recall:     {rec:.3f}")
print(f"F1-score:   {f1:.3f}")

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show() 


In [None]:
models = {
    "SVM": make_pipeline(StandardScaler(), SVC(probability=True, random_state=1)),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=1),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, random_state=1),
    "Logistic Regression": make_pipeline(StandardScaler(), LogisticRegression(random_state=1)),
    "K-Nearest Neighbors": make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=5))
}

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    f1 = f1_score(y_test, y_pred)
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    
    results[name] = {"Accuracy": acc, "Precision": prec, "Recall": rec, "F1-score": f1}
    
    print(f"\n{name} metrics:")
    print(f"Accuracy:   {acc:.3f}")
    print(f"Precision:  {prec:.3f}")
    print(f"Recall:     {rec:.3f}")
    print(f"F1-score:   {f1:.3f}")
    
    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot()
    plt.title(name)
    plt.show()

In [None]:
from sklearn.model_selection import cross_val_score, StratifiedKFold, cross_validate

models = {
    "SVM": make_pipeline(StandardScaler(), SVC(probability=True, random_state=1)),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=1),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, random_state=1),
    "Logistic Regression": make_pipeline(StandardScaler(), LogisticRegression(random_state=1)),
    "K-Nearest Neighbors": make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=5))
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)

scoring = ["accuracy", "precision", "recall", "f1"]

cv_results = {}

for name, model in models.items():
    scores = cross_validate(model, X, y, cv=cv, scoring=scoring)
    cv_results[name] = {
        "Accuracy": np.mean(scores["test_accuracy"]),
        "Precision": np.mean(scores["test_precision"]),
        "Recall": np.mean(scores["test_recall"]),
        "F1-score": np.mean(scores["test_f1"])
    }

for model_name, metrics in cv_results.items():
    print(f"\n{model_name} Cross-Validation Metrics:")
    print(f"Accuracy:  {metrics['Accuracy']:.3f}")
    print(f"Precision: {metrics['Precision']:.3f}")
    print(f"Recall:    {metrics['Recall']:.3f}")
    print(f"F1-score:  {metrics['F1-score']:.3f}")
