In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [10]:
import kagglehub

import os

path = kagglehub.dataset_download("rabieelkharoua/alzheimers-disease-dataset")

In [11]:
files = os.listdir(path)
print("Content of", files)

csv_file = files[0]
csv_path = os.path.join(path, csv_file)

# Load DataFrame
df = pd.read_csv(csv_path)
df = df.drop(columns=["DoctorInCharge"])  # Drop useless column


# Display the content of DataFrame
df.head().T

Content of ['alzheimers_disease_data.csv']


Unnamed: 0,0,1,2,3,4
PatientID,4751.0,4752.0,4753.0,4754.0,4755.0
Age,73.0,89.0,73.0,74.0,89.0
Gender,0.0,0.0,0.0,1.0,0.0
Ethnicity,0.0,0.0,3.0,0.0,0.0
EducationLevel,2.0,0.0,1.0,1.0,0.0
BMI,22.927749,26.827681,17.795882,33.800817,20.716974
Smoking,0.0,0.0,0.0,1.0,0.0
AlcoholConsumption,13.297218,4.542524,19.555085,12.209266,18.454356
PhysicalActivity,6.327112,7.619885,7.844988,8.428001,6.310461
DietQuality,1.347214,0.518767,1.826335,7.435604,0.795498


<!-- @format -->

# First evaluation


In [12]:
from sklearn.model_selection import train_test_split
from functions.data_prep import data_preprocessing

X = np.array(df.drop(columns=["Diagnosis"]))
y = np.array(df["Diagnosis"])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train_scaled, X_test_scaled = data_preprocessing(X_train, X_test)

<!-- @format -->

## Logisitc Regression


In [13]:
from models.logistic_regression import Logistic_Regression_

clf = Logistic_Regression_()
print(clf.bayesian_opt(X_train_scaled, y_train))

[I 2025-01-27 23:37:44,066] A new study created in memory with name: no-name-b2eb649e-90ed-468f-b408-6d9ede672f0e
[I 2025-01-27 23:37:44,126] Trial 0 finished with value: 0.8306986914367076 and parameters: {'solver': 'sag', 'penalty': None, 'C': 0.01}. Best is trial 0 with value: 0.8306986914367076.
[I 2025-01-27 23:37:44,185] Trial 1 finished with value: 0.8359329446064139 and parameters: {'solver': 'newton-cg', 'penalty': 'l1', 'C': 1}. Best is trial 1 with value: 0.8359329446064139.
[I 2025-01-27 23:37:44,262] Trial 2 finished with value: 0.8359329446064139 and parameters: {'solver': 'saga', 'penalty': None, 'C': 10}. Best is trial 1 with value: 0.8359329446064139.
[I 2025-01-27 23:37:44,299] Trial 3 finished with value: 0.8359329446064139 and parameters: {'solver': 'lbfgs', 'penalty': 'l1', 'C': 1}. Best is trial 1 with value: 0.8359329446064139.
[I 2025-01-27 23:37:44,332] Trial 4 finished with value: 0.8359329446064139 and parameters: {'solver': 'lbfgs', 'penalty': None, 'C': 1}.

0.8423333785341379


<!-- @format -->

Do zrobienia

- Dodanie opcji sprawdzenia wszystkich wyników


<!-- @format -->


<!-- @format -->

## Suppor Vector Machines


In [14]:
from models.svm import SVM

svm_model = SVM()
print(svm_model.bayesian_opt(X_train_scaled, y_train))

[I 2025-01-27 23:37:49,041] A new study created in memory with name: no-name-63394489-2501-40b8-878e-2f4c6101986f
[I 2025-01-27 23:37:49,269] Trial 0 finished with value: 0.658522396742292 and parameters: {'C': 0.0712117145561969, 'gamma': 'auto', 'kernel': 'sigmoid', 'degree': 3, 'coef0': 0.9246011595989638}. Best is trial 0 with value: 0.658522396742292.
[I 2025-01-27 23:37:49,426] Trial 1 finished with value: 0.6468877254217569 and parameters: {'C': 0.00231469059285281, 'gamma': 'scale', 'kernel': 'sigmoid', 'degree': 2, 'coef0': 0.8308447853764571}. Best is trial 0 with value: 0.658522396742292.
[I 2025-01-27 23:37:49,698] Trial 2 finished with value: 0.6468877254217569 and parameters: {'C': 0.044817443465624905, 'gamma': 'scale', 'kernel': 'rbf', 'degree': 5, 'coef0': 0.48925226600873273}. Best is trial 0 with value: 0.658522396742292.
[I 2025-01-27 23:37:49,961] Trial 3 finished with value: 0.6468877254217569 and parameters: {'C': 0.0010286695891799879, 'gamma': 'scale', 'kernel'

0.8470040721349621


<!-- @format -->

## XGBoost


In [15]:
# from models.xgboost import XGBoost

# xgboost_model = XGBoost()
# print(xgboost_model.bayesian_opt(X_train_scaled, y_train))

<!-- @format -->

## Random Forest


In [16]:
from models.random_forest import Random_Forest_

random_forest_model = Random_Forest_()
print(random_forest_model.bayesian_opt(X_train_scaled, y_train))

[I 2025-01-27 23:38:05,496] A new study created in memory with name: no-name-004b6112-3f11-45d4-abd0-f300a58f0ca4
[I 2025-01-27 23:38:07,810] Trial 0 finished with value: 0.8720186154741129 and parameters: {'n_estimators': 309, 'max_depth': 11, 'min_samples_split': 13, 'min_samples_leaf': 19, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.8720186154741129.
[I 2025-01-27 23:38:11,028] Trial 1 finished with value: 0.8510762070971495 and parameters: {'n_estimators': 260, 'max_depth': 2, 'min_samples_split': 17, 'min_samples_leaf': 6, 'max_features': None}. Best is trial 0 with value: 0.8720186154741129.
[I 2025-01-27 23:38:11,935] Trial 2 finished with value: 0.9168121000581734 and parameters: {'n_estimators': 93, 'max_depth': 22, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt'}. Best is trial 2 with value: 0.9168121000581734.
[I 2025-01-27 23:38:15,536] Trial 3 finished with value: 0.9278650378126819 and parameters: {'n_estimators': 158, 'max_depth': 29, 'mi

0.941826643397324
