In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [2]:
import kagglehub

import os

path = kagglehub.dataset_download("rabieelkharoua/alzheimers-disease-dataset")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
files = os.listdir(path)
print("Content of", files)

csv_file = files[0]
csv_path = os.path.join(path, csv_file)

# Load DataFrame
df = pd.read_csv(csv_path)
df = df.drop(columns=["DoctorInCharge"])  # Drop useless column


# Display the content of DataFrame
df.head().T

Content of ['alzheimers_disease_data.csv']


Unnamed: 0,0,1,2,3,4
PatientID,4751.0,4752.0,4753.0,4754.0,4755.0
Age,73.0,89.0,73.0,74.0,89.0
Gender,0.0,0.0,0.0,1.0,0.0
Ethnicity,0.0,0.0,3.0,0.0,0.0
EducationLevel,2.0,0.0,1.0,1.0,0.0
BMI,22.927749,26.827681,17.795882,33.800817,20.716974
Smoking,0.0,0.0,0.0,1.0,0.0
AlcoholConsumption,13.297218,4.542524,19.555085,12.209266,18.454356
PhysicalActivity,6.327112,7.619885,7.844988,8.428001,6.310461
DietQuality,1.347214,0.518767,1.826335,7.435604,0.795498


<!-- @format -->

# First evaluation


In [4]:
from sklearn.model_selection import train_test_split
from functions.data_prep import data_preprocessing

X = np.array(df.drop(columns=["Diagnosis"]))
y = np.array(df["Diagnosis"])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train_scaled, X_test_scaled = data_preprocessing(X_train, X_test)

<!-- @format -->

## Logisitc Regression


In [5]:
from models.logistic_regression import Logistic_Regression_

clf = Logistic_Regression_()
print(clf.bayesian_opt(X_train_scaled, y_train))

[I 2025-01-27 23:29:19,550] A new study created in memory with name: no-name-c909a2af-38dd-4044-8feb-b388b71386b3
[I 2025-01-27 23:29:19,585] Trial 0 finished with value: 0.8359329446064139 and parameters: {'solver': 'liblinear', 'penalty': 'l1', 'C': 100}. Best is trial 0 with value: 0.8359329446064139.
[I 2025-01-27 23:29:19,619] Trial 1 finished with value: 0.8359329446064139 and parameters: {'solver': 'lbfgs', 'penalty': None, 'C': 1}. Best is trial 0 with value: 0.8359329446064139.
[I 2025-01-27 23:29:19,652] Trial 2 finished with value: 0.8359329446064139 and parameters: {'solver': 'lbfgs', 'penalty': None, 'C': 10}. Best is trial 0 with value: 0.8359329446064139.
[I 2025-01-27 23:29:19,713] Trial 3 finished with value: 0.8306986914367076 and parameters: {'solver': 'saga', 'penalty': None, 'C': 0.01}. Best is trial 0 with value: 0.8359329446064139.
[I 2025-01-27 23:29:19,815] Trial 4 finished with value: 0.8359329446064139 and parameters: {'solver': 'sag', 'penalty': 'l1', 'C': 1

0.8423333785341379


<!-- @format -->

Do zrobienia

- Dodanie opcji sprawdzenia wszystkich wyników


<!-- @format -->


<!-- @format -->

## Suppor Vector Machines


In [6]:
from models.svm import SVM

svm_model = SVM()
print(svm_model.bayesian_opt(X_train_scaled, y_train))

[I 2025-01-27 23:29:25,599] A new study created in memory with name: no-name-8e28bf10-f839-4978-b13b-0ca0fe5fdc2c
[I 2025-01-27 23:29:25,830] Trial 0 finished with value: 0.6468877254217569 and parameters: {'C': 0.0032778376240530827, 'gamma': 'scale', 'kernel': 'poly', 'degree': 5, 'coef0': 0.10764576297248962}. Best is trial 0 with value: 0.6468877254217569.
[I 2025-01-27 23:29:25,988] Trial 1 finished with value: 0.8406050029086677 and parameters: {'C': 0.47243172330128863, 'gamma': 'auto', 'kernel': 'linear', 'degree': 3, 'coef0': 0.6442361086461184}. Best is trial 1 with value: 0.8406050029086677.
[I 2025-01-27 23:29:26,929] Trial 2 finished with value: 0.8406050029086679 and parameters: {'C': 5.662989986902583, 'gamma': 'auto', 'kernel': 'linear', 'degree': 3, 'coef0': 0.3457724415790534}. Best is trial 2 with value: 0.8406050029086679.
[I 2025-01-27 23:29:27,253] Trial 3 finished with value: 0.8202443280977313 and parameters: {'C': 6.2348257338532935, 'gamma': 'auto', 'kernel': 

0.8464223385689355


<!-- @format -->

## XGBoost


In [7]:
# from models.xgboost import XGBoost

# xgboost_model = XGBoost()
# print(xgboost_model.bayesian_opt(X_train_scaled, y_train))

<!-- @format -->

## Random Forest


In [8]:
from models.random_forest import Random_Forest_

random_forest_model = Random_Forest_()
print(random_forest_model.bayesian_opt(X_train_scaled, y_train))

[I 2025-01-27 23:29:42,361] A new study created in memory with name: no-name-ec4d12a6-c343-4511-b4cd-c1400ca19c19
[I 2025-01-27 23:29:42,833] Trial 0 finished with value: 0.8720186154741129 and parameters: {'n_estimators': 57, 'max_depth': 15, 'min_samples_split': 15, 'min_samples_leaf': 19, 'max_features': 'log2'}. Best is trial 0 with value: 0.8720186154741129.
[I 2025-01-27 23:29:49,384] Trial 1 finished with value: 0.9412449098312973 and parameters: {'n_estimators': 249, 'max_depth': 20, 'min_samples_split': 13, 'min_samples_leaf': 4, 'max_features': None}. Best is trial 1 with value: 0.9412449098312973.
[I 2025-01-27 23:29:52,589] Trial 2 finished with value: 0.8737638161721932 and parameters: {'n_estimators': 402, 'max_depth': 19, 'min_samples_split': 8, 'min_samples_leaf': 17, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9412449098312973.
[I 2025-01-27 23:29:53,137] Trial 3 finished with value: 0.7248400232693427 and parameters: {'n_estimators': 86, 'max_depth': 3, 'mi

KeyboardInterrupt: 