In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# Load the dataset
file_path = "C:/Users/SANEABHUTTO/Desktop/archive/archive (1)/alzheimers_disease_data.csv"
df = pd.read_csv(file_path)

# Fill missing values with mean
df = df.fillna(df.mean())

# Separate features and target
X = df.drop(columns=['PatientID', 'Diagnosis'])
y = df['Diagnosis']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the pipeline
pipeline_gbc = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler()),
    ('classifier', GradientBoostingClassifier(random_state=42))
])

# Define hyperparameters grid
param_grid_gbc = {
    'classifier__n_estimators': [100, 200, 300],
    'classifier__max_depth': [3, 4, 5],
    'classifier__learning_rate': [0.01, 0.1, 0.2],
    'classifier__subsample': [0.8, 1.0],
}

# Grid search for Gradient Boosting
grid_search_gbc = GridSearchCV(estimator=pipeline_gbc, param_grid=param_grid_gbc, cv=5, n_jobs=-1, verbose=2)
grid_search_gbc.fit(X_train, y_train)

# Best parameters and score for Gradient Boosting
best_params_gbc = grid_search_gbc.best_params_
print(f'Best Parameters: {best_params_gbc}')
print(f'Best Score: {grid_search_gbc.best_score_:.2f}')

# Evaluate the tuned model
best_gbc_model = grid_search_gbc.best_estimator_
y_pred_test_best = best_gbc_model.predict(X_test)

print('Classification Report (Gradient Boosting):')
print(classification_report(y_test, y_pred_test_best))

# Save predictions to a new CSV file
df_test = X_test.copy()
df_test['True_Diagnosis'] = y_test
df_test['Predicted_Diagnosis'] = y_pred_test_best
df_test.to_csv('C:/Users/SANEABHUTTO/Desktop/archive/archive (1)/predicted_alzheimers_diagnosis.csv', index=False)


Fitting 5 folds for each of 54 candidates, totalling 270 fits
Best Parameters: {'classifier__learning_rate': 0.01, 'classifier__max_depth': 4, 'classifier__n_estimators': 300, 'classifier__subsample': 1.0}
Best Score: 0.95
Classification Report (Gradient Boosting):
              precision    recall  f1-score   support

           0       0.95      0.98      0.97       277
           1       0.96      0.92      0.94       153

    accuracy                           0.96       430
   macro avg       0.96      0.95      0.95       430
weighted avg       0.96      0.96      0.96       430

