In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.metrics import accuracy_score

In [3]:
modified_train_df = pd.read_csv('modified_train.csv')
modified_test_df = pd.read_csv('modified_test.csv')

In [4]:
X_train = modified_train_df.drop(columns=['NObeyesdad'])
y_train = modified_train_df['NObeyesdad']
X_test = modified_test_df 

In [5]:
train_id = X_train['id']
test_id = X_test['id']

X_train.drop(columns=['id'], inplace=True)
X_test.drop(columns=['id'], inplace=True)

In [6]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
param_grid_gb = {
    'loss' : ['log_loss', 'exponential'],
    'n_estimators': [100, 300], 
    'learning_rate': [0.01, 0.05],
    'subsample' : [0.5, 1],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],  
    'min_samples_leaf': [1, 2, 4],
    'subsample': [0.8, 1.0],  
    'max_features': ['auto', 'sqrt']
}

In [12]:
grid_search_gb = GridSearchCV(GradientBoostingClassifier(), param_grid_gb, cv=5, scoring='accuracy')


In [14]:
X_train_ensemble, X_val_ensemble, y_train_ensemble, y_val_ensemble = train_test_split(
    X_train_scaled, y_train, test_size=0.2, random_state=42
)


In [None]:
grid_search_gb.fit(X_train_ensemble, y_train_ensemble)

In [None]:
best_gb = grid_search_gb.best_estimator_
best_params_gb = grid_search_gb.best_params_


In [None]:
y_pred_val_gb = best_gb.predict(X_val_ensemble)

In [None]:
accuracy_val_gb = accuracy_score(y_val_ensemble, y_pred_val_gb)
print(f"Accuracy of Gradient Boosting after Hyperparameter Optimization: {accuracy_val_gb:.4f}")

In [None]:
print("Best Parameters:")
print(best_params_gb)