In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier


In [2]:
base_estimator = DecisionTreeClassifier()
model = AdaBoostClassifier(base_estimator=base_estimator,random_state=42)


In [3]:
train_df = pd.read_csv('../train_test_files/processed_train.csv')
test_df  = pd.read_csv('../train_test_files/processed_test.csv')
x=test_df['response_id']
test_df.drop(['response_id'],axis=1,inplace=True)

In [4]:
param_grid = {
    'n_estimators': [50, 100, 200],  # Number of weak learners
    'learning_rate': [0.01, 0.1, 1, 2],  # Influence of each estimator
    'base_estimator__max_depth': [1, 2, 3],  # Max depth of the weak learners (e.g., decision trees)
    'base_estimator__min_samples_split': [2, 5, 10]  # Minimum samples required to split an internal node (for decision trees)
}


In [5]:
grid_search = GridSearchCV(
    estimator=model, 
    param_grid=param_grid, 
    scoring='f1',  # or another scoring metric you prefer
    cv=5,                # Number of cross-validation folds
    verbose=2,           # Level of verbosity
    n_jobs=-1            # Use all available cores
)


In [6]:
grid_search.fit(train_df.drop(['exit_status'],axis=1), train_df['exit_status'])


Fitting 5 folds for each of 108 candidates, totalling 540 fits




In [7]:
final = grid_search.predict(test_df)

In [8]:
predictions = ["Stayed" if pred == 1 else "Left" for pred in final]

In [9]:
output_df = pd.DataFrame({
    'response_id': x,
    'Predictions': predictions
})

In [10]:
output_df.to_csv('../output/adaboost_output.csv', index=False)

In [11]:
print(grid_search.best_params_)

{'base_estimator__max_depth': 1, 'base_estimator__min_samples_split': 2, 'learning_rate': 1, 'n_estimators': 50}


In [12]:
y_predict = grid_search.predict(train_df.drop(['exit_status'],axis=1))

In [13]:
from sklearn.metrics import f1_score

In [14]:
print("the f1 score is :" , f1_score(train_df['exit_status'],y_predict))

the f1 score is : 0.769493798189742
