In [1]:
import pandas as pd
import numpy as np

In [2]:
train_df = pd.read_csv('../train_test_files/processed_train.csv')

In [3]:
test_df = pd.read_csv('../train_test_files/processed_test.csv')

In [4]:
x = test_df['response_id']
test_df.drop(['response_id'],axis=1,inplace=True)

In [5]:
from sklearn.ensemble import RandomForestClassifier 
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV

In [6]:
model = RandomForestClassifier(random_state=42)


In [7]:
param_grid = {
    'n_estimators': [100,200],  # Number of trees in the forest
    'max_depth': [5,10],  # Maximum depth of the tree
    'min_samples_split': [2, 4,8],  # Minimum number of samples required to split a node
    'min_samples_leaf': [2, 4],  # Minimum number of samples required to be at a leaf node
    'max_features': ['sqrt', 'log2']  # Number of features to consider for the best split
}

In [8]:
grid_search = GridSearchCV(
    estimator=model, 
    param_grid=param_grid, 
    scoring='f1',  # or another scoring metric you prefer
    cv=5,                # Number of cross-validation folds
    verbose=2,           # Level of verbosity
    n_jobs=-1            # Use all available cores
)


In [9]:
grid_search.fit(train_df.drop(['exit_status'],axis=1), train_df['exit_status'])


Fitting 5 folds for each of 48 candidates, totalling 240 fits


In [10]:
final = grid_search.predict(test_df)

In [11]:
predictions = ["Stayed" if pred == 1 else "Left" for pred in final]

In [12]:
output_df = pd.DataFrame({
    'response_id': x,
    'Predictions': predictions
})

In [13]:
output_df.to_csv('../output/randomforest_output.csv', index=False)

In [14]:
print(grid_search.best_params_)

{'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 200}


In [15]:
y_predict = grid_search.predict(train_df.drop(['exit_status'],axis=1))


In [16]:
from sklearn.metrics import f1_score

In [17]:
print("the f1_score of training is : ",f1_score(train_df['exit_status'],y_predict))

the f1_score of training is :  0.7800261753750126
