<a href="https://colab.research.google.com/github/AsminiKOD/ClientPredicting/blob/RandomForest/Random_Forest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Random Forest Model

In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
import pandas as pd

# Load the already SMOTED dataset
file_path = 're-duplicates-handled.csv'
data = pd.read_csv(file_path)

# Separate features and target variable
X = data.drop(columns=['y'])
y = data['y']
data.dropna(subset=['y'], inplace=True)


# Separate features and target variable again after handling NaN
X = data.drop(columns=['y'])
y = data['y']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define the Random Forest model with hyperparameter tuning
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2'],
    'class_weight': ['balanced']
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3, scoring='f1', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best model
best_rf_model = grid_search.best_estimator_

# Evaluate the model on training and testing data
y_train_pred = best_rf_model.predict(X_train)
y_test_pred = best_rf_model.predict(X_test)

train_report = classification_report(y_train, y_train_pred)
test_report = classification_report(y_test, y_test_pred)

print("Training Set Classification Report:")
print(train_report)

print("Testing Set Classification Report:")
print(test_report)


Training Set Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     27437
           1       1.00      1.00      1.00     26763

    accuracy                           1.00     54200
   macro avg       1.00      1.00      1.00     54200
weighted avg       1.00      1.00      1.00     54200

Testing Set Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.93      0.93      6860
           1       0.93      0.92      0.92      6691

    accuracy                           0.92     13551
   macro avg       0.92      0.92      0.92     13551
weighted avg       0.92      0.92      0.92     13551

