### Application of Random Forest as a predictive model 

#### Random Forest without hyperparameters tuning

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Data Preprocessing
data = pd.read_csv('Customer_Churn.csv')

# Handling missing values
data.fillna(0, inplace=True)

# Convert categorical variables into numerical representations
label_encoders = {}
for column in data.columns:
    if data[column].dtype == 'object':
        label_encoders[column] = LabelEncoder()
        data[column] = label_encoders[column].fit_transform(data[column])

# Feature Engineering
X_numerical = data[['MonthlyCharges', 'TotalCharges']] # Concatenate numerical features (MonthlyCharges and TotalCharges) with existing features
X_categorical = data.drop(['Churn', 'MonthlyCharges', 'TotalCharges'], axis=1)
X = pd.concat([X_categorical, X_numerical], axis=1)

# Split the Data
y = data['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Selection and Training
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Model Evaluation
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.7970191625266146
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.91      0.87      1036
           1       0.66      0.48      0.56       373

    accuracy                           0.80      1409
   macro avg       0.74      0.70      0.71      1409
weighted avg       0.78      0.80      0.79      1409



#### Random Forest with hyperparameters tuning

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Data Preprocessing
data = pd.read_csv('Customer_Churn.csv')

# Handling missing values
data.fillna(0, inplace=True)

# Convert categorical variables into numerical representations
label_encoders = {}
for column in data.columns:
    if data[column].dtype == 'object':
        label_encoders[column] = LabelEncoder()
        data[column] = label_encoders[column].fit_transform(data[column])

# Step 2: Feature Engineering
# Concatenate numerical features (MonthlyCharges and TotalCharges) with existing features
X_numerical = data[['MonthlyCharges', 'TotalCharges']]
X_categorical = data.drop(['Churn', 'MonthlyCharges', 'TotalCharges'], axis=1)
X = pd.concat([X_categorical, X_numerical], axis=1)

# Step 3: Split the Data
y = data['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Model Selection and Hyperparameter Tuning
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Step 5: Model Evaluation with Best Hyperparameters
best_rf = grid_search.best_estimator_
y_pred = best_rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))

Best Hyperparameters: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 150}
Accuracy: 0.8119233498935415
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.92      0.88      1036
           1       0.69      0.52      0.59       373

    accuracy                           0.81      1409
   macro avg       0.77      0.72      0.74      1409
weighted avg       0.80      0.81      0.80      1409

