# CUSTOMER CHURN PREDICTION

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report

In [None]:
df = pd.read_csv('telecom_dataset.csv')

In [None]:
# print(df.head())
print(df.columns)

In [None]:
X = df.drop('Churn', axis=1)
y = df['Churn']

In [None]:
# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Perform feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_resampled)
X_test_scaled = scaler.transform(X_test)

In [None]:
rf_clf = RandomForestClassifier(random_state=42)

In [None]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}
grid_search = GridSearchCV(estimator=rf_clf, param_grid=param_grid, 
                           scoring='f1', cv=5, verbose=1, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train_resampled)

In [None]:
print("Best parameters found: ", grid_search.best_params_)
print("Best F1-score found: ", grid_search.best_score_)

In [None]:
y_pred = grid_search.best_estimator_.predict(X_test_scaled)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print("\nAccuracy: {:.2f}%".format(accuracy * 100))
print("F1-score: {:.4f}".format(f1))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
