In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Load the synthetic dataset
data = pd.read_csv('telecom_churn_data_reduced.csv')

# Data preprocessing
# Convert categorical columns to numerical values
categorical_columns = [
    'contract', 'gender', 'partner', 'dependents',
    'internet_service', 'paperless_billing', 'payment_method'
]
encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    encoders[col] = le

# Define features and target
X = data.drop('churn', axis=1)
y = data['churn']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create and train a Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

# Save the model, scaler, and encoders
joblib.dump(model, 'random_forest_churn_model_reduced.pkl')
joblib.dump(scaler, 'scaler_reduced.pkl')
joblib.dump(encoders, 'encoders.pkl')


Accuracy: 0.49666666666666665
Confusion Matrix:
 [[68 82]
 [69 81]]
Classification Report:
               precision    recall  f1-score   support

         0.0       0.50      0.45      0.47       150
         1.0       0.50      0.54      0.52       150

    accuracy                           0.50       300
   macro avg       0.50      0.50      0.50       300
weighted avg       0.50      0.50      0.50       300



['encoders.pkl']