# CLASSIFICATION MODEL FOR PREDICTING CUSTOMER CHURN


In [1]:
# Classification model of random forest algo for predicting customer churn using files uploaded

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the uploaded CSV file

# Make sure the file name matches the one you uploaded
try:
    df = pd.read_csv('/content/data_for_predictions.csv') # Replace 'uploaded_file.csv' with your actual filename
except FileNotFoundError:
    print("Error: uploaded_file.csv not found. Please upload the data file.")
    exit()

# Assuming 'Churn' is the target variable and other columns are features
# You might need to adjust the feature and target columns based on your dataset
if 'churn' not in df.columns:
    print("Error: 'Churn' column not found in the dataset.")
    exit()

X = df.drop('churn', axis=1)
y = df['churn']

# Handle categorical features: Convert categorical variables to numerical using one-hot encoding
X = pd.get_dummies(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
report


Accuracy: 0.8986995208761123
Classification Report:


'              precision    recall  f1-score   support\n\n           0       0.90      1.00      0.95      2617\n           1       1.00      0.03      0.06       305\n\n    accuracy                           0.90      2922\n   macro avg       0.95      0.51      0.50      2922\nweighted avg       0.91      0.90      0.85      2922\n'