# Import Libraries

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


# Load the dataset

In [4]:
  data = pd.read_csv('telco_customer_churn.csv')

# List few rows

In [5]:
print(data.head())

   customerID  gender  SeniorCitizen Partner Dependents  tenure PhoneService  \
0  7590-VHVEG  Female              0     Yes         No       1           No   
1  5575-GNVDE    Male              0      No         No      34          Yes   
2  3668-QPYBK    Male              0      No         No       2          Yes   
3  7795-CFOCW    Male              0      No         No      45           No   
4  9237-HQITU  Female              0      No         No       2          Yes   

      MultipleLines InternetService OnlineSecurity  ... DeviceProtection  \
0  No phone service             DSL             No  ...               No   
1                No             DSL            Yes  ...              Yes   
2                No             DSL            Yes  ...               No   
3  No phone service             DSL            Yes  ...              Yes   
4                No     Fiber optic             No  ...               No   

  TechSupport StreamingTV StreamingMovies        Contract Pape

# Drop unnecessary columns

In [6]:
data = data.drop(['customerID'], axis=1)

# Handle missing values

In [7]:
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')
data = data.dropna()

# Encode categorical variables

In [8]:
label_encoder = LabelEncoder()

for column in data.columns:
    if data[column].dtype == object:
        data[column] = label_encoder.fit_transform(data[column])

# Split Dataset to feature and target variables

In [9]:
X = data.drop('Churn', axis=1)
y = data['Churn']

# Train Test Split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Random Forest Model

In [11]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Prediction

In [12]:
y_pred = rf_model.predict(X_test)
y_pred_df = pd.DataFrame(y_pred, columns=['Predicted Churn'])
pd.set_option('display.max_rows', None)
print(y_pred_df)

      Predicted Churn
0                   0
1                   0
2                   1
3                   0
4                   0
5                   1
6                   0
7                   1
8                   0
9                   0
10                  1
11                  0
12                  0
13                  0
14                  0
15                  0
16                  1
17                  0
18                  0
19                  0
20                  0
21                  0
22                  0
23                  1
24                  0
25                  0
26                  0
27                  0
28                  1
29                  0
30                  0
31                  1
32                  0
33                  0
34                  0
35                  0
36                  0
37                  0
38                  0
39                  0
40                  0
41                  0
42                  0
43                  0
44        

# Summary

In [13]:
# Print a summary
print(f"Total Predictions: {len(y_pred)}")
print(f"Churn Predictions (1): {np.sum(y_pred == 1)}")
print(f"No Churn Predictions (0): {np.sum(y_pred == 0)}")

Total Predictions: 2110
Churn Predictions (1): 437
No Churn Predictions (0): 1673


# New Instance

In [14]:
# Example new customer data (without 'Churn' column)
new_customer = {
    'gender': 'Male',
    'SeniorCitizen': 0,
    'Partner': 'Yes',
    'Dependents': 'No',
    'tenure': 5,
    'PhoneService': 'Yes',
    'MultipleLines': 'No',
    'InternetService': 'Fiber optic',
    'OnlineSecurity': 'No',
    'OnlineBackup': 'Yes',
    'DeviceProtection': 'No',
    'TechSupport': 'No',
    'StreamingTV': 'Yes',
    'StreamingMovies': 'No',
    'Contract': 'Month-to-month',
    'PaperlessBilling': 'Yes',
    'PaymentMethod': 'Electronic check',
    'MonthlyCharges': 70.35,
    'TotalCharges': 350.5
}

# Convert to DataFrame for consistency with the model input
new_customer_df = pd.DataFrame([new_customer])

# Preprocess

In [15]:
# Assuming you have label encoders and preprocessing steps used during training
from sklearn.preprocessing import LabelEncoder

# Example of preprocessing (handle categorical variables)
categorical_columns = ['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
                       'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
                       'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod']

# Apply label encoding (or similar transformations as in training)
for column in categorical_columns:
    label_encoder = LabelEncoder()
    new_customer_df[column] = label_encoder.fit_transform(new_customer_df[column])

# Ensure the numerical data is properly handled (TotalCharges, MonthlyCharges)
new_customer_df['TotalCharges'] = pd.to_numeric(new_customer_df['TotalCharges'], errors='coerce')

# Check the preprocessed new customer data
print(new_customer_df)


   gender  SeniorCitizen  Partner  Dependents  tenure  PhoneService  \
0       0              0        0           0       5             0   

   MultipleLines  InternetService  OnlineSecurity  OnlineBackup  \
0              0                0               0             0   

   DeviceProtection  TechSupport  StreamingTV  StreamingMovies  Contract  \
0                 0            0            0                0         0   

   PaperlessBilling  PaymentMethod  MonthlyCharges  TotalCharges  
0                 0              0           70.35         350.5  


# Predicting on New Data

In [16]:
# Assuming the model has already been trained
# Make a prediction for the new customer
new_customer_prediction = rf_model.predict(new_customer_df)

# Output the prediction
if new_customer_prediction[0] == 1:
    print("The new customer is likely to churn.")
else:
    print("The new customer is likely to stay.")


The new customer is likely to churn.
