In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report

from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier


In [None]:
df=pd.read_csv("Telco-Customer-Churn.csv")

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.isnull().sum()

Drop the customer Id since it is not useful for predictions

In [None]:
df['Churn'].value_counts()

In [None]:
if 'customerId' in df.columns:
    df.drop('customerId',axis=1,inplace=True)

Convert total charges to numeric

In [None]:
df["TotalCharges"]=pd.to_numeric(df['TotalCharges'],errors='coerce')
df.fillna(0,inplace=True)

Encoding the Categorical Columns

In [None]:
lable={}
for col in df.select_dtypes(include=['object']).columns:
    le=LabelEncoder()
    df[col]=le.fit_transform(df[col])
    lable[col]=le

In [None]:
X=df.drop('Churn',axis=1)
y=df['Churn']

Splitting the training and testing data

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=42,test_size=0.2)


In [None]:
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X, y)

In [None]:
y_train_res.value_counts()

Training the model using Random Forest Classifier

In [None]:
model=RandomForestClassifier(n_estimators=200,random_state=42)
model.fit(X_train_res,y_train_res)

In [None]:
y_predictions_rfc=model.predict(X_test)


In [None]:
model_accuracy=accuracy_score(y_test,y_predictions_rfc)
model_precision=precision_score(y_test,y_predictions_rfc)
model_recall=recall_score(y_test,y_predictions_rfc)
model_f1score=f1_score(y_test,y_predictions_rfc)

print("Accuracy : ",model_accuracy)
print("Precision: ",model_precision)
print("Model Recall Score: ",model_recall)
print("Model F1 Score: ",model_f1score)

Training the model using XGBoost

In [None]:
xgb_model = XGBClassifier(n_estimators=200, random_state=42, use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train_res, y_train_res)

In [None]:
y_pred_xgb = xgb_model.predict(X_test)

In [None]:
xgb_model_accuracy=accuracy_score(y_test,y_pred_xgb)
xgb_model_precision=precision_score(y_test,y_pred_xgb)
xgb_model_recall=recall_score(y_test,y_pred_xgb)
xgb_model_f1score=f1_score(y_test,y_pred_xgb)

print("Accuracy : ",xgb_model_accuracy)
print("Precision: ",xgb_model_precision)
print("Model Recall Score: ",xgb_model_recall)
print("Model F1 Score: ",xgb_model_f1score)

In [None]:
log_model = LogisticRegression(max_iter=10000,class_weight='balanced')
log_model.fit(X_train_res, y_train_res)

In [None]:
y_pred_log = log_model.predict(X_test)

In [None]:
log_model_accuracy=accuracy_score(y_test,y_pred_log)
log_model_precision=precision_score(y_test,y_pred_log)
log_model_recall=recall_score(y_test,y_pred_log)
log_model_f1score=f1_score(y_test,y_pred_log)
log_classification_report=classification_report(y_test,y_pred_log)

print("Accuracy : ",log_model_accuracy)
print("Precision: ",log_model_precision)
print("Model Recall Score: ",log_model_recall)
print("Model F1 Score: ",log_model_f1score)

In [None]:
xgb_model_accuracy=accuracy_score(y_test,y_pred_xgb)
xgb_model_precision=precision_score(y_test,y_pred_xgb)
xgb_model_recall=recall_score(y_test,y_pred_xgb)
xgb_model_f1score=f1_score(y_test,y_pred_xgb)

print("Accuracy : ",xgb_model_accuracy)
print("Precision: ",xgb_model_precision)
print("Model Recall Score: ",xgb_model_recall)
print("Model F1 Score: ",xgb_model_f1score)

In [None]:
cus_id=input("Enter the Customer Id: ")
gender=input("Enter the Gender (Female/Male): ")
senior=int(input("Enter 1 for senior citizen and 0 for non senior citizens: "))
partner=input("Do you have a partner? (yes/no): ")
dependent=input("Do you have any dependents? (yes/no) : ")
tenure=int(input("Tenure: "))
phone_service= input("Do you have a phone service:(yes/no) ")


multiple_mapping = {'yes': 2, 'no': 1, 'no phone': 0}
multiple_input = input("Do you have multiple lines? (Yes/ No/ No phone: ").strip().lower()
if multiple_input in multiple_mapping:
    multiple = multiple_mapping[multiple_input]
else:
    raise ValueError("Invalid input for multiple connections.")



internet_service_mapping = {'dsl': 2, 'fiber optic': 1, 'no': 0}
internet_service_input = input("Do you have internet service? (DSL/ Fiber Optic/ No)").strip().lower()
if internet_service_input in internet_service_mapping:
    internet_service = internet_service_mapping[internet_service_input]
else:
    raise ValueError("Invalid input for internet service.")



online_security_mapping = {'yes': 2, 'no': 1, 'no internet': 0}
online_security_input = input("Do you have online security? (Yes/ No/ No internet)").strip().lower()
if online_security_input in online_security_mapping:
    online_security = online_security_mapping[online_security_input]
else:
    raise ValueError("Invalid input for online security.")



online_backup_mapping = {'yes': 2, 'no': 1, 'no internet': 0}
online_backup_input = input("Do you have online backup? (Yes/ No/ No internet)").strip().lower()
if online_backup_input in online_backup_mapping:
    online_backup = online_backup_mapping[online_backup_input]
else:
    raise ValueError("Invalid input for online backup.")



device_protection_mapping = {'yes': 2, 'no': 1, 'no internet': 0}
device_protection_input = input("Do you have device protection? (Yes/ No/ No internet)").strip().lower()
if device_protection_input in device_protection_mapping:
    device_protection = device_protection_mapping[device_protection_input]
else:
    raise ValueError("Invalid input for device protection.")




tech_sup_mapping = {'yes': 2, 'no': 1, 'no internet': 0}
tech_sup_input = input("Do you have technical support? (Yes/ No/ No internet)").strip().lower()
if tech_sup_input in tech_sup_mapping:
    tech_sup = tech_sup_mapping[tech_sup_input]
else:
    raise ValueError("Invalid input for technical support.")




streaming_tv_mapping = {'yes': 2, 'no': 1, 'no internet': 0}
streaming_tv_input = input("Do you have TV Streaming? (Yes/ No/ No internet)").strip().lower()
if streaming_tv_input in streaming_tv_mapping:
    streaming_tv = streaming_tv_mapping[streaming_tv_input]
else:
    raise ValueError("Invalid input for TV Streaming.")


streaming_movies_mapping = {'yes': 2, 'no': 1, 'no internet': 0}
streaming_movies_input = input("Do you have Movie Streaming? (Yes/ No/ No internet)").strip().lower()
if streaming_movies_input in streaming_movies_mapping:
    streaming_movies = streaming_movies_mapping[streaming_movies_input]
else:
    raise ValueError("Invalid input for TV Streaming.")



contract_mapping = {'two years': 2, 'one year': 1, 'month-to month': 0}
contract_input = input("How long is your contarct (month-to-month / One year / Two years): ").strip().lower()
if contract_input in contract_mapping:
    contract = contract_mapping[contract_input]
else:
    raise ValueError("Invalid input for contract period.")


paper=input("Paperless? (Yes / No): ")


payment_mapping = {'electronic': 3,'mailed check': 2, 'bank transaction': 1, 'credit card': 0}
payment_input = input("Payment Type: (Electronic / Mailed Check / Bank transaction/ Credit Card): ").strip().lower()
if payment_input in payment_mapping:
    payment = payment_mapping[payment_input]
else:
    raise ValueError("Invalid input for Payment.")


monthly_charge=float(input("Enter your monthly charge: "))
tot_charge=float(input("Enter the Total Charge: "))





In [None]:
gender_low=1 if gender.lower=='male' else 0
partner_low=1 if partner.lower=='yes' else 0
dependent_low= 1 if dependent.lower=='yes' else 0
phone_service_low=1 if phone_service.lower=='yes' else 0
paper_low=1 if paper.lower=='yes' else 0


In [None]:
user_input=np.array(
    [
    [0,gender_low,senior,partner_low,dependent_low,tenure,phone_service_low,multiple,internet_service,online_security,online_backup,device_protection,tech_sup,streaming_tv,streaming_movies,contract,paper_low,payment,monthly_charge,tot_charge]
    ],dtype=float
)

In [None]:
predictions=xgb_model.predict(user_input)

In [None]:
print("----------------Churning Details------------------ ","\n")
print("Customer Id: ",cus_id,"\n")
print("Gender: ",gender,"\n")

if(senior==1):
    print("Senior Citize: yes","\n")
else: 
    print("Senior citizen: No","\n")

print("Partner: ",partner,"\n")
print("Dependent: ",dependent,"\n")
print("Tenure: ",tenure,"\n")
print("Phone Service: ",phone_service,"\n")
print("Multiple lines: ",multiple_input,"\n")
print("Internet services: ",internet_service_input,"\n")
print("Online Security: ",online_security_input,"\n")
print("Online Backup: ",online_backup_input,"\n")
print("Device Protection: ",device_protection_input,"\n")
print("Technical Support:  ",tech_sup_input,"\n")
print("Tv Streaming: ",streaming_tv_input,"\n")
print("Movie Streaming: ",streaming_movies_input,"\n")
print("Contract period: ",contract_input,"\n")
print("Paperless: ",paper,"\n")
print("Payment Method: ",payment_input,"\n")
print("Monthly Charge: ",monthly_charge,"\n")
print("Total Charge: ",tot_charge,"\n")

print("Possibility of churning: ","\n")
if predictions[0] == 1:
    print("The customer is likely to churn the service")
else:
    print("The customer is unlikely to churn the service.")

Visualizing data

In [None]:
probability = xgb_model.predict_proba(user_input)

no_churn_prob = probability[0][0] #unlikely to churn
churn_prob = probability[0][1] #Probable to churn

In [None]:
labels = ['Unlikely to churn', 'Likely to churn']
sizes = [no_churn_prob, churn_prob]
colors = ["#ffacee","#A6B7FF"]  
explode = [0.1, 0]

plt.figure(figsize=(6,6))
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors,explode=explode,wedgeprops={'edgecolor': 'black', 'linewidth': 1.5},)
plt.title('Probability of Churning a telephone service')
plt.show()