# IMPORTING REQUIRED LIBRARIES

In [101]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.simplefilter("ignore")

# LOADING THE TELECOM DATASET FOR CHURN PREDICTION

In [102]:
dt = pd.read_csv(r"C:\Users\HP\Downloads\archive (8)\WA_Fn-UseC_-Telco-Customer-Churn.csv")
print(dt.head())
print(dt.describe())

   customerID  gender  SeniorCitizen Partner Dependents  tenure PhoneService  \
0  7590-VHVEG  Female              0     Yes         No       1           No   
1  5575-GNVDE    Male              0      No         No      34          Yes   
2  3668-QPYBK    Male              0      No         No       2          Yes   
3  7795-CFOCW    Male              0      No         No      45           No   
4  9237-HQITU  Female              0      No         No       2          Yes   

      MultipleLines InternetService OnlineSecurity  ... DeviceProtection  \
0  No phone service             DSL             No  ...               No   
1                No             DSL            Yes  ...              Yes   
2                No             DSL            Yes  ...               No   
3  No phone service             DSL            Yes  ...              Yes   
4                No     Fiber optic             No  ...               No   

  TechSupport StreamingTV StreamingMovies        Contract Pape

# DATA PREPROCESSING

In [103]:
def remove_indices(col):
    return dt[dt[col]=="No phone service"].index
def remove_indices_another(col):
    return dt[dt[col]=="No internet service"].index

#dropping unncessary columns
print(dt.shape)
re_cols = ["customerID","gender","Partner","Dependents"]
for i in re_cols:
    dt = dt.drop(i,axis=1)
print(dt.shape)


print(dt.shape)
column_name = ["SeniorCitizen","tenure","PhoneService","MultipleLines","InternetService","OnlineSecurity","OnlineBackup","DeviceProtection","TechSupport","StreamingTV","StreamingMovies","Contract","PaperlessBilling","PaymentMethod","MonthlyCharges","TotalCharges","Churn"]
for i in column_name:
    re_in = remove_indices(i)
    re_in_an = remove_indices_another(i)
    dt.drop(re_in, inplace=True)
    dt.drop(re_in_an, inplace=True)
print(dt.shape)
print(dt)

#encoding categorical data into numerical data
print(dt.head())
encoder = LabelEncoder()
for i in column_name:
    dt[i] = encoder.fit_transform(dt[i])
print(dt.head())

(7043, 21)
(7043, 17)
(7043, 17)
(4835, 17)
      SeniorCitizen  tenure PhoneService MultipleLines InternetService  \
1                 0      34          Yes            No             DSL   
2                 0       2          Yes            No             DSL   
4                 0       2          Yes            No     Fiber optic   
5                 0       8          Yes           Yes     Fiber optic   
6                 0      22          Yes           Yes     Fiber optic   
...             ...     ...          ...           ...             ...   
7035              0      19          Yes            No     Fiber optic   
7038              0      24          Yes           Yes             DSL   
7039              0      72          Yes           Yes     Fiber optic   
7041              1       4          Yes           Yes     Fiber optic   
7042              0      66          Yes            No     Fiber optic   

     OnlineSecurity OnlineBackup DeviceProtection TechSupport Strea

# SPLITTING INTO DEPENDENT AND INDEPENDENT ATTRIBUTES

In [104]:
x = dt.drop("Churn",axis=1)
y = dt["Churn"]

# DEFINING TRAINING AND TESTING DATA

In [105]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.4,random_state=42)

# CREATING OBJECT FOR LOGISTIC REGRESSION

In [106]:
lo_rg = LogisticRegression()
lo_rg.fit(x_train, y_train)

# TEST DATA PREDICTION

In [107]:
y_pred = lo_rg.predict(x_test)
print(x_test)
print(y_pred)

      SeniorCitizen  tenure  PhoneService  MultipleLines  InternetService  \
4813              0      46             0              0                1   
3665              0      71             0              1                1   
1475              1      69             0              1                1   
4800              1       1             0              1                1   
6411              1       2             0              0                1   
...             ...     ...           ...            ...              ...   
4929              1      63             0              0                0   
1346              0      14             0              1                1   
2343              0       3             0              0                0   
6506              0      13             0              1                1   
2902              1       6             0              0                1   

      OnlineSecurity  OnlineBackup  DeviceProtection  TechSupport  \
4813  

# ACCURACY SCORE

In [108]:
accr = accuracy_score(y_test,y_pred)
print("Accuracy Score: ", accr)

Accuracy Score:  0.7538779731127198


In [109]:
cr = classification_report(y_test,y_pred)
print(cr)

              precision    recall  f1-score   support

           0       0.80      0.84      0.82      1281
           1       0.65      0.58      0.61       653

    accuracy                           0.75      1934
   macro avg       0.73      0.71      0.72      1934
weighted avg       0.75      0.75      0.75      1934



# COLLECTING USER DATA FOR PREDICTION

In [112]:
# Initialize an empty list to store user input
user_data = []

# Ask user for input
senior_citizen = int(input("Is the customer a Senior Citizen? (0 for No, 1 for Yes): "))
user_data.append(senior_citizen)

tenure = int(input("Enter tenure (in months): "))
user_data.append(tenure)

phone_service = int(input("Does the customer have Phone Service? (0 for No, 1 for Yes): "))
user_data.append(phone_service)

multiple_lines = int(input("Does the customer have Multiple Lines? (0 for No, 1 for Yes): "))
user_data.append(multiple_lines)

internet_service = int(input("Enter Internet Service ( 0 for DSL or 1 for Fiber optic): "))
user_data.append(internet_service)

online_security = int(input("Does the customer have Online Security? (0 for No, 1 for Yes): "))
user_data.append(online_security)

online_backup = int(input("Does the customer have Online Backup? (0 for No, 1 for Yes): "))
user_data.append(online_backup)

device_protection = int(input("Does the customer have Device Protection? (0 for No, 1 for Yes): "))
user_data.append(device_protection)

tech_support = int(input("Does the customer have Tech Support? (0 for No, 1 for Yes): "))
user_data.append(tech_support)

streaming_tv = int(input("Does the customer have Streaming TV? (0 for No, 1 for Yes): "))
user_data.append(streaming_tv)

streaming_movies = int(input("Does the customer have Streaming Movies? (0 for No, 1 for Yes): "))
user_data.append(streaming_movies)

contract = int(input("Enter Contract Type (0 for Month-to-month, 1 for One year, 2 for Two years): "))
user_data.append(contract)

paperless_billing = int(input("Is Paperless Billing enabled? (0 for No, 1 for Yes): "))
user_data.append(paperless_billing)

payment_method = int(input("Enter Payment Method (0 for Electronic Check, 1 for Mailed Check, 2 for Bank Transfer, 3 for Credit Card): "))
user_data.append(payment_method)

monthly_charges = float(input("Enter Monthly Charges: "))
user_data.append(monthly_charges)

total_charges = float(input("Enter Total Charges: "))
user_data.append(total_charges)

print("User data collected successfully:", user_data)


Is the customer a Senior Citizen? (0 for No, 1 for Yes):  0
Enter tenure (in months):  2
Does the customer have Phone Service? (0 for No, 1 for Yes):  1
Does the customer have Multiple Lines? (0 for No, 1 for Yes):  1
Enter Internet Service ( 0 for DSL or 1 for Fiber optic):  1
Does the customer have Online Security? (0 for No, 1 for Yes):  1
Does the customer have Online Backup? (0 for No, 1 for Yes):  0
Does the customer have Device Protection? (0 for No, 1 for Yes):  1
Does the customer have Tech Support? (0 for No, 1 for Yes):  1
Does the customer have Streaming TV? (0 for No, 1 for Yes):  0
Does the customer have Streaming Movies? (0 for No, 1 for Yes):  1
Enter Contract Type (0 for Month-to-month, 1 for One year, 2 for Two years):  0
Is Paperless Billing enabled? (0 for No, 1 for Yes):  1
Enter Payment Method (0 for Electronic Check, 1 for Mailed Check, 2 for Bank Transfer, 3 for Credit Card):  2
Enter Monthly Charges:  58
Enter Total Charges:  240


User data collected successfully: [0, 2, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 2, 58.0, 240.0]


# Predicting with the Collected data

In [113]:
input_array = np.array(user_data).reshape(1, -1)

# Get probability estimates for each class
probabilities = lo_rg.predict_proba(input_array)

# Print the probability values
print("Probability of NO (class 0):", probabilities[0, 0])
print("Probability of YES (class 1):", probabilities[0, 1])

y_pred = lo_rg.predict(input_array)
if y_pred==0:
    print("Churn Prediction result is: NO")
elif y_pred==1:
    print("Churn Prediction result is: YES")

Probability of NO (class 0): 0.7712553689161483
Probability of YES (class 1): 0.22874463108385165
Churn Prediction result is: NO
