In [1]:
#Import the libraries 
import pandas as pd
import numpy as np
import seaborn as sns # For creating plots
import matplotlib.ticker as mtick # For specifying the axes tick format 
import matplotlib.pyplot as plt

sns.set(style = 'white')

In [2]:
# Read the data 
df = pd.read_csv("telecom_dummies.csv")
df.head()

Unnamed: 0,tenure,monthlycharges,totalcharges,gender_Male,seniorcitizen_1,partner_Yes,dependents_Yes,phoneservice_Yes,multiplelines_Yes,onlinesecurity_Yes,...,internetservice_DSL,internetservice_Fiber optic,internetservice_No,contract_Month-to-month,contract_One year,contract_Two year,paymentmethod_Bank transfer (automatic),paymentmethod_Credit card (automatic),paymentmethod_Electronic check,paymentmethod_Mailed check
0,1,29.85,29.85,0,0,1,0,0,0,0,...,1,0,0,1,0,0,0,0,1,0
1,34,56.95,1889.5,1,0,0,0,1,0,1,...,1,0,0,0,1,0,0,0,0,1
2,2,53.85,108.15,1,0,0,0,1,0,1,...,1,0,0,1,0,0,0,0,0,1
3,45,42.3,1840.75,1,0,0,0,0,0,1,...,1,0,0,0,1,0,1,0,0,0
4,2,70.7,151.65,0,0,0,0,1,0,0,...,0,1,0,1,0,0,0,0,1,0


In [3]:
numerical = df.iloc[:,:3]
numerical.head()

Unnamed: 0,tenure,monthlycharges,totalcharges
0,1,29.85,29.85
1,34,56.95,1889.5
2,2,53.85,108.15
3,45,42.3,1840.75
4,2,70.7,151.65


In [4]:
from sklearn.preprocessing import StandardScaler

# Initialize StandardScaler instance
scaler = StandardScaler()

# Fit and transform the scaler on numerical columns
scaled_numerical = scaler.fit_transform(numerical)

In [5]:
scaled_numerical = pd.DataFrame(scaled_numerical, columns=numerical.columns)

In [6]:
df= df.drop(columns=numerical.columns, axis = 1)

In [7]:
telco = df.merge(right=scaled_numerical, how ="left", left_index = True, right_index = True)

In [8]:
telco = telco.rename(columns={"seniorcitizen_1":"seniorcitizen_Yes"})

In [9]:
telco.head()

Unnamed: 0,gender_Male,seniorcitizen_Yes,partner_Yes,dependents_Yes,phoneservice_Yes,multiplelines_Yes,onlinesecurity_Yes,onlinebackup_Yes,deviceprotection_Yes,techsupport_Yes,...,contract_Month-to-month,contract_One year,contract_Two year,paymentmethod_Bank transfer (automatic),paymentmethod_Credit card (automatic),paymentmethod_Electronic check,paymentmethod_Mailed check,tenure,monthlycharges,totalcharges
0,0,0,1,0,0,0,0,1,0,0,...,1,0,0,0,0,1,0,-1.277445,-1.160323,-0.992611
1,1,0,0,0,1,0,1,0,1,0,...,0,1,0,0,0,0,1,0.066327,-0.259629,-0.172165
2,1,0,0,0,1,0,1,1,0,0,...,1,0,0,0,0,0,1,-1.236724,-0.36266,-0.958066
3,1,0,0,0,0,0,1,0,1,1,...,0,1,0,1,0,0,0,0.514251,-0.746535,-0.193672
4,0,0,0,0,1,0,0,0,0,0,...,1,0,0,0,0,1,0,-1.236724,0.197365,-0.938874


In [10]:
# Selecting the features for our X
X = telco.drop(["churn_Yes"],axis = 1)
X.shape

(7043, 26)

In [11]:
# Selecting our target
y = telco[["churn_Yes"]]
y.head()

Unnamed: 0,churn_Yes
0,0
1,0
2,1
3,0
4,1


In [12]:
#Importing the libraries 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# Spliting the data for training and testing 
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

# Using the Logistic Regression and fitting the model
# y_train.values.ravel because the shape needs to be (rows, )
# parameters solver='lbfgs', max_iter=1000 for number of inerations not to reach the limit
model = LogisticRegression(solver='lbfgs', max_iter=1000)
result = model.fit(X_train, y_train.values.ravel())

In [13]:
from sklearn.metrics import accuracy_score

# Predict the values on the testing data
pred_y = model.predict(X_test)

# Measure model performance on testing data
print(accuracy_score(y_test,pred_y))

0.8225691980127751


In [104]:
import math

def logit2prob(coef):
    exp_coef = np.exp(coef)
    odds = (exp_coef - 1)*100
    return(round(odds,2))
    
logit2prob(0.97)

163.79

In [107]:
logit2prob(float_coef)

TypeError: type numpy.ndarray doesn't define __round__ method

In [106]:
float_coef = model.coef_.astype(float)
float_coef = np.transpose(float_coef)

prob_num =(1/(1+np.exp(-float_coef))*100)
prob_num

array([[48.72681134],
       [53.99743068],
       [51.3658482 ],
       [46.02130352],
       [37.02225719],
       [56.59721423],
       [39.60295232],
       [45.85285125],
       [49.64220299],
       [41.55573264],
       [55.46993747],
       [57.94135308],
       [58.28226604],
       [50.23071752],
       [70.75462162],
       [29.13911889],
       [66.69542899],
       [51.03840193],
       [32.47788629],
       [48.73811233],
       [46.62225739],
       [56.78223813],
       [47.92414696],
       [20.43479396],
       [45.52729492],
       [65.75573074]])

In [114]:

# Combine feature names and coefficients into pandas DataFrame
feature_names = pd.DataFrame(X_train.columns, columns = ['Feature'])
log_coef = pd.DataFrame(np.transpose(model.coef_), columns = ['Coefficient'])
probability = pd.DataFrame((prob_num),columns = ['Probability'])
odds = pd.DataFrame((prob_num),columns = ['Probability'])
coefficients = pd.concat([feature_names,log_coef,probability], axis = 1)

# Calculate exponent of the logistic regression coefficients
coefficients['Exp_Coefficient'] = np.exp(coefficients['Coefficient'])

coefficients['Odds'] = (coefficients['Exp_Coefficient'] - 1)*100

#coefficients['odds'] = logit2prob(coefficients['Coefficient'])

# Remove coefficients that are equal to zero
coefficients = coefficients[coefficients['Coefficient']!=0]

coefficients= coefficients[['Feature', 'Coefficient','Exp_Coefficient','Odds','Probability']]


# Print the values sorted by the exponent coefficient
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  
    display(coefficients.sort_values(by=['Coefficient']))

Unnamed: 0,Feature,Coefficient,Exp_Coefficient,Odds,Probability
23,tenure,-1.359338,0.256831,-74.316922,20.434794
15,internetservice_No,-0.888637,0.411216,-58.878413,29.139119
18,contract_Two year,-0.731896,0.480996,-51.900371,32.477886
4,phoneservice_Yes,-0.531262,0.587863,-41.213744,37.022257
6,onlinesecurity_Yes,-0.422037,0.65571,-34.428993,39.602952
9,techsupport_Yes,-0.341038,0.711032,-28.89682,41.555733
24,monthlycharges,-0.179388,0.835782,-16.421821,45.527295
7,onlinebackup_Yes,-0.166268,0.846819,-15.318069,45.852851
3,dependents_Yes,-0.159485,0.852583,-14.741729,46.021304
20,paymentmethod_Credit card (automatic),-0.135316,0.87344,-12.655996,46.622257


In [115]:
display(coefficients.sort_values(by=['Coefficient']))

Unnamed: 0,Feature,Coefficient,Exp_Coefficient,Odds,Probability
23,tenure,-1.359338,0.256831,-74.316922,20.434794
15,internetservice_No,-0.888637,0.411216,-58.878413,29.139119
18,contract_Two year,-0.731896,0.480996,-51.900371,32.477886
4,phoneservice_Yes,-0.531262,0.587863,-41.213744,37.022257
6,onlinesecurity_Yes,-0.422037,0.65571,-34.428993,39.602952
9,techsupport_Yes,-0.341038,0.711032,-28.89682,41.555733
24,monthlycharges,-0.179388,0.835782,-16.421821,45.527295
7,onlinebackup_Yes,-0.166268,0.846819,-15.318069,45.852851
3,dependents_Yes,-0.159485,0.852583,-14.741729,46.021304
20,paymentmethod_Credit card (automatic),-0.135316,0.87344,-12.655996,46.622257
