In [1]:
# Import Necessary Libraries
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv("churn.csv")
df

In [None]:
df.shape

In [None]:
# To see all column names
df.columns.values

In [None]:
#To check for NA or missing values
df.isna().sum()

In [None]:
df.describe()

In [None]:
df['Churn'].value_counts()

In [None]:
#Visualize the count of customer churn
sns.countplot(df['Churn'])

In [None]:
#To see the percentage of customers that are leaving
numRetained = df[df.Churn == 'No'].shape[0]
numChurned = df[df.Churn == 'Yes'].shape[0]

# print the percentage of customers that stayed
print(numRetained/(numRetained + numChurned) * 100,'% of customers stayed in the company')
# peint the percentage of customers that left
print(numChurned/(numRetained + numChurned) * 100, '% of customers left with the company')

In [None]:
#Visualize the churn count for both males and females
sns.countplot(x ='gender', hue='Churn', data=df)

In [None]:
#Visualize the churn count for the internet service
sns.countplot(x='InternetService', hue='Churn', data=df)

In [None]:
#To Visualize Numeric data
numericFeatures = ['tenure', 'MonthlyCharges']
fig, ax = plt.subplots(1,2, figsize=(28, 8))
df[df.Churn == "No"][numericFeatures].hist(bins=20, color='blue', alpha=0.5, ax=ax)
df[df.Churn == "Yes"][numericFeatures].hist(bins=20, color='orange', alpha=0.5, ax=ax)

In [None]:
cleanDF = df.drop('customerID', axis=1)

In [None]:
#Convert all the non-numeric columns to numeric
for column in cleanDF.columns:
  if cleanDF[column].dtype == np.number:
    continue
  cleanDF[column] = LabelEncoder().fit_transform(cleanDF[column])

In [None]:
cleanDF.dtypes

In [None]:
cleanDF.head()

In [None]:
#Scale the data
x = cleanDF.drop('Churn', axis=1)
y = cleanDF['Churn']
x = StandardScaler().fit_transform(x)

In [None]:
#Split the data into 70% training and 30% testing
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.2, random_state=42)

In [None]:
#Create and Train the model
model = LogisticRegression()
# Train the model
model.fit(xtrain, ytrain)

In [None]:
#Create the predictions on the test data
predictions = model.predict(xtest)
# print the predictions
print(predictions)

In [None]:
#check the precision, recall and f1-score
print(classification_report(ytest, predictions))