# Churn Analysis Project

## A. Import libraries required and Data processing

In [2]:
# for dataframe computaion
import pandas as pd
# for vector computation
import numpy as np
# for plot
import matplotlib.pyplot as plt
import seaborn as sns
# for Statistical functions
import scipy.stats as stats  
# for data preprocessing
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
# for logistic regression
from sklearn.linear_model import LogisticRegression
# for splitting and train and test datastet randomly
from sklearn.model_selection import train_test_split
# for metrics and model evaluation
from sklearn.metrics import classification_report 

In [3]:
#bring back the dataset
url='Dataset/mycustomerproject.csv'
mai_data_churn=pd.read_csv(url)
mai_data_churn.head(10)

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes
5,9305-CDSKC,Female,0,No,No,8,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,Month-to-month,Yes,Electronic check,99.65,820.5,Yes
6,1452-KIOVK,Male,0,No,Yes,22,Yes,Yes,Fiber optic,No,...,No,No,Yes,No,Month-to-month,Yes,Credit card (automatic),89.1,1949.4,No
7,6713-OKOMC,Female,0,No,No,10,No,No phone service,DSL,Yes,...,No,No,No,No,Month-to-month,No,Mailed check,29.75,301.9,No
8,7892-POOKP,Female,0,Yes,No,28,Yes,Yes,Fiber optic,No,...,Yes,Yes,Yes,Yes,Month-to-month,Yes,Electronic check,104.8,3046.05,Yes
9,6388-TABGU,Male,0,No,Yes,62,Yes,No,DSL,Yes,...,No,No,No,No,One year,No,Bank transfer (automatic),56.15,3487.95,No


In [None]:
#determinate the area of the database
mai_data_churn.shape

**As can be seen in the cell above, the dataframe have 7043 rows and 21 columns, detailed as follows**

In [None]:
#Show informations about the database
mai_data_churn.info()

**And there are no null values in our dataset**

In [None]:
#Show where churn is 'Yes'
data_churning= mai_data_churn.loc[mai_data_churn['Churn'] == 'Yes']
data_churning

**Over a total of 7043 clients, 1869 are churn , any category combined**

In [None]:
fig, ax = plt.subplots(figsize=(10,6))
sns.heatmap(data_churning.corr(), center=0, cmap='Reds')
ax.set_title('Multi-Collinearity of Churning')

In [None]:
#Annotate each cell with the numeric value
fig, ax = plt.subplots(figsize=(10,6))
sns.heatmap(data_churning.corr(), center=0, cmap='BrBG', annot=True)

In [None]:
#to find every categorical values
col_cat = ['gender','SeniorCitizen','Partner','Dependents','tenure','PhoneService','MultipleLines','InternetService','OnlineSecurity','DeviceProtection','TechSupport','StreamingTV','StreamingMovies','Contract','PaperlessBilling','PaymentMethod','MonthlyCharges','TotalCharges','Churn']
for col in col_cat:
    print(col, mai_data_churn[col].unique())

In [None]:
churning= mai_data_churn['Churn'].value_counts().to_frame().reset_index()
churning
churning2= churning.rename(columns={'index':'Cat_Val'})
churning2

In [None]:
#number of churn in general
churning2.plot(kind='bar', x='Cat_Val',y='Churn' )

In [None]:
#see the impact of the churn
churn_impact = mai_data_churn[['Churn', 'MonthlyCharges']].groupby(['Churn']).sum()
churn_impact
churn_impact['Churn_impact'] = round((churn_impact/churn_impact.sum())*100,2)
churn_impact[:2]

In [None]:
Churn = 'No', 'Yes'
Churning = [69.5,30.5]
colors = ['#8B5A8C','#3594FF']
plt.pie(Churning, labels=Churn, colors=colors, autopct='%1.1f%%')    
plt.title('Impact of churn')
plt.show()

**With the Churn, the company lost 30.5% of the revenue that it makes every month.**

## B. Analysis based on the Demographics

In [None]:
demographics_churn=(pd.concat([mai_data_churn[mai_data_churn.columns[1:5].to_list()],mai_data_churn['Churn']],axis=1))
demographics_churn

In [None]:
#to find categorical values
col_cat = ['gender','SeniorCitizen','Partner','Dependents','Churn']
for col in col_cat:
    print(col, mai_data_churn[col].unique())

### 1. Status

In [None]:
status_churn=demographics_churn.groupby(['Partner','Churn']).size().unstack().reset_index()
status_churn

In [None]:
status_churn.plot(kind='bar',x='Partner',y='Yes')

**Clients who don't have partner churn most than clients who have**

In [None]:
status_churn['Proportion']= round ((status_churn['Yes']/1869)*100,2)
status_churn[:2]

In [None]:
Partner = 'No', 'Yes'
Churning = [64.21,35.79]
colors = ['#F0DA32','#3594FF']
plt.pie(Churning, labels=Partner, colors=colors, autopct='%1.1f%%')    
plt.title('Churn by status')
plt.show()

**Of 1869 customers who are churn, 35.79% have a partner and 64.21% do not have**

### 2, Gender

In [None]:
#number of churn by gender
gender_churn= demographics_churn.groupby(['gender','Churn']).size().unstack().reset_index()
gender_churn

In [None]:
#percent of gender one the t0tal of clients
gender_churn['Total_gender'] = (gender_churn['No'] + gender_churn['Yes'])
gender_churn[:2]

In [None]:
gender_churn['Proportion_gender'] = round((gender_churn['Yes'] / 1869)*100,2)
gender_churn[:2]

In [None]:
gender = 'Female', 'Male'
Churning = [50.24,49.76]
colors = ['#F0DA32','#3594FF']
plt.pie(Churning, labels=gender, colors=colors, autopct='%1.1f%%')    
plt.title('Churn by gender')
plt.show()

**Of the 3,488 women, 939 are churned, and of the 3,555 men, 930 are. In other words 26.92% of the women are churn and 26.16% of the men too.   
So 50.24% of churning's population is female and 49.75% is male**

### 3. Age

In [None]:
#number of churn by age
age_churn= demographics_churn.groupby(['SeniorCitizen','Churn']).size().unstack().reset_index()
age_churn['Total'] = (age_churn['No'] + age_churn['Yes'])
age_churn[:2]
age_churn['Proportion_age'] = round((age_churn['Yes'] / 1869)*100,2)
age_churn[:2]

In [None]:
age_percent = age_churn.append(age_churn.sum(numeric_only= True),ignore_index=True)
age_percent

In [None]:
SeniorCitizen = '0.0', '1.0'
Churning = [74.53,25.47]
colors = ['#F0DA32','#3594FF']
plt.pie(Churning, labels=SeniorCitizen, colors=colors, autopct='%1.1f%%')    
plt.title('Churn by age')
plt.show()

**Of the 1869 churns, 1393 are young and 476 are old. This represents 23.61% of young people and 41.68% of old people, respectively.**   
**But the churning by group of age is 74.5% for youngh people and 25.5% for old**

### 4, Dependance

In [None]:
#number of churn by dependance
dependance_churn= demographics_churn.groupby(['Dependents','Churn']).size().unstack().reset_index()
dependance_churn['Total'] = (dependance_churn['No'] + dependance_churn['Yes'])
dependance_churn[:2]
dependance_churn['Proportion'] = round((dependance_churn['Yes'] / 1869)*100,2)
dependance_churn[:2]

In [None]:
Dependents = 'No', 'Yes'
Churning = [82.56,17.44]
colors = ['#F0DA32','#3594FF']
plt.pie(Churning, labels=Dependents, colors=colors, autopct='%1.1f%%')    
plt.title('Churn by dependance')
plt.show()

**82.56% of clients who are churn, aren't dependents , and the 17.44% are**

## C. Analysis on the services used

In [None]:
services_churn=(pd.concat([mai_data_churn[mai_data_churn.columns[6:15].to_list()],mai_data_churn['Churn']],axis=1))
services_churn

**There are 8 categories services**

In [None]:
#to find categorical values
col_cat = ['PhoneService','MultipleLines','InternetService','OnlineSecurity','DeviceProtection','TechSupport','StreamingTV','StreamingMovies','Churn']
for col in col_cat:
    print(col, mai_data_churn[col].unique())

### 1. PhoneService

In [None]:
#number of churn by PhoneService
PhoneService_churn= services_churn.groupby(['PhoneService','Churn']).size().unstack().reset_index()
PhoneService_churn['Total'] = (PhoneService_churn['No'] + PhoneService_churn['Yes'])
PhoneService_churn[:2]
PhoneService_churn['Proportion01'] = round((PhoneService_churn['Yes'] / 1869)*100,2)
PhoneService_churn[:2]

In [None]:
PhoneService_percent = PhoneService_churn.append(PhoneService_churn.sum(numeric_only= True),ignore_index=True)
PhoneService_percent

In [None]:
PhoneService = 'No', 'Yes'
Churning = [9.1, 90.9]
colors = ['#3594FF','#8B5A8C']
plt.pie(Churning, labels=PhoneService, colors=colors, autopct='%1.1f%%')    
plt.title('Churn by Phone services')
plt.show()

***Clients who use phone services are the least likely to leave***

### 2. MultipleLines

In [None]:
#number of churn by MultipleLines
MultipleLines_churn= services_churn.groupby(['MultipleLines','Churn']).size().unstack().reset_index()
MultipleLines_churn['Total'] = (MultipleLines_churn['No'] + MultipleLines_churn['Yes'])
MultipleLines_churn[:3]
MultipleLines_churn['Proportion02'] = round((MultipleLines_churn['Yes'] / 1869)*100,2)
MultipleLines_churn[:3]

In [None]:
MultipleLines_percent = MultipleLines_churn.append(MultipleLines_churn.sum(numeric_only= True),ignore_index=True)
MultipleLines_percent

In [None]:
MultipleLines = 'No', 'No phone service', 'Yes'
Churning = [45.43, 9.10, 45.48]
colors = ['#F0DA32','#3594FF','#8B5A8C']
plt.pie(Churning, labels=MultipleLines, colors=colors, autopct='%1.1f%%')    
plt.title('Churn by MultipleLines services')
plt.show()

***The lowest churning percentage is for customers who no have phone service***

### 3. InternetService

In [None]:
#number of churn by InternetService
InternetService_churn= services_churn.groupby(['InternetService','Churn']).size().unstack().reset_index()
InternetService_churn['Total'] = (InternetService_churn['No'] + InternetService_churn['Yes'])
InternetService_churn[:3]
InternetService_churn['Proportion03'] = round((InternetService_churn['Yes'] / 1869)*100,2)
InternetService_churn[:3]

In [None]:
InternetService_percent = InternetService_churn.append(InternetService_churn.sum(numeric_only= True),ignore_index=True)
InternetService_percent

In [None]:
InternetService = 'DLS', 'Fiber optic', 'No'
Churning = [24.56, 69.40, 6.05]
colors = ['#F0DA32','#3594FF','#8B5A8C']
plt.pie(Churning, labels=InternetService, colors=colors, autopct='%1.1f%%')    
plt.title('Churn by Internet services')
plt.show()

***The highest churning percentage is for customers who use fiber optic for th internet services***

### 4. OnlineSecurity

In [None]:
#number of churn by OnlineSecurity
OnlineSecurity_churn= services_churn.groupby(['OnlineSecurity','Churn']).size().unstack().reset_index()
OnlineSecurity_churn['Total'] = (OnlineSecurity_churn['No'] + OnlineSecurity_churn['Yes'])
OnlineSecurity_churn[:3]
OnlineSecurity_churn['Proportion04'] = round((OnlineSecurity_churn['Yes'] / 1869)*100,2)
OnlineSecurity_churn[:3]

In [None]:
OnlineSecurity_percent = OnlineSecurity_churn.append(OnlineSecurity_churn.sum(numeric_only= True),ignore_index=True)
OnlineSecurity_percent

In [None]:
OnlineSecurity = 'No', 'No internet service', 'Yes'
Churning = [78.17, 6.05, 15.78]
colors = ['#F0DA32','#3594FF','#8B5A8C']
plt.pie(Churning, labels=OnlineSecurity, colors=colors, autopct='%1.1f%%')    
plt.title('Churn by OnlineSecurity services')
plt.show()

***Clients who have no internet service stay loyal***

### 5. DeviceProtection

In [None]:
#number of churn by DeviceProtection
DeviceProtection_churn= services_churn.groupby(['DeviceProtection','Churn']).size().unstack().reset_index()
DeviceProtection_churn['Total'] = (DeviceProtection_churn['No'] + DeviceProtection_churn['Yes'])
DeviceProtection_churn[:3]
DeviceProtection_churn['Proportion05'] = round((DeviceProtection_churn['Yes'] / 1869)*100,2)
DeviceProtection_churn[:3]

In [None]:
DeviceProtection_percent = DeviceProtection_churn.append(DeviceProtection_churn.sum(numeric_only= True),ignore_index=True)
DeviceProtection_percent

In [None]:
DeviceProtection = 'No', 'No internet service', 'Yes'
Churning = [64.79, 6.05, 29.16]
colors = ['#F0DA32','#3594FF','#8B5A8C']
plt.pie(Churning, labels=DeviceProtection, colors=colors, autopct='%1.1f%%')    
plt.title('Churn by DeviceProtection services')
plt.show()

***Clients who have no internet service are the least likely to stay***

### 6. TechSupport

In [None]:
#number of churn by TechSupport
TechSupport_churn= services_churn.groupby(['TechSupport','Churn']).size().unstack().reset_index()
TechSupport_churn['Total'] = (TechSupport_churn['No'] + TechSupport_churn['Yes'])
TechSupport_churn[:3]
TechSupport_churn['Proportion06'] = round((TechSupport_churn['Yes'] / 1869)*100,2)
TechSupport_churn[:3]

In [None]:
TechSupport_percent = TechSupport_churn.append(TechSupport_churn.sum(numeric_only= True),ignore_index=True)
TechSupport_percent

In [None]:
TechSupport = 'No', 'No internet service', 'Yes'
Churning = [77.37, 6.05, 16.59]
colors = ['#F0DA32','#3594FF','#8B5A8C']
plt.pie(Churning, labels=TechSupport, colors=colors, autopct='%1.1f%%')    
plt.title('Churn by TechSupport services')
plt.show()

***Clients who don't use TechSupport services are the least likely to leave
(churning=77.4%)***

### 7. StreamingTV

In [None]:
#number of churn by StreamingTV
StreamingTV_churn= services_churn.groupby(['StreamingTV','Churn']).size().unstack().reset_index()
StreamingTV_churn['Total'] = (StreamingTV_churn['No'] + StreamingTV_churn['Yes'])
StreamingTV_churn[:3]
StreamingTV_churn['Proportion07'] = round((StreamingTV_churn['Yes'] / 1869)*100,2)
StreamingTV_churn[:3]

In [None]:
StreamingTV_percent = StreamingTV_churn.append(StreamingTV_churn.sum(numeric_only= True),ignore_index=True)
StreamingTV_percent

In [None]:
StreamingTV = 'No', 'No internet service', 'Yes'
Churning = [50.40, 6.05, 43.55]
colors = ['#F0DA32','#3594FF','#8B5A8C']
plt.pie(Churning, labels=StreamingTV, colors=colors, autopct='%1.1f%%')    
plt.title('Churn by StreamingTV services')
plt.show()

***Clients who have no internet service are the least likely to stay***

### 8. StreamingMovies

In [None]:
#number of churn by StreamingMovies
StreamingMovies_churn= services_churn.groupby(['StreamingMovies','Churn']).size().unstack().reset_index()
StreamingMovies_churn['Total'] = (StreamingMovies_churn['No'] + StreamingMovies_churn['Yes'])
StreamingMovies_churn[:3]
StreamingMovies_churn['Proportion08'] = round((StreamingMovies_churn['Yes'] / 1869)*100,2)
StreamingMovies_churn[:3]

In [None]:
StreamingMovies_percent = StreamingMovies_churn.append(StreamingMovies_churn.sum(numeric_only= True),ignore_index=True)
StreamingMovies_percent

In [None]:
StreamingMovies = 'No', 'No internet service', 'Yes'
Churning = [50.19, 6.05, 43.77]
colors = ['#FA8F0A','#3594FF','#8B5A8C']
plt.pie(Churning, labels=StreamingMovies, colors=colors, autopct='%1.1f%%')    
plt.title('Churn by StreamingMovies services')
plt.show()

***Clients who have no internet service are the least likely to stay***

## D. Analysis on the Billing information

In [None]:
BillingInformation_churn=(pd.concat([mai_data_churn[mai_data_churn.columns[15:20].to_list()],mai_data_churn['Churn']],axis=1))
BillingInformation_churn

In [None]:
#to find categorical values
col_cat = ['Contract','PaperlessBilling','PaymentMethod','MonthlyCharges','TotalCharges','Churn']
for col in col_cat:
    print(col, mai_data_churn[col].unique())

### 1. Contract

In [None]:
#number of churn by kind of contract
contract_churn= BillingInformation_churn.groupby(['Contract','Churn']).size().unstack().reset_index()
contract_churn['Total'] = (contract_churn['No'] + contract_churn['Yes'])
contract_churn[:3]
contract_churn['Proportion'] = round((contract_churn['Yes'] / 1869)*100,2)
contract_churn[:3]

In [None]:
Contract = 'Month-to-month', 'One year','Two year'
Churning = [88.55, 8.88,2.57]
colors = ['#FA8F0A','#18DE9D','#61F527']
plt.pie(Churning, labels=Contract, colors=colors)
plt.show

***Customers with a monthly contract are those with the highest churn percentage(88.55%). Those with a two-year contract are the most loyal(2.57% of churning)***

***The annual contract policy is clearly better for the company***

### 2.PaperlessBilling

In [None]:
#number of churn by kind of PaperlessBilling
PaperlessBilling_churn= BillingInformation_churn.groupby(['PaperlessBilling','Churn']).size().unstack().reset_index()
PaperlessBilling_churn['Total'] = (PaperlessBilling_churn['No'] + PaperlessBilling_churn['Yes'])
PaperlessBilling_churn[:2]
PaperlessBilling_churn['Proportion'] = round((PaperlessBilling_churn['Yes'] / 1869)*100,2)
PaperlessBilling_churn[:2]

In [None]:
PaperlessBilling = 'No', 'Yes'
Churning = [25.09, 74.91]
colors = ['#FA8F0A','#18DE9D']
plt.pie(Churning, labels=PaperlessBilling, colors=colors)    
plt.title('churn by kind of PaperlessBilling')
plt.show()

***Customers who do not use the Paperless Billing policy are the least likely to stay(469 churn:25.09% pf churning).***

### 3. PaymentMethod

In [None]:
#number of churn by kind of Payment Method
PaymentMethod_churn= BillingInformation_churn.groupby(['PaymentMethod','Churn']).size().unstack().reset_index()
PaymentMethod_churn['Total'] = (PaymentMethod_churn['No'] + PaymentMethod_churn['Yes'])
PaymentMethod_churn[:4]
PaymentMethod_churn['Proportion'] = round((PaymentMethod_churn['Yes'] / 1869)*100,2)
PaymentMethod_churn[:4]

In [None]:
PaymentMethod = 'Bank transfer (automatic)', 'Credit card (automatic)', 'Electronic check', 'Mailed check'
Churning = [13.80, 12.41, 57.30, 16.48]
colors = ['#FA8F0A','#F0DA32','#61F527','#18DE9D']
plt.pie(Churning, labels=PaymentMethod, colors=colors)    
plt.title('Churn by kind of Payment Method')
plt.show()

***Clients who use electronic check for payment method are the least likely to leave
(churning=57.30%)***

# Bonus
***How long will it take for the company to lose all its customers?  Which demographics will they lose first?***

In [None]:
#calculate how soon the company will lose all these customers
quantity = 7043
Churn_percent = 0.2654
day = 0

while quantity >=1:
    quantity = quantity -(quantity * (Churn_percent))
    day +=1
    print(day)

***In the hypothesis where the percentage of loss of customers is fixed and that there is no new customer,the company will lose all these customers nearly in 29 month***

In [None]:
#See which demographics they will lose first

In [None]:
#Sexe
##Female
quantity1 = 3488
Churn_percent1 = 0.2692
day1 = 0

while quantity1 >=1:
    quantity1 = quantity1 -(quantity1 * (Churn_percent1))
    day1 +=1
    print(day1)   
    
##Male
quantity = 3555
Churn_percent = 0.2641
day = 0

while quantity >=1:
    quantity = quantity -(quantity * (Churn_percent))
    day +=1
    print(day)

***In 27 month they will lost all demographics based on gender***

In [None]:
#Age
##Youngh
quantity1 = 5901
Churn_percent1 = 0.7453
day1 = 0

while quantity1 >=1:
    quantity1 = quantity1 -(quantity1 * (Churn_percent1))
    day1 +=1
    print(day1)   
    
#Old
quantity = 1142
Churn_percent = 0.2547
day = 0

while quantity >=1:
    quantity = quantity -(quantity * (Churn_percent))
    day +=1
    print(day)

**They will lost all younght clients(7 months) before the old (24 months)**

In [None]:
#Partner
##no
quantity1 = 3641
Churn_percent1 = 0.4916
day1 = 0

while quantity1 >=1:
    quantity1 = quantity1 -(quantity1 * (Churn_percent1))
    day1 +=1
    print(day1)   
    
#Yes
quantity = 3402
Churn_percent = 0.2447
day = 0

while quantity >=1:
    quantity = quantity -(quantity * (Churn_percent))
    day +=1
    print(day)

**They will lost all client who don't have partner in 13 months before they lost whom have partner(in 29 months)**

In [None]:
#Dependents
##No
quantity1 = 4933
Churn_percent1 = 0.8256
day1 = 0

while quantity1 >=1:
    quantity1 = quantity1 -(quantity1 * (Churn_percent1))
    day1 +=1
    print(day1)   
    
#Yes
quantity = 2110
Churn_percent = 0.1744
day = 0

while quantity >=1:
    quantity = quantity -(quantity * (Churn_percent))
    day +=1
    print(day)

**They will lost all clients who doesn't dependent in 5 months**

### * Abstract

***The company will lost all clients whose dependency demographic is negative (no dependency) first (in 5 months)***

# Part 2

## D. Customer profile

In [5]:
#Change categorical value on numerical value
for col in mai_data_churn.select_dtypes('object'):
    mai_data_churn=pd.get_dummies(mai_data_churn, columns=[col])

In [6]:
mai_data_churn.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,customerID_0002-ORFBO,customerID_0003-MKNFE,customerID_0004-TLHLJ,customerID_0011-IGKFF,customerID_0013-EXCHZ,customerID_0013-MHZWF,customerID_0013-SMEOE,...,TotalCharges_996.85,TotalCharges_996.95,TotalCharges_997.65,TotalCharges_997.75,TotalCharges_998.1,TotalCharges_999.45,TotalCharges_999.8,TotalCharges_999.9,Churn_No,Churn_Yes
0,0,1,29.85,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,0,34,56.95,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,0,2,53.85,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,45,42.3,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,2,70.7,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [7]:
x_train, x_test, y_train, y_test= train_test_split(x,y, test_size=.25, ramdom_state=1)

NameError: name 'x' is not defined

### 1. Age

In [None]:
#The company has more young than old and they are more inclined to churn, age can use to determinate a profil.


In [4]:
profil= mai_data_churn.loc[mai_data_churn['Churn'] == 'Yes']
profil
age_profil = mai_data_churn[['Churn', 'SeniorCitizen']].groupby(['Churn']).sum()
age_profil

Unnamed: 0_level_0,SeniorCitizen
Churn,Unnamed: 1_level_1
No,666
Yes,476


### 2. Services used

In [None]:
#Given the variety of services offered, the type of services can be used to profile