In [1]:
import pandas as pd
import seaborn as sns
from pydataset import data
import numpy as np
import os
import scipy.stats as stats
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier


#turn off pink warning boxes
import warnings
warnings.filterwarnings("ignore")

import wrangle as w
from env import get_db_url

alpha = 0.5

In [2]:
df = pd.read_sql('SELECT * FROM customers', get_db_url('telco_churn'))

In [3]:
raw_data = get_telco_data()

Unnamed: 0,customer_id,gender,senior_citizen,partner,dependents,tenure,phone_service,multiple_lines,internet,online_security,...,family_house,single_head_house,gender_Male,partner_Yes,dependents_Yes,phone_service_Yes,paperless_billing_Yes,churn_Yes,ihs,ooss
0,0002-ORFBO,Female,0,Yes,Yes,9,Yes,0,1,0,...,True,False,0,1,1,1,1,0,3,2
1,0003-MKNFE,Male,0,No,No,9,Yes,1,1,0,...,False,False,1,0,0,1,0,0,4,0
2,0004-TLHLJ,Male,0,No,No,4,Yes,0,1,0,...,False,False,1,0,0,1,1,1,2,1
3,0011-IGKFF,Male,1,Yes,No,13,Yes,0,1,0,...,False,False,1,1,0,1,1,1,4,2
4,0013-EXCHZ,Female,1,Yes,No,3,Yes,0,1,0,...,False,False,0,1,0,1,1,1,3,1


##########################################################################################################

In [None]:
single_df = prepare.single_house(train_df)

In [None]:
single_df.head()

In [None]:
dual_df = w.dual_house(train_df)

In [None]:
dual_df.head()

In [None]:
family_df = w.family_house(train_df)

In [None]:
family_df.head()

In [None]:
single_head_house_df = w.single_head_house(train_df)

In [None]:
train_df.head()

### Single exploration

exploring the relationship between in-hand-services and churn among single households

- H_0: ihs of single households who churn >= ihs of single households who do not churn
- H_a: ihs of single households who churn < ihs of single households who do not churn

In [None]:
single_df.ihs.hist()

In [None]:
churn_sample_single = single_df[single_df.churn == 'Yes'].ihs

churn_sample_single.hist()

In [None]:
alpha = 0.5

In [None]:
single_df.churn.value_counts(normalize = True)

In [None]:

churn_sample_single = single_df[single_df.churn == 'Yes'].ihs
overall_mean_single = single_df.ihs.mean()

t, p = stats.ttest_1samp(churn_sample_single, overall_mean_single)

print(t, p/2, alpha)

In [None]:
if p/2 > alpha:
    print("We fail to reject $H_{0}$")
elif t < 0:
    print("We fail to reject $H_{0}$")
else:
    print("We reject $H_{0}$")

by rejecting the null hypothesis, we affirm that among single households, those who churn have less in-hand-services than those who do not churn.

#### here we are comparing out of sight services of churn and not churn among single households

- H_0: ooss of those who churn >= ooss of those who do not churn among single households
- H_a: ooss of those who churn < ooss of those who do not churn among single households

In [None]:
churn_sample_single = single_df[single_df.churn == 'Yes'].ooss
overall_mean_single = single_df.ooss.mean()

t, p = stats.ttest_1samp(churn_sample_single, overall_mean_single)

print(t, p/2, alpha)

In [None]:
if p/2 > alpha:
    print("We fail to reject $H_{0}$")
elif t < 0:
    print("We fail to reject $H_{0}$")
else:
    print("We reject $H_{0}$")

In [None]:
churn_sample_single = single_df[single_df.churn == 'Yes'].ooss

churn_sample_single.hist()

From this we can infer that people are more satisfied the more ooss they have

### Family Exploration

exploring the relationship between in-hand-services and churn among family households

- H_0 : ihs of family households who churn >= ihs of family households who do not churn
- H_a: ihs of family households who churn < ihs of family households who do not churn

In [None]:
single_df.churn.value_counts()


In [None]:
family_df.ooss.hist()

In [None]:
single_df.ihs.hist()

In [None]:
single_df.ooss.hist()

In [None]:
churn_sample_family = family_df[family_df.churn == 'Yes'].ihs

churn_sample_family.hist()

In [None]:
churn_sample_family = family_df[family_df.churn == 'Yes'].ihs
overall_mean_family = family_df.ihs.mean()

t, p = stats.ttest_1samp(churn_sample_family, overall_mean_family)

print(t, p/2, alpha)

In [None]:
if p/2 > alpha:
    print("We fail to reject $H_{0}$")
elif t < 0:
    print("We fail to reject $H_{0}$")
else:
    print("We reject $H_{0}$")

by failing to reject the null hypothesis, we recognize there is no relationship between churn and ihs among families

In [None]:
churn_sample_family = family_df[family_df.churn == 'Yes'].ooss

churn_sample_family.hist()

people tend to be more satisfied with more ooss

###############################################################################################################


In [None]:
train_df.head()

In [None]:
train_df.ihs.hist()

In [None]:
train_df.ooss.hist()

In [None]:
train_df[train_df.churn == 'Yes'].ihs.hist()

In [None]:
train_df[train_df.churn == 'Yes'].ooss.hist()

#### some chi-squared

In [None]:
observed = pd.crosstab(single_head_house_df.ooss, single_head_house_df.churn)
observed

In [None]:
stats.chi2_contingency(observed)

In [None]:
chi2, p, degf, expected = stats.chi2_contingency(observed)

In [None]:
print('Observed')
print(observed.values)
print('\nExpected')
print(expected.astype(int))
print('\n----')
print(f'chi^2 = {chi2:.4f}')
print(f'p     = {p:.4f}')

#### considering monthly charges

In [None]:
observed = pd.crosstab(family_df.charge_bins, family_df.ihs)
observed

In [None]:
stats.chi2_contingency(observed)

In [None]:
chi2, p, degf, expected = stats.chi2_contingency(observed)

In [None]:
print('Observed')
print(observed.values)
print('\nExpected')
print(expected.astype(int))
print('\n----')
print(f'chi^2 = {chi2:.4f}')
print(f'p     = {p:.4f}')

In [None]:
observed = pd.crosstab(single_df.charge_bins, single_df.ihs)
observed

## baseline stats

In [None]:
single_df.churn.value_counts(normalize = True)

In [None]:
family_df.churn.value_counts(normalize = True)

In [None]:
dual_df.churn.value_counts(normalize = True)

In [None]:
single_head_house_df.churn.value_counts(normalize = True)

In [None]:
train_df.churn.value_counts(normalize = True)

In [None]:
train_df.ooss.value_counts(normalize = True)

In [None]:
family_df.ooss.value_counts(normalize = True)

In [None]:
single_df.ooss.value_counts(normalize = True)

In [None]:
dual_df.ooss.value_counts(normalize = True)

In [None]:
single_head_house_df.ooss.value_counts(normalize = True)

In [None]:
train_df.online_security.value_counts(normalize = True)