### Import Python Methods

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score,recall_score,precision_score

import warnings
warnings.filterwarnings("ignore")

from acquire import get_telco_data
from prepare import prep_telco_data_explore
from prepare import prep_telco_data_model

### Estabolishing a Baseline

In [2]:
df = get_telco_data(cached = True)

In [3]:
# Churn is greater then 50 % for first 8 months of service
df = get_telco_data(cached = True)
month_one = df.tenure <= 8
test = df[month_one]
test.churn.value_counts(normalize = True)

Yes    0.505476
No     0.494524
Name: churn, dtype: float64

In [4]:
# Percent of Customers who Churned over 72 Months
df.churn.value_counts(normalize = True)

No     0.73463
Yes    0.26537
Name: churn, dtype: float64

In [5]:
# Annual Churn per year (6 Years, aka.72 Months)

In [6]:
# Year One
df['tenure_years'] = round(df.tenure / 12, 2)
year_one = df.tenure_years <= 1
test = df[year_one]
test.churn.value_counts(normalize = True)

No     0.525618
Yes    0.474382
Name: churn, dtype: float64

In [7]:
# Year two
df['tenure_years'] = round(df.tenure / 12, 2)
year_one = (df.tenure_years >= 2) & (df.tenure_years >=1)
test = df[year_one]
test.churn.value_counts(normalize = True)

No     0.857143
Yes    0.142857
Name: churn, dtype: float64

In [8]:
# Year three
df['tenure_years'] = round(df.tenure / 12, 2)
year_one = (df.tenure_years >= 3) & (df.tenure_years >=2)
test = df[year_one]
test.churn.value_counts(normalize = True)

No     0.879384
Yes    0.120616
Name: churn, dtype: float64

In [9]:
# Year four
df['tenure_years'] = round(df.tenure / 12, 2)
year_one = (df.tenure_years >= 4) & (df.tenure_years >=3)
test = df[year_one]
test.churn.value_counts(normalize = True)

No     0.903604
Yes    0.096396
Name: churn, dtype: float64

In [10]:
# Year Five
df['tenure_years'] = round(df.tenure / 12, 2)
year_one = (df.tenure_years >= 5) & (df.tenure_years >=4)
test = df[year_one]
test.churn.value_counts(normalize = True)

No     0.933243
Yes    0.066757
Name: churn, dtype: float64

In [11]:
# Year Six
df['tenure_years'] = round(df.tenure / 12, 2)
year_one = (df.tenure_years >= 6) & (df.tenure_years >=5)
test = df[year_one]
test.churn.value_counts(normalize = True)

No     0.983425
Yes    0.016575
Name: churn, dtype: float64

In [12]:
# Cleaned Data for Exploration
df_explore= prep_telco_data_explore(get_telco_data(cached = True))

In [18]:
df_explore.tech_support_cc.value_counts()

0    3473
2    2044
1    1526
Name: tech_support_cc, dtype: int64