# 1 Getting Ready

## 1.1 Import Required Libraries

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from dateutil import relativedelta

## 1.2 Setting up Environment

In [4]:
plt.style.use('fivethirtyeight')
%matplotlib inline
pd.options.display.max_columns = None
pd.options.display.max_rows = None

# 2 Load EDA-DPP Data

## 2.1 Load from pickle

In [5]:
df_eda_churn = pd.read_pickle('./data/eda/churn_eda.pickle')

## 2.2 Have a Look into the EDA-DPP Outpput Data

In [6]:
df_eda_churn.head()

Unnamed: 0,CUSTOMER_ID,GENDER,TENURE,MONTHLY_CHARGES,CHURN,CHURN_STATUS
0,7590-VHVEG,Female,1,29.85,No,0
1,5575-GNVDE,Male,34,56.95,No,0
2,3668-QPYBK,Male,2,53.85,Yes,1
3,7795-CFOCW,Male,45,42.3,No,0
4,9237-HQITU,Female,2,70.7,Yes,1


## 2.3 Make a copy of EDA-DPP Data to work on

In [7]:
df = df_eda_churn.copy()

# 3 Learn more about the Dataset - Meta Info

In [8]:
df.shape

(7043, 6)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7043 entries, 0 to 7042
Data columns (total 6 columns):
CUSTOMER_ID        7043 non-null object
GENDER             7043 non-null object
TENURE             7043 non-null int64
MONTHLY_CHARGES    7043 non-null float64
CHURN              7043 non-null object
CHURN_STATUS       7043 non-null int64
dtypes: float64(1), int64(2), object(3)
memory usage: 385.2+ KB


In [10]:
df.columns

Index(['CUSTOMER_ID', 'GENDER', 'TENURE', 'MONTHLY_CHARGES', 'CHURN',
       'CHURN_STATUS'],
      dtype='object')

In [11]:
df.describe()

Unnamed: 0,TENURE,MONTHLY_CHARGES,CHURN_STATUS
count,7043.0,7043.0,7043.0
mean,32.371149,64.761692,0.26537
std,24.559481,30.090047,0.441561
min,0.0,18.25,0.0
25%,9.0,35.5,0.0
50%,29.0,70.35,0.0
75%,55.0,89.85,1.0
max,72.0,118.75,1.0


# 4 Dummification

In [12]:
gender_map = {'Male':'M',
              'Female':'F'
            }
df["GENDER"] = df["GENDER"].map(gender_map)

nominal_cols = ["GENDER"]
df_dum_gender = pd.get_dummies(df[nominal_cols])

In [13]:
df_dum_gender.head()

Unnamed: 0,GENDER_F,GENDER_M
0,1,0
1,0,1
2,0,1
3,0,1
4,1,0


In [14]:
df.head()

Unnamed: 0,CUSTOMER_ID,GENDER,TENURE,MONTHLY_CHARGES,CHURN,CHURN_STATUS
0,7590-VHVEG,F,1,29.85,No,0
1,5575-GNVDE,M,34,56.95,No,0
2,3668-QPYBK,M,2,53.85,Yes,1
3,7795-CFOCW,M,45,42.3,No,0
4,9237-HQITU,F,2,70.7,Yes,1


# 4.1 Concate into master DataFrame

In [12]:
df = pd.concat([df,df_dum_gender], axis=1)

In [13]:
df.head()

Unnamed: 0,CUSTOMER_ID,GENDER,TENURE,MONTHLY_CHARGES,CHURN,CHURN_STATUS,GENDER_F,GENDER_M
0,7590-VHVEG,F,1,29.85,No,0,1,0
1,5575-GNVDE,M,34,56.95,No,0,0,1
2,3668-QPYBK,M,2,53.85,Yes,1,0,1
3,7795-CFOCW,M,45,42.3,No,0,0,1
4,9237-HQITU,F,2,70.7,Yes,1,1,0


# 5 Drop Columns

## 5.1 Drop ID Columns

In [20]:
df.drop(columns=['CUSTOMER_ID'],inplace=True,axis=1)

## 5.2 Drop CHURN Column

In [21]:
df.drop(columns=['CHURN'],inplace=True,axis=1)

## 5.3 Drop Columns those are Dummified

In [14]:
df.drop(columns=['GENDER'],inplace=True,axis=1)

# 6 Save the DataFrame for ML Modeling

In [15]:
df.to_pickle('./data/training/churn.pickle')