# IMPORTS

## Libraries

In [1]:
import warnings

import bisect

import numpy  as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from scipy import stats as ss

from IPython.display      import Image
from IPython.core.display import HTML


warnings.filterwarnings("ignore")

## Load Dataset

In [2]:
dfRaw = pd.read_feather('00-Data/FeatherData/df01.feather')

In [3]:
dfRaw.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,True,True,101348.88,True
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,False,True,112542.58,False
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,True,False,113931.57,True
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,False,False,93826.63,False
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,True,True,79084.1,False


## Helper Functions

#### Jupyter Settings

In [4]:
def jupyter_settings():
    %matplotlib inline
    %pylab inline
    
    plt.style.use('bmh')
    plt.rcParams['figure.figsize'] = [25, 12]
    plt.rcParams['font.size'] = 24
    
    display( HTML('<style>.container { width:100% !important; }</style>'))
    pd.options.display.max_columns = None
    pd.options.display.max_rows = None
    pd.set_option('display.expand_frame_repr', False)
    
    sns.set()

In [5]:
jupyter_settings()  

Populating the interactive namespace from numpy and matplotlib


#### Functions

In [6]:
def groupAge(num, breakpoints=[10, 20, 30, 45, 60, 70, 80, 120], result='01234567'):
    i = bisect.bisect(breakpoints, num-1)
    ageMapping = {
        0: 'Child',
        1: 'Teenager',
        2: 'Young',
        3: 'Adult',
        4: 'Midlife',
        5: 'Senior',
        6: 'Mature Adulthood',
        7: 'Late Adulthood'
    }
    
    return ageMapping[i]

# FEATURE ENGINEERING

In [7]:
df02 = dfRaw.copy()

In [9]:
# Group Age
df02['AgeGroup'] = df02['Age'].apply(lambda row: groupAge(row))


# Country Origin
originMap = {
    'France': 'Latin',
    'Spain': 'Latin',
    'Germany': 'Anglo-Saxons'
}

df02['Origin'] = df02['Geography'].map(originMap)

In [11]:
df02.sample(10)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,AgeGroup,Origin
5442,5443,15660417,Lambert,613,Germany,Female,43,10,120481.69,1,False,False,94875.03,True,Adult,Anglo-Saxons
3198,3199,15736397,Wang,544,France,Male,23,1,96471.2,1,True,False,35550.97,False,Young,Latin
8057,8058,15620836,Lo Duca,816,Germany,Female,34,2,108410.87,2,True,False,102908.91,False,Adult,Anglo-Saxons
3588,3589,15571958,McIntosh,489,Spain,Male,40,3,221532.8,1,True,False,171867.08,False,Adult,Latin
2915,2916,15617482,Milanesi,489,Germany,Female,52,1,131441.51,1,True,False,37240.11,True,Midlife,Anglo-Saxons
2999,3000,15593014,Evseyev,525,France,Male,33,1,112833.35,1,False,True,175178.56,False,Adult,Latin
3249,3250,15591279,Nwagugheuzo,734,France,Male,37,3,80387.81,1,False,True,77272.62,False,Adult,Latin
47,48,15771573,Okagbue,637,Germany,Female,39,9,137843.8,1,True,True,117622.8,True,Adult,Anglo-Saxons
1044,1045,15779059,Timms,670,France,Female,38,4,119624.54,2,True,True,110472.12,False,Adult,Latin
1251,1252,15814930,McGregor,588,Germany,Female,40,10,125534.51,1,True,False,121504.18,True,Adult,Anglo-Saxons


# Convert to .feather

In [13]:
df02.to_feather('00-Data/FeatherData/df02.feather')