# Load Libraries

In [None]:
%load_ext autotime

import pandas as pd 
import numpy as np
import os

import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly
import plotly.express as px 
import plotly.graph_objects as go 
from plotly.subplots import make_subplots

# Load Dataset

In [None]:
path_dir = r'bank marketing/dataset'

bank = 'bank-additional-full.csv'

df_bank = pd.read_csv(os.path.join(path_dir,bank), sep=';', quotechar='"')

# Metadata

| No. | Variable | Description | Type |
| :--- | :-------- | :----------- | :---- |
| 1   | age      | Age of the client | numeric |
| 2   | job      | Type of job | categorical: "admin.","blue-collar","entrepreneur","housemaid","management","retired","self-employed","services","student","technician","unemployed","unknown" |
| 3   | marital  | Marital status | categorical: "divorced","married","single","unknown"; note: "divorced" means divorced or widowed |
| 4   | education | Education level | categorical: "basic.4y","basic.6y","basic.9y","high.school","illiterate","professional.course","university.degree","unknown" |
| 5   | default  | Has credit in default? | categorical: "no","yes","unknown" |
| 6   | housing  | Has housing loan? | categorical: "no","yes","unknown" |
| 7   | loan     | Has personal loan? | categorical: "no","yes","unknown" |
| 8   | contact  | Contact communication type | categorical: "cellular","telephone" |
| 9   | month    | Last contact month of year | categorical: "jan", "feb", "mar", ..., "nov", "dec" |
| 10  | day_of_week | Last contact day of the week | categorical: "mon","tue","wed","thu","fri" |
| 11  | duration | Last contact duration, in seconds | numeric |
| 12  | campaign | Number of contacts performed during this campaign and for this client | numeric, includes last contact |
| 13  | pdays    | Number of days that passed by after the client was last contacted from a previous campaign | numeric; 999 means client was not previously contacted |
| 14  | previous | Number of contacts performed before this campaign and for this client | numeric |
| 15  | poutcome | Outcome of the previous marketing campaign | categorical: "failure","nonexistent","success" |
| 16  | emp.var.rate | Employment variation rate - quarterly indicator | numeric |
| 17  | cons.price.idx | Consumer price index - monthly indicator | numeric |
| 18  | cons.conf.idx | Consumer confidence index - monthly indicator | numeric |
| 19  | euribor3m | Euribor 3 month rate - daily indicator | numeric |
| 20  | nr.employed | Number of employees - quarterly indicator | numeric |
| 21  | y        | Has the client subscribed a term deposit? | binary: "yes","no" |


# Explore Dataset

In [None]:
print(df_bank.info(),'\n')
print(df_bank.shape,'\n')
print(df_bank.columns,'\n')
print(df_bank.describe())

# Bar/Historgram Plot

In [None]:
boxplot_col = ['age','duration','campaign']
his_col = ['pdays','previous','emp.var.rate','cons.price.idx','cons.conf.idx','euribor3m','nr.employed']

for col in df_bank.columns: 
    if col not in boxplot_col:
        if df_bank[col].dtype == 'object':
            fig_bar = px.bar(df_bank[col].value_counts(), 
                             x=df_bank[col].value_counts().index, y=df_bank[col].value_counts().values)
            fig_bar.update_layout(title=col)
            fig_bar.show()

        elif col in his_col: 
            fig_his = px.histogram(df_bank, x=df_bank['month'], y=col)
            fig_his.update_layout(title=col)
            fig_his.show()

# Boxplot

In [None]:
for col in df_bank.columns:
    if df_bank[col].dtype == 'int64' or df_bank[col].dtype == 'float64':
        fig_box = px.box(df_bank, x=col, y='y')
        fig_box.update_layout(title=col)
        fig_box.show()

# Correlation Plot

In [None]:
corr_matrix = df_bank.corr()
fig_corr = go.Figure(data=go.Heatmap(z=corr_matrix.values, x=corr_matrix.columns, y=corr_matrix.columns, 
                                     hoverongaps=False, colorscale='RdYlBu'))
fig_corr.show()