## New notebook following successful MVP

In [13]:
#Imports
import pandas as pd

import plotly.express as px

import numpy as np


In [7]:
df = pd.read_csv("../data/bank-full.csv", sep=";")
df

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,51,technician,married,tertiary,no,825,no,no,cellular,17,nov,977,3,-1,0,unknown,yes
45207,71,retired,divorced,primary,no,1729,no,no,cellular,17,nov,456,2,-1,0,unknown,yes
45208,72,retired,married,secondary,no,5715,no,no,cellular,17,nov,1127,5,184,3,success,yes
45209,57,blue-collar,married,secondary,no,668,no,no,telephone,17,nov,508,4,-1,0,unknown,no


In [11]:
# Housing and Loan combination
df['housing_loan_combo'] = df['housing'] + " & " + df['loan']
df['housing_loan_combo'].value_counts()

housing_loan_combo
yes & no     20763
no & no      17204
yes & yes     4367
no & yes      2877
Name: count, dtype: int64

In [14]:
conditions = [
    (df['housing'] == 'yes') & (df['loan'] == 'no'),
    (df['housing'] == 'yes') & (df['loan'] == 'yes'),
    (df['housing'] == 'no') & (df['loan'] == 'no'),
    (df['housing'] == 'no') & (df['loan'] == 'yes')
]

choices = [
    "Housing + No Other Loan",
    "Housing + Other Loan",
    "No Housing + No Other Loan",
    "No Housing + Other Loan"
]

df['housing_loan_combo'] = np.select(conditions, choices, default="Unknown")

In [17]:
df['housing_loan_combo'].value_counts()

housing_loan_combo
Housing + No Other Loan       20763
No Housing + No Other Loan    17204
Housing + Other Loan           4367
No Housing + Other Loan        2877
Name: count, dtype: int64

In [18]:
fig1 = px.histogram(
    df,
    x='housing_loan_combo',
    color='y',
    barmode='group',
    title="Investment Decision by Housing & Loan Combination",
    labels={
        'housing_loan_combo': 'Housing + Loan Status',
        'count': 'Number of Customers',
        'y': 'Will Invest?'
    }
)
fig1.show()

In [20]:
# Calculate proportions of investment decisions by housing and loan combination
combo_counts = (
    df.groupby(['housing_loan_combo', 'y'])
    .size()
    .reset_index(name='count')
)
combo_counts['proportion'] = combo_counts.groupby('housing_loan_combo')['count'].transform(lambda x: x / x.sum())
combo_counts.head()


Unnamed: 0,housing_loan_combo,y,count,proportion
0,Housing + No Other Loan,no,19093,0.919568
1,Housing + No Other Loan,yes,1670,0.080432
2,Housing + Other Loan,no,4102,0.939318
3,Housing + Other Loan,yes,265,0.060682
4,No Housing + No Other Loan,no,14069,0.817775


In [21]:
fig2 = px.bar(
    combo_counts,
    x='housing_loan_combo',
    y='proportion',
    color='y',
    barmode='stack',
    title="Proportion of Investment Decisions by Housing & Loan Combination",
    labels={
        'housing_loan_combo': 'Housing + Loan Status',
        'proportion': 'Proportion',
        'y': 'Will Invest?'
    }
)
fig2.show()

In [22]:
#Calculating the marital status proportions
marital_counts = (
    df.groupby(['marital', 'y'])
    .size()
    .reset_index(name='count')
)
marital_counts['proportion'] = marital_counts.groupby('marital')['count'].transform(lambda x: x / x.sum())


In [23]:
fig3 = px.bar(
    marital_counts,
    x='marital',
    y='proportion',
    color='y',
    barmode='stack',
    title="Proportion of Investment Decisions by Marital Status",
    labels={
        'marital': 'Marital Status',
        'proportion': 'Proportion',
        'y': 'Will Invest?'
    }
)
fig3.show()