In [64]:
import numpy as np
import pandas as pd

In [65]:
# Read the CSV file
data = pd.read_csv('bank_marketing.csv')
data

Unnamed: 0,client_id,age,job,marital,education,credit_default,mortgage,month,day,contact_duration,number_contacts,previous_campaign_contacts,previous_outcome,cons_price_idx,euribor_three_months,campaign_outcome
0,32884,,,married,,no,no,may,9,371,1,1,failure,92.893,1.299,no
1,3169,55.0,unknown,married,unknown,unknown,yes,may,24,285,2,0,nonexistent,93.994,4.860,no
2,32206,33.0,blue-collar,married,basic.9y,no,no,may,1,52,1,1,failure,92.893,1.313,no
3,9403,,,married,,no,no,jun,3,355,4,0,nonexistent,94.465,4.967,no
4,14020,27.0,housemaid,married,high.school,no,yes,jul,30,189,2,0,nonexistent,93.918,4.963,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9780,40291,,,divorced,,no,no,jul,21,169,2,2,success,94.215,0.899,yes
9781,1412,,,divorced,,no,yes,may,5,453,2,0,nonexistent,93.994,4.855,no
9782,26188,,,married,,no,yes,nov,2,348,3,0,nonexistent,93.200,4.076,no
9783,18326,,,married,,no,yes,jul,7,962,4,0,nonexistent,93.918,4.963,yes


In [66]:
# Check if there are duplicated values in the data set
data.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
9780     True
9781     True
9782     True
9783     True
9784     True
Length: 9785, dtype: bool

In [67]:
# Remove the duplicates in the dataset which targets the primary key client_id
data = data.drop_duplicates(subset=['client_id'], keep='first').copy()
data

Unnamed: 0,client_id,age,job,marital,education,credit_default,mortgage,month,day,contact_duration,number_contacts,previous_campaign_contacts,previous_outcome,cons_price_idx,euribor_three_months,campaign_outcome
0,32884,,,married,,no,no,may,9,371,1,1,failure,92.893,1.299,no
1,3169,55.0,unknown,married,unknown,unknown,yes,may,24,285,2,0,nonexistent,93.994,4.860,no
2,32206,33.0,blue-collar,married,basic.9y,no,no,may,1,52,1,1,failure,92.893,1.313,no
3,9403,,,married,,no,no,jun,3,355,4,0,nonexistent,94.465,4.967,no
4,14020,27.0,housemaid,married,high.school,no,yes,jul,30,189,2,0,nonexistent,93.918,4.963,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9495,39541,52.0,admin.,married,unknown,no,no,apr,23,387,2,0,nonexistent,93.749,0.654,yes
9496,3921,41.0,blue-collar,married,basic.4y,unknown,no,may,29,88,5,0,nonexistent,93.994,4.858,no
9497,18387,32.0,admin.,divorced,high.school,no,unknown,jul,1,81,4,0,nonexistent,93.918,4.968,no
9498,38711,61.0,management,married,university.degree,no,yes,nov,28,195,1,1,success,92.649,0.715,yes


In [68]:
# Define a mapping of month names to numerical values
month_mapping = {
    'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, 'jun': 6,
    'jul': 7, 'aug': 8, 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12
}

In [69]:
# Add a temporary year column (2022) that will be used for formatting later
data['year'] = 2022

In [70]:
# Replace month names with numeric values
data['month'] = data['month'].map(month_mapping)
data

Unnamed: 0,client_id,age,job,marital,education,credit_default,mortgage,month,day,contact_duration,number_contacts,previous_campaign_contacts,previous_outcome,cons_price_idx,euribor_three_months,campaign_outcome,year
0,32884,,,married,,no,no,5,9,371,1,1,failure,92.893,1.299,no,2022
1,3169,55.0,unknown,married,unknown,unknown,yes,5,24,285,2,0,nonexistent,93.994,4.860,no,2022
2,32206,33.0,blue-collar,married,basic.9y,no,no,5,1,52,1,1,failure,92.893,1.313,no,2022
3,9403,,,married,,no,no,6,3,355,4,0,nonexistent,94.465,4.967,no,2022
4,14020,27.0,housemaid,married,high.school,no,yes,7,30,189,2,0,nonexistent,93.918,4.963,no,2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9495,39541,52.0,admin.,married,unknown,no,no,4,23,387,2,0,nonexistent,93.749,0.654,yes,2022
9496,3921,41.0,blue-collar,married,basic.4y,unknown,no,5,29,88,5,0,nonexistent,93.994,4.858,no,2022
9497,18387,32.0,admin.,divorced,high.school,no,unknown,7,1,81,4,0,nonexistent,93.918,4.968,no,2022
9498,38711,61.0,management,married,university.degree,no,yes,11,28,195,1,1,success,92.649,0.715,yes,2022


In [71]:
# Combine year, month, and day into a single date column
data['date'] = pd.to_datetime(data[['year', 'month', 'day']])
data

Unnamed: 0,client_id,age,job,marital,education,credit_default,mortgage,month,day,contact_duration,number_contacts,previous_campaign_contacts,previous_outcome,cons_price_idx,euribor_three_months,campaign_outcome,year,date
0,32884,,,married,,no,no,5,9,371,1,1,failure,92.893,1.299,no,2022,2022-05-09
1,3169,55.0,unknown,married,unknown,unknown,yes,5,24,285,2,0,nonexistent,93.994,4.860,no,2022,2022-05-24
2,32206,33.0,blue-collar,married,basic.9y,no,no,5,1,52,1,1,failure,92.893,1.313,no,2022,2022-05-01
3,9403,,,married,,no,no,6,3,355,4,0,nonexistent,94.465,4.967,no,2022,2022-06-03
4,14020,27.0,housemaid,married,high.school,no,yes,7,30,189,2,0,nonexistent,93.918,4.963,no,2022,2022-07-30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9495,39541,52.0,admin.,married,unknown,no,no,4,23,387,2,0,nonexistent,93.749,0.654,yes,2022,2022-04-23
9496,3921,41.0,blue-collar,married,basic.4y,unknown,no,5,29,88,5,0,nonexistent,93.994,4.858,no,2022,2022-05-29
9497,18387,32.0,admin.,divorced,high.school,no,unknown,7,1,81,4,0,nonexistent,93.918,4.968,no,2022,2022-07-01
9498,38711,61.0,management,married,university.degree,no,yes,11,28,195,1,1,success,92.649,0.715,yes,2022,2022-11-28


In [72]:
# Drop unnecessary and temporary columns
data = data.drop(columns=['month', 'year', 'day'])
data

Unnamed: 0,client_id,age,job,marital,education,credit_default,mortgage,contact_duration,number_contacts,previous_campaign_contacts,previous_outcome,cons_price_idx,euribor_three_months,campaign_outcome,date
0,32884,,,married,,no,no,371,1,1,failure,92.893,1.299,no,2022-05-09
1,3169,55.0,unknown,married,unknown,unknown,yes,285,2,0,nonexistent,93.994,4.860,no,2022-05-24
2,32206,33.0,blue-collar,married,basic.9y,no,no,52,1,1,failure,92.893,1.313,no,2022-05-01
3,9403,,,married,,no,no,355,4,0,nonexistent,94.465,4.967,no,2022-06-03
4,14020,27.0,housemaid,married,high.school,no,yes,189,2,0,nonexistent,93.918,4.963,no,2022-07-30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9495,39541,52.0,admin.,married,unknown,no,no,387,2,0,nonexistent,93.749,0.654,yes,2022-04-23
9496,3921,41.0,blue-collar,married,basic.4y,unknown,no,88,5,0,nonexistent,93.994,4.858,no,2022-05-29
9497,18387,32.0,admin.,divorced,high.school,no,unknown,81,4,0,nonexistent,93.918,4.968,no,2022-07-01
9498,38711,61.0,management,married,university.degree,no,yes,195,1,1,success,92.649,0.715,yes,2022-11-28
