In [61]:
import pandas as pd
import numpy as np

## Client

In [62]:
# list of the columns we want to use
cols = ['client_id', 'age', 'job', 'marital', 'education', 'credit_default', 'mortgage']

# import csv with the columns necessary for clients
client = pd.read_csv('bank_marketing.csv', usecols=cols)

In [63]:
client = client.replace({'education':{'unknown': np.NaN}, 
                         'credit_default':{'unknown': np.NaN},
                         'mortgage': {'unknown': np.NaN}
                        })

In [64]:
#changing the columns creadit_default and mortgage type to bool
client[['mortgage', 'credit_default']] = client[['mortgage','credit_default']].astype(bool)

In [65]:
client['education'] = client['education'].str.replace('.', '_')

In [66]:
client['job'] = client['job'].str.replace('.', '_')

## Campaign

In [67]:
# list of the columns we want to use
cols = ['client_id', 'number_contacts', 'contact_duration', 'previous_campaign_contacts', 'previous_outcome', 'campaign_outcome', 'month', 'day']

# import csv with the columns necessary for campaign
campaign = pd.read_csv('bank_marketing.csv', 
                        usecols=cols,
                        dtype={'campaign_outcome':bool},
                        true_values=['yes'],
                        false_values=['no'],
                        parse_dates=[['month', 'day']])

In [68]:
campaign = campaign.replace({'previous_outcome':{'nonexistent': np.NaN}, 
                        })

In [69]:
campaign['previous_outcome'] = campaign['previous_outcome'].astype(bool)

In [70]:
# Adding the year to the month_day column and turn it into a datetime format
campaign['last_contact_date'] = pd.to_datetime("2022 " + campaign['month_day'], format='%Y %b %d')
campaign.drop('month_day', axis=1, inplace=True)

## Economics

In [71]:
# list of the columns we want to use
cols = ['client_id', 'cons_price_idx', 'euribor_three_months']

# import csv with the columns necessary for economics
economics = pd.read_csv('bank_marketing.csv', usecols=cols)

### Save results to a csv

In [72]:
client.to_csv('client.csv', index=False)
campaign.to_csv('campaign.csv', index=False)
economics.to_csv('economics.csv', index=False)