In [26]:
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import warnings
warnings.filterwarnings('ignore')

In [8]:
df_yes_no = pd.read_csv(r'data/df_yes_no.csv')
df_yes_no_clean = pd.read_csv(r'data/df_yes_no_clean.csv')

In [9]:
df_yes_no.head(5)

Unnamed: 0.1,Unnamed: 0,Offer Accepted,Reward,Mailer Type,Income Level,# Bank Accounts Open,Overdraft Protection,Credit Rating,# Credit Cards Held,# Homes Owned,Household Size,Own Your Home,Average Balance,Q1 Balance,Q2 Balance,Q3 Balance,Q4 Balance
0,0,Yes,Air Miles,Letter,Medium,1,No,Low,1,1,2,No,769.5,54.0,515.0,1204.0,1305.0
1,1,Yes,Air Miles,Postcard,High,1,Yes,Low,2,1,5,Yes,851.25,206.0,467.0,1744.0,988.0
2,2,Yes,Air Miles,Postcard,Low,1,No,High,2,1,2,Yes,931.75,1536.0,1232.0,597.0,362.0
3,3,Yes,Air Miles,Postcard,Low,1,No,Medium,1,1,2,Yes,1192.75,1787.0,908.0,1343.0,733.0
4,4,Yes,Cash Back,Postcard,High,1,No,Low,2,2,5,Yes,1329.5,1073.0,1127.0,1603.0,1515.0


In [10]:
df_yes_no.drop('Unnamed: 0', axis=1, inplace=True)
df_yes_no_clean.drop('Unnamed: 0', axis=1, inplace=True)

# Encoding

In [11]:
df_yes_no.head(1)

Unnamed: 0,Offer Accepted,Reward,Mailer Type,Income Level,# Bank Accounts Open,Overdraft Protection,Credit Rating,# Credit Cards Held,# Homes Owned,Household Size,Own Your Home,Average Balance,Q1 Balance,Q2 Balance,Q3 Balance,Q4 Balance
0,Yes,Air Miles,Letter,Medium,1,No,Low,1,1,2,No,769.5,54.0,515.0,1204.0,1305.0


In [12]:
df_yes_no['Offer Accepted'].replace('No', 0, inplace=True)
df_yes_no['Offer Accepted'].replace('Yes', 1, inplace=True)

In [13]:
object_list = list(df_yes_no.select_dtypes(include = "object").columns)

In [14]:
object_list

['Reward',
 'Mailer Type',
 'Income Level',
 'Overdraft Protection',
 'Credit Rating',
 'Own Your Home']

In [15]:
def dummies(df, column):
    dummies = pd.get_dummies(df[column], prefix_sep = "_", prefix = column, dtype = int)
    df[dummies.columns] = dummies
    df.drop([column], axis = 1, inplace = True)
    return df

In [17]:
for i in object_list:
    dummies(df_yes_no, i)

In [18]:
df_yes_no.shape

(17461, 25)

In [19]:
df_yes_no.head(1)

Unnamed: 0,Offer Accepted,# Bank Accounts Open,# Credit Cards Held,# Homes Owned,Household Size,Average Balance,Q1 Balance,Q2 Balance,Q3 Balance,Q4 Balance,...,Income Level_High,Income Level_Low,Income Level_Medium,Overdraft Protection_No,Overdraft Protection_Yes,Credit Rating_High,Credit Rating_Low,Credit Rating_Medium,Own Your Home_No,Own Your Home_Yes
0,1,1,1,1,2,769.5,54.0,515.0,1204.0,1305.0,...,0,0,1,1,0,0,1,0,1,0


In [20]:
df_yes_no.to_csv(r'data/df_yesno_noscaled2.csv', index=False)

# Scaling

In [23]:
scaler = StandardScaler() 
scaler.fit(df_yes_no[['# Bank Accounts Open', '# Credit Cards Held', '# Homes Owned', 'Household Size', 'Average Balance', 'Q1 Balance', 'Q2 Balance', 'Q3 Balance', 'Q4 Balance']])
X_escaladas = scaler.transform(df_yes_no[['# Bank Accounts Open', '# Credit Cards Held', '# Homes Owned', 'Household Size', 'Average Balance', 'Q1 Balance', 'Q2 Balance', 'Q3 Balance', 'Q4 Balance']])
df_yes_no[['# Bank Accounts Open', '# Credit Cards Held', '# Homes Owned', 'Household Size', 'Average Balance', 'Q1 Balance', 'Q2 Balance', 'Q3 Balance', 'Q4 Balance']] = X_escaladas

In [27]:
pca = PCA(2)
data_transformed = pd.DataFrame(pca.fit_transform(df_yes_no), columns=["PC1","PC2"])
data_transformed.head()

Unnamed: 0,PC1,PC2
0,-0.65852,1.81428
1,-0.251259,1.791217
2,-0.208512,-1.584783
3,0.870136,-0.945227
4,1.82485,0.686832


In [30]:
df_yes_no = pd.concat([df_yes_no, data_transformed], axis=1)

In [32]:
df_yes_no.drop(['# Bank Accounts Open', '# Credit Cards Held', '# Homes Owned', 'Household Size', 'Average Balance', 'Q1 Balance', 'Q2 Balance', 'Q3 Balance', 'Q4 Balance'], inplace=True, axis=1)

In [33]:
df_yes_no.shape

(17461, 18)

In [34]:
df_yes_no.to_csv(r'data/df_yesno_scaled2.csv', index=False)

In [35]:
df_yes_no.head(5)

Unnamed: 0,Offer Accepted,Reward_Air Miles,Reward_Cash Back,Reward_Points,Mailer Type_Letter,Mailer Type_Postcard,Income Level_High,Income Level_Low,Income Level_Medium,Overdraft Protection_No,Overdraft Protection_Yes,Credit Rating_High,Credit Rating_Low,Credit Rating_Medium,Own Your Home_No,Own Your Home_Yes,PC1,PC2
0,1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,0,-0.65852,1.81428
1,1,1,0,0,0,1,1,0,0,0,1,0,1,0,0,1,-0.251259,1.791217
2,1,1,0,0,0,1,0,1,0,1,0,1,0,0,0,1,-0.208512,-1.584783
3,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0.870136,-0.945227
4,1,0,1,0,0,1,1,0,0,1,0,0,1,0,0,1,1.82485,0.686832
