In [1]:
import pandas as pd
import numpy as np

import os

import tabulate

from imblearn.over_sampling import SMOTE

import pickle

from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, OrdinalEncoder

In [2]:
def save_models(name:str, path:str, model) -> None:
    try:
        with open(os.path.join(path,name), 'wb') as f:
            pickle.dump(model, f)

    except:
        print("Failed to save model")

In [3]:
df = pd.read_csv(r"D:\Credit-card-approval-predictor\data\clean_data\Clean_data.csv")

In [4]:
df.shape

(537215, 15)

In [5]:
df['TARGET'].value_counts()

TARGET
Yes    535253
No       1962
Name: count, dtype: int64

In [6]:
df.dtypes

CODE_GENDER             object
FLAG_OWN_CAR            object
FLAG_OWN_REALTY         object
CNT_CHILDREN             int64
AMT_INCOME_TOTAL       float64
NAME_INCOME_TYPE        object
NAME_EDUCATION_TYPE     object
NAME_FAMILY_STATUS      object
NAME_HOUSING_TYPE       object
CNT_FAM_MEMBERS        float64
STATUS                  object
TARGET                  object
AGE                      int64
YR_EMPLOYEED             int64
NEW_OCC_COL             object
dtype: object

#### `CODE_GENDER` column

In [7]:
df['CODE_GENDER'].nunique()

2

In [8]:
df['CODE_GENDER'].unique()

array(['M', 'F'], dtype=object)

In [9]:
df['CODE_GENDER'].isnull().sum()

0

In [10]:
gender_encoder = OneHotEncoder(drop = 'first',
                               sparse_output=False,
                               handle_unknown='ignore')

In [11]:
gender_encoder.fit(df[['CODE_GENDER']])

In [12]:
df['CODE_GENDER'] = gender_encoder.transform(df[['CODE_GENDER']])

In [13]:
df.head()

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL
0,1.0,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
1,1.0,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
2,1.0,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
3,1.0,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
4,1.0,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services


In [14]:
with open(r'D:\Credit-card-approval-predictor\artifacts\gender_encoder.pkl', 'wb') as f:
    pickle.dump(gender_encoder, f)

### `FLAG_OWN_CAR` column

In [15]:
df['FLAG_OWN_CAR'].nunique()

2

In [16]:
df['FLAG_OWN_CAR'].unique()

array(['Y', 'N'], dtype=object)

In [17]:
car_encoder = OneHotEncoder(drop = 'first',
                            sparse_output=False,
                            handle_unknown='ignore')

In [18]:
car_encoder.fit(df[['FLAG_OWN_CAR']])

In [19]:
df['FLAG_OWN_CAR'] = car_encoder.transform(df[['FLAG_OWN_CAR']])

In [20]:
df

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL
0,1.0,1.0,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
1,1.0,1.0,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
2,1.0,1.0,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
3,1.0,1.0,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
4,1.0,1.0,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,Y,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance
537211,1.0,0.0,Y,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,2,No,25,3,Labor and Maintenance
537212,1.0,0.0,Y,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,1,Yes,25,3,Labor and Maintenance
537213,1.0,0.0,Y,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance


In [21]:
save_models(name = 'car.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=car_encoder)

### `FLAG_OWN_REALTY` column

In [22]:
df['FLAG_OWN_REALTY'].nunique()

2

In [23]:
df['FLAG_OWN_REALTY'].unique()

array(['Y', 'N'], dtype=object)

In [24]:
realty_encoder = OneHotEncoder(drop = 'first',
                               sparse_output=False,
                               handle_unknown='ignore')

In [25]:
realty_encoder.fit(df[['FLAG_OWN_REALTY']])

In [26]:
df['FLAG_OWN_REALTY'] = realty_encoder.transform(df[['FLAG_OWN_REALTY']])

In [27]:
df

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL
0,1.0,1.0,1.0,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
1,1.0,1.0,1.0,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
2,1.0,1.0,1.0,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
3,1.0,1.0,1.0,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
4,1.0,1.0,1.0,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,1.0,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance
537211,1.0,0.0,1.0,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,2,No,25,3,Labor and Maintenance
537212,1.0,0.0,1.0,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,1,Yes,25,3,Labor and Maintenance
537213,1.0,0.0,1.0,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance


In [28]:
save_models(name = 'realty.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=realty_encoder)

### `AMT_INCOME_TOTAL` column

In [29]:
annual_amt_scaler = MinMaxScaler()

In [30]:
annual_amt_scaler.fit(df[['AMT_INCOME_TOTAL']])

In [31]:
df['AMT_INCOME_TOTAL'] = annual_amt_scaler.transform(df[['AMT_INCOME_TOTAL']])

In [32]:
df

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL
0,1.0,1.0,1.0,0,0.088785,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
1,1.0,1.0,1.0,0,0.088785,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
2,1.0,1.0,1.0,0,0.088785,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
3,1.0,1.0,1.0,0,0.088785,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
4,1.0,1.0,1.0,0,0.088785,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,1.0,0,0.088785,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance
537211,1.0,0.0,1.0,0,0.088785,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,2,No,25,3,Labor and Maintenance
537212,1.0,0.0,1.0,0,0.088785,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,1,Yes,25,3,Labor and Maintenance
537213,1.0,0.0,1.0,0,0.088785,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance


In [33]:
save_models(name = 'annual_amt_scaler.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=annual_amt_scaler)

### `CNT_CHILDREN` column

In [34]:
df['CNT_CHILDREN'].nunique()

7

In [35]:
df['CNT_CHILDREN'].unique()

array([0, 3, 1, 2, 4, 5, 7], dtype=int64)

In [36]:
child_scaler = MinMaxScaler()

In [37]:
child_scaler.fit(df[['CNT_CHILDREN']])

In [38]:
df['CNT_CHILDREN'] = child_scaler.transform(df[['CNT_CHILDREN']])

In [39]:
df

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL
0,1.0,1.0,1.0,0.0,0.088785,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
1,1.0,1.0,1.0,0.0,0.088785,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
2,1.0,1.0,1.0,0.0,0.088785,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
3,1.0,1.0,1.0,0.0,0.088785,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
4,1.0,1.0,1.0,0.0,0.088785,Working,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,1.0,0.0,0.088785,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance
537211,1.0,0.0,1.0,0.0,0.088785,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,2,No,25,3,Labor and Maintenance
537212,1.0,0.0,1.0,0.0,0.088785,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,1,Yes,25,3,Labor and Maintenance
537213,1.0,0.0,1.0,0.0,0.088785,Working,Secondary / secondary special,Single / not married,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance


In [40]:
save_models(name = 'child_scaler.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=child_scaler)

### `NAME_INCOME_TYPE` column

In [41]:
df['NAME_INCOME_TYPE'].nunique()

5

In [42]:
df['NAME_INCOME_TYPE'].unique()

array(['Working', 'Commercial associate', 'State servant', 'Student',
       'Pensioner'], dtype=object)

In [43]:
category_order = [['Working', 'Commercial associate', 'State servant', 'Student',
       'Pensioner']]

In [44]:
income_type_encoder = OrdinalEncoder(categories = category_order)

In [45]:
income_type_encoder.fit(df[['NAME_INCOME_TYPE']])

In [46]:
df['NAME_INCOME_TYPE'] = income_type_encoder.transform(df[['NAME_INCOME_TYPE']])

In [47]:
save_models(name = 'income_type_encoder.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=income_type_encoder)

In [48]:
income_type_scaler = MinMaxScaler()

In [49]:
income_type_scaler.fit(df[['NAME_INCOME_TYPE']])

In [50]:
income_type_scaler.transform(df[['NAME_INCOME_TYPE']])

array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]])

In [51]:
save_models(name = 'income_type_scaler.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=income_type_scaler)

In [52]:
df

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL
0,1.0,1.0,1.0,0.0,0.088785,0.0,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
1,1.0,1.0,1.0,0.0,0.088785,0.0,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
2,1.0,1.0,1.0,0.0,0.088785,0.0,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
3,1.0,1.0,1.0,0.0,0.088785,0.0,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
4,1.0,1.0,1.0,0.0,0.088785,0.0,Secondary / secondary special,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,1.0,0.0,0.088785,0.0,Secondary / secondary special,Single / not married,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance
537211,1.0,0.0,1.0,0.0,0.088785,0.0,Secondary / secondary special,Single / not married,Rented apartment,1.0,2,No,25,3,Labor and Maintenance
537212,1.0,0.0,1.0,0.0,0.088785,0.0,Secondary / secondary special,Single / not married,Rented apartment,1.0,1,Yes,25,3,Labor and Maintenance
537213,1.0,0.0,1.0,0.0,0.088785,0.0,Secondary / secondary special,Single / not married,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance


### `NAME_EDUCATION_TYPE` column

In [53]:
df['NAME_EDUCATION_TYPE'].nunique()

5

In [54]:
df['NAME_EDUCATION_TYPE'].unique()

array(['Secondary / secondary special', 'Higher education',
       'Incomplete higher', 'Lower secondary', 'Academic degree'],
      dtype=object)

In [55]:
education_order = [['Lower secondary', 'Secondary / secondary special', 'Incomplete higher', 'Higher education', 'Academic degree']]

In [56]:
education_order_encoder = OrdinalEncoder(categories = education_order)

In [57]:
education_order_encoder.fit(df[['NAME_EDUCATION_TYPE']])

In [58]:
df['NAME_EDUCATION_TYPE'] = education_order_encoder.transform(df[['NAME_EDUCATION_TYPE']])

In [59]:
save_models(name = 'education_order_encoder.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=education_order_encoder)

In [60]:
education_scaler = MinMaxScaler()

In [61]:
education_scaler.fit(df[['NAME_EDUCATION_TYPE']])

In [62]:
df['NAME_EDUCATION_TYPE'] = education_scaler.transform(df[['NAME_EDUCATION_TYPE']])

In [63]:
save_models(name = 'education_scaler.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=education_scaler)

In [64]:
df

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL
0,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
1,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
2,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
3,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
4,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,Married,House / apartment,2.0,C,Yes,58,3,Support and Services
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,Single / not married,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance
537211,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,Single / not married,Rented apartment,1.0,2,No,25,3,Labor and Maintenance
537212,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,Single / not married,Rented apartment,1.0,1,Yes,25,3,Labor and Maintenance
537213,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,Single / not married,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance


### `NAME_FAMILY_STATUS` column

In [65]:
df['NAME_FAMILY_STATUS'].nunique()

5

In [66]:
df['NAME_FAMILY_STATUS'].unique()

array(['Married', 'Single / not married', 'Civil marriage', 'Separated',
       'Widow'], dtype=object)

In [67]:
df1 = df.drop(['NAME_FAMILY_STATUS'], axis = 'columns')

### `NAME_HOUSING_TYPE` column

In [68]:
df1['NAME_HOUSING_TYPE'].nunique()

6

In [69]:
df1['NAME_HOUSING_TYPE'].unique()

array(['House / apartment', 'Rented apartment', 'Municipal apartment',
       'With parents', 'Co-op apartment', 'Office apartment'],
      dtype=object)

In [70]:
housing_encoder = OneHotEncoder(drop = 'first',
                               sparse_output=False,
                               handle_unknown='ignore')

In [71]:
housing_encoder.fit(df1[['NAME_HOUSING_TYPE']])

In [72]:
housing_encoder.transform(df1[['NAME_HOUSING_TYPE']])

array([[1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       ...,
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 1., 0.]])

In [73]:
housing_encoder.categories

'auto'

In [74]:
feature_names = housing_encoder.get_feature_names_out()

In [75]:
feature_names = [name.split("_")[-1] for name in feature_names]

In [76]:
feature_names[0].split("_")[-1]

'House / apartment'

In [77]:
Rental_apar_df = pd.DataFrame(housing_encoder.transform(df[['NAME_HOUSING_TYPE']]), columns = feature_names)

In [78]:
df2 = pd.concat([df1,Rental_apar_df], axis = 'columns')

In [79]:
df2

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_HOUSING_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL,House / apartment,Municipal apartment,Office apartment,Rented apartment,With parents
0,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,House / apartment,2.0,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,House / apartment,2.0,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
2,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,House / apartment,2.0,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,House / apartment,2.0,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
4,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,House / apartment,2.0,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537211,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,Rented apartment,1.0,2,No,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537212,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,Rented apartment,1.0,1,Yes,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537213,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,Rented apartment,1.0,0,Yes,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0


In [80]:
df3 = df2.drop(['NAME_HOUSING_TYPE'], axis = 'columns')

In [81]:
df3

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL,House / apartment,Municipal apartment,Office apartment,Rented apartment,With parents
0,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,2.0,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,2.0,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
2,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,2.0,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,2.0,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
4,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,2.0,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,1.0,0,Yes,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537211,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,1.0,2,No,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537212,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,1.0,1,Yes,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537213,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,1.0,0,Yes,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0


In [82]:
save_models(name = 'housing_encoder.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=housing_encoder)

### `CNT_FAM_MEMBERS` columns

In [83]:
df3['CNT_FAM_MEMBERS'].nunique()

8

In [84]:
df3['CNT_FAM_MEMBERS'].unique()

array([2., 1., 5., 3., 4., 6., 7., 9.])

In [85]:
fam_mem_scaler = MinMaxScaler()

In [86]:
fam_mem_scaler.fit(df3[['CNT_FAM_MEMBERS']])

In [87]:
df3['CNT_FAM_MEMBERS'] = fam_mem_scaler.transform(df3[['CNT_FAM_MEMBERS']])

In [88]:
df3

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL,House / apartment,Municipal apartment,Office apartment,Rented apartment,With parents
0,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
2,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
4,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,C,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0,Yes,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537211,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,2,No,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537212,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,1,Yes,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537213,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0,Yes,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0


In [89]:
save_models(name = 'fam_mem_scaler.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=fam_mem_scaler)

### `STATUS` column

In [90]:
df3['STATUS'].unique()

array(['C', 'X', '0', '1', '5', '4', '3', '2'], dtype=object)

In [91]:
status = df3['STATUS'].to_list()

In [92]:
new_status = []

for i in status:
    if  i == '0' or i == '1':
        new_status.append('<60')

    elif i == '2' or i == '3' or  i == '4':
        new_status.append('60-150')

    elif i == '5':
        new_status.append('B')

    else:
        new_status.append(i)

In [93]:
df3['STATUS'] = new_status

In [94]:
category_order = [['X', 'C', '<60', '60-150', 'B']]

In [95]:
status_encoder = OrdinalEncoder(categories = category_order)

In [96]:
status_encoder.fit(df3[['STATUS']])

In [97]:
df3[['STATUS']] = status_encoder.transform(df3[['STATUS']])

In [98]:
save_models(name = 'status_encoder.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=status_encoder)

In [99]:
status_scaler = MinMaxScaler()

In [100]:
status_scaler.fit(df3[['STATUS']])

In [101]:
df3['STATUS'] = status_scaler.transform(df3[['STATUS']])

In [102]:
df3['STATUS']

0         0.25
1         0.25
2         0.25
3         0.25
4         0.25
          ... 
537210    0.50
537211    0.75
537212    0.50
537213    0.50
537214    0.50
Name: STATUS, Length: 537215, dtype: float64

In [103]:
df3

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL,House / apartment,Municipal apartment,Office apartment,Rented apartment,With parents
0,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
2,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
4,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,Yes,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,Yes,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537211,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.75,No,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537212,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,Yes,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537213,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,Yes,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0


In [104]:
save_models(name = 'status_scaler.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=status_scaler)

### `TARGET` column

In [105]:
target_encoder = OneHotEncoder(drop = 'first',
                               sparse_output=False,
                               handle_unknown='ignore')

In [106]:
target_encoder.fit(df3[['TARGET']])

In [107]:
df3[['TARGET']] = target_encoder.transform(df3[['TARGET']])

In [108]:
df3

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL,House / apartment,Municipal apartment,Office apartment,Rented apartment,With parents
0,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
2,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
4,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,58,3,Support and Services,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,1.0,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537211,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.75,0.0,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537212,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,1.0,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537213,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,1.0,25,3,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0


In [109]:
save_models(name = 'target_encoder.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=target_encoder)

### `AGE` columns

In [110]:
age_scaler = MinMaxScaler()

In [111]:
age_scaler.fit(df3[['AGE']])

In [112]:
df3[['AGE']] = age_scaler.transform(df3[['AGE']])

In [113]:
save_models(name = 'age_scaler.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=age_scaler)

### `YR_EMPLOYEED` column

In [114]:
employ_scaler = MinMaxScaler()

In [115]:
employ_scaler.fit(df3[['YR_EMPLOYEED']])

In [116]:
df3['YR_EMPLOYEED'] = employ_scaler.transform(df3[['YR_EMPLOYEED']])

In [117]:
save_models(name = 'employ_scaler.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=employ_scaler)

In [118]:
df3

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL,House / apartment,Municipal apartment,Office apartment,Rented apartment,With parents
0,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,Support and Services,1.0,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,Support and Services,1.0,0.0,0.0,0.0,0.0
2,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,Support and Services,1.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,Support and Services,1.0,0.0,0.0,0.0,0.0
4,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,Support and Services,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,1.0,0.106383,0.069767,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537211,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.75,0.0,0.106383,0.069767,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537212,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,1.0,0.106383,0.069767,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0
537213,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,1.0,0.106383,0.069767,Labor and Maintenance,0.0,0.0,0.0,1.0,0.0


### `NEW_OCC_COL` column

In [119]:
df3['NEW_OCC_COL'].unique()

array(['Support and Services', 'Customer Service and Hospitality',
       'Finance and Accounting', 'Labor and Maintenance',
       'Administrative and Management', 'Transportation and Logistics',
       'Technical and IT', 'Healthcare'], dtype=object)

In [120]:
category_order = [['Finance and Accounting', 'Technical and IT', 'Healthcare', 'Administrative and Management', 'Customer Service and Hospitality', 'Transportation and Logistics', 'Support and Services', 'Labor and Maintenance']]

In [121]:
occu_type_encoder = OrdinalEncoder(categories = category_order)

In [122]:
occu_type_encoder.fit(df3[['NEW_OCC_COL']])

In [123]:
df3['NEW_OCC_COL'] = occu_type_encoder.transform(df3[['NEW_OCC_COL']])

In [124]:
df3

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL,House / apartment,Municipal apartment,Office apartment,Rented apartment,With parents
0,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,6.0,1.0,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,6.0,1.0,0.0,0.0,0.0,0.0
2,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,6.0,1.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,6.0,1.0,0.0,0.0,0.0,0.0
4,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,6.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,1.0,0.106383,0.069767,7.0,0.0,0.0,0.0,1.0,0.0
537211,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.75,0.0,0.106383,0.069767,7.0,0.0,0.0,0.0,1.0,0.0
537212,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,1.0,0.106383,0.069767,7.0,0.0,0.0,0.0,1.0,0.0
537213,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,1.0,0.106383,0.069767,7.0,0.0,0.0,0.0,1.0,0.0


In [125]:
save_models(name = 'occu_type_encoder.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=occu_type_encoder)

In [126]:
occu_type_scaler = MinMaxScaler()

In [127]:
occu_type_scaler.fit(df3[['NEW_OCC_COL']])

In [128]:
df3['NEW_OCC_COL'] = occu_type_scaler.transform(df3[['NEW_OCC_COL']])

In [129]:
df3

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,CNT_FAM_MEMBERS,STATUS,TARGET,AGE,YR_EMPLOYEED,NEW_OCC_COL,House / apartment,Municipal apartment,Office apartment,Rented apartment,With parents
0,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,0.857143,1.0,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,0.857143,1.0,0.0,0.0,0.0,0.0
2,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,0.857143,1.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,0.857143,1.0,0.0,0.0,0.0,0.0
4,1.0,1.0,1.0,0.0,0.088785,0.0,0.25,0.125,0.25,1.0,0.808511,0.069767,0.857143,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537210,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,1.0,0.106383,0.069767,1.000000,0.0,0.0,0.0,1.0,0.0
537211,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.75,0.0,0.106383,0.069767,1.000000,0.0,0.0,0.0,1.0,0.0
537212,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,1.0,0.106383,0.069767,1.000000,0.0,0.0,0.0,1.0,0.0
537213,1.0,0.0,1.0,0.0,0.088785,0.0,0.25,0.000,0.50,1.0,0.106383,0.069767,1.000000,0.0,0.0,0.0,1.0,0.0


In [130]:
save_models(name = 'occu_type_scaler.pkl',
            path='D:/Credit-card-approval-predictor/artifacts',
            model=occu_type_scaler)

In [133]:
df3.dtypes

CODE_GENDER            float64
FLAG_OWN_CAR           float64
FLAG_OWN_REALTY        float64
CNT_CHILDREN           float64
AMT_INCOME_TOTAL       float64
NAME_INCOME_TYPE       float64
NAME_EDUCATION_TYPE    float64
CNT_FAM_MEMBERS        float64
STATUS                 float64
TARGET                 float64
AGE                    float64
YR_EMPLOYEED           float64
NEW_OCC_COL            float64
House / apartment      float64
Municipal apartment    float64
Office apartment       float64
Rented apartment       float64
With parents           float64
dtype: object

In [145]:
df3.to_csv("D:/Credit-card-approval-predictor/data/ready_data/modelling_data.csv", index=False)