## Preprocessing

In [None]:
# If using colab
!pip install keras-tuner

In [9]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MaxAbsScaler, RobustScaler
import pandas as pd
import tensorflow as tf
import keras_tuner as kt
from warnings import filterwarnings
filterwarnings('ignore')



In [2]:
#  Import and read the charity_data.csv.
credit_card_df = pd.read_csv("credit_card_data.csv")
credit_card_df.head()

Unnamed: 0,ID,MONTHS_BALANCE,STATUS,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,OCCUPATION_TYPE,CNT_FAM_MEMBERS
0,5008806,0.0,C,M,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,-21474,-1134,Security staff,2
1,5008808,0.0,0,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
2,5008810,0.0,C,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
3,5008811,0.0,C,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
4,5008815,0.0,0,M,Y,Y,0,270000.0,Working,Higher education,Married,House / apartment,-16872,-769,Accountants,2


In [3]:
credit_card_df['STATUS'].replace({"0": 1, "2": 1, "3": 1, "4": 1, "5": 1, "C": 0, "X": 0}, inplace=True)
credit_card_df['NAME_INCOME_TYPE'].replace({
    'Commercial associate':1,
    'Pensioner':2,
    'State servant':3,
    'Student':4,
    'Working':5,
    'Academic degree':6
}, inplace=True)

credit_card_df['STATUS'] = credit_card_df['STATUS'].astype(int)
credit_card_df['NAME_INCOME_TYPE'] = credit_card_df['NAME_INCOME_TYPE'].astype(int)
credit_card_df.head()


Unnamed: 0,ID,MONTHS_BALANCE,STATUS,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,OCCUPATION_TYPE,CNT_FAM_MEMBERS
0,5008806,0.0,0,M,Y,Y,0,112500.0,5,Secondary / secondary special,Married,House / apartment,-21474,-1134,Security staff,2
1,5008808,0.0,1,F,N,Y,0,270000.0,1,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
2,5008810,0.0,0,F,N,Y,0,270000.0,1,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
3,5008811,0.0,0,F,N,Y,0,270000.0,1,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
4,5008815,0.0,1,M,Y,Y,0,270000.0,5,Higher education,Married,House / apartment,-16872,-769,Accountants,2


In [4]:
# Convert categorical data to numeric with `pd.get_dummies`
credit_card_df_numeric = pd.get_dummies(credit_card_df, dtype=int)
credit_card_df_numeric

Unnamed: 0,ID,MONTHS_BALANCE,STATUS,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,CNT_FAM_MEMBERS,CODE_GENDER_F,...,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_Waiters/barmen staff
0,5008806,0.0,0,0,112500.0,5,-21474,-1134,2,0,...,0,0,0,0,0,0,0,0,1,0
1,5008808,0.0,1,0,270000.0,1,-19110,-3051,1,1,...,0,0,0,0,0,0,1,0,0,0
2,5008810,0.0,0,0,270000.0,1,-19110,-3051,1,1,...,0,0,0,0,0,0,1,0,0,0
3,5008811,0.0,0,0,270000.0,1,-19110,-3051,1,1,...,0,0,0,0,0,0,1,0,0,0
4,5008815,0.0,1,0,270000.0,5,-16872,-769,2,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17038,5149828,0.0,1,0,315000.0,5,-17348,-2420,2,0,...,0,0,1,0,0,0,0,0,0,0
17039,5149834,0.0,0,0,157500.0,1,-12387,-1325,2,1,...,0,0,0,1,0,0,0,0,0,0
17040,5149838,0.0,0,0,157500.0,2,-12387,-1325,2,1,...,0,0,0,1,0,0,0,0,0,0
17041,5150049,0.0,1,0,283500.0,5,-17958,-655,2,1,...,0,0,0,0,0,0,1,0,0,0


In [5]:
credit_card_df_numeric = credit_card_df_numeric.drop(columns=[
    'CODE_GENDER_F',
    'FLAG_OWN_CAR_N',
    'FLAG_OWN_REALTY_N',
    'ID',
    'MONTHS_BALANCE'
])

In [6]:
credit_card_df_numeric.columns

Index(['STATUS', 'CNT_CHILDREN', 'AMT_INCOME_TOTAL', 'NAME_INCOME_TYPE',
       'DAYS_BIRTH', 'DAYS_EMPLOYED', 'CNT_FAM_MEMBERS', 'CODE_GENDER_M',
       'FLAG_OWN_CAR_Y', 'FLAG_OWN_REALTY_Y',
       'NAME_EDUCATION_TYPE_Academic degree',
       'NAME_EDUCATION_TYPE_Higher education',
       'NAME_EDUCATION_TYPE_Incomplete higher',
       'NAME_EDUCATION_TYPE_Lower secondary',
       'NAME_EDUCATION_TYPE_Secondary / secondary special',
       'NAME_FAMILY_STATUS_Civil marriage', 'NAME_FAMILY_STATUS_Married',
       'NAME_FAMILY_STATUS_Separated',
       'NAME_FAMILY_STATUS_Single / not married', 'NAME_FAMILY_STATUS_Widow',
       'NAME_HOUSING_TYPE_Co-op apartment',
       'NAME_HOUSING_TYPE_House / apartment',
       'NAME_HOUSING_TYPE_Municipal apartment',
       'NAME_HOUSING_TYPE_Office apartment',
       'NAME_HOUSING_TYPE_Rented apartment', 'NAME_HOUSING_TYPE_With parents',
       'OCCUPATION_TYPE_Accountants', 'OCCUPATION_TYPE_Cleaning staff',
       'OCCUPATION_TYPE_Cooking sta

In [7]:
# Split our preprocessed data into our features and target arrays
y_status = credit_card_df_numeric["STATUS"].values
X_status = credit_card_df_numeric.drop(["STATUS"], axis=1).values

y_income = credit_card_df_numeric['NAME_INCOME_TYPE'].values
X_income = credit_card_df_numeric.drop(['NAME_INCOME_TYPE'], axis=1).values

# Split the preprocessed data into a training and testing dataset
X_train_status, X_test_status, y_train_status, y_test_status = train_test_split(X_status, y_status, random_state=78)
X_train_income, X_test_income, y_train_income, y_test_income = train_test_split(X_income, y_income, random_state=78)

In [8]:
# Create a Scalers instances
standard_scaler_s = StandardScaler()
max_abs_scaler_s = MaxAbsScaler()
robust_scaler_s = RobustScaler()

standard_scaler_i = StandardScaler()
max_abs_scaler_i = MaxAbsScaler()
robust_scaler_i = RobustScaler()


# Fit the Scalers
# Status data
# Standard scaler on status
X_sscaler_status = standard_scaler_s.fit(X_train_status)
X_train_sscaled_s = X_sscaler_status.transform(X_train_status)
X_test_sscaled_s = X_sscaler_status.transform(X_test_status)

# maxabs scaler on status
X_mascaler_status = max_abs_scaler_s.fit(X_train_status)
X_train_mascaled_s = X_mascaler_status.transform(X_train_status)
X_test_mascaled_s = X_mascaler_status.transform(X_test_status)

# robust scaler on status
X_rscaler_status = robust_scaler_s.fit(X_train_status)
X_train_rascaled_s = X_rscaler_status.transform(X_train_status)
X_test_rscaled_s = X_rscaler_status.transform(X_test_status)

# Income data
# Standard scaler on income
X_sscaler_income = standard_scaler_i.fit(X_train_income)
X_train_sscaled_i = X_sscaler_status.transform(X_train_income)
X_test_sscaled_i = X_sscaler_status.transform(X_test_income)

# maxabs scaler on income
X_mascaler_income = max_abs_scaler_i.fit(X_train_income)
X_train_mascaled_i = X_mascaler_status.transform(X_train_income)
X_test_mascaled_i = X_mascaler_status.transform(X_test_income)

# robust scaler on income
X_rscaler_income = robust_scaler_i.fit(X_train_income)
X_train_rascaled_i = X_rscaler_status.transform(X_train_income)
X_test_rscaled_i = X_rscaler_status.transform(X_test_income)


## Compile, Train and Evaluate the Model

### 01 - Standard scaler on status data

In [None]:
def create_model(hyper_p):
    nn_model = tf.keras.models.Sequential()

    activation = hyper_p.Choice('activation', ['relu', 'tanh', 'sigmoid'])

    nn_model.add(tf.keras.layers.Dense(units=hyper_p.Int(
        'first_units',
        min_value=1,
        max_value=10,
        step=2)
        activation = activation, input_dim=2))
    
    for i in range(hyper_p.Int('num_layers', 1, 6))

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_sscaled_s[0])
hidden_nodes_layer1 =  30
hidden_nodes_layer2 = 30
hidden_nodes_layer3 = 30

nn_01 = tf.keras.models.Sequential()

# First hidden layer
nn_01.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn_01.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Third hidden layer
nn_01.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))


# Output layer
nn_01.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_01.summary()

# Compile the model
nn_01.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model
fit_model = nn_01.fit(X_train_sscaled_s,y_train_status,epochs=30)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_01.evaluate(X_test_sscaled_s,y_test_status,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")