## Preprocessing

In [1]:
# Import our dependencies
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.datasets import make_classification

import keras_tuner as kt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense




In [2]:
# Import and read the credit card data.csv.
credit_card_df = pd.read_csv("credit_card_data.csv")
credit_card_df.head()

Unnamed: 0,ID,MONTHS_BALANCE,STATUS,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,OCCUPATION_TYPE,CNT_FAM_MEMBERS
0,5008806,0.0,C,M,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,-21474,-1134,Security staff,2
1,5008808,0.0,0,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
2,5008810,0.0,C,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
3,5008811,0.0,C,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
4,5008815,0.0,0,M,Y,Y,0,270000.0,Working,Higher education,Married,House / apartment,-16872,-769,Accountants,2


In [3]:
# Remove non-significant categories in some of the columns.
credit_card_df = credit_card_df[
    ~credit_card_df['NAME_INCOME_TYPE'].isin(["Pensioner", "Student"]) &
    ~credit_card_df['OCCUPATION_TYPE'].isin(["HR staff", "IT staff", "Realty agents", "Waiters/barmen staff", "Secretaries"]) &
    ~credit_card_df['NAME_HOUSING_TYPE'].isin(["Rented apartment", "Co-op apartment", "Office apartment"]) &
    ~credit_card_df['NAME_EDUCATION_TYPE'].isin(["Academic degree", "Lower secondary"])
]

# Show the data frame
credit_card_df

Unnamed: 0,ID,MONTHS_BALANCE,STATUS,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,OCCUPATION_TYPE,CNT_FAM_MEMBERS
0,5008806,0.0,C,M,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,-21474,-1134,Security staff,2
1,5008808,0.0,0,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
2,5008810,0.0,C,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
3,5008811,0.0,C,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
4,5008815,0.0,0,M,Y,Y,0,270000.0,Working,Higher education,Married,House / apartment,-16872,-769,Accountants,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17036,5149190,0.0,5,M,Y,N,1,450000.0,Working,Higher education,Married,House / apartment,-9847,-502,Core staff,3
17037,5149775,0.0,C,F,Y,Y,0,130500.0,Working,Secondary / secondary special,Married,House / apartment,-16137,-9391,Laborers,2
17038,5149828,0.0,5,M,Y,Y,0,315000.0,Working,Secondary / secondary special,Married,House / apartment,-17348,-2420,Managers,2
17039,5149834,0.0,C,F,N,Y,0,157500.0,Commercial associate,Higher education,Married,House / apartment,-12387,-1325,Medicine staff,2


In [4]:
# Classify the STATUS column into two categories (ie. 0 and 1)
credit_card_df['STATUS'].replace({"0": 1, "2": 1, "3": 1, "4": 1, "5": 1, "C": 0, "X": 0}, inplace=True)

# Show the data frame
credit_card_df

Unnamed: 0,ID,MONTHS_BALANCE,STATUS,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,OCCUPATION_TYPE,CNT_FAM_MEMBERS
0,5008806,0.0,0,M,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,-21474,-1134,Security staff,2
1,5008808,0.0,1,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
2,5008810,0.0,0,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
3,5008811,0.0,0,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1
4,5008815,0.0,1,M,Y,Y,0,270000.0,Working,Higher education,Married,House / apartment,-16872,-769,Accountants,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17036,5149190,0.0,1,M,Y,N,1,450000.0,Working,Higher education,Married,House / apartment,-9847,-502,Core staff,3
17037,5149775,0.0,0,F,Y,Y,0,130500.0,Working,Secondary / secondary special,Married,House / apartment,-16137,-9391,Laborers,2
17038,5149828,0.0,1,M,Y,Y,0,315000.0,Working,Secondary / secondary special,Married,House / apartment,-17348,-2420,Managers,2
17039,5149834,0.0,0,F,N,Y,0,157500.0,Commercial associate,Higher education,Married,House / apartment,-12387,-1325,Medicine staff,2


In [5]:
# Define the current date
current_date = datetime.now()

# Convert the negative integers to birth dates
credit_card_df['DAYS_CONVERTED'] = credit_card_df['DAYS_BIRTH'].apply(lambda x: current_date + timedelta(days=x))

# Calculate the age in years
credit_card_df['AGE'] = credit_card_df['DAYS_CONVERTED'].apply(lambda x: (current_date - x).days // 365)

# Show the data frame
credit_card_df

Unnamed: 0,ID,MONTHS_BALANCE,STATUS,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,OCCUPATION_TYPE,CNT_FAM_MEMBERS,DAYS_CONVERTED,AGE
0,5008806,0.0,0,M,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,-21474,-1134,Security staff,2,1965-08-06 22:58:55.022656,58
1,5008808,0.0,1,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1,1972-01-26 22:58:55.022656,52
2,5008810,0.0,0,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1,1972-01-26 22:58:55.022656,52
3,5008811,0.0,0,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1,1972-01-26 22:58:55.022656,52
4,5008815,0.0,1,M,Y,Y,0,270000.0,Working,Higher education,Married,House / apartment,-16872,-769,Accountants,2,1978-03-13 22:58:55.022656,46
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17036,5149190,0.0,1,M,Y,N,1,450000.0,Working,Higher education,Married,House / apartment,-9847,-502,Core staff,3,1997-06-06 22:58:55.022656,26
17037,5149775,0.0,0,F,Y,Y,0,130500.0,Working,Secondary / secondary special,Married,House / apartment,-16137,-9391,Laborers,2,1980-03-17 22:58:55.022656,44
17038,5149828,0.0,1,M,Y,Y,0,315000.0,Working,Secondary / secondary special,Married,House / apartment,-17348,-2420,Managers,2,1976-11-22 22:58:55.022656,47
17039,5149834,0.0,0,F,N,Y,0,157500.0,Commercial associate,Higher education,Married,House / apartment,-12387,-1325,Medicine staff,2,1990-06-23 22:58:55.022656,33


In [6]:
# Define the current date
current_date = datetime.now()

# Convert the negative integers to employed days
credit_card_df['EMP_CONVERTED'] = credit_card_df['DAYS_EMPLOYED'].apply(lambda x: current_date + timedelta(days=x))

# Calculate the employed days in years
credit_card_df['YEARS_EXPERIENCE'] = credit_card_df['EMP_CONVERTED'].apply(lambda x: (current_date - x).days // 365)

# Show the data frame
credit_card_df

Unnamed: 0,ID,MONTHS_BALANCE,STATUS,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,OCCUPATION_TYPE,CNT_FAM_MEMBERS,DAYS_CONVERTED,AGE,EMP_CONVERTED,YEARS_EXPERIENCE
0,5008806,0.0,0,M,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,-21474,-1134,Security staff,2,1965-08-06 22:58:55.022656,58,2021-04-14 22:58:55.214317,3
1,5008808,0.0,1,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1,1972-01-26 22:58:55.022656,52,2016-01-14 22:58:55.214317,8
2,5008810,0.0,0,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1,1972-01-26 22:58:55.022656,52,2016-01-14 22:58:55.214317,8
3,5008811,0.0,0,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,Sales staff,1,1972-01-26 22:58:55.022656,52,2016-01-14 22:58:55.214317,8
4,5008815,0.0,1,M,Y,Y,0,270000.0,Working,Higher education,Married,House / apartment,-16872,-769,Accountants,2,1978-03-13 22:58:55.022656,46,2022-04-14 22:58:55.214317,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17036,5149190,0.0,1,M,Y,N,1,450000.0,Working,Higher education,Married,House / apartment,-9847,-502,Core staff,3,1997-06-06 22:58:55.022656,26,2023-01-06 22:58:55.214317,1
17037,5149775,0.0,0,F,Y,Y,0,130500.0,Working,Secondary / secondary special,Married,House / apartment,-16137,-9391,Laborers,2,1980-03-17 22:58:55.022656,44,1998-09-05 22:58:55.214317,25
17038,5149828,0.0,1,M,Y,Y,0,315000.0,Working,Secondary / secondary special,Married,House / apartment,-17348,-2420,Managers,2,1976-11-22 22:58:55.022656,47,2017-10-06 22:58:55.214317,6
17039,5149834,0.0,0,F,N,Y,0,157500.0,Commercial associate,Higher education,Married,House / apartment,-12387,-1325,Medicine staff,2,1990-06-23 22:58:55.022656,33,2020-10-05 22:58:55.214317,3


In [7]:
# Define the columns and target
categorical = ['CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'OCCUPATION_TYPE' ]
numerical = ['AMT_INCOME_TOTAL', 'AGE', 'YEARS_EXPERIENCE']
target = ['STATUS']
cols = categorical + numerical + target
cols_to_drop = credit_card_df.columns[~credit_card_df.columns.isin(cols)]
cols_to_drop

Index(['ID', 'MONTHS_BALANCE', 'CNT_CHILDREN', 'DAYS_BIRTH', 'DAYS_EMPLOYED',
       'CNT_FAM_MEMBERS', 'DAYS_CONVERTED', 'EMP_CONVERTED'],
      dtype='object')

In [8]:
# Reduce the columns
credit_card_df_reduced = credit_card_df.drop(columns=cols_to_drop)

# Show the data frame
credit_card_df_reduced

Unnamed: 0,STATUS,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,OCCUPATION_TYPE,AGE,YEARS_EXPERIENCE
0,0,M,Y,Y,112500.0,Working,Secondary / secondary special,Married,House / apartment,Security staff,58,3
1,1,F,N,Y,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,Sales staff,52,8
2,0,F,N,Y,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,Sales staff,52,8
3,0,F,N,Y,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,Sales staff,52,8
4,1,M,Y,Y,270000.0,Working,Higher education,Married,House / apartment,Accountants,46,2
...,...,...,...,...,...,...,...,...,...,...,...,...
17036,1,M,Y,N,450000.0,Working,Higher education,Married,House / apartment,Core staff,26,1
17037,0,F,Y,Y,130500.0,Working,Secondary / secondary special,Married,House / apartment,Laborers,44,25
17038,1,M,Y,Y,315000.0,Working,Secondary / secondary special,Married,House / apartment,Managers,47,6
17039,0,F,N,Y,157500.0,Commercial associate,Higher education,Married,House / apartment,Medicine staff,33,3


In [9]:
# Define the categorical and numerical dataframe
categorical_df = credit_card_df_reduced[categorical]
numerical_df = credit_card_df_reduced[numerical]

# Encoding categorical columnss
categorical_df_encoded = pd.get_dummies(categorical_df, dtype=int)
categorical_df_encoded


Unnamed: 0,CODE_GENDER_F,CODE_GENDER_M,FLAG_OWN_CAR_N,FLAG_OWN_CAR_Y,FLAG_OWN_REALTY_N,FLAG_OWN_REALTY_Y,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Working,NAME_EDUCATION_TYPE_Higher education,...,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Security staff
0,0,1,0,1,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
1,1,0,1,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,1,0,1,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,1,0,1,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,1,0,1,0,1,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17036,0,1,0,1,1,0,0,0,1,1,...,1,0,0,0,0,0,0,0,0,0
17037,1,0,0,1,0,1,0,0,1,0,...,0,0,0,1,0,0,0,0,0,0
17038,0,1,0,1,0,1,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
17039,1,0,1,0,0,1,1,0,0,1,...,0,0,0,0,0,0,1,0,0,0


In [10]:
# Combine encoded categorical and numerical dataframe
data_clean = pd.concat([numerical_df, categorical_df_encoded], axis=1)
data_clean

Unnamed: 0,AMT_INCOME_TOTAL,AGE,YEARS_EXPERIENCE,CODE_GENDER_F,CODE_GENDER_M,FLAG_OWN_CAR_N,FLAG_OWN_CAR_Y,FLAG_OWN_REALTY_N,FLAG_OWN_REALTY_Y,NAME_INCOME_TYPE_Commercial associate,...,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Security staff
0,112500.0,58,3,0,1,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,1
1,270000.0,52,8,1,0,1,0,0,1,1,...,0,0,0,0,0,0,0,0,1,0
2,270000.0,52,8,1,0,1,0,0,1,1,...,0,0,0,0,0,0,0,0,1,0
3,270000.0,52,8,1,0,1,0,0,1,1,...,0,0,0,0,0,0,0,0,1,0
4,270000.0,46,2,0,1,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17036,450000.0,26,1,0,1,0,1,1,0,0,...,1,0,0,0,0,0,0,0,0,0
17037,130500.0,44,25,1,0,0,1,0,1,0,...,0,0,0,1,0,0,0,0,0,0
17038,315000.0,47,6,0,1,0,1,0,1,0,...,0,0,0,0,0,1,0,0,0,0
17039,157500.0,33,3,1,0,1,0,0,1,1,...,0,0,0,0,0,0,1,0,0,0


In [11]:
# Show the summary of the clean data frame
data_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 16039 entries, 0 to 17041
Data columns (total 36 columns):
 #   Column                                             Non-Null Count  Dtype  
---  ------                                             --------------  -----  
 0   AMT_INCOME_TOTAL                                   16039 non-null  float64
 1   AGE                                                16039 non-null  int64  
 2   YEARS_EXPERIENCE                                   16039 non-null  int64  
 3   CODE_GENDER_F                                      16039 non-null  int32  
 4   CODE_GENDER_M                                      16039 non-null  int32  
 5   FLAG_OWN_CAR_N                                     16039 non-null  int32  
 6   FLAG_OWN_CAR_Y                                     16039 non-null  int32  
 7   FLAG_OWN_REALTY_N                                  16039 non-null  int32  
 8   FLAG_OWN_REALTY_Y                                  16039 non-null  int32  
 9   NAME_INCOME

In [12]:
# Define the target column
target_col = credit_card_df_reduced[target]
target_col

Unnamed: 0,STATUS
0,0
1,1
2,0
3,0
4,1
...,...
17036,1
17037,0
17038,1
17039,0


In [13]:
# Split our preprocessed data into our features and target arrays
X = data_clean
y = target_col

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=28)

In [14]:
# Getting the shape of X_train
X_train.shape

(12029, 36)

## Compile, Train and Evaluate the Model

In [15]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = X_train.shape[1]
hidden_nodes_layer1 =  10
hidden_nodes_layer2 = 28
hidden_nodes_layer3 = 28

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="sigmoid"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="sigmoid"))



# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [17]:
# Check in summary of the data frame
data_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 16039 entries, 0 to 17041
Data columns (total 36 columns):
 #   Column                                             Non-Null Count  Dtype  
---  ------                                             --------------  -----  
 0   AMT_INCOME_TOTAL                                   16039 non-null  float64
 1   AGE                                                16039 non-null  int64  
 2   YEARS_EXPERIENCE                                   16039 non-null  int64  
 3   CODE_GENDER_F                                      16039 non-null  int32  
 4   CODE_GENDER_M                                      16039 non-null  int32  
 5   FLAG_OWN_CAR_N                                     16039 non-null  int32  
 6   FLAG_OWN_CAR_Y                                     16039 non-null  int32  
 7   FLAG_OWN_REALTY_N                                  16039 non-null  int32  
 8   FLAG_OWN_REALTY_Y                                  16039 non-null  int32  
 9   NAME_INCOME

In [18]:
# Change the type of the data to debug
X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.float32)
X_test = X_train.astype(np.float32)
y_test = y_train.astype(np.float32)


In [19]:
# Train the model
fit_model = nn.fit(X_train, y_train, epochs=20)

Epoch 1/20
[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7123 - loss: 0.6005
Epoch 2/20
[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7029 - loss: 0.6093
Epoch 3/20
[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7050 - loss: 0.6071
Epoch 4/20
[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6996 - loss: 0.6119
Epoch 5/20
[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6994 - loss: 0.6125
Epoch 6/20
[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7135 - loss: 0.5995
Epoch 7/20
[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7030 - loss: 0.6091
Epoch 8/20
[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7108 - loss: 0.6022
Epoch 9/20
[1m376/376[0m [32m━━━━━━━━

In [20]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

376/376 - 1s - 1ms/step - accuracy: 0.7061 - loss: 0.6059
Loss: 0.6058531999588013, Accuracy: 0.706126868724823


## Making Predictions Using the Random Forest Model

In [21]:
# Initialize the Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Fit the model to the training data
rf_model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = rf_model.predict(X_test)

  return fit_method(estimator, *args, **kwargs)


In [22]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, y_pred)

In [23]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, y_pred))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,7987,507
Actual 1,1351,2184


Accuracy Score : 0.8455399451325962
Classification Report
              precision    recall  f1-score   support

         0.0       0.86      0.94      0.90      8494
         1.0       0.81      0.62      0.70      3535

    accuracy                           0.85     12029
   macro avg       0.83      0.78      0.80     12029
weighted avg       0.84      0.85      0.84     12029



## Optimise the model using Grid Search

In [24]:
# Initialize the Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

# Fit the grid search to the training data
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 108 candidates, totalling 540 fits


  return fit_method(estimator, *args, **kwargs)


In [25]:
# Print the best parameters and best score
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Cross-validation Score: {grid_search.best_score_:.4f}")

# Evaluate the model on the test set
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")

Best Parameters: {'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 300}
Best Cross-validation Score: 0.7076
Test Accuracy: 0.7453


## Optimise the model using hyperparameter tuning.

In [26]:
# Define the hypermodel
def build_model(hp):
    model = Sequential()
    number_input_features = X_train.shape[1]
    
    model.add(
        Dense(
            units=hp.Int('units_layer1', min_value=32, max_value=512, step=32),
            activation='relu',
            input_dim=number_input_features
        )
    )
    
    for i in range(hp.Int('num_layers', 1, 5)):
        model.add(
            Dense(
                units=hp.Int(f'units_layer{i+2}', min_value=10, max_value=100, step=10),
                activation='relu'
            )
        )
    
    model.add(Dense(units=1, activation='sigmoid'))
    
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

# Initialize the tuner
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=2,
    directory='Keras Directory',
    project_name='credit_card_model'
)

# Run the hyperparameter search
tuner.search(X_train, y_train, epochs=10, validation_split=0.2)

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Optionally, train the best model further
best_model.fit(X_train, y_train, epochs=10, validation_split=0.2)

Trial 10 Complete [00h 00m 19s]
val_accuracy: 0.700332522392273

Best val_accuracy So Far: 0.7005403339862823
Total elapsed time: 00h 02m 55s
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5852 - loss: 428.5367 - val_accuracy: 0.7003 - val_loss: 461.3882
Epoch 2/10
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5910 - loss: 219.8824 - val_accuracy: 0.7003 - val_loss: 31.0117
Epoch 3/10
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5921 - loss: 65.8225 - val_accuracy: 0.2997 - val_loss: 64.6894
Epoch 4/10
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5982 - loss: 48.0830 - val_accuracy: 0.2997 - val_loss: 44.5513
Epoch 5/10
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5861 - loss: 34.3933 - val_accuracy: 0.7003 - val_loss: 6.6322
Epoch 6/10
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5889 - loss: 7.4599 - val_accuracy: 0.2997 - val_loss: 3.3122
Epoch 7/10
[1m301/301[0m 

<keras.src.callbacks.history.History at 0x2050af13250>