In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.metrics import f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
import optuna
import category_encoders as ce


import warnings
warnings.filterwarnings('ignore')

In [2]:
train_loan = pd.read_csv('credit risk train.csv')

In [3]:
test_loan = pd.read_csv('credit risk test.csv')

In [4]:
X = train_loan.drop(columns='bad_loans')
y = train_loan.bad_loans

In [5]:
# pymnt_plan has only one value
X.drop(columns='pymnt_plan', inplace=True)

In [6]:
# drop columns with collinearity
X.drop(columns=['funded_amnt', 'delinq_2yrs_zero', 'pub_rec'], inplace=True)

In [7]:
# change 'object' types to 'category'
cat_features=['grade', 'home_ownership', 'purpose']
X[cat_features] = X[cat_features].astype('category')

<h3>Encoding</h3>

In [8]:
cat_features = ['grade', 'home_ownership', 'purpose']
encoder = ce.TargetEncoder(cols=cat_features)
encoder.fit(X[cat_features], y)
X_enc = pd.concat([X, encoder.transform(X[cat_features]).add_suffix('_enc')], axis=1)
X_enc.drop(columns=['grade', 'home_ownership', 'purpose'], inplace=True)

<h3>Handling Missing Values</h3>

In [9]:
X_enc_imp = X_enc.copy()
X_enc_imp.payment_inc_ratio.fillna(0, inplace=True)
X_enc_imp.fillna(-1, inplace=True)

<h3>Train Test Split</h3>

In [10]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X_enc_imp, y, stratify=y, test_size=0.3)
Xtest, Xvalid, ytest, yvalid =  train_test_split(Xtest, ytest, stratify=ytest, test_size=0.5)

<h3>Handling Outliers</h3>

In [11]:
# Log Transform
########### train #############
Xtrain_log = Xtrain.copy()
cols = ['payment_inc_ratio', 'open_acc']
for c in cols:
    Xtrain_log[c] = np.log(Xtrain_log[c] + 1.1)

########### valid #############
Xvalid_log = Xvalid.copy()
cols = ['payment_inc_ratio', 'open_acc']
for c in cols:
    Xvalid_log[c] = np.log(Xvalid_log[c] + 1.1)

########### test #############
Xtest_log = Xtest.copy()
cols = ['payment_inc_ratio', 'open_acc']
for c in cols:
    Xtest_log[c] = np.log(Xtest_log[c] + 1.1)

In [12]:
# z-score outlier detection
########### train #############
Xtrain_clip = Xtrain.copy()
columns = ['loan_amnt', 'sub_grade_num', 'emp_length_num', 'dti', 'delinq_2yrs',
           'inq_last_6mths', 'revol_util', 'payment_inc_ratio', 'open_acc']# ,'grade',] 

for c in columns:
    if (Xtrain_clip[c].dtypes == int) | (Xtrain_clip[c].dtypes == float):
        L = Xtrain[c].mean() - 3*Xtrain[c].std()
        U = Xtrain[c].mean() + 3*Xtrain[c].std()
        Xtrain_clip[c] = Xtrain_clip[c].clip(L,U)

########### valid #############
Xvalid_clip = Xvalid.copy()
columns = ['loan_amnt', 'sub_grade_num', 'emp_length_num', 'dti', 'delinq_2yrs',
           'inq_last_6mths', 'revol_util', 'payment_inc_ratio', 'open_acc']# ,'grade',]

for c in columns:
    if (Xvalid_clip[c].dtypes == int) | (Xvalid_clip[c].dtypes == float):
        L = Xtrain[c].mean() - 3*Xtrain[c].std()
        U = Xtrain[c].mean() + 3*Xtrain[c].std()
        Xvalid_clip[c] =Xvalid_clip[c].clip(L,U)

########### test #############
Xtest_clip = Xtest.copy()
columns = ['loan_amnt', 'sub_grade_num', 'emp_length_num', 'dti', 'delinq_2yrs',
           'inq_last_6mths', 'revol_util', 'payment_inc_ratio', 'open_acc']# ,'grade',]

for c in columns:
    if (Xtest_clip[c].dtypes == int) | (Xtest_clip[c].dtypes == float):
        L = Xtrain[c].mean() - 3*Xtrain[c].std()
        U = Xtrain[c].mean() + 3*Xtrain[c].std()
        Xtest_clip[c] =Xtest_clip[c].clip(L,U)

In [13]:
# Log and z-score
########### train #############
Xtrain_log_clip = Xtrain_clip.drop(columns=['payment_inc_ratio', 'open_acc'])
Xtrain_log_clip = pd.concat([Xtrain_log_clip, Xtrain_log.loc[:,['payment_inc_ratio', 'open_acc']]], axis=1)

########### valid #############
Xvalid_log_clip = Xvalid_clip.drop(columns=['payment_inc_ratio', 'open_acc'])
Xvalid_log_clip = pd.concat([Xvalid_log_clip, Xvalid_log.loc[:,['payment_inc_ratio', 'open_acc']]], axis=1)

########### test #############
Xtest_log_clip = Xtest_clip.drop(columns=['payment_inc_ratio', 'open_acc'])
Xtest_log_clip = pd.concat([Xtest_log_clip, Xtest_log.loc[:,['payment_inc_ratio', 'open_acc']]], axis=1)

<h3>Normalization: MinMaxScaler</h3>

In [14]:
# Normalization
mmscaler = MinMaxScaler()

Xtrain_normal = mmscaler.fit_transform(Xtrain)
Xtrain_normal = pd.DataFrame(Xtrain_normal, columns=Xtrain.columns, index=Xtrain.index)

Xvalid_normal = mmscaler.transform(Xvalid)
Xvalid_normal = pd.DataFrame(Xvalid_normal, columns=Xvalid.columns, index=Xvalid.index)

Xtest_normal = mmscaler.transform(Xtest)
Xtest_normal = pd.DataFrame(Xtest_normal, columns=Xtest.columns, index=Xtest.index)

#############################
Xtrain_log_normal = mmscaler.fit_transform(Xtrain_log)
Xtrain_log_normal = pd.DataFrame(Xtrain_log_normal, columns=Xtrain_log.columns, index=Xtrain_log.index)

Xvalid_log_normal = mmscaler.transform(Xvalid_log)
Xvalid_log_normal = pd.DataFrame(Xvalid_log_normal, columns=Xvalid_log.columns, index=Xvalid_log.index)

Xtest_log_normal = mmscaler.transform(Xtest_log)
Xtest_log_normal = pd.DataFrame(Xtest_log_normal, columns=Xtest_log.columns, index=Xtest_log.index)

#############################
Xtrain_clip_normal = mmscaler.fit_transform(Xtrain_clip)
Xtrain_clip_normal = pd.DataFrame(Xtrain_clip_normal, columns=Xtrain_clip.columns, index=Xtrain_clip.index)

Xvalid_clip_normal = mmscaler.transform(Xvalid_clip)
Xvalid_clip_normal = pd.DataFrame(Xvalid_clip_normal, columns=Xvalid_clip.columns, index=Xvalid_clip.index)

Xtest_clip_normal = mmscaler.transform(Xtest_clip)
Xtest_clip_normal = pd.DataFrame(Xtest_clip_normal, columns=Xtest_clip.columns, index=Xtest_clip.index)

#############################
Xtrain_log_clip_normal = mmscaler.fit_transform(Xtrain_log_clip)
Xtrain_log_clip_normal = pd.DataFrame(Xtrain_log_clip_normal, columns=Xtrain_log_clip.columns, index=Xtrain_log_clip.index)

Xvalid_log_clip_normal = mmscaler.transform(Xvalid_log_clip)
Xvalid_log_clip_normal = pd.DataFrame(Xvalid_log_clip_normal, columns=Xvalid_log_clip.columns, index=Xvalid_log_clip.index)

Xtest_log_clip_normal = mmscaler.transform(Xtest_log_clip)
Xtest_log_clip_normal = pd.DataFrame(Xtest_log_clip_normal, columns=Xtest_log_clip.columns, index=Xtest_log_clip.index)

<h3>Over Sampling</h3>

In [15]:
# Over-Sampling
over_sampler = SMOTE()

########### train #############
Xtrain_over, ytrain_over = over_sampler.fit_resample(Xtrain, ytrain)
Xtrain_normal_over, ytrain_normal_over = over_sampler.fit_resample(Xtrain_normal, ytrain)
Xtrain_log_normal_over, ytrain_log_normal_over = over_sampler.fit_resample(Xtrain_log_normal, ytrain)
Xtrain_clip_normal_over, ytrain_clip_normal_over = over_sampler.fit_resample(Xtrain_clip_normal, ytrain)
Xtrain_log_clip_normal_over, ytrain_log_clip_normal_over = over_sampler.fit_resample(Xtrain_log_clip_normal, ytrain)

<h3>Under Sampling</h3>

In [16]:
# Under-Sampling
under_sampler = RandomUnderSampler()

########### train #############
Xtrain_under, ytrain_under = under_sampler.fit_resample(Xtrain, ytrain)
Xtrain_normal_under, ytrain_normal_under = under_sampler.fit_resample(Xtrain_normal, ytrain)
Xtrain_log_normal_under, ytrain_log_normal_under = under_sampler.fit_resample(Xtrain_log_normal, ytrain)
Xtrain_clip_normal_under, ytrain_clip_normal_under = under_sampler.fit_resample(Xtrain_clip_normal, ytrain)
Xtrain_log_clip_normal_under, ytrain_log_clip_normal_under = under_sampler.fit_resample(Xtrain_log_clip_normal, ytrain)

<h3>PCA</h3>

In [17]:
# PCA with Under Sampled Data
from sklearn.decomposition import PCA
pca = PCA(n_components=0.99)

Xtrain_pca = pd.DataFrame(pca.fit_transform(Xtrain_under), index=Xtrain_under.index)
Xtrain_under_pca = pd.concat([Xtrain_under, Xtrain_pca], axis=1)
Xtrain_under_pca.columns=Xtrain_under_pca.columns.astype(str)

Xvalid_pca = pd.DataFrame(pca.transform(Xvalid), index=Xvalid.index)
Xvalid_pca = pd.concat([Xvalid, Xvalid_pca], axis=1)
Xvalid_pca.columns = Xvalid_pca.columns.astype(str)

Xtest_pca = pd.DataFrame(pca.transform(Xtest), index=Xtest.index)
Xtest_pca = pd.concat([Xtest, Xtest_pca], axis=1)
Xtest_pca.columns = Xtest_pca.columns.astype(str)

###########################################
Xtrain_normal_pca = pd.DataFrame(pca.fit_transform(Xtrain_normal_under), index=Xtrain_normal_under.index)
Xtrain_normal_under_pca = pd.concat([Xtrain_normal_under, Xtrain_normal_pca], axis=1)
Xtrain_normal_under_pca.columns=Xtrain_normal_under_pca.columns.astype(str)

Xvalid_normal_pca = pd.DataFrame(pca.transform(Xvalid_normal), index=Xvalid_normal.index)
Xvalid_normal_pca = pd.concat([Xvalid_normal, Xvalid_normal_pca], axis=1)
Xvalid_normal_pca.columns = Xvalid_normal_pca.columns.astype(str)

Xtest_normal_pca = pd.DataFrame(pca.transform(Xtest_normal), index=Xtest_normal.index)
Xtest_normal_pca = pd.concat([Xtest_normal, Xtest_normal_pca], axis=1)
Xtest_normal_pca.columns = Xtest_normal_pca.columns.astype(str)

###########################################
Xtrain_log_normal_pca = pd.DataFrame(pca.fit_transform(Xtrain_log_normal_under), index=Xtrain_log_normal_under.index)
Xtrain_log_normal_under_pca = pd.concat([Xtrain_log_normal_under, Xtrain_log_normal_pca], axis=1)
Xtrain_log_normal_under_pca.columns=Xtrain_log_normal_under_pca.columns.astype(str)

Xvalid_log_normal_pca = pd.DataFrame(pca.transform(Xvalid_log_normal), index=Xvalid_log_normal.index)
Xvalid_log_normal_pca = pd.concat([Xvalid_log_normal, Xvalid_log_normal_pca], axis=1)
Xvalid_log_normal_pca.columns = Xvalid_log_normal_pca.columns.astype(str)

Xtest_log_normal_pca = pd.DataFrame(pca.transform(Xtest_log_normal), index=Xtest_log_normal.index)
Xtest_log_normal_pca = pd.concat([Xtest_log_normal, Xtest_log_normal_pca], axis=1)
Xtest_log_normal_pca.columns = Xtest_log_normal_pca.columns.astype(str)

###########################################
Xtrain_clip_normal_pca = pd.DataFrame(pca.fit_transform(Xtrain_clip_normal_under), index=Xtrain_clip_normal_under.index)
Xtrain_clip_normal_under_pca = pd.concat([Xtrain_clip_normal_under, Xtrain_clip_normal_pca], axis=1)
Xtrain_clip_normal_under_pca.columns=Xtrain_clip_normal_under_pca.columns.astype(str)

Xvalid_clip_normal_pca = pd.DataFrame(pca.transform(Xvalid_clip_normal), index=Xvalid_clip_normal.index)
Xvalid_clip_normal_pca = pd.concat([Xvalid_clip_normal, Xvalid_clip_normal_pca], axis=1)
Xvalid_clip_normal_pca.columns=Xvalid_clip_normal_pca.columns.astype(str)

Xtest_clip_normal_pca = pd.DataFrame(pca.transform(Xtest_clip_normal), index=Xtest_clip_normal.index)
Xtest_clip_normal_pca = pd.concat([Xtest_clip_normal, Xtest_clip_normal_pca], axis=1)
Xtest_clip_normal_pca.columns=Xtest_clip_normal_pca.columns.astype(str)

###########################################
Xtrain_log_clip_normal_pca = pd.DataFrame(pca.fit_transform(Xtrain_log_clip_normal_under), index=Xtrain_log_clip_normal_under.index)
Xtrain_log_clip_normal_under_pca = pd.concat([Xtrain_log_clip_normal_under, Xtrain_log_clip_normal_pca], axis=1)
Xtrain_log_clip_normal_under_pca.columns=Xtrain_log_clip_normal_under_pca.columns.astype(str)

Xvalid_log_clip_normal_pca = pd.DataFrame(pca.transform(Xvalid_log_clip_normal), index=Xvalid_log_clip_normal.index)
Xvalid_log_clip_normal_pca = pd.concat([Xvalid_log_clip_normal, Xvalid_log_clip_normal_pca], axis=1)
Xvalid_log_clip_normal_pca.columns=Xvalid_log_clip_normal_pca.columns.astype(str)

Xtest_log_clip_normal_pca = pd.DataFrame(pca.transform(Xtest_log_clip_normal), index=Xtest_log_clip_normal.index)
Xtest_log_clip_normal_pca = pd.concat([Xtest_log_clip_normal, Xtest_log_clip_normal_pca], axis=1)
Xtest_log_clip_normal_pca.columns=Xtest_log_clip_normal_pca.columns.astype(str)

<h1>MLP</h1>

<h3 dir='rtl'>
    برای هر مدل، کد زیر در گوگل کولب اجرا و هایپرپارامترهای بهینه به این کد منتقل شده است.
</h3>

https://colab.research.google.com/drive/1HACqfi8j2Zzwt54cZg4XKmY0TR88RMeH?usp=sharing

<h3>Normal Data</h3>

In [18]:
# Encoded Imputed Normal Over-Sampled Data
mlpno = Sequential()

# input layer
mlpno.add(Input(shape=(int(Xtrain_normal_over.shape[1]),)))

# hidden layers
mlpno.add(Dense(units=9, activation='relu'))
mlpno.add(Dense(units=16, activation='relu'))
mlpno.add(Dense(units=56, activation='relu'))

# Output layer
mlpno.add(Dense(1, activation='sigmoid'))

mlpno.compile(optimizer=Adam(learning_rate=0.0012369264413803532), loss='binary_crossentropy')
mlpno.fit(Xtrain_normal_over, ytrain_over, epochs=100, batch_size=10000, verbose=0, 
          validation_data=(Xvalid_normal, yvalid))

pred = mlpno.predict(Xtest_normal)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.38900414937759337


In [19]:
mlpnu = Sequential()

# input layer
mlpnu.add(Input(shape=(int(Xtrain_normal_under.shape[1]),)))

# hidden layers
mlpnu.add(Dense(units=44, activation='relu'))
mlpnu.add(Dense(units=9, activation='relu'))
mlpnu.add(Dense(units=9, activation='relu'))

# Output layer
mlpnu.add(Dense(1, activation='sigmoid'))

mlpnu.compile(optimizer=Adam(learning_rate=0.005704837988708971), loss='binary_crossentropy')
mlpnu.fit(Xtrain_normal_under, ytrain_under, epochs=100, batch_size=10000, verbose=0,
          validation_data=(Xvalid_normal, yvalid))

pred = mlpnu.predict(Xtest_normal)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.3931576769209546


In [20]:
mlpnup = Sequential()

# input layer
mlpnup.add(Input(shape=(int(Xtrain_normal_under_pca.shape[1]),)))

# hidden layers
mlpnup.add(Dense(units=44, activation='relu'))
mlpnup.add(Dense(units=38, activation='relu'))
mlpnup.add(Dense(units=14, activation='relu'))

# Output layer
mlpnup.add(Dense(1, activation='sigmoid'))

mlpnup.compile(optimizer=Adam(learning_rate=0.0010010530931948824), loss='binary_crossentropy')
mlpnup.fit(Xtrain_normal_under_pca, ytrain_under, epochs=100, batch_size=10000, verbose=0,
          validation_data=(Xvalid_normal_pca, yvalid))

pred = mlpnup.predict(Xtest_normal_pca)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.39430363208779723


<h3>Log Normal Data</h3>

In [21]:
mlplno = Sequential()

# input layer
mlplno.add(Input(shape=(int(Xtrain_log_normal_over.shape[1]),)))

# hidden layers
mlplno.add(Dense(units=9, activation='relu'))
mlplno.add(Dense(units=22, activation='relu'))
mlplno.add(Dense(units=17, activation='relu'))

# Output layer
mlplno.add(Dense(1, activation='sigmoid'))

mlplno.compile(optimizer=Adam(learning_rate=0.0010655670130010795), loss='binary_crossentropy')
mlplno.fit(Xtrain_log_normal_over, ytrain_over, epochs=100, batch_size=10000, verbose=0,
          validation_data=(Xvalid_log_normal, yvalid))

pred = mlplno.predict(Xtest_log_normal)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.3918421052631579


In [22]:
mlplnu = Sequential()

# input layer
mlplnu.add(Input(shape=(int(Xtrain_log_normal_under.shape[1]),)))

# hidden layers
mlplnu.add(Dense(units=58, activation='relu'))
mlplnu.add(Dense(units=54, activation='relu'))
mlplnu.add(Dense(units=16, activation='relu'))

# Output layer
mlplnu.add(Dense(1, activation='sigmoid'))

mlplnu.compile(optimizer=Adam(learning_rate=0.009039811894517835), loss='binary_crossentropy')
mlplnu.fit(Xtrain_log_normal_under, ytrain_under, epochs=100, batch_size=10000, verbose=0,
          validation_data=(Xvalid_log_normal, yvalid))

pred = mlplnu.predict(Xtest_log_normal)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.38850828729281767


In [23]:
mlplnop = Sequential()

# input layer
mlplnop.add(Input(shape=(int(Xtrain_log_normal_under_pca.shape[1]),)))

# hidden layers
mlplnop.add(Dense(units=50, activation='relu'))
mlplnop.add(Dense(units=50, activation='relu'))
mlplnop.add(Dense(units=23, activation='relu'))

# Output layer
mlplnop.add(Dense(1, activation='sigmoid'))

mlplnop.compile(optimizer=Adam(learning_rate=0.002780072866542793), loss='binary_crossentropy')
mlplnop.fit(Xtrain_log_normal_under_pca, ytrain_under, epochs=100, batch_size=10000, verbose=0,
          validation_data=(Xvalid_log_normal_pca, yvalid))

pred = mlplnop.predict(Xtest_log_normal_pca)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.39503571010420324


<h3>Clip Normal Data</h3>

In [24]:
mlpcn = Sequential()

# input layer
mlpcn.add(Input(shape=(int(Xtrain_clip_normal.shape[1]),)))

# hidden layers
mlpcn.add(Dense(units=49, activation='relu'))
mlpcn.add(Dense(units=34, activation='relu'))
mlpcn.add(Dense(units=41, activation='relu'))

# Output layer
mlpcn.add(Dense(1, activation='sigmoid'))

mlpcn.compile(optimizer=Adam(learning_rate=0.006765994491629928), loss='binary_crossentropy')
mlpcn.fit(Xtrain_clip_normal, ytrain, epochs=100, batch_size=10000, verbose=0,
          validation_data=(Xvalid_clip_normal, yvalid))

pred = mlpcn.predict(Xtest_clip_normal)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.10179869881362419


In [25]:
mlpcno = Sequential()

# input layer
mlpcno.add(Input(shape=(int(Xtrain_clip_normal_over.shape[1]),)))

# hidden layers
mlpcno.add(Dense(units=11, activation='relu'))
mlpcno.add(Dense(units=26, activation='relu'))
mlpcno.add(Dense(units=36, activation='relu'))

# Output layer
mlpcno.add(Dense(1, activation='sigmoid'))

mlpcno.compile(optimizer=Adam(learning_rate=0.0016286903740268563), loss='binary_crossentropy')
mlpcno.fit(Xtrain_clip_normal_over, ytrain_over, epochs=100, batch_size=10000, verbose=0,
          validation_data=(Xvalid_clip_normal, yvalid))

pred = mlpcno.predict(Xtest_clip_normal)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.3885657715629254


In [26]:
mlpcnu = Sequential()

# input layer
mlpcnu.add(Input(shape=(int(Xtrain_clip_normal_under.shape[1]),)))

# hidden layers
mlpcnu.add(Dense(units=31, activation='relu'))
mlpcnu.add(Dense(units=8, activation='relu'))
mlpcnu.add(Dense(units=8, activation='relu'))

# Output layer
mlpcnu.add(Dense(1, activation='sigmoid'))

mlpcnu.compile(optimizer=Adam(learning_rate=0.005572312091054415), loss='binary_crossentropy')
mlpcnu.fit(Xtrain_clip_normal_under, ytrain_under, epochs=100, batch_size=10000, verbose=0,
          validation_data=(Xvalid_clip_normal, yvalid))

pred = mlpcnu.predict(Xtest_clip_normal)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.39361430395913155


In [27]:
mlpcnup = Sequential()

# input layer
mlpcnup.add(Input(shape=(int(Xtrain_clip_normal_under_pca.shape[1]),)))

# hidden layers
mlpcnup.add(Dense(units=49, activation='relu'))
mlpcnup.add(Dense(units=20, activation='relu'))
mlpcnup.add(Dense(units=35, activation='relu'))

# Output layer
mlpcnup.add(Dense(1, activation='sigmoid'))

mlpcnup.compile(optimizer=Adam(learning_rate=0.002002357043019399), loss='binary_crossentropy')
mlpcnup.fit(Xtrain_clip_normal_under_pca, ytrain_under, epochs=100, batch_size=10000, verbose=0, 
            validation_data=(Xvalid_clip_normal_pca, yvalid))

pred = mlpcnup.predict(Xtest_clip_normal_pca)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.394907908992416


<h3>Log Clip Normal Data</h3>

In [28]:
mlplcn = Sequential()

# input layer
mlplcn.add(Input(shape=(int(Xtrain_log_clip_normal.shape[1]),)))

# hidden layers
mlplcn.add(Dense(units=38, activation='relu'))
mlplcn.add(Dense(units=33, activation='relu'))
mlplcn.add(Dense(units=47, activation='relu'))

# Output layer
mlplcn.add(Dense(1, activation='sigmoid'))

mlplcn.compile(optimizer=Adam(learning_rate=0.006143152408454745), loss='binary_crossentropy')
mlplcn.fit(Xtrain_log_clip_normal, ytrain, epochs=100, batch_size=10000, verbose=0,
          validation_data=(Xvalid_log_clip_normal, yvalid))

pred = mlplcn.predict(Xtest_log_clip_normal)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.155005382131324


In [29]:
mlplcno = Sequential()

# input layer
mlplcno.add(Input(shape=(int(Xtrain_log_clip_normal_over.shape[1]),)))

# hidden layers
mlplcno.add(Dense(units=10, activation='relu'))
mlplcno.add(Dense(units=15, activation='relu'))
mlplcno.add(Dense(units=10, activation='relu'))

# Output layer
mlplcno.add(Dense(1, activation='sigmoid'))

mlplcno.compile(optimizer=Adam(learning_rate=0.0038168640881971858), loss='binary_crossentropy')
mlplcno.fit(Xtrain_log_clip_normal_over, ytrain_over, epochs=100, batch_size=10000, verbose=0,
            validation_data=(Xvalid_log_clip_normal, yvalid))

pred = mlplcno.predict(Xtest_log_clip_normal)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.3916048118761198


In [30]:
mlplcnu = Sequential()

# input layer
mlplcnu.add(Input(shape=(int(Xtrain_log_clip_normal_under.shape[1]),)))

# hidden layers
mlplcnu.add(Dense(units=13, activation='relu'))
mlplcnu.add(Dense(units=20, activation='relu'))
mlplcnu.add(Dense(units=36, activation='relu'))

# Output layer
mlplcnu.add(Dense(1, activation='sigmoid'))

mlplcnu.compile(optimizer=Adam(learning_rate=0.006757752320006498), loss='binary_crossentropy')
mlplcnu.fit(Xtrain_log_clip_normal_under, ytrain_under, epochs=100, batch_size=10000, verbose=0,
          validation_data=(Xvalid_log_clip_normal, yvalid))

pred = mlplcnu.predict(Xtest_log_clip_normal)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.3966372657111356


In [31]:
mlplcnup = Sequential()

# input layer
mlplcnup.add(Input(shape=(int(Xtrain_log_clip_normal_under_pca.shape[1]),)))

# hidden layers
mlplcnup.add(Dense(units=49, activation='relu'))
mlplcnup.add(Dense(units=39, activation='relu'))
mlplcnup.add(Dense(units=51, activation='relu'))

# Output layer
mlplcnup.add(Dense(1, activation='sigmoid'))

mlplcnup.compile(optimizer=Adam(learning_rate=0.0012724824346682212), loss='binary_crossentropy')

mlplcnup.fit(Xtrain_log_clip_normal_under_pca, ytrain_under, epochs=100, batch_size=10000, verbose=0,
          validation_data=(Xvalid_log_clip_normal_pca, yvalid))

pred = mlplcnup.predict(Xtest_log_clip_normal_pca)
pred = (pred > 0.5).astype(int)

print('Xtest result: ', f1_score(y_true=ytest, y_pred=pred))

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Xtest result:  0.392287418504647
