In [1]:
import pandas as pd
import os

In [2]:
DATA_FINAL_DIR = 'data/'
MODEL_FINAL_DIR = 'models/'
RESULT_FINAL_DIR = 'result/'

In [3]:
test = pd.read_csv(os.path.join(DATA_FINAL_DIR, 'test.csv'))
train = pd.read_csv(os.path.join(DATA_FINAL_DIR, 'train.csv'))

In [5]:
train['islulus'].value_counts()

0.0    10070
1.0     9930
Name: islulus, dtype: int64

In [6]:
test['islulus'].value_counts()

1.0    172
0.0    172
Name: islulus, dtype: int64

In [10]:
list_model = [
    'dense',
    'cnn',
    'lstm',
    'bilstm',
    'gru',
    'bigru'
]
list_optimizer = ['adam', 'sgd', 'rmsprop']
list_learning_rate = [0.01, 0.001, 0.0001, 0.00001]
list_batch_size = [8, 16, 32, 64, 128]

result = pd.DataFrame()

for model in list_model:
    result_file = os.path.join(RESULT_FINAL_DIR, f'results_{model}.csv')
    if os.path.exists(result_file):
        df = pd.read_csv(result_file)
        df['model'] = model
        result = pd.concat([result, df], ignore_index=True)

In [11]:
result.to_csv(os.path.join(RESULT_FINAL_DIR, 'result.csv'), index=False)

In [12]:
result.reset_index(inplace=True)
result = result.groupby(['model', 'optimizer', 'learning_rate', 'batch_size']).mean().reset_index()

  result = result.groupby(['model', 'optimizer', 'learning_rate', 'batch_size']).mean().reset_index()


In [13]:
result.sort_values(by=['index'], inplace=True)
result.drop(columns=['index'], inplace=True)

In [14]:
result

Unnamed: 0,model,optimizer,learning_rate,batch_size,fold,accuracy,precision,recall,f1_score
195,dense,adam,0.01000,8,3.0,0.860465,0.840725,0.889535,0.864423
196,dense,adam,0.01000,16,3.0,0.864535,0.846910,0.890698,0.868046
197,dense,adam,0.01000,32,3.0,0.863953,0.845667,0.890698,0.867534
198,dense,adam,0.01000,64,3.0,0.862209,0.839864,0.895349,0.866656
199,dense,adam,0.01000,128,3.0,0.870349,0.851601,0.897674,0.873844
...,...,...,...,...,...,...,...,...,...
20,bigru,rmsprop,0.00001,8,3.0,0.687791,0.653401,0.800000,0.719248
21,bigru,rmsprop,0.00001,16,3.0,0.685465,0.654441,0.787209,0.714486
22,bigru,rmsprop,0.00001,32,3.0,0.687791,0.660503,0.773256,0.712354
23,bigru,rmsprop,0.00001,64,3.0,0.686628,0.657840,0.777907,0.712845


In [15]:
result.to_excel(os.path.join(RESULT_FINAL_DIR, 'result.xlsx'), index=False)

In [16]:
result_raw = pd.read_csv(os.path.join(RESULT_FINAL_DIR, 'result.csv'))

In [17]:
# take the best f1 score for each combination of model, optimizer, batch_size, and learning_rate

result_raw = result_raw.loc[result_raw.groupby(['model', 'optimizer', 'batch_size', 'learning_rate'])['f1_score'].idxmax()]

In [18]:
result_raw.sort_index(inplace=True)
result_raw.to_excel(os.path.join(RESULT_FINAL_DIR, 'result_raw.xlsx'), index=False)

In [19]:
result.sort_values(by=['f1_score'], ascending=False).head(20)

Unnamed: 0,model,optimizer,learning_rate,batch_size,fold,accuracy,precision,recall,f1_score
150,cnn,rmsprop,0.001,8,3.0,0.875,0.848819,0.912791,0.879554
182,dense,adam,1e-05,32,3.0,0.874419,0.84929,0.910465,0.878805
155,cnn,rmsprop,0.01,8,3.0,0.873837,0.857822,0.896512,0.876609
146,cnn,rmsprop,0.0001,16,3.0,0.872674,0.85196,0.902326,0.876356
177,cnn,sgd,0.01,32,3.0,0.872093,0.854826,0.896512,0.875133
205,dense,rmsprop,0.0001,8,3.0,0.872674,0.858913,0.89186,0.87507
141,cnn,rmsprop,1e-05,16,3.0,0.873256,0.863261,0.887209,0.875029
179,cnn,sgd,0.01,128,3.0,0.872674,0.861426,0.888372,0.874646
145,cnn,rmsprop,0.0001,8,3.0,0.869767,0.845878,0.904651,0.874165
156,cnn,rmsprop,0.01,16,3.0,0.870349,0.850419,0.9,0.874072


In [45]:
result.sort_values(by=['accuracy'], ascending=False).head(20)

Unnamed: 0,model,optimizer,learning_rate,batch_size,fold,accuracy,precision,recall,f1_score
118,cnn,rmsprop,0.001,8,3.0,0.875,0.848819,0.912791,0.879554
123,cnn,rmsprop,0.01,8,3.0,0.873837,0.857822,0.896512,0.876609
161,dense,rmsprop,0.0001,8,3.0,0.872674,0.858913,0.89186,0.87507
114,cnn,rmsprop,0.0001,16,3.0,0.872674,0.85196,0.902326,0.876356
143,cnn,sgd,0.01,128,3.0,0.872674,0.861426,0.888372,0.874646
141,cnn,sgd,0.01,32,3.0,0.872093,0.854826,0.896512,0.875133
152,dense,adam,0.001,32,3.0,0.87093,0.854632,0.894186,0.873885
159,dense,adam,0.01,128,3.0,0.870349,0.851601,0.897674,0.873844
124,cnn,rmsprop,0.01,16,3.0,0.870349,0.850419,0.9,0.874072
170,dense,rmsprop,0.001,128,3.0,0.870349,0.851235,0.897674,0.87381


In [20]:
result.sort_values(by=['precision'], ascending=False).head(20)

Unnamed: 0,model,optimizer,learning_rate,batch_size,fold,accuracy,precision,recall,f1_score
346,lstm,sgd,0.0001,16,3.0,0.637209,0.930848,0.305814,0.423875
122,cnn,adam,1e-05,32,3.0,0.872093,0.86366,0.883721,0.873561
141,cnn,rmsprop,1e-05,16,3.0,0.873256,0.863261,0.887209,0.875029
179,cnn,sgd,0.01,128,3.0,0.872674,0.861426,0.888372,0.874646
121,cnn,adam,1e-05,16,3.0,0.870349,0.860015,0.884884,0.872225
205,dense,rmsprop,0.0001,8,3.0,0.872674,0.858913,0.89186,0.87507
142,cnn,rmsprop,1e-05,32,3.0,0.869186,0.858761,0.883721,0.871048
206,dense,rmsprop,0.0001,16,3.0,0.869767,0.858198,0.886047,0.87186
120,cnn,adam,1e-05,8,3.0,0.865698,0.857837,0.876744,0.867175
155,cnn,rmsprop,0.01,8,3.0,0.873837,0.857822,0.896512,0.876609


In [47]:
result.sort_values(by=['recall'], ascending=False).head(20)

Unnamed: 0,model,optimizer,learning_rate,batch_size,fold,accuracy,precision,recall,f1_score
131,cnn,sgd,0.0001,32,3.0,0.537791,0.520153,0.984884,0.680701
228,gru,sgd,0.0001,64,3.0,0.622674,0.585284,0.943023,0.716852
130,cnn,sgd,0.0001,16,3.0,0.607558,0.573097,0.938372,0.707856
178,dense,sgd,0.0001,16,3.0,0.652326,0.600517,0.936047,0.729704
129,cnn,sgd,0.0001,8,3.0,0.726744,0.662071,0.926744,0.772112
132,cnn,sgd,0.0001,64,3.0,0.534302,0.519151,0.919767,0.662136
221,gru,rmsprop,0.01,32,3.0,0.792442,0.774513,0.918605,0.829137
184,dense,sgd,0.001,32,3.0,0.848837,0.806946,0.918605,0.85889
186,dense,sgd,0.001,128,3.0,0.748256,0.696418,0.917442,0.787178
138,cnn,sgd,0.001,128,3.0,0.729651,0.667545,0.917442,0.771739


In [21]:
result[result['model'] == 'bigru'].sort_values(by=['f1_score'], ascending=False).head(20)

Unnamed: 0,model,optimizer,learning_rate,batch_size,fold,accuracy,precision,recall,f1_score
18,bigru,adam,0.01,64,3.0,0.863953,0.846388,0.889535,0.867363
10,bigru,adam,0.001,8,3.0,0.860465,0.828486,0.909302,0.866977
14,bigru,adam,0.001,128,3.0,0.860465,0.839274,0.89186,0.864747
17,bigru,adam,0.01,32,3.0,0.856977,0.823895,0.909302,0.864244
12,bigru,adam,0.001,32,3.0,0.859302,0.83518,0.895349,0.864207
19,bigru,adam,0.01,128,3.0,0.860465,0.843709,0.884884,0.86373
30,bigru,rmsprop,0.001,8,3.0,0.85814,0.832437,0.897674,0.86365
11,bigru,adam,0.001,16,3.0,0.857558,0.828945,0.901163,0.863502
13,bigru,adam,0.001,64,3.0,0.855233,0.83005,0.894186,0.86077
33,bigru,rmsprop,0.001,64,3.0,0.852326,0.821052,0.901163,0.859202


# DIRTY

In [3]:
result_dirty = pd.read_csv(os.path.join(RESULT_FINAL_DIR, 'result_dirty.csv'))
result = pd.read_csv(os.path.join(RESULT_FINAL_DIR, 'result.csv'))

In [4]:
result_dirty

Unnamed: 0,model,optimizer,learning_rate,batch_size,fold,accuracy,precision,recall,f1_score,confusion_matrix
0,dense,adam,0.00001,16,1,0.872093,0.851648,0.901163,0.875706,"[[145, 27], [17, 155]]"
1,dense,adam,0.00001,16,2,0.860465,0.829787,0.906977,0.866667,"[[140, 32], [16, 156]]"
2,dense,adam,0.00001,16,3,0.872093,0.847826,0.906977,0.876404,"[[144, 28], [16, 156]]"
3,dense,adam,0.00001,16,4,0.872093,0.847826,0.906977,0.876404,"[[144, 28], [16, 156]]"
4,dense,adam,0.00001,16,5,0.866279,0.838710,0.906977,0.871508,"[[142, 30], [16, 156]]"
...,...,...,...,...,...,...,...,...,...,...
355,bigru,rmsprop,0.00001,128,1,0.697674,0.639344,0.906977,0.750000,"[[84, 88], [16, 156]]"
356,bigru,rmsprop,0.00001,128,2,0.688953,0.670157,0.744186,0.705234,"[[109, 63], [44, 128]]"
357,bigru,rmsprop,0.00001,128,3,0.694767,0.675393,0.750000,0.710744,"[[110, 62], [43, 129]]"
358,bigru,rmsprop,0.00001,128,4,0.691860,0.691860,0.691860,0.691860,"[[119, 53], [53, 119]]"


In [5]:
result

Unnamed: 0,model,optimizer,learning_rate,batch_size,fold,accuracy,precision,recall,f1_score,confusion_matrix
0,dense,adam,0.01000,8,1,0.851744,0.830601,0.883721,0.856338,"[[141, 31], [20, 152]]"
1,dense,adam,0.01000,8,2,0.854651,0.838889,0.877907,0.857955,"[[143, 29], [21, 151]]"
2,dense,adam,0.01000,8,3,0.860465,0.836957,0.895349,0.865169,"[[142, 30], [18, 154]]"
3,dense,adam,0.01000,8,4,0.877907,0.861111,0.901163,0.880682,"[[147, 25], [17, 155]]"
4,dense,adam,0.01000,8,5,0.857558,0.836066,0.889535,0.861972,"[[142, 30], [19, 153]]"
...,...,...,...,...,...,...,...,...,...,...
1435,bigru,rmsprop,0.00001,8,1,0.688953,0.651163,0.813953,0.723514,"[[97, 75], [32, 140]]"
1436,bigru,rmsprop,0.00001,8,2,0.686047,0.655340,0.784884,0.714286,"[[101, 71], [37, 135]]"
1437,bigru,rmsprop,0.00001,8,3,0.688953,0.652582,0.808140,0.722078,"[[98, 74], [33, 139]]"
1438,bigru,rmsprop,0.00001,8,4,0.686047,0.655340,0.784884,0.714286,"[[101, 71], [37, 135]]"


In [6]:
result = pd.concat([result, result_dirty], ignore_index=True)

In [7]:
result

Unnamed: 0,model,optimizer,learning_rate,batch_size,fold,accuracy,precision,recall,f1_score,confusion_matrix
0,dense,adam,0.01000,8,1,0.851744,0.830601,0.883721,0.856338,"[[141, 31], [20, 152]]"
1,dense,adam,0.01000,8,2,0.854651,0.838889,0.877907,0.857955,"[[143, 29], [21, 151]]"
2,dense,adam,0.01000,8,3,0.860465,0.836957,0.895349,0.865169,"[[142, 30], [18, 154]]"
3,dense,adam,0.01000,8,4,0.877907,0.861111,0.901163,0.880682,"[[147, 25], [17, 155]]"
4,dense,adam,0.01000,8,5,0.857558,0.836066,0.889535,0.861972,"[[142, 30], [19, 153]]"
...,...,...,...,...,...,...,...,...,...,...
1795,bigru,rmsprop,0.00001,128,1,0.697674,0.639344,0.906977,0.750000,"[[84, 88], [16, 156]]"
1796,bigru,rmsprop,0.00001,128,2,0.688953,0.670157,0.744186,0.705234,"[[109, 63], [44, 128]]"
1797,bigru,rmsprop,0.00001,128,3,0.694767,0.675393,0.750000,0.710744,"[[110, 62], [43, 129]]"
1798,bigru,rmsprop,0.00001,128,4,0.691860,0.691860,0.691860,0.691860,"[[119, 53], [53, 119]]"


In [8]:
import pandas as pd
from pandas.api.types import CategoricalDtype

# Your predefined order lists
list_model = ['dense', 'cnn', 'lstm', 'bilstm', 'gru', 'bigru']
list_optimizer = ['adam', 'sgd', 'rmsprop']
list_learning_rate = [0.01, 0.001, 0.0001, 0.00001]
list_batch_size = [8, 16, 32, 64, 128]

# Set categorical dtype for each column based on your custom list
result['model'] = result['model'].astype(
    CategoricalDtype(categories=list_model, ordered=True))
result['optimizer'] = result['optimizer'].astype(
    CategoricalDtype(categories=list_optimizer, ordered=True))
result['learning_rate'] = result['learning_rate'].astype(
    CategoricalDtype(categories=list_learning_rate, ordered=True))
result['batch_size'] = result['batch_size'].astype(
    CategoricalDtype(categories=list_batch_size, ordered=True))

result = result.sort_values(
    by=['model', 'optimizer', 'learning_rate', 'batch_size', 'fold']).reset_index(drop=True)
result

Unnamed: 0,model,optimizer,learning_rate,batch_size,fold,accuracy,precision,recall,f1_score,confusion_matrix
0,dense,adam,0.01000,8,1,0.851744,0.830601,0.883721,0.856338,"[[141, 31], [20, 152]]"
1,dense,adam,0.01000,8,2,0.854651,0.838889,0.877907,0.857955,"[[143, 29], [21, 151]]"
2,dense,adam,0.01000,8,3,0.860465,0.836957,0.895349,0.865169,"[[142, 30], [18, 154]]"
3,dense,adam,0.01000,8,4,0.877907,0.861111,0.901163,0.880682,"[[147, 25], [17, 155]]"
4,dense,adam,0.01000,8,5,0.857558,0.836066,0.889535,0.861972,"[[142, 30], [19, 153]]"
...,...,...,...,...,...,...,...,...,...,...
1795,bigru,rmsprop,0.00001,128,1,0.697674,0.639344,0.906977,0.750000,"[[84, 88], [16, 156]]"
1796,bigru,rmsprop,0.00001,128,2,0.688953,0.670157,0.744186,0.705234,"[[109, 63], [44, 128]]"
1797,bigru,rmsprop,0.00001,128,3,0.694767,0.675393,0.750000,0.710744,"[[110, 62], [43, 129]]"
1798,bigru,rmsprop,0.00001,128,4,0.691860,0.691860,0.691860,0.691860,"[[119, 53], [53, 119]]"


In [9]:
for model in list_model:
    result_file = os.path.join(RESULT_FINAL_DIR, f'results_{model}.csv')
    result_model = result[result['model'] == model]
    result_model.to_csv(result_file, index=False)