In [39]:
from sklearn.metrics import mean_absolute_error
from itertools import combinations
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer, MinMaxScaler,StandardScaler
from tensorflow.keras.layers import BatchNormalization, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau
from sklearn.model_selection import KFold, GroupKFold, StratifiedKFold
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf
import tensorflow
from tensorflow_addons.optimizers import AdamW
from keras import backend as K
from sklearn.model_selection import train_test_split
import os

In [8]:
df = pd.read_csv('csv/Regression_data_preprocessing.csv')

In [24]:
def preprocessing_add_features(df):
    
    df['water'] = df['Whole weight'] - (df['Shucked weight'] + df['Viscera weight'] + df['Shell weight'])
    df['ratio'] = df['Shucked weight'] / df['Whole weight']
    
    #whole weight의 경우 상당수가 다른 변수들의 값과 많이 겹치므로 categorize 실시
    df['Whole weight'] = df['Whole weight'].map(lambda x: 1 if x <= 0.5 else (2 if x <= 1 else (3 if x<=2.5 else 4)))

    #임시변수
    df['new1'] = df['Length'] + df['Diameter'] + df['Height']
    df['new2'] = df['Length'] * df['Diameter'] * df['Height']
    df['new3'] = df['Shell weight']+df['Height']
    df['new4'] = df['Viscera weight'] + df['Shucked weight']

    return df

In [20]:
df

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings,Sex_F,Sex_I,Sex_M
0,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15,0,0,1
1,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7,0,0,1
2,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9,1,0,0
3,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10,0,0,1
4,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
4172,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11,1,0,0
4173,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10,0,0,1
4174,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9,0,0,1
4175,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10,1,0,0


In [25]:
df = preprocessing_add_features(df)

In [27]:
df = df.rename(columns={'Rings' : 'Target'})

In [28]:
df = df.set_index('Target').reset_index()

In [30]:
df.groupby("Target").mean()

Unnamed: 0_level_0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Sex_F,Sex_I,Sex_M,water,ratio,new1,new2,new3,new4
Target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1,0.075,0.055,0.01,2.0,0.001,0.0005,0.0015,0.0,1.0,0.0,0.997,0.001,0.14,4.1e-05,0.0115,0.0015
2,0.15,0.1,0.025,2.0,0.0045,0.004,0.005,0.0,1.0,0.0,0.9865,0.0045,0.275,0.000375,0.03,0.0085
3,0.176,0.128667,0.041667,2.0,0.011767,0.006267,0.008933,0.0,0.8,0.2,0.973033,0.011767,0.346333,0.001047,0.0506,0.018033
4,0.221491,0.161579,0.053947,2.0,0.024719,0.012956,0.018,0.0,0.894737,0.105263,0.944325,0.024719,0.437018,0.002239,0.071947,0.037675
5,0.285739,0.210696,0.069913,2.0,0.061696,0.02733,0.03677,0.034783,0.869565,0.095652,0.874204,0.061696,0.566348,0.004771,0.106683,0.089026
6,0.369363,0.278861,0.092065,2.084942,0.123158,0.058371,0.078388,0.061776,0.833977,0.104247,0.825025,0.109508,0.740289,0.010725,0.170453,0.181529
7,0.422033,0.321535,0.105921,2.276215,0.182657,0.085899,0.111648,0.112532,0.682864,0.204604,0.89601,0.138183,0.849488,0.015846,0.217569,0.268556
8,0.498776,0.384798,0.127007,2.765845,0.293773,0.138502,0.178609,0.214789,0.482394,0.302817,1.154961,0.162672,1.010581,0.026233,0.305616,0.432275
9,0.546865,0.425218,0.142721,3.153846,0.387938,0.187803,0.236509,0.345428,0.251089,0.403483,1.341597,0.174605,1.114804,0.035425,0.379231,0.57574
10,0.574629,0.44929,0.153526,3.38959,0.447217,0.223128,0.282976,0.391167,0.14511,0.463722,1.43627,0.180797,1.177445,0.042198,0.436501,0.670345


In [35]:
#조합변수 생성(3개의 칼럼사용)
def abcd(df,col1,col2,col3,set_col,df_update = False, test = False):
    result1 = []
    df[set_col] = 2*df[col1]+df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = 2*df[col1]-df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = 2*df[col1]/df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = 2*df[col1]*df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] =3*df[col1]+df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = 3*df[col1]-df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = 3*df[col1]/df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = 3*df[col1]*df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)

    
    
    df[set_col] = df[col1]+df[col2]+df[col3] #8
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = df[col1]*df[col2]*df[col3]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = df[col1]-df[col2]-df[col3]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = df[col1]/df[col2]/df[col3]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    
    df[set_col] = df[col1]/df[col2]+df[col3]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = (df[col1]+df[col2])*df[col3]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = df[col1]-df[col2]*df[col3]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = 2*df[col1]*df[col2]/df[col3]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    
    df[set_col] = df[col1]/df[col2]-df[col3]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = df[col1]*df[col2]-df[col3]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = df[col1]+df[col2]-df[col3]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)


                     
    max_corr = 0
    for n, i in enumerate(result1):
        if max_corr < abs(i):
            max_corr = abs(i)
            num = n
            
    if df_update != False:

        if num == 0:
            df[set_col] = 2*df[col1]+df[col2]
            test[set_col] = 2*test[col1]+test[col2]
        elif num == 1:
            df[set_col] = 2*df[col1]-df[col2]
            test[set_col] = 2*test[col1]-test[col2]
        elif num == 2:
            df[set_col] = 2*df[col1]/df[col2]
            test[set_col] = 2*test[col1]/test[col2]
        elif num == 3:
            df[set_col] = 2*df[col1]*df[col2]
            test[set_col] = 2*test[col1]*test[col2]
        if num == 4:
            df[set_col] = 3*df[col1]+df[col2]
            test[set_col] = 3*test[col1]+test[col2]
        elif num == 5:
            df[set_col] = 3*df[col1]-df[col2]
            test[set_col] = 3*test[col1]-test[col2]
        elif num == 6:
            df[set_col] = 3*df[col1]/df[col2]
            test[set_col] = 3*test[col1]/test[col2]
        elif num == 7:
            df[set_col] = 3*df[col1]*df[col2]
            test[set_col] = 3*test[col1]*test[col2]
            
        elif num == 8:
            df[set_col] = df[col1]+df[col2]+df[col3]
            test[set_col] = test[col1]+test[col2]+df[col3]
        elif num == 9:
            df[set_col] = df[col1]*df[col2]*df[col3]
            test[set_col] = test[col1]*test[col2]*test[col3]
        elif num == 10:
            df[set_col] = df[col1]-df[col2]-df[col3]
            test[set_col] = test[col1]-test[col2]-test[col3]
        elif num == 11:
            df[set_col] = df[col1]/df[col2]/df[col3]
            test[set_col] = test[col1]/test[col2]/test[col3]
            
                                           
        elif num == 12:
            df[set_col] = df[col1]/df[col2]+df[col3]
            test[set_col] = test[col1]/test[col2]+test[col3]                             
        elif num == 13:
            df[set_col] = (df[col1]+df[col2])*df[col3]
            test[set_col] = (test[col1]+test[col2])*test[col3]   
        elif num == 14:
            df[set_col] = df[col1]-df[col2]*df[col3]
            test[set_col] = test[col1]-test[col2]*test[col3] 
        elif num == 15:
            df[set_col] = 2*df[col1]*df[col2]/df[col3]
            test[set_col] = 2*test[col1]*test[col2]/test[col3] 
            
        elif num == 16:
            df[set_col] = df[col1]/df[col2]-df[col3]
            test[set_col] = test[col1]/test[col2]-test[col3]                             
        elif num == 17:
            df[set_col] = df[col1]*df[col2]-df[col3]
            test[set_col] = test[col1]*test[col2]-test[col3]   
        elif num == 18:
            df[set_col] = df[col1]+df[col2]-df[col3]
            test[set_col] = test[col1]+test[col2]-test[col3] 


        return df, test
                     
                     
    else:
        return max_corr

In [40]:
train, test = train_test_split(df, test_size=0.2, random_state = 42)

In [33]:
X_features = df.drop(['Target','Sex_F',	'Sex_I', 'Sex_M'],axis=1)

In [50]:
max_result = 0
max_result_total = 0

for col1 in X_features.columns:
    for col2 in X_features.columns: 
        if (col1=='Target')|(col2=='bcd'):
            continue
        else:
            result = abcd1(df, col1, col2,'new6')
#             print(result,col1,col2)
            if max_result < abs(result):
                max_result = result
                print(max_result)
                columns = [col1, col2]
df,test = abcd1(df,columns[0],columns[1],'new6',df_update=True,test = test)

0.5567195769296182
0.5635045502751335
0.5824580524394525
0.5970773185643968
0.6330777205886574
0.6375729688557358
0.6689083585983355
0.6883295542726244
0.7038205938320047


In [51]:
df_clean = df.drop(['new4','new2','new1'], axis = 1)

In [52]:
df_clean.to_csv('clean_model1.csv')

In [36]:
#조합변수 생성, 2개의 칼럼사용
def abcd1(df,col1,col2,set_col,df_update = False, test = False):
    result1 = []
    df[set_col] = 2*df[col1]+df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = 2*df[col1]-df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = 2*df[col1]/df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = 2*df[col1]*df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] =3*df[col1]+df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = 3*df[col1]-df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = 3*df[col1]/df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    df[set_col] = 3*df[col1]*df[col2]
    result = df.set_index(set_col).reset_index().corr()[set_col]['Target']
    result1.append(result)
    max_corr = 0
    for n, i in enumerate(result1):
        if max_corr < abs(i):
            max_corr = abs(i)
            num = n
            
    if df_update != False:

        if num == 0:
            df[set_col] = 2*df[col1]+df[col2]
            test[set_col] = 2*test[col1]+test[col2]
        elif num == 1:
            df[set_col] = 2*df[col1]-df[col2]
            test[set_col] = 2*test[col1]-test[col2]
        elif num == 2:
            df[set_col] = 2*df[col1]/df[col2]
            test[set_col] = 2*test[col1]/test[col2]
        elif num == 3:
            df[set_col] = 2*df[col1]*df[col2]
            test[set_col] = 2*test[col1]*test[col2]
        if num == 4:
            df[set_col] = 3*df[col1]+df[col2]
            test[set_col] = 3*test[col1]+test[col2]
        elif num == 5:
            df[set_col] = 3*df[col1]-df[col2]
            test[set_col] = 3*test[col1]-test[col2]
        elif num == 6:
            df[set_col] = 3*df[col1]/df[col2]
            test[set_col] = 3*test[col1]/test[col2]
        elif num == 7:
            df[set_col] = 3*df[col1]*df[col2]
            test[set_col] = 3*test[col1]*test[col2]

        return df, test
    else:
        return max_corr

In [26]:
corr = df.corr()['Rings']
corr

Length            0.556720
Diameter          0.574660
Height            0.610292
Whole weight      0.512265
Shucked weight    0.420884
Viscera weight    0.503819
Shell weight      0.627574
Rings             1.000000
Sex_F             0.250279
Sex_I            -0.436063
Sex_M             0.181831
water             0.402079
ratio             0.302868
new1              0.580124
new2              0.553462
new3              0.635700
new4              0.455207
Name: Rings, dtype: float64

In [56]:
max_result = 0
max_result_total = 0

# abc = shell+ height


X_features = df.drop(['Target','Sex_F',	'Sex_I', 'Sex_M', 'new6'],axis=1)
for col1 in X_features.columns:
    for col2 in X_features.columns: 
        for col3 in X_features.columns:

            if (col1=='Target')|(col2=='bcd'):
                continue
            else:

                result = abcd(df, col1, col2,col3,'a1')
    #             print(result,col1,col2)

                if max_result < abs(result):

                    max_result = result
                    print(max_result)
                    columns = [col1, col2,col3]
df,test = abcd(df,columns[0],columns[1],columns[2],'a1',df_update=True,test = test)

max_result = 0
max_result_total = 0

print('a2')
max_result = 0
max_result_total = 0
X_features = df.drop(['Target','Sex_F',	'Sex_I', 'Sex_M','new6'],axis=1)
for col2 in X_features.columns: 
    for col3 in X_features.columns:
        col1 = 'a1'
        if (col1=='Target')|(col2=='bcd'):
            continue
        else:

            result = abcd(df, col1, col2,col3,'a2')
#             print(result,col1,col2)

            if max_result < abs(result):

                max_result = result
                print(max_result)
                columns = [col1, col2,col3]
df,test = abcd(df,columns[0],columns[1],columns[2],'a2',df_update=True,test = test)

X_features = df.drop(['Target','Sex_F',	'Sex_I', 'Sex_M', 'new6'],axis=1)
max_result = 0
max_result_total = 0
print('a3')                
col_lists = ['a1','a2']             
max_result = 0
max_result_total = 0


for col1 in col_lists:
    for col2 in X_features.columns: 
        for col3 in X_features.columns:

            if (col1=='Target')|(col2=='bcd'):
                continue
            else:

                result = abcd(df, col1, col2,col3,'a3')
    #             print(result,col1,col2)

                if max_result < abs(result):

                    max_result = result
                    print(max_result)
                    columns = [col1, col2,col3]
df,test = abcd(df,columns[0],columns[1],columns[2],'a3',df_update=True,test = test)
print('a4') 
X_features = df.drop(['Target','Sex_F',	'Sex_I', 'Sex_M','new6'],axis=1)
col_lists = ['a1','a2','a3']  
for col1 in col_lists:
    for col2 in X_features.columns: 
        for col3 in X_features.columns:

            if (col1=='Target')|(col2=='bcd'):
                continue
            else:

                result = abcd(df, col1, col2,col3,'a4')
    #             print(result,col1,col2)

                if max_result < abs(result):

                    max_result = result
                    print(max_result)
                    columns = [col1, col2,col3]
df,test = abcd(df,columns[0],columns[1],columns[2],'a4',df_update=True,test = test)
print('a5') 
X_features = df.drop(['Target','Sex_F',	'Sex_I', 'Sex_M','new6'],axis=1)
col_lists = ['a4']  
for col1 in col_lists:
    for col2 in X_features.columns: 
        for col3 in X_features.columns:

            if (col1=='Target')|(col2=='bcd'):
                continue
            else:

                result = abcd(df, col1, col2,col3,'a5')
    #             print(result,col1,col2)

                if max_result < abs(result):

                    max_result = result
                    print(max_result)
                    columns = [col1, col2,col3]
df,test = abcd(df,columns[0],columns[1],columns[2],'a5',df_update=True,test = test)

X_features = df.drop(['Target','Sex_F',	'Sex_I', 'Sex_M','new6'],axis=1)
col_lists = ['a1','a2','a3','a4','a5']  
for col1 in col_lists:
    for col2 in X_features.columns: 
        for col3 in X_features.columns:

            if (col1=='Target')|(col2=='bcd'):
                continue
            else:

                result = abcd(df, col1, col2,col3,'a6')
    #             print(result,col1,col2)

                if max_result < abs(result):

                    max_result = result
                    print(max_result)
                    columns = [col1, col2,col3]
df,test = abcd(df,columns[0],columns[1],columns[2],'a6',df_update=True,test = test)

0.5567195769296183
0.5746598513059247
0.6102920256576804
0.6275740445103185
0.635699794021367
0.7235713796162117
0.7248146264945484
0.7257223988669331
a2
0.7059253713204919
0.7074871201421244
0.7076163635121877
0.7080355791148196
0.708986758729633
a3
0.7059253713204919
0.7074871201421244
0.7076163635121877
0.7080355791148196
0.708986758729633
0.718102385784465
0.7224647160160703
a4
0.7224647160160717
0.7244981660043733
0.7248889849827949
0.7250927241639409
0.7261848693362479
0.726260756388801
a5
0.7264346143287239
0.72644687717704
0.7267486628761616
0.7277256731770174
0.7290272518295857
0.7299107544364131
0.7303509602827355
0.7303639528242662
0.7304979512776675
0.7308650663563389


In [57]:
df.drop(['new1','new2','new3','new4','a1','a3','a4','a5','a6'],axis=1,inplace=True)

In [59]:
df.corr()

Unnamed: 0,Target,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Sex_F,Sex_I,Sex_M,water,ratio,new6,a2
Target,1.0,0.55672,0.57466,0.610292,0.512265,0.420884,0.503819,0.627574,0.250279,-0.436063,0.181831,0.402079,0.302868,0.703821,-0.708987
Length,0.55672,1.0,0.986812,0.900808,0.877635,0.897914,0.903018,0.897706,0.309666,-0.551465,0.236543,0.646363,0.772807,0.657793,-0.775896
Diameter,0.57466,0.986812,1.0,0.907106,0.878494,0.893162,0.899724,0.90533,0.318626,-0.564315,0.240376,0.649156,0.763109,0.6779,-0.797337
Height,0.610292,0.900808,0.907106,1.0,0.833599,0.83676,0.866261,0.890891,0.316806,-0.557686,0.2357,0.608466,0.689508,0.763744,-0.847649
Whole weight,0.512265,0.877635,0.878494,0.833599,1.0,0.866991,0.874543,0.862312,0.306389,-0.567531,0.255274,0.897198,0.522701,0.613037,-0.66596
Shucked weight,0.420884,0.897914,0.893162,0.83676,0.866991,1.0,0.931961,0.882617,0.263991,-0.521842,0.251793,0.58314,0.82614,0.483261,-0.630747
Viscera weight,0.503819,0.903018,0.899724,0.866261,0.874543,0.931961,1.0,0.907656,0.308444,-0.556081,0.242194,0.603966,0.721885,0.616937,-0.726357
Shell weight,0.627574,0.897706,0.90533,0.890891,0.862312,0.882617,0.907656,1.0,0.306319,-0.546953,0.235391,0.598829,0.670495,0.828688,-0.895674
Sex_F,0.250279,0.309666,0.318626,0.316806,0.306389,0.263991,0.308444,0.306319,1.0,-0.464298,-0.512528,0.252752,0.188495,0.280121,-0.300124
Sex_I,-0.436063,-0.551465,-0.564315,-0.557686,-0.567531,-0.521842,-0.556081,-0.546953,-0.464298,1.0,-0.522541,-0.461618,-0.37722,-0.435277,0.491957


In [61]:
feature_cols = df.columns.tolist()
feature_cols.remove('Target')

target_cols = ['Target']
X = df[feature_cols]
y = df[target_cols]

remove_list = ['Sex_I','Sex_F','Sex_M']
for col in remove_list:
    feature_cols.remove(col)
pipeline = Pipeline([('normalizer', Normalizer()),
                     ('scaler', StandardScaler())])
scaler = StandardScaler()
# X[feature_cols] = scaler.fit_transform(X[feature_cols])

X[feature_cols] = pipeline.fit_transform(X[feature_cols])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[feature_cols] = pipeline.fit_transform(X[feature_cols])


In [75]:
df.to_csv('final_model1.csv')

In [67]:
X_train, X_test = train_test_split(df, test_size= 0.2, random_state = 42)

In [68]:
y = df['Target']

In [69]:
y_train, y_test = train_test_split(y, test_size = 0.2, random_state = 42)

In [76]:
X_train, X_val = train_test_split(X_train, test_size = 0.2, random_state = 42)
y_train, y_val = train_test_split(y_train, test_size = 0.2, random_state = 42)

In [90]:
schedule = tensorflow.optimizers.schedules.PiecewiseConstantDecay(
                [100, 150], [1e-0, 1e-1, 1e-2])
step = tf.Variable(0, trainable=False)
wd = lambda: 1e-3 * schedule(step)
optimizer = AdamW(learning_rate=0.001, weight_decay=wd)
def custom_opt(n):
    schedule = tensorflow.optimizers.schedules.PiecewiseConstantDecay([100, 150], [1e-0, 1e-1, 1e-2])
    step = tf.Variable(0, trainable=False)
    wd = lambda: 1e-3 * schedule(step)
    opt = AdamW(learning_rate = n, weight_decay = wd)
    return opt

In [77]:
# method_custom_metric 구현
def accuracy(y_true, y_pred):
    return 1 - tf.abs((y_true - y_pred) / y_true) 

# 모델 구현
model = Sequential()
model.add(Dense(256, activation='elu', input_dim=X_train.shape[1]))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(32, activation='elu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))              
model.add(Dense(16, activation='elu'))
model.add(Dense(16, activation='elu'))
model.add(Dense(1, activation='elu'))

# 모델 컴파일
model.compile(loss='mae', optimizer=optimizer , metrics=[accuracy])

# early stopping 구현 - 커스텀 정확도 기준
early_stopping = EarlyStopping(monitor='val_accuracy', patience=25)
model.fit(X_train, y_train, epochs=1000, batch_size=256, validation_data=(X_test, y_test), callbacks=[early_stopping])

print("====== Final_model =======")
print("train loss, train accuracy")
train_loss, train_acc = model.evaluate(X_train, y_train, verbose =2)
print("validation loss, validation accuracy")
val_loss, val_acc = model.evaluate(X_val, y_val, verbose=2)





Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [95]:
def custom_opt2(n):
    opt = tf.keras.optimizers.Adam(learning_rate=n)
    return opt


In [96]:
func = 'elu'
opti = 'adam'
batch = 256
for i in [0.01, 0.009, 0.006, 0.003, 0.001, 0.0005]:
    model = Sequential()
    model.add(Dense(256, activation=func, input_dim=X_train.shape[1]))
    model.add(BatchNormalization())  # BatchNormalization 추가
    model.add(Dropout(0.2))
    model.add(Dense(128, activation=func))
    model.add(BatchNormalization())  # BatchNormalization 추가
    model.add(Dropout(0.2))
    model.add(Dense(64, activation=func))
    model.add(BatchNormalization())  # BatchNormalization 추가
    model.add(Dropout(0.2))
    model.add(Dense(32, activation=func))
    model.add(Dense(16, activation=func))
    model.add(Dense(8, activation=func))
    model.add(Dense(8, activation=func))
    model.add(Dense(1, activation=func))

    # 모델 컴파일
    model.compile(loss='mae', optimizer=custom_opt2(i) , metrics=[accuracy])

    # early stopping 구현 - 커스텀 정확도 기준
    early_stopping = EarlyStopping(monitor='val_accuracy', patience=10)
    model.fit(X_train, y_train, epochs=1000, batch_size=24, validation_data=(X_test, y_test), callbacks=[early_stopping])
    print(i)

    print("====== Final_model =======")
    print("train loss, train accuracy")
    train_loss, train_acc = model.evaluate(X_train, y_train, verbose =2)
    print("validation loss, validation accuracy")
    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=2)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
0.01
train loss, train accuracy
84/84 - 0s - loss: 0.3807 - accuracy: 0.9592 - 106ms/epoch - 1ms/step
validation loss, validation accuracy
21/21 - 0s - loss: 0.4099 - accuracy: 0.9554 - 42ms/epoch - 2ms/step
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/100