<font color='tomato'><font color="#CC3D3D"><p>
# A DNN Model for Multiclass Classification

##### Import modules

In [96]:
import pandas as pd
import numpy as np
import os
import random
import pickle
from tqdm import tqdm
from IPython.display import Image, clear_output
import seaborn as sns
import matplotlib.pylab as plt
from matplotlib import font_manager, rc
%matplotlib inline

from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
import kerastuner as kt
print(tf.__version__)

2.9.1


##### Set random seeds to make your results reproducible

In [97]:
# 매번 모델링을 할 때마다 동일한 결과를 얻으려면 아래 코드를 실행해야 함.

def reset_seeds(s1,s2,s3, reset_graph_with_backend=None):
    if reset_graph_with_backend is not None:
        K = reset_graph_with_backend
        K.clear_session()
        tf.compat.v1.reset_default_graph()
        print("KERAS AND TENSORFLOW GRAPHS RESET")  # optional

    np.random.seed(s1)
    random.seed(s2)
    tf.compat.v1.set_random_seed(s3)
#    os.environ['CUDA_VISIBLE_DEVICES'] = ''  # for GPU
#    print("RANDOM SEEDS RESET")  # optional

In [98]:
reset_seeds(1,2,3)

### Step 1: Load and process the data

##### Read data

In [99]:
num_features_train = pd.read_csv(os.path.abspath("../input")+"/choi_num_features_test.csv" , encoding = 'utf-8')
num_features_test = pd.read_csv(os.path.abspath("../input")+"/choi_num_features_test.csv" , encoding = 'utf-8')
df_train = pd.read_csv(os.path.abspath("../input")+"/choi_select_547_train.csv" , encoding = 'utf-8')
df_test = pd.read_csv(os.path.abspath("../input")+"/choi_select_547_test.csv" , encoding = 'utf-8')
y_train = pd.read_csv(os.path.abspath("../input")+'/y_train.csv' , encoding = 'cp949').group

IDtest = num_features_test.custid.unique()

df_train.head()

Unnamed: 0,19_x,19_y,dis_rate,sales_time,남성,비남성,남성part,비화장품,화장품_x,real_amt,...,customer_info_v287,customer_info_v288,customer_info_v290,customer_info_v291,customer_info_v293,customer_info_v294,customer_info_v296,customer_info_v297,customer_info_v298,customer_info_v299
0,0.480424,0.892344,3.932657,1.295963,-0.179521,-0.277657,0.624123,0.612743,2.065492,0.026492,...,0.002763,0.042127,0.082274,-0.029668,-0.014897,0.00668,-0.022601,-0.017812,0.01776,0.070474
1,0.081857,1.929858,2.357874,-0.202027,0.349653,2.417547,-0.255893,2.298417,-0.245812,2.957209,...,0.091217,-0.008662,0.003983,-0.002832,0.042365,0.012988,0.071694,-0.024969,-0.011674,-0.037639
2,-0.117426,0.548752,0.119978,0.254014,-0.012162,-0.254931,0.961806,1.42956,-0.141677,-0.590541,...,-0.000277,-0.006757,0.015132,0.050091,0.033917,0.02893,-0.072096,-0.100285,0.043172,-0.034632
3,-0.515993,-0.404549,-0.398339,-0.903977,-0.359631,-0.348907,-0.689248,0.850321,-0.464243,-0.031264,...,-0.050163,0.103102,0.107627,-0.043774,-0.022241,0.077513,-0.028061,0.064289,-0.004013,-0.030925
4,0.679707,1.265075,2.037846,0.018223,2.705752,-0.300516,1.863673,0.153425,0.711728,0.639562,...,0.014402,0.009355,0.06934,-0.007804,-0.03551,-0.026667,-0.00182,0.03315,-0.007561,0.010482


In [100]:
print(df_train.shape)
print(df_test.shape)

(21587, 547)
(14380, 547)


***

##### One-hot-encode Target variable 

In [101]:
# 8개의 범주형 타겟 값을 one-hot-encoding을 통해 8개의 컬럼으로 만들어야 함.
y_train = keras.utils.to_categorical(y_train.astype('category').cat.codes)

In [102]:
df_train = np.array(df_train)
df_test = np.array(df_test)

##### Split data into train & validation set 

In [103]:
i = int(round(df_train.shape[0] * 0.8,0))
X_valid, y_valid = df_train[i:], y_train[i:]
X_train, y_train = df_train[:i], y_train[:i]

In [104]:
print(X_valid.shape)
print(y_valid.shape)

(4317, 547)
(4317, 8)


In [105]:
print(X_train.shape)
print(y_train.shape)

(17270, 547)
(17270, 8)


### Step 2: Define the hyper-model

In [106]:
def model_fn(hp):
    inputs = keras.Input(shape=(X_train.shape[1],))
    x = inputs
    for i in range(hp.Int('num_layers', 2, 3)):
        x = keras.layers.Dense(hp.Int('unit_'+str(i), 16, 64, step=16), activation='relu')(x)
        x = keras.layers.Dropout(hp.Float('dropout_'+str(i), 0, 0.5, step=0.25, default=0.5))(x)
    outputs = keras.layers.Dense(8, activation='softmax')(x) # 예측값이 8종류이므로 8개 출력 뉴런 필요
    model = keras.Model(inputs, outputs)
    model.compile(loss='categorical_crossentropy', # Multiclass Classification에서 사용하는 loss function
                  optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])), 
                  metrics=[keras.metrics.CategoricalCrossentropy()]) # Multiclass Classification에서 사용하는 평가지표
    return model

### Step 3: Build multiple hyper-tuned models

In [107]:
tuner = kt.Hyperband(model_fn,
                     objective=kt.Objective('val_categorical_crossentropy', direction="min"), 
                     max_epochs=10,
                     hyperband_iterations=3,
                     overwrite=True,
                     directory='dnn_tuning')
tuner.search(X_train, y_train, validation_data=(X_valid, y_valid), 
             callbacks=[tf.keras.callbacks.EarlyStopping(patience=1)])
model = tuner.get_best_models(1)[0]  
tuner.results_summary(1)

Trial 60 Complete [00h 00m 02s]
val_categorical_crossentropy: 1.4748642444610596

Best val_categorical_crossentropy So Far: 1.4619978666305542
Total elapsed time: 00h 02m 32s
INFO:tensorflow:Oracle triggered exit
Results summary
Results in dnn_tuning\untitled_project
Showing 1 best trials
<keras_tuner.engine.objective.Objective object at 0x0000020E801861C0>
Trial summary
Hyperparameters:
num_layers: 3
unit_0: 48
dropout_0: 0.25
unit_1: 64
dropout_1: 0.0
learning_rate: 0.001
unit_2: 48
dropout_2: 0.0
tuner/epochs: 10
tuner/initial_epoch: 4
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 0053
Score: 1.4619978666305542


### Step 4: Make submissions

In [108]:
t = pd.Timestamp.now()
fname = f"dnn_submission_{t.month:02}{t.day:02}{t.hour:02}{t.minute:02}.csv"
pred = pd.DataFrame(model.predict(df_test))
pred.columns = ['F20','F30','F40','F50','M20','M30','M40','M50']
pd.concat([pd.Series(IDtest, name="ID"), pred] ,axis=1).to_csv(fname, index=False)
print(f"'{fname}' is ready to submit.")

'dnn_submission_06082328.csv' is ready to submit.


<font color="#CC3D3D"><p>
# End