In [8]:
# import package
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder


In [9]:
path_root = os.path.dirname(os.path.abspath('')) # get path root folder
path_dataset = os.path.join(path_root, 'dataset', 'dataset.xlsx')
path_result_file = os.path.join(path_root, 'result', 'customer')
file_save_result = os.path.join(path_result_file, 'result.csv')

In [10]:
def load_sensor_data(path):
    return pd.read_excel(path, skiprows = range(0, 2))

def drop_column(df):
    x = [0, 1, 2, 3, 4] #  0: Unnamed-0, Unnamed-1, Unnamed-2,  データＩＤ, Time
    df_droped = df.drop(df.columns[x], axis=1)
    return df_droped


# Task 1: Loading data from file excel 
data = load_sensor_data(path=path_dataset)

# Task 2: change name column
data_copy = data.copy()
data_copy.rename(columns={'Unnamed: 0': 'datecheck-ID', 
                          'Unnamed: 1': 'shoes-ID',
                          'Unnamed: 2': 'balance-status',
                          'データＩＤ':'set-ID',
                         }, inplace = True)

# Task 3: removing feature which relate to train
data_copy = drop_column(df=data_copy)

In [11]:
# function from customer ... 
# def label_Integerization(label):
#     uniq_label = np.unique(label)
#     int_label=[]
#     for i in range(len(label)):
#         c_nn = label[i]
#         for j in range(len(uniq_label)):
#             if c_nn == uniq_label[j]:
#                 int_label.append((j+1))
#                 break
#     return np.array(int_label)

# Cái này không cần thiết
# X = np.array(y_train.reshape(-1,1))
# enc = OneHotEncoder(categories="auto", sparse=False, dtype=np.int32)
# y_train_xx = enc.fit_transform(X)

In [136]:
selcol = ["L-Fx1","L-Fy1","L-Fz1","L-Mx1","L-My1","L-Mz1","L-Fx2","L-Fy2","L-Fz2","L-Mx2","L-My2","L-Mz2","L-Fx3","L-Fy3","L-Fz3","L-Mx3","L-My3","L-Mz3",
             "L-AccelX","L-AccelY","L-AccelZ","L-GyroX","L-GyroY","L-GyroZ","R-Fx1","R-Fy1","R-Fz1","R-Mx1","R-My1","R-Mz1","R-Fx2","R-Fy2","R-Fz2","R-Mx2","R-My2","R-Mz2","R-Fx3","R-Fy3",
             "R-Fz3","R-Mx3","R-My3","R-Mz3","R-AccelX","R-AccelY","R-AccelZ","R-GyroX","R-GyroY","R-GyroZ"]
    
lblcol = ["L-FX","L-FY","L-FZ","R-FX","R-FY","R-FZ"]

x_train = data_copy[selcol].values # input 
y_train = data_copy[lblcol].values # labels <--- 

print("X Shape is: ", x_train.shape)
print("Y Shape is: ", y_train.shape)

X Shape is:  (236388, 48)
Y Shape is:  (236388, 6)


In [137]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, BatchNormalization
from tensorflow.keras import optimizers
from tensorflow.keras.models import load_model

class LossHistory(tf.keras.callbacks.Callback):
    def on_train_begin(self,logs={}):
        self.losses = []
    def on_batch_end(self,batch,logs={}):
        self.losses.append(logs.get('loss'))

def set_callback(name_type):
    if name_type == 'history':
        return LossHistory
    elif 'tensorboard' in name_type:
        check_folds = name_type.split('_')
        if len(check_folds) > 1:
            numberfolds = check_folds[1]
            logs = os.path.join(path_result_file, "folds", "tensorboard_%s" % (str(numberfolds)))
            return tf.keras.callbacks.TensorBoard(log_dir=logs, 
                                                  histogram_freq=1, 
                                                  write_graph=True,  
                                                  write_images=False)
            
        else:
            logs = os.path.join(path_result_file, "tensorboard")
            return tf.keras.callbacks.TensorBoard(log_dir=logs, 
                                                  histogram_freq=1, 
                                                  write_graph=True,  
                                                  write_images=False)
    
    elif 'checkpoint' in name_type:
        # "weights_%s_{epoch:02d}_{accuracy:.2f}.h5" % (str(numberfolds))
        check_folds = name_type.split('_')
        if len(check_folds) > 1:
            numberfolds = check_folds[1]
            saving_path = os.path.join(path_result_file, "checkpoint", "weights_%s.h5" % (str(numberfolds)))
            return tf.keras.callbacks.ModelCheckpoint(saving_path,
                                                      verbose=1,
                                                      monitor="val_accuracy",
                                                      save_best_only=True,
                                                      mode='max') # save_best_only=True, mode='auto'
            
        else:
            saving_path = os.path.join(path_result_file, "checkpoint", "weights_{epoch:02d}_{accuracy:.2f}.h5")
            return tf.keras.callbacks.ModelCheckpoint(saving_path,
                                                      verbose=1,
                                                      monitor="val_accuracy",
                                                      save_best_only=True,
                                                      mode='max') # save_best_only=True, mode='auto'


class DNN(object):
    def __init__(self, n_input, n_hiddens, n_out, activation='relu',
                 kernel_initializer='he_normal', optimize='Adam', param={'rate': 0.001, "beta1": 0.9, "beta2": 0.999},
                 loss='categorical_crossentropy', on_softmax=True):

        self.n_in = n_input
        self.n_hiddens = n_hiddens
        self.n_out = n_out
        self.activation = activation
        self.kernel_initializer = kernel_initializer
        self.optimize = self.set_optimizer(optimize, param)
        self.loss = loss
        self.on_softmax = on_softmax
        self.callback = None
        self.hist = None

    def set_optimizer(self, optimize, param):
        if optimize == 'Adam':
            learning_rate = param['rate']
            beta1 = param['beta1']
            beta2 = param['beta2']
            optimizer = optimizers.Adam(lr=learning_rate, beta_1=beta1, beta_2=beta2)
        elif optimize == 'SGD':
            lr = prm['lr']
            momentum = param['mometum']
            self.optimizer = optimizers.SGD(lr, momentum)
        elif optimize == 'Nest':
            lr = param['lr']
            momentum = param['mometum']
            optimizer = optimizers.SGD(lr, momentum, nesterov=True)
        elif optimize == 'Adagrad':
            lr = param['lr']
            optimizer = optimizers.Adagrad(lr)
        elif optimize == 'Adadelta':
            rho = param['rho']
            optimizer = optimizers.Adadelta(rho=rho)
        elif optimize == 'RMSProp':
            lr = param['lr']
            optimizer = optimizers.RMSprop(lr)
        else:
            learning_rate = param['rate']
            beta1 = param['beta1']
            beta2 = param['beta2']
            optimizer = optimizers.Adam(lr=learning_rate, beta_1=beta1, beta_2=beta2)

        return optimizer

    def make_model_bach_normalization(self):
        model = Sequential()
        model.add(Dense(self.n_hiddens[0], kernel_initializer=self.kernel_initializer,
                        input_shape=(self.n_in,)))
        model.add(BatchNormalization())
        model.add(Activation(self.activation))
        model.add(Dropout(0.5))
        for i in range(1, len(self.n_hiddens)):
            model.add(Dense(self.n_hiddens[i], kernel_initializer=self.kernel_initializer))
            model.add(BatchNormalization())
            model.add(Activation(self.activation))
            model.add(Dropout(0.5))
        if self.on_softmax:
            model.add(Dense(self.n_out, kernel_initializer=self.kernel_initializer,
                            activation='softmax'))
        else:
            model.add(Dense(self.n_out))

        model.summary()
        self.model = model
        self.model.compile(optimizer=self.optimize,
                           loss=self.loss,
                           metrics=['accuracy'])

    def make_model(self):
        model = Sequential()
        model.add(Dense(self.n_hiddens[0], kernel_initializer=self.kernel_initializer,
                        input_shape=(self.n_in,)))
        model.add(Activation(self.activation))
        model.add(Dropout(0.5))
        for i in range(1, len(self.n_hiddens)):
            model.add(Dense(self.n_hiddens[i], kernel_initializer=self.kernel_initializer))
            model.add(Activation(self.activation))
            model.add(Dropout(0.5))
        if self.on_softmax:
            model.add(Dense(self.n_out, kernel_initializer=self.kernel_initializer,
                            activation='softmax'))
        else:
            model.add(Dense(self.n_out))

        model.summary()
        self.model = model
        self.model.compile(optimizer=self.optimize,
                           loss=self.loss,
                           metrics=['accuracy'])

    def set_callback(self, callback_type):
        list_callback = []
        for name_callback in callback_type:
            callback = set_callback(name_type=name_callback)
            list_callback.append(callback)
        self.callback = list_callback        

    def calc_fit(self, x_data, y_data, epoch=10,
                 batch_size=5, verbose=2, validation=None):
        print(">>> Calc fit shape is : %s  -  %s" % (x_data.shape, y_data.shape))
        if validation is None:
            if self.callback is None:
                self.hist = self.model.fit(x_data, y_data, epochs=epoch,
                                           batch_size=batch_size, 
                                           verbose=verbose)
            else:
                self.hist = self.model.fit(x_data, y_data, epochs=epoch,
                                           batch_size=batch_size, 
                                           verbose=verbose, 
                                           callbacks=self.callback)
        else:
            if self.callback is None:
                self.hist = self.model.fit(x_data, y_data, epochs=epoch,
                                           batch_size=batch_size, 
                                           verbose=verbose, 
                                           validation_data=validation)
            else:
                self.hist = self.model.fit(x_data, y_data, epochs=epoch,
                                           batch_size=batch_size, 
                                           verbose=verbose, 
                                           validation_data=validation,
                                           callbacks=self.callback)

    def get_history_data(self):
        return self.hist

    def evaluate(self, x_test, y_test, varbose=0):
        loss, acc = self.model.evaluate(x_test, y_test, verbose=varbose)
        return loss, acc

    def predict(self, x_test, varbose=0):
        pred = self.model.predict(x_test)
        return pred

    def save_model(self, outf):
        self.model.save(outf)  # モデルを保存

    def load_model(self, inpf, show=True):
        self.model = None
        self.model = load_model(inpf)
        if show:
            self.model.summary()


In [138]:
def main_1(x_train, y_train, file_save):
    n_inp = 48
    n_hidden = [150, 50, 10]
    n_out = 6 # n_out = 1 - is wrong about when MSE calculate the loss 
    epoch = 1000
    
    # setting training 
    dnn = DNN(n_inp, n_hidden, n_out, loss='mse', optimize='RMSProp', param={'lr':0.001}, on_softmax=False)
    dnn.make_model_bach_normalization()
    dnn.set_callback(callback_type=['tensorboard', 'checkpoint'])
    dnn.calc_fit(x_train, y_train, batch_size=30, epoch=epoch)
    
    # saving model
    
    
    # write result
    hist = dnn.get_history_data()
    list_d = []
    step = [x + 1 for x in range(epoch)]
    acc = hist.history['accuracy']
    loss = hist.history['loss']
    for i in range(len(step)):
        c_d = [step[i], acc[i], loss[i]]
        list_d.append(c_d)
    
    # write file csv 
    d_hist = pd.DataFrame(list_d, columns=['step','accuracy','loss'])
    d_hist.to_csv(file_save, index=False)

In [139]:
# !wget "https://drive.google.com/uc?export=download&id=1xT_3uZk82jL5obv7SYzNoFh8c2MML1xn" -O ./dataset/dataset.xlsx

In [146]:
from sklearn.model_selection import StratifiedKFold 

def main_crossvalidation(x_train, y_train, file_save):
    # get info
    basename = os.path.basename(file_save_result)
    dirname = os.path.dirname(file_save_result)
    
    # parameter for training 
    n_inp = 48
    n_hidden = [150, 50, 10]
    n_out = 6 # n_out = 1 - is wrong about when MSE calculate the loss 
    epoch = 5
    
    # init cross validation
    from sklearn.model_selection import KFold
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)
    
    
    VALIDATION_ACCURACY = []
    VALIDATION_LOSS = []
    
    for index, (train_index, test_index) in enumerate(kfold.split(x_train, y_train)):
        print(">>> FOLD is %s" % (str(index)))
        file_save = str(basename.split('.')[0] + '_%s' % (index + 1)) + str(basename.split('.')[1])
        abs_path = os.path.join(dirname, file_save)
        
        X_train_folds = x_train[train_index]
        y_train_folds = y_train[train_index]
        
        X_test_fold = x_train[test_index]
        y_test_fold = y_train[test_index]
        
        # Clone model 
        dnn = DNN(n_inp, n_hidden, n_out, loss='mse', optimize='RMSProp', param={'lr':0.0001}, on_softmax=False)
        dnn.make_model_bach_normalization()
        dnn.set_callback(callback_type=['tensorboard_%s' % (index + 1), 'checkpoint_%s' % (index + 1)])
        dnn.calc_fit(X_train_folds, y_train_folds, 
                     batch_size=128, 
                     epoch=epoch) # FJN: 128
        
        
        # write result
        hist = dnn.get_history_data()
        list_d = []
        step = [x + 1 for x in range(epoch)]
        acc = hist.history['accuracy']
        loss = hist.history['loss']
        for i in range(len(step)):
            c_d = [step[i], acc[i], loss[i]]
            list_d.append(c_d)

        # write file csv 
        d_hist = pd.DataFrame(list_d, columns=['step','accuracy','loss'])
        d_hist.to_csv(abs_path, index=False)
        
        # LOAD BEST MODEL to evaluate the performance of the model
        savedmodel_path = os.path.join(path_result_file, "checkpoint", "weights_%s.h5" % (str(index + 1)))
        dnn.load_model(savedmodel_path, show=False)
        
        results = dnn.evaluate(x_test=X_test_fold, y_test=y_test_fold)
        VALIDATION_ACCURACY.append(results[1] * 100)
        VALIDATION_LOSS.append(results[0])
        
        tf.keras.backend.clear_session()
        
    return VALIDATION_ACCURACY, VALIDATION_LOSS

In [147]:
print(f"Shape Training Data: {x_train.shape, y_train.shape}")

Shape Training Data: ((236388, 48), (236388, 6))


In [148]:
file_save_result = os.path.join(path_result_file, 'result.csv')
VALIDATION_ACCURACY, VALIDATION_LOSS = main_crossvalidation(x_train, y_train, file_save_result)

>>> FOLD is 0
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 150)               7350      
                                                                 
 batch_normalization (BatchN  (None, 150)              600       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 150)               0         
                                                                 
 dropout (Dropout)           (None, 150)               0         
                                                                 
 dense_1 (Dense)             (None, 50)                7550      
                                                                 
 batch_normalization_1 (Batc  (None, 50)               200       
 hNormalization)                          

Epoch 1/5
1478/1478 - 10s - loss: 57490.6680 - accuracy: 0.3964 - 10s/epoch - 6ms/step
Epoch 2/5
1478/1478 - 5s - loss: 57161.6211 - accuracy: 0.5067 - 5s/epoch - 3ms/step
Epoch 3/5
1478/1478 - 6s - loss: 56756.7969 - accuracy: 0.5492 - 6s/epoch - 4ms/step
Epoch 4/5
1478/1478 - 5s - loss: 56259.5039 - accuracy: 0.5701 - 5s/epoch - 3ms/step
Epoch 5/5
1478/1478 - 5s - loss: 55668.4062 - accuracy: 0.5854 - 5s/epoch - 3ms/step
>>> FOLD is 3
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 150)               7350      
                                                                 
 batch_normalization (BatchN  (None, 150)              600       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 150)               0         
           

In [151]:
print(VALIDATION_ACCURACY)
print("%.2f%% (+/- %.2f%%)" % (np.mean(VALIDATION_ACCURACY), np.std(VALIDATION_ACCURACY)))

[74.02174472808838, 45.43762505054474, 74.13807511329651, 67.62061715126038, 63.91691565513611]
65.03% (+/- 10.54%)


In [79]:
# predict model 
model = tf.keras.models.load_model("/Users/thaihoc/Desktop/TCH_AI/result/customer/checkpoint/weights_05_0.73.hdf5")

# rmse 
x_predicted = model.predict(x_train[:5])
x_labeled = y_train[:5]
mse = tf.reduce_mean(tf.square(tf.subtract(x_predicted, x_labeled)))
rmse = tf.sqrt(mse)
print(">> RMSE: ", rmse.numpy())

>> RMSE:  48.85347


In [42]:
# Cross validation 
from sklearn.model_selection import KFold, StratifiedKFold

kf = KFold(n_splits = 5)
skf = StratifiedKFold(n_splits = 5, random_state = 7, shuffle = True) 
kFold = StratifiedKFold(n_splits=10)


for train_index, val_index in kFold.split(x_train,y_train):
    training_data = x_train[train_index]
    validation_data = x_train[val_index]
    

ValueError: Supported target types are: ('binary', 'multiclass'). Got 'continuous-multioutput' instead.

In [21]:
x_train

array([[   5.56862745,    4.54901961,    2.82352941, ...,    0.91      ,
          -3.08      ,   -3.29      ],
       [  -4.43137255,   -1.45098039,    2.82352941, ...,    0.91      ,
          -3.01      ,   -3.29      ],
       [   3.56862745,   -3.45098039,    0.82352941, ...,    1.19      ,
          -3.08      ,   -3.43      ],
       ...,
       [ 166.06122449,  190.40816327,  556.08163265, ..., -282.1       ,
        -182.3       ,   -4.6       ],
       [ 193.06122449,  197.40816327,  624.08163265, ..., -281.6       ,
        -188.7       ,   14.8       ],
       [ 219.06122449,  209.40816327,  696.08163265, ..., -270.5       ,
        -190.4       ,   16.6       ]])

In [98]:
# --------------

In [32]:
import numpy as np
y_check = np.asarray([[0.5, 0.3, 0.6, 0.1111, 0, 9]])
y_predict = np.asarray([0.6])

In [101]:
y_predict.shape

(1, 3)

In [126]:
tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(y_check, y_predict), 2)))

<tf.Tensor: shape=(), dtype=float64, numpy=0.31622776601683794>

In [123]:
oss = tf.reduce_sum(tf.pow(y_check - y_predict, 2)) / (2 * 1)
oss

<tf.Tensor: shape=(), dtype=float64, numpy=0.049999999999999996>

In [111]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_check, y_predict)

ValueError: y_true and y_pred have different number of output (1!=3)

In [4]:
import tensorflow as tf
mse = tf.keras.losses.MeanSquaredError()

In [115]:
mse(y_predict, y_check)

<tf.Tensor: shape=(), dtype=float64, numpy=0.03333333507180214>

In [121]:
tf.square(2)

<tf.Tensor: shape=(), dtype=int32, numpy=4>

In [33]:
np.mean((y_predict-y_check)**2)

11.876503868333335

In [37]:
tf.reduce_sum(tf.square(y_check - y_predict), axis=-1) / 7

<tf.Tensor: shape=(1,), dtype=float64, numpy=array([10.17986046])>

In [5]:
tf.reduce_mean(tf.pow(y_check - y_predict, 2))

2021-12-01 19:16:31.599692: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


<tf.Tensor: shape=(), dtype=float64, numpy=0.03333333333333333>

In [34]:
tf.reduce_mean(tf.square(y_predict - y_check), axis=-1)

<tf.Tensor: shape=(1,), dtype=float64, numpy=array([11.87650387])>