In [1]:
import sys
from sklearn import preprocessing, model_selection
import tensorflow as tf
import pandas as pd
import numpy as np
import joblib
import argparse
from argparse import RawTextHelpFormatter, RawDescriptionHelpFormatter
import os
from scipy import stats
from tqdm import tqdm
import csv
from tensorflow.python.client import device_lib 
from numba import cuda
import gc

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
def rmse(y_true, y_pred):
    dev = np.square(y_true.ravel() - y_pred.ravel())
    return np.sqrt(np.sum(dev) / y_true.shape[0])


def pcc(y_true, y_pred):
    p = stats.pearsonr(y_true, y_pred)
    return p[0]


def pcc_rmse(y_true, y_pred):
    global alpha

    dev = np.square(y_true.ravel() - y_pred.ravel())
    r = np.sqrt(np.sum(dev) / y_true.shape[0])

    p = stats.pearsonr(y_true, y_pred)[0]

    return (1-p)*alpha + r * (1 - alpha)


def PCC_RMSE(y_true, y_pred):
    global alpha

    fsp = y_pred - tf.keras.backend.mean(y_pred)
    fst = y_true - tf.keras.backend.mean(y_true)

    devP = tf.keras.backend.std(y_pred)
    devT = tf.keras.backend.std(y_true)

    r = tf.keras.backend.sqrt(tf.keras.backend.mean(tf.keras.backend.square(y_pred - y_true), axis=-1))

    p = 1.0 - tf.keras.backend.mean(fsp * fst) / (devP * devT)

    #p = tf.where(tf.is_nan(p), 0.25, p)

    return alpha * p + (1 - alpha) * r


def RMSE(y_true, y_pred):
    return tf.keras.backend.sqrt(tf.keras.backend.mean(tf.keras.backend.square(y_pred - y_true), axis=-1))


def PCC(y_true, y_pred):
    fsp = y_pred - tf.keras.backend.mean(y_pred)
    fst = y_true - tf.keras.backend.mean(y_true)

    devP = tf.keras.backend.std(y_pred)
    devT = tf.keras.backend.std(y_true)

    return tf.keras.backend.mean(fsp * fst) / (devP * devT)

In [4]:
def create_model(input_size, lr=0.0001, maxpool=True, dropout=0.1):
    model = tf.keras.Sequential()

    model.add(tf.keras.layers.Conv2D(32, kernel_size=4, strides=1,
                                     padding="valid", input_shape=input_size))
    model.add(tf.keras.layers.Activation("relu"))
    if maxpool:
        model.add(tf.keras.layers.MaxPooling2D(
            pool_size=2,
            strides=2,
            padding='same',  # Padding method
        ))

    model.add(tf.keras.layers.Conv2D(64, 4, 1, padding="valid"))
    model.add(tf.keras.layers.Activation("relu"))
    if maxpool:
        model.add(tf.keras.layers.MaxPooling2D(
            pool_size=2,
            strides=2,
            padding='same',  # Padding method
        ))

    model.add(tf.keras.layers.Conv2D(128, 4, 1, padding="valid"))
    model.add(tf.keras.layers.Activation("relu"))
    if maxpool:
        model.add(tf.keras.layers.MaxPooling2D(
            pool_size=2,
            strides=2,
            padding='same',  # Padding method
        ))

    model.add(tf.keras.layers.Flatten())

    model.add(tf.keras.layers.Dense(400, kernel_regularizer=tf.keras.regularizers.l2(0.01), ))
    model.add(tf.keras.layers.Activation("relu"))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout))

    model.add(tf.keras.layers.Dense(200,
                                    kernel_regularizer=tf.keras.regularizers.l2(0.01), ))
    model.add(tf.keras.layers.Activation("relu"))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout))

    model.add(tf.keras.layers.Dense(100, kernel_regularizer=tf.keras.regularizers.l2(0.01), ))
    model.add(tf.keras.layers.Activation("relu"))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout))

    #model.add(tf.keras.layers.Dense(20, kernel_regularizer=tf.keras.regularizers.l2(0.01), ))
    #model.add(tf.keras.layers.Activation("relu"))
    #model.add(tf.keras.layers.BatchNormalization())
    #model.add(tf.keras.layers.Dropout(dropout))

    model.add(tf.keras.layers.Dense(1, kernel_regularizer=tf.keras.regularizers.l2(0.01), ))
    model.add(tf.keras.layers.Activation("relu"))

    sgd = tf.keras.optimizers.SGD(lr=lr, momentum=0.9, decay=1e-6, )
    model.compile(optimizer=sgd, loss=PCC_RMSE, metrics=['mse'])

    return model

In [5]:
reshape = [60,169,1]

In [6]:
train_file = "train_test_validate_set/train.csv"
val_file = "train_test_validate_set/validate.csv"
test_file = "train_test_validate_set/test.csv"
path = ""

In [7]:
Xtrain, ytrain = None, []
df = pd.read_csv(path + train_file,index_col=0,header = 0,names = None).dropna()
for index,row in tqdm(df.iterrows()):
  if(index==0):
    Xtrain = row.values[1:-2].reshape(1,-1)
  else:
    Xtrain = np.concatenate((Xtrain,row.values[1:-2].reshape(1,-1)), axis=0)
  ytrain = ytrain + [row.values[-1]]


3243it [36:46,  1.47it/s]


In [8]:
Xval, yval = None, []
df = pd.read_csv(path + val_file,index_col=0,header = 0,names = None).dropna()
# print(df.head())
index = 0
for _,row in tqdm(df.iterrows()):
  if(index==0):
    Xval = row.values[1:-2].reshape(1,-1)
  else:
    Xval = np.concatenate((Xval,row.values[1:-2].reshape(1,-1)), axis=0)
  yval = yval + [row.values[-2]]
  index = index + 1

Xtest, ytest = None, []
df = pd.read_csv(path + test_file,index_col=0,header = 0,names = None).dropna()
index = 0
for _,row in tqdm(df.iterrows()):
  if(index==0):
    Xtest = row.values[1:-2].reshape(1,-1)
  else:
    Xtest = np.concatenate((Xtest,row.values[1:-2].reshape(1,-1)), axis=0)
  ytest = ytest + [row.values[-2]]
  index = index + 1

1000it [02:52,  5.81it/s]
350it [00:19, 17.77it/s]


In [9]:
scaler = preprocessing.StandardScaler()
X_train_val = np.concatenate((Xtrain, Xval), axis=0)
scaler.fit(X_train_val)

StandardScaler()

In [10]:
Xtrain = scaler.transform(Xtrain).reshape(-1, reshape[0],reshape[1],reshape[2])
Xval = scaler.transform(Xval).reshape(-1, reshape[0],reshape[1],reshape[2])
Xtest = scaler.transform(Xtest).reshape(-1, reshape[0],reshape[1],reshape[2])
ytrain = np.array(ytrain).reshape(-1, 1)
yval = np.array(yval).reshape(-1, 1)
ytest = np.array(ytest).reshape(-1, 1)

In [11]:
print(device_lib.list_local_devices()) 

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 4533476029235673188
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 16054186291945188912
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 10875295488
locality {
  bus_id: 2
  numa_node: 1
  links {
  }
}
incarnation: 8762557081286554191
physical_device_desc: "device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:86:00.0, compute capability: 6.1"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 14053156051851713456
physical_device_desc: "device: XLA_GPU device"
]


In [12]:
log = []
stop = [[0,999.9], ]
batch_size = 128
epochs = 150
global alpha
alpha = 1
patience = 10
path_model = "models/"
path_log = "logs/"
model_name = "model5.h5"
log_name = "log5.csv"
delta_loss = 0.0001
lr = 0.0005
dropout = 0
maxpool = False
model = create_model((reshape[0], reshape[1], reshape[2]),
                                 lr=lr, dropout=dropout, maxpool=maxpool)

In [13]:
# cuda.select_device(0)
# cuda.close()

In [14]:
with open('generation.csv', mode='a') as generation_file:
    generation_writer = csv.writer(generation_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    generation_writer.writerow([model_name, log_name, lr, dropout, maxpool, alpha, batch_size])

for e in range(1, epochs+1):
    model.fit(Xtrain, ytrain, validation_data=(Xval, yval),batch_size=batch_size, epochs=1, verbose=1)

    ytrain_pred = model.predict(Xtrain).ravel()
    loss = pcc_rmse(ytrain.ravel(), ytrain_pred)
    pcc_train = pcc(ytrain.ravel(), ytrain_pred)
    rmse_train = rmse(ytrain.ravel(), ytrain_pred)

    yval_pred = model.predict(Xval).ravel()
    loss_val = pcc_rmse(yval.ravel(), yval_pred)
    pcc_val = pcc(yval.ravel(), yval_pred)
    rmse_val = rmse(yval.ravel(), yval_pred)

    ytest_pred = model.predict(Xtest).ravel()
    loss_test = pcc_rmse(ytest.ravel(), ytest_pred)
    pcc_test = pcc(ytest.ravel(), ytest_pred)
    rmse_test = rmse(ytest.ravel(), ytest_pred)

    log.append([e, loss, pcc_train, rmse_train,
                    loss_val, pcc_val, rmse_val,
                    loss_test, pcc_test, rmse_test])
    logs    = pd.DataFrame(log, columns=['epoch', 'loss', 'pcc_train', 'rmse_train',
                                             'loss_val', 'pcc_val', 'rmse_val',
                                             'loss_test', 'pcc_test', 'rmse_test'])

    print("EPOCH:%d Loss:%.3f RMSE:%.3f PCC:%.3f LOSS_VAL:%.3f RMSE:%.3f PCC:%.3f LOSS_TEST:%.3f RMSE_TEST:%.3f PCC_TEST:%.3f"%
          (e, loss, rmse_train, pcc_train, loss_val, rmse_val, pcc_val, loss_test, rmse_test, pcc_test ))            

    if(stop[-1][1] - loss_val >= delta_loss):
        print("Model improve from %.3f to %.3f. Save model to %s."% (stop[-1][1], loss_val, path_model + model_name))
        model.save(path_model + model_name)
        stop.append([e, loss_val])
    else:
        if(e - stop[-1][0] >= patience):
            print("Get best model at epoch = %d." % stop[-1][0])
            break
            
logs.to_csv(path_log + log_name)

EPOCH:1 Loss:0.659 RMSE:6.866 PCC:0.341 LOSS_VAL:0.670 RMSE:7.153 PCC:0.330 LOSS_TEST:0.702 RMSE_TEST:6.998 PCC_TEST:0.298
Model improve from 999.900 to 0.670. Save model to models/model5.h5.
EPOCH:2 Loss:0.633 RMSE:6.820 PCC:0.367 LOSS_VAL:0.597 RMSE:7.112 PCC:0.403 LOSS_TEST:0.595 RMSE_TEST:6.956 PCC_TEST:0.405
Model improve from 0.670 to 0.597. Save model to models/model5.h5.
EPOCH:3 Loss:0.551 RMSE:6.759 PCC:0.449 LOSS_VAL:0.491 RMSE:7.059 PCC:0.509 LOSS_TEST:0.512 RMSE_TEST:6.901 PCC_TEST:0.488
Model improve from 0.597 to 0.491. Save model to models/model5.h5.
EPOCH:4 Loss:0.504 RMSE:6.742 PCC:0.496 LOSS_VAL:0.461 RMSE:7.032 PCC:0.539 LOSS_TEST:0.500 RMSE_TEST:6.873 PCC_TEST:0.500
Model improve from 0.491 to 0.461. Save model to models/model5.h5.
EPOCH:5 Loss:0.443 RMSE:6.713 PCC:0.557 LOSS_VAL:0.440 RMSE:6.999 PCC:0.560 LOSS_TEST:0.446 RMSE_TEST:6.843 PCC_TEST:0.554
Model improve from 0.461 to 0.440. Save model to models/model5.h5.
EPOCH:6 Loss:0.368 RMSE:6.640 PCC:0.632 LOSS_VAL

EPOCH:30 Loss:0.104 RMSE:5.196 PCC:0.896 LOSS_VAL:0.260 RMSE:5.428 PCC:0.740 LOSS_TEST:0.243 RMSE_TEST:5.298 PCC_TEST:0.757
Model improve from 0.262 to 0.260. Save model to models/model5.h5.
EPOCH:31 Loss:0.102 RMSE:5.204 PCC:0.898 LOSS_VAL:0.270 RMSE:5.432 PCC:0.730 LOSS_TEST:0.241 RMSE_TEST:5.307 PCC_TEST:0.759
EPOCH:32 Loss:0.106 RMSE:5.210 PCC:0.894 LOSS_VAL:0.267 RMSE:5.440 PCC:0.733 LOSS_TEST:0.246 RMSE_TEST:5.322 PCC_TEST:0.754
EPOCH:33 Loss:0.103 RMSE:5.290 PCC:0.897 LOSS_VAL:0.263 RMSE:5.512 PCC:0.737 LOSS_TEST:0.245 RMSE_TEST:5.394 PCC_TEST:0.755
EPOCH:34 Loss:0.101 RMSE:5.150 PCC:0.899 LOSS_VAL:0.260 RMSE:5.370 PCC:0.740 LOSS_TEST:0.244 RMSE_TEST:5.252 PCC_TEST:0.756
EPOCH:35 Loss:0.105 RMSE:5.192 PCC:0.895 LOSS_VAL:0.258 RMSE:5.407 PCC:0.742 LOSS_TEST:0.247 RMSE_TEST:5.287 PCC_TEST:0.753
Model improve from 0.260 to 0.258. Save model to models/model5.h5.
EPOCH:36 Loss:0.102 RMSE:5.165 PCC:0.898 LOSS_VAL:0.260 RMSE:5.423 PCC:0.740 LOSS_TEST:0.250 RMSE_TEST:5.311 PCC_TEST:0.75

EPOCH:62 Loss:0.088 RMSE:5.139 PCC:0.912 LOSS_VAL:0.261 RMSE:5.415 PCC:0.739 LOSS_TEST:0.248 RMSE_TEST:5.322 PCC_TEST:0.752
EPOCH:63 Loss:0.086 RMSE:5.177 PCC:0.914 LOSS_VAL:0.255 RMSE:5.469 PCC:0.745 LOSS_TEST:0.244 RMSE_TEST:5.354 PCC_TEST:0.756
EPOCH:64 Loss:0.086 RMSE:5.088 PCC:0.914 LOSS_VAL:0.256 RMSE:5.355 PCC:0.744 LOSS_TEST:0.247 RMSE_TEST:5.252 PCC_TEST:0.753
EPOCH:65 Loss:0.088 RMSE:5.190 PCC:0.912 LOSS_VAL:0.254 RMSE:5.467 PCC:0.746 LOSS_TEST:0.245 RMSE_TEST:5.360 PCC_TEST:0.755
Get best model at epoch = 55.
