In [1]:
import sys
from sklearn import preprocessing, model_selection
import tensorflow as tf
import pandas as pd
import numpy as np
import joblib
import argparse
from argparse import RawTextHelpFormatter, RawDescriptionHelpFormatter
import os
from scipy import stats
from tqdm import tqdm
import csv
from tensorflow.python.client import device_lib 
from numba import cuda
import gc
import math

In [2]:
def rmse(y_true, y_pred):
    dev = np.square(y_true.ravel() - y_pred.ravel())
    return np.sqrt(np.sum(dev) / y_true.shape[0])


def pcc(y_true, y_pred):
    p = stats.pearsonr(y_true, y_pred)
    if(math.isnan(p[0])): return 0.25
    return p[0]


def pcc_rmse(y_true, y_pred):

    return (1-pcc(y_true, y_pred))*alpha + rmse(y_true, y_pred) * (1 - alpha)


def PCC_RMSE(y_true, y_pred):
    global alpha

    fsp = y_pred - tf.keras.backend.mean(y_pred)
    fst = y_true - tf.keras.backend.mean(y_true)

    devP = tf.keras.backend.std(y_pred)
    devT = tf.keras.backend.std(y_true)

    r = tf.keras.backend.sqrt(tf.keras.backend.mean(tf.keras.backend.square(y_pred - y_true), axis=-1))
    r = tf.where(tf.math.is_nan(r), 1.5, r)
    
    p = tf.keras.backend.mean(fsp * fst) / (devP * devT)
    p = tf.where(tf.math.is_nan(p), 0.75, p)
    p = 1 - p
   

    return alpha * p + (1 - alpha) * r


def RMSE(y_true, y_pred):
    return tf.keras.backend.sqrt(tf.keras.backend.mean(tf.keras.backend.square(y_pred - y_true), axis=-1))


def PCC(y_true, y_pred):
    fsp = y_pred - tf.keras.backend.mean(y_pred)
    fst = y_true - tf.keras.backend.mean(y_true)

    devP = tf.keras.backend.std(y_pred)
    devT = tf.keras.backend.std(y_true)

    p = tf.keras.backend.mean(fsp * fst) / (devP * devT)
    
    p = tf.where(tf.math.is_nan(p), 0.25, p)
    
    return p

In [3]:
def create_model(input_size, lr=0.0001, maxpool=True, dropout=0.1):
    model = tf.keras.Sequential()

    model.add(tf.keras.layers.Conv2D(32, kernel_size=4, strides=1,
                                     padding="valid", input_shape=input_size))
    model.add(tf.keras.layers.Activation("relu"))
    if maxpool:
        model.add(tf.keras.layers.MaxPooling2D(
            pool_size=2,
            strides=2,
            padding='same',  # Padding method
        ))

    model.add(tf.keras.layers.Conv2D(64, 4, 1, padding="valid"))
    model.add(tf.keras.layers.Activation("relu"))
    if maxpool:
        model.add(tf.keras.layers.MaxPooling2D(
            pool_size=2,
            strides=2,
            padding='same',  # Padding method
        ))

    model.add(tf.keras.layers.Conv2D(128, 4, 1, padding="valid"))
    model.add(tf.keras.layers.Activation("relu"))
    if maxpool:
        model.add(tf.keras.layers.MaxPooling2D(
            pool_size=2,
            strides=2,
            padding='same',  # Padding method
        ))

    model.add(tf.keras.layers.Flatten())

    model.add(tf.keras.layers.Dense(400, kernel_regularizer=tf.keras.regularizers.l2(0.01), ))
    model.add(tf.keras.layers.Activation("relu"))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout))

    model.add(tf.keras.layers.Dense(200,
                                    kernel_regularizer=tf.keras.regularizers.l2(0.01), ))
    model.add(tf.keras.layers.Activation("relu"))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout))

    model.add(tf.keras.layers.Dense(100, kernel_regularizer=tf.keras.regularizers.l2(0.01), ))
    model.add(tf.keras.layers.Activation("relu"))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout))

    #model.add(tf.keras.layers.Dense(20, kernel_regularizer=tf.keras.regularizers.l2(0.01), ))
    #model.add(tf.keras.layers.Activation("relu"))
    #model.add(tf.keras.layers.BatchNormalization())
    #model.add(tf.keras.layers.Dropout(dropout))

    model.add(tf.keras.layers.Dense(1, kernel_regularizer=tf.keras.regularizers.l2(0.01), ))
    model.add(tf.keras.layers.Activation("relu"))

    sgd = tf.keras.optimizers.SGD(lr=lr, momentum=0.9, decay=1e-6, )
    model.compile(optimizer=sgd, loss=PCC_RMSE, metrics=['mse'])

    return model

In [4]:
reshape = [81,60,1]

In [5]:
train_file = "train_test_validate_set/train.csv"
val_file = "train_test_validate_set/validate.csv"
test_file = "train_test_validate_set/test.csv"
path = ""

In [7]:
# Xtrain1, ytrain = None, []
# df = pd.read_csv(path + train_file,index_col=0,header = 0,names = None).dropna()[0:4000]
# for index,row in tqdm(df.iterrows()):
#   if(index==0):
#     Xtrain1 = row.values[1:-2].reshape(1,-1)
#   else:
#     Xtrain1 = np.concatenate((Xtrain1,row.values[1:-2].reshape(1,-1)), axis=0)
#   ytrain = ytrain + [row.values[-1]]


In [8]:
# Xtrain2 = None
# df = pd.read_csv(path + train_file,index_col=0,header = 0,names = None).dropna()[4000:8000]
# for index,row in tqdm(df.iterrows()):
#   if(index==1000):
#     Xtrain2 = row.values[1:-2].reshape(1,-1)
#   else:
#     Xtrain2 = np.concatenate((Xtrain2,row.values[1:-2].reshape(1,-1)), axis=0)
#   ytrain = ytrain + [row.values[-1]]

In [9]:
# Xtrain3 = None
# df = pd.read_csv(path + train_file,index_col=0,header = 0,names = None).dropna()[8000:12000]
# for index,row in tqdm(df.iterrows()):
#   if(index==8000):
#     Xtrain3 = row.values[1:-2].reshape(1,-1)
#   else:
#     Xtrain3 = np.concatenate((Xtrain3,row.values[1:-2].reshape(1,-1)), axis=0)
#   ytrain = ytrain + [row.values[-1]]

In [10]:
# Xtrain4 = None
# df = pd.read_csv(path + train_file,index_col=0,header = 0,names = None).dropna()[12000:]
# for index,row in tqdm(df.iterrows()):
#   if(index==12000):
#     Xtrain4 = row.values[1:-2].reshape(1,-1)
#   else:
#     Xtrain4 = np.concatenate((Xtrain4,row.values[1:-2].reshape(1,-1)), axis=0)
#   ytrain = ytrain + [row.values[-1]]

In [11]:
# Xval, yval = None, []
# df = pd.read_csv(path + val_file,index_col=0,header = 0,names = None).dropna()
# # print(df.head())
# index = 0
# for _,row in tqdm(df.iterrows()):
#   if(index==0):
#     Xval = row.values[1:-2].reshape(1,-1)
#   else:
#     Xval = np.concatenate((Xval,row.values[1:-2].reshape(1,-1)), axis=0)
#   yval = yval + [row.values[-2]]
#   index = index + 1

# Xtest, ytest = None, []
# df = pd.read_csv(path + test_file,index_col=0,header = 0,names = None).dropna()
# index = 0
# for _,row in tqdm(df.iterrows()):
#   if(index==0):
#     Xtest = row.values[1:-2].reshape(1,-1)
#   else:
#     Xtest = np.concatenate((Xtest,row.values[1:-2].reshape(1,-1)), axis=0)
#   ytest = ytest + [row.values[-2]]
#   index = index + 1

In [12]:
# Xtrain = Xtrain1
# Xtrain = np.concatenate((Xtrain,Xtrain2),axis=0)
# Xtrain = np.concatenate((Xtrain,Xtrain3),axis=0)
# Xtrain = np.concatenate((Xtrain,Xtrain4),axis=0)

In [6]:
Xtrain = pd.read_csv(path + train_file,index_col=0,header = 0,names = None).dropna().values[:,1:-2]
Xtest = pd.read_csv(path + test_file,index_col=0,header = 0,names = None).dropna().values[:,1:-2]
Xval = pd.read_csv(path + val_file,index_col=0,header = 0,names = None).dropna().values[:,1:-2]

In [7]:
ytrain = []
df = pd.read_csv(path + train_file,index_col=0,header = 0,names = None).dropna()
for index,row in tqdm(df.iterrows()):
  ytrain = ytrain + [row.values[-1]]

ytest = []
df = pd.read_csv(path + test_file,index_col=0,header = 0,names = None).dropna()
for index,row in tqdm(df.iterrows()):
  ytest = ytest + [row.values[-2]]

yval = []
df = pd.read_csv(path + val_file,index_col=0,header = 0,names = None).dropna()
for index,row in tqdm(df.iterrows()):
  yval = yval + [row.values[-2]]

15969it [00:19, 811.97it/s]
350it [00:01, 313.88it/s]
1000it [00:01, 926.34it/s]


In [15]:
# Xtrain.shape

In [8]:
scaler = preprocessing.StandardScaler()
X_train_val = np.concatenate((Xtrain, Xval), axis=0)
scaler.fit(X_train_val)

StandardScaler()

In [9]:
Xtrain = scaler.transform(Xtrain).reshape(-1, reshape[0],reshape[1],reshape[2])
Xval = scaler.transform(Xval).reshape(-1, reshape[0],reshape[1],reshape[2])
Xtest = scaler.transform(Xtest).reshape(-1, reshape[0],reshape[1],reshape[2])
ytrain = np.array(ytrain).reshape(-1, 1)
yval = np.array(yval).reshape(-1, 1)
ytest = np.array(ytest).reshape(-1, 1)

In [10]:
print(device_lib.list_local_devices()) 

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 7016825684483915276
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 13160815377946648250
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 10875295488
locality {
  bus_id: 2
  numa_node: 1
  links {
  }
}
incarnation: 6854271780933700617
physical_device_desc: "device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:86:00.0, compute capability: 6.1"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 18390862442731187378
physical_device_desc: "device: XLA_GPU device"
]


In [16]:
log = []
stop = [[0,999.9], ]
batch_size = 128
epochs = 200
global alpha
alpha = 1
patience = 40
path_model = "models/Met/"
path_log = "logs/Met/"
model_name = "model9.h5"
log_name = "log9.csv"
delta_loss = 0.01
lr = 0.001
dropout = 0
maxpool = False
model = create_model((reshape[0], reshape[1], reshape[2]),
                                 lr=lr, dropout=dropout, maxpool=maxpool)

In [20]:
# cuda.select_device(0)
# cuda.close()

In [17]:
with open('generation.csv', mode='a') as generation_file:
    generation_writer = csv.writer(generation_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    generation_writer.writerow([model_name, log_name, lr, dropout, maxpool, alpha, batch_size])

for e in range(1, epochs+1):
    model.fit(Xtrain, ytrain, validation_data=(Xval, yval),batch_size=batch_size, epochs=1, verbose=1)

    ytrain_pred = model.predict(Xtrain).ravel()
    print(ytrain_pred)
    loss = pcc_rmse(ytrain.ravel(), ytrain_pred)
    pcc_train = pcc(ytrain.ravel(), ytrain_pred)
    rmse_train = rmse(ytrain.ravel(), ytrain_pred)

    yval_pred = model.predict(Xval).ravel()
    loss_val = pcc_rmse(yval.ravel(), yval_pred)
    pcc_val = pcc(yval.ravel(), yval_pred)
    rmse_val = rmse(yval.ravel(), yval_pred)

    ytest_pred = model.predict(Xtest).ravel()
    loss_test = pcc_rmse(ytest.ravel(), ytest_pred)
    pcc_test = pcc(ytest.ravel(), ytest_pred)
    rmse_test = rmse(ytest.ravel(), ytest_pred)

    log.append([e, loss, pcc_train, rmse_train,
                    loss_val, pcc_val, rmse_val,
                    loss_test, pcc_test, rmse_test])
    logs    = pd.DataFrame(log, columns=['epoch', 'loss', 'pcc_train', 'rmse_train',
                                             'loss_val', 'pcc_val', 'rmse_val',
                                             'loss_test', 'pcc_test', 'rmse_test'])

    print("EPOCH:%d Loss:%.3f RMSE:%.3f PCC:%.3f LOSS_VAL:%.3f RMSE_VAL:%.3f PCC_VAL:%.3f LOSS_TEST:%.3f RMSE_TEST:%.3f PCC_TEST:%.3f"%
          (e, loss, rmse_train, pcc_train, loss_val, rmse_val, pcc_val, loss_test, rmse_test, pcc_test ))            

    if(stop[-1][1] - loss_val >= delta_loss):
#         print("Model improve from %.3f to %.3f. Save model to %s."% (stop[-1][1], loss_val, path_model + model_name))
#         model.save(path_model + model_name)
        stop.append([e, loss_val])
    else:
        if(e - stop[-1][0] >= patience):
            print("Get best model at epoch = %d." % stop[-1][0])
            break
            
logs.to_csv(path_log + log_name)

[0.         0.98252654 1.5252817  ... 0.         0.3273311  0.99691325]
EPOCH:1 Loss:0.433 RMSE:5.764 PCC:0.567 LOSS_VAL:0.410 RMSE_VAL:5.961 PCC_VAL:0.590 LOSS_TEST:0.409 RMSE_TEST:5.874 PCC_TEST:0.591
[0.         1.168913   1.8717841  ... 0.         0.62916464 1.6787645 ]
EPOCH:2 Loss:0.329 RMSE:5.512 PCC:0.671 LOSS_VAL:0.355 RMSE_VAL:5.723 PCC_VAL:0.645 LOSS_TEST:0.357 RMSE_TEST:5.653 PCC_TEST:0.643
[0.        1.148878  2.0622213 ... 0.        1.534082  1.6566328]
EPOCH:3 Loss:0.284 RMSE:5.352 PCC:0.716 LOSS_VAL:0.338 RMSE_VAL:5.548 PCC_VAL:0.662 LOSS_TEST:0.336 RMSE_TEST:5.490 PCC_TEST:0.664
[0.        1.0041655 2.3901665 ... 0.        1.1845499 2.0982537]
EPOCH:4 Loss:0.249 RMSE:5.358 PCC:0.751 LOSS_VAL:0.331 RMSE_VAL:5.551 PCC_VAL:0.669 LOSS_TEST:0.348 RMSE_TEST:5.461 PCC_TEST:0.652
[0.        1.3961931 3.0488749 ... 0.        1.1074485 1.9899758]
EPOCH:5 Loss:0.217 RMSE:5.153 PCC:0.783 LOSS_VAL:0.327 RMSE_VAL:5.349 PCC_VAL:0.673 LOSS_TEST:0.349 RMSE_TEST:5.289 PCC_TEST:0.651
[0.

[0.        0.8557505 2.397995  ... 0.        1.4383624 1.89152  ]
EPOCH:26 Loss:0.066 RMSE:5.428 PCC:0.934 LOSS_VAL:0.261 RMSE_VAL:5.597 PCC_VAL:0.739 LOSS_TEST:0.269 RMSE_TEST:5.546 PCC_TEST:0.731
[0.        0.7713926 2.1558087 ... 0.        1.2631917 1.8437026]
EPOCH:27 Loss:0.063 RMSE:5.442 PCC:0.937 LOSS_VAL:0.264 RMSE_VAL:5.635 PCC_VAL:0.736 LOSS_TEST:0.277 RMSE_TEST:5.579 PCC_TEST:0.723
[0.        0.906616  2.2354236 ... 0.        1.1291103 1.8025361]
EPOCH:28 Loss:0.060 RMSE:5.459 PCC:0.940 LOSS_VAL:0.261 RMSE_VAL:5.642 PCC_VAL:0.739 LOSS_TEST:0.275 RMSE_TEST:5.589 PCC_TEST:0.725
[0.        0.8688034 2.1877897 ... 0.        1.4394927 1.6914539]
EPOCH:29 Loss:0.058 RMSE:5.456 PCC:0.942 LOSS_VAL:0.259 RMSE_VAL:5.618 PCC_VAL:0.741 LOSS_TEST:0.269 RMSE_TEST:5.565 PCC_TEST:0.731
[0.         0.7961769  2.1527226  ... 0.         0.95842314 2.0113504 ]
EPOCH:30 Loss:0.062 RMSE:5.450 PCC:0.938 LOSS_VAL:0.271 RMSE_VAL:5.585 PCC_VAL:0.729 LOSS_TEST:0.289 RMSE_TEST:5.529 PCC_TEST:0.711
[0. 

[0.55041146 0.56003565 1.4603003  ... 0.         0.76849544 1.1318    ]
EPOCH:51 Loss:0.041 RMSE:5.846 PCC:0.959 LOSS_VAL:0.251 RMSE_VAL:5.989 PCC_VAL:0.749 LOSS_TEST:0.265 RMSE_TEST:5.903 PCC_TEST:0.735
[0.33277836 0.5078316  1.2992563  ... 0.         0.74615616 1.210913  ]
EPOCH:52 Loss:0.044 RMSE:5.864 PCC:0.956 LOSS_VAL:0.264 RMSE_VAL:6.022 PCC_VAL:0.736 LOSS_TEST:0.254 RMSE_TEST:5.916 PCC_TEST:0.746
[0.38752013 0.48702186 1.2222949  ... 0.         0.76461303 1.0769329 ]
EPOCH:53 Loss:0.045 RMSE:5.875 PCC:0.955 LOSS_VAL:0.253 RMSE_VAL:6.020 PCC_VAL:0.747 LOSS_TEST:0.254 RMSE_TEST:5.930 PCC_TEST:0.746
[0.60121125 0.4482084  1.4343373  ... 0.         0.7742927  1.082534  ]
EPOCH:54 Loss:0.040 RMSE:5.877 PCC:0.960 LOSS_VAL:0.255 RMSE_VAL:6.018 PCC_VAL:0.745 LOSS_TEST:0.260 RMSE_TEST:5.922 PCC_TEST:0.740
[0.524175  0.5131694 1.1399081 ... 0.        0.6992044 1.0388411]
EPOCH:55 Loss:0.037 RMSE:5.908 PCC:0.963 LOSS_VAL:0.250 RMSE_VAL:6.061 PCC_VAL:0.750 LOSS_TEST:0.262 RMSE_TEST:5.966 P

[0.55437076 0.5417467  0.9393907  ... 0.2939273  0.7028112  0.87790906]
EPOCH:76 Loss:0.011 RMSE:5.935 PCC:0.989 LOSS_VAL:0.246 RMSE_VAL:6.078 PCC_VAL:0.754 LOSS_TEST:0.244 RMSE_TEST:5.977 PCC_TEST:0.756
[0.48994654 0.58430773 1.0015132  ... 0.2687644  0.70114124 0.9086908 ]
EPOCH:77 Loss:0.009 RMSE:5.929 PCC:0.991 LOSS_VAL:0.236 RMSE_VAL:6.078 PCC_VAL:0.764 LOSS_TEST:0.238 RMSE_TEST:5.975 PCC_TEST:0.762
[0.5315353  0.57889974 1.0079024  ... 0.260469   0.74340725 0.89892733]
EPOCH:78 Loss:0.009 RMSE:5.934 PCC:0.991 LOSS_VAL:0.237 RMSE_VAL:6.083 PCC_VAL:0.763 LOSS_TEST:0.242 RMSE_TEST:5.981 PCC_TEST:0.758
[0.53019094 0.5853218  0.97847414 ... 0.21438754 0.67518586 0.9425698 ]
EPOCH:79 Loss:0.008 RMSE:5.926 PCC:0.992 LOSS_VAL:0.232 RMSE_VAL:6.073 PCC_VAL:0.768 LOSS_TEST:0.240 RMSE_TEST:5.970 PCC_TEST:0.760
[0.5452083  0.5487875  0.99196565 ... 0.25338453 0.70505255 0.8879413 ]
EPOCH:80 Loss:0.009 RMSE:5.940 PCC:0.991 LOSS_VAL:0.238 RMSE_VAL:6.086 PCC_VAL:0.762 LOSS_TEST:0.249 RMSE_TEST:5

[0.62556106 0.6403287  1.0440594  ... 0.35417506 0.7378947  0.94710195]
EPOCH:101 Loss:0.009 RMSE:5.888 PCC:0.991 LOSS_VAL:0.236 RMSE_VAL:6.031 PCC_VAL:0.764 LOSS_TEST:0.253 RMSE_TEST:5.928 PCC_TEST:0.747
[0.6603547  0.6291518  1.0598204  ... 0.31965154 0.72951627 0.9652565 ]
EPOCH:102 Loss:0.009 RMSE:5.898 PCC:0.991 LOSS_VAL:0.233 RMSE_VAL:6.045 PCC_VAL:0.767 LOSS_TEST:0.240 RMSE_TEST:5.945 PCC_TEST:0.760
[0.54873997 0.62799823 1.0115107  ... 0.34035861 0.7333526  0.9134549 ]
EPOCH:103 Loss:0.008 RMSE:5.904 PCC:0.992 LOSS_VAL:0.237 RMSE_VAL:6.051 PCC_VAL:0.763 LOSS_TEST:0.245 RMSE_TEST:5.949 PCC_TEST:0.755
[0.60573196 0.5664406  1.0737566  ... 0.32346317 0.7520925  0.9927838 ]
EPOCH:104 Loss:0.009 RMSE:5.893 PCC:0.991 LOSS_VAL:0.236 RMSE_VAL:6.034 PCC_VAL:0.764 LOSS_TEST:0.248 RMSE_TEST:5.931 PCC_TEST:0.752
[0.5619448  0.61827624 1.0206158  ... 0.39033955 0.73644817 0.91154397]
EPOCH:105 Loss:0.011 RMSE:5.888 PCC:0.989 LOSS_VAL:0.232 RMSE_VAL:6.034 PCC_VAL:0.768 LOSS_TEST:0.251 RMSE_T

[0.5879173  0.6068156  0.95713705 ... 0.42545158 0.7411958  0.8654684 ]
EPOCH:126 Loss:0.006 RMSE:5.914 PCC:0.994 LOSS_VAL:0.230 RMSE_VAL:6.060 PCC_VAL:0.770 LOSS_TEST:0.242 RMSE_TEST:5.954 PCC_TEST:0.758
[0.6326573  0.6079557  0.93523526 ... 0.39649856 0.69156295 0.8598685 ]
EPOCH:127 Loss:0.011 RMSE:5.916 PCC:0.989 LOSS_VAL:0.233 RMSE_VAL:6.059 PCC_VAL:0.767 LOSS_TEST:0.249 RMSE_TEST:5.953 PCC_TEST:0.751
Get best model at epoch = 87.


In [31]:
test1 = [1,1,1,1]
test2= [1,1,1.1,1]
test3 = stats.pearsonr(test1,test2)
print(test3)
if(math.isnan(test3[0])):print("yesh")

(nan, nan)
yesh


In [32]:
ytrain

array([[5.49],
       [5.72],
       [8.99],
       ...,
       [3.77],
       [6.7 ],
       [8.25]])