In [1]:
%matplotlib widget
import sys
import os
import re
#sys.path = [p for p in sys.path if p.find('/opt/apps/software/') == -1]
from glob import glob
from IPython.display import display, HTML
from matplotlib import pyplot as plt

from utils.constants import UNIVARIATE_ARCHIVE_NAMES as ARCHIVE_NAMES
from utils.constants import UNIVARIATE_DATASET_NAMES as DATASET_NAMES
from utils.utils import read_all_datasets, transform_labels, create_directory, run_length_xps, generate_results_csv, plot_epochs_metric
from utils.data_loading import get_multiple_data_cf, predict, shifted_zscore_cf, zscore, print_metric, tf_rmse, tf_pmse_cf, rmse
import utils
from classifiers import inception

import numpy as np
import pandas as pd
import sklearn
import keras
# keras.backend.tensorflow_backend._get_available_gpus()

# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

def tf_pmse_DA(y_true, y_pred):
    return tf_pmse_cf(y_true, y_pred, idx=0)

def tf_pmse_5HT(y_true, y_pred):
    return tf_pmse_cf(y_true, y_pred, idx=1)

def tf_pmse_pH(y_true, y_pred):
    return tf_pmse_cf(y_true, y_pred, idx=2)

def tf_pmse_NE(y_true, y_pred):
    return tf_pmse_cf(y_true, y_pred, idx=3)



Instructions for updating:
non-resource variables are not supported in the long term


Using TensorFlow backend.


In [2]:
names = ['DA', '5HT', 'pH', 'NE']
speed = 'slow'
data_prefix = '/mnt/nfs/proj/in-vitro/Leonardo/cf_data'

probes = [
    'CF025', 'CF027', 'CF057', 'CF064', 'CF066', 'CF078', 'CF081', 'CF082'
]

hold_probe = probes[5]
output_directory = f'/mnt/nfs/proj/in-vitro/Leonardo/inception/results/cf/{hold_probe}/'

if not (os.path.exists(output_directory)):
    os.makedirs(output_directory, exist_ok=True)

# val_probe=None
val_probe=probes[0]

print(f'Leaving out probe {hold_probe}', flush=True)
print(f'Validation probe {val_probe}', flush=True)
print(f'Loading data', flush=True)

# normalize_data = minmax
# revert_data = lambda x: minmax(x, inverse=True)

normalize_data = shifted_zscore_cf
revert_data = lambda x: shifted_zscore_cf(x, inverse=True)

# normalize_data = lambda x: x
# revert_data = lambda x: x

# this is actually the number of records per UNIQUE CONCENTRATIONS per probe
n_records_per_probe = -1 # all
# n_records_per_probe = 1

x_train, y_train, x_val, y_val, x_test, y_test = get_multiple_data_cf(data_prefix,
                                                                      probes=probes,
                                                                      hold_probe=hold_probe,
                                                                      val_probe=val_probe,
                                                                      normalize_data=normalize_data,
                                                                      n_records_per_probe=n_records_per_probe)

print('Data loaded')

if len(x_train.shape) == 2:  # if univariate
    print('adding singleton')
    # add a dimension to make it multivariate with one dimension
    x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
    x_val = x_val.reshape((x_val.shape[0], x_val.shape[1], 1))
    x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

output_shape = y_train.shape[1]
input_shape = x_train.shape[1:]


Leaving out probe CF078
Validation probe CF025
Loading data
loading probe CF025
loading probe CF027
loading probe CF057
loading probe CF064
loading probe CF066
loading probe CF078
loading probe CF081
loading probe CF082
Shuffling training dataset
Data loaded
adding singleton


In [3]:
for x in [x_train, y_train, x_val, y_val, x_test, y_test]:
    print(x.shape)
print(output_shape)
print(input_shape)

(369900, 999, 1)
(369900, 4)
(61650, 999, 1)
(61650, 4)
(61650, 999, 1)
(61650, 4)
4
(999, 1)


In [4]:

# classifier = inception.Regression_INCEPTION(output_directory, input_shape, output_shape, verbose=1, build=True, nb_epochs=10, metrics='CF')
classifier = inception.Regression_INCEPTION(output_directory, input_shape, output_shape, verbose=1, build=True, nb_epochs=100, 
                                            metrics=[tf_pmse_DA, tf_pmse_5HT, tf_pmse_pH, tf_pmse_NE], revert_data=revert_data)
# classifier = Regression_INCEPTION(output_directory, input_shape, output_shape, verbose=1, build=True, nb_epochs=100)

model_path = classifier.output_directory + 'best_model.hdf5'
if os.path.isfile(model_path):
    print('Best model already fit: %s'%model_path)
    best_model = classifier.get_best_model()
else:
    print('Model not fit yet')
    best_model = None

INFO:tensorflow:Using MirroredStrategy with devices ('/replica:0/task:0/device:CPU:0',)
starting model from scratch...
Compiling with Adam and metrics:  ['tf_pmse_DA', 'tf_pmse_5HT', 'tf_pmse_pH', 'tf_pmse_NE']
Best model already fit: /mnt/nfs/proj/in-vitro/Leonardo/inception/results/cf/CF078/best_model.hdf5


In [5]:
# Fit inception time the model

if best_model is None:
    print('Fitting new model...')
    metrics = classifier.fit(x_train, y_train, x_val, y_val, plot_test_acc=True)
    best_model = classifier.get_best_model()
else:
    print('Model alread fit, computing prediction of validation data')
    metrics = classifier.predict(x_val, y_val, x_train, y_train, return_df_metrics=True)

display(HTML(metrics.to_html()))


Model alread fit, computing prediction of validation data


Unnamed: 0,rmse_DA,rmse_5HT,rmse_pH,rmse_NE,duration
0,466.477978,344.976229,0.094177,716.69803,0.0


In [6]:
# Hold CF082, validation CF025, all data, 100 epochs
# Epoch 1/100
# 369900/369900 [==============================] - 332s 897us/step - loss: 0.3839 - tf_pmse_DA: 488.6883 - tf_pmse_5HT: 418.8771 - tf_pmse_pH: 0.0728 - tf_pmse_NE: 545.8939 - val_loss: 0.9853 - val_tf_pmse_DA: 879.1813 - val_tf_pmse_5HT: 389.8340 - val_tf_pmse_pH: 0.1891 - val_tf_pmse_NE: 1166.5785
# Epoch 10/100
# 369900/369900 [==============================] - 324s 875us/step - loss: 0.0139 - tf_pmse_DA: 132.5645 - tf_pmse_5HT: 111.5507 - tf_pmse_pH: 0.0200 - tf_pmse_NE: 139.9987 - val_loss: 0.1967 - val_tf_pmse_DA: 340.3430 - val_tf_pmse_5HT: 337.9328 - val_tf_pmse_pH: 0.0613 - val_tf_pmse_NE: 453.8609
# Epoch 97/100
# 369900/369900 [==============================] - 323s 872us/step - loss: 0.0021 - tf_pmse_DA: 50.4458 - tf_pmse_5HT: 46.5694 - tf_pmse_pH: 0.0077 - tf_pmse_NE: 54.5813 - val_loss: 0.1685 - val_tf_pmse_DA: 356.7854 - val_tf_pmse_5HT: 305.9508 - val_tf_pmse_pH: 0.0534 - val_tf_pmse_NE: 400.9060
# Epoch 100/100
# 369900/369900 [==============================] - 322s 871us/step - loss: 0.0020 - tf_pmse_DA: 49.6475 - tf_pmse_5HT: 46.6628 - tf_pmse_pH: 0.0076 - tf_pmse_NE: 53.9088 - val_loss: 0.2012 - val_tf_pmse_DA: 375.4945 - val_tf_pmse_5HT: 356.6755 - val_tf_pmse_pH: 0.0558 - val_tf_pmse_NE: 441.8727
# predicting validation set... 
# 	rmse_DA 	rmse_5HT 	rmse_pH 	rmse_NE 	duration
# 0 	483.575696 	504.289784 	0.072682 	604.31947 	32320.917521
# Epoch 70/100
# 369900/369900 [==============================] - 321s 868us/step - loss: 0.0026 - tf_pmse_DA: 55.4761 - tf_pmse_5HT: 51.8563 - tf_pmse_pH: 0.0084 - tf_pmse_NE: 63.0837 - val_loss: 0.2494 - val_tf_pmse_DA: 403.2530 - val_tf_pmse_5HT: 277.9370 - val_tf_pmse_pH: 0.0658 - val_tf_pmse_NE: 549.2294
# Epoch 100/100
# 369900/369900 [==============================] - 321s 868us/step - loss: 0.0019 - tf_pmse_DA: 49.0351 - tf_pmse_5HT: 46.0001 - tf_pmse_pH: 0.0072 - tf_pmse_NE: 55.4475 - val_loss: 0.2707 - val_tf_pmse_DA: 410.6053 - val_tf_pmse_5HT: 278.3472 - val_tf_pmse_pH: 0.0673 - val_tf_pmse_NE: 557.2681
# rmse_DA 	rmse_5HT 	rmse_pH 	rmse_NE 	duration
# 0 	466.478029 	344.97594 	0.094177 	716.697981 	32189.755459

In [7]:
metrics = classifier.predict(x_test, y_test, x_train, y_train, return_df_metrics=True)
display(HTML(metrics.to_html()))

# Hold CF082, validation CF025, all data, 100 epochs
# rmse_DA 	rmse_5HT 	rmse_pH 	rmse_NE 	duration
# 289.592247 	148.701677 	0.082032 	416.473678 	0.0

Unnamed: 0,rmse_DA,rmse_5HT,rmse_pH,rmse_NE,duration
0,372.568581,143.972704,0.06122,322.67593,0.0


In [8]:

# model_path = os.path.join(classifier.output_directory, 'best_model.hdf5')
model_path = os.path.join(classifier.output_directory, 'last_model.hdf5')
# model_path = os.path.join(classifier.output_directory, 'model_init.hdf5')

print(model_path)

model = keras.models.load_model(model_path, custom_objects={"tf_pmse_DA": tf_pmse_DA, "tf_pmse_5HT": tf_pmse_5HT, "tf_pmse_pH": tf_pmse_pH, "tf_pmse_NE": tf_pmse_NE})

xt, yt = x_test, y_test
# xt, yt = x_val, y_val

yp = model.predict(xt, batch_size=64)

yp = np.apply_along_axis(revert_data, axis=1, arr=yp) 
yt = np.apply_along_axis(revert_data, axis=1, arr=yt) 

rmse4 = rmse(yt, yp)

metrics2 = pd.DataFrame(data=np.zeros((1, 5), dtype=np.float), index=[0], columns=['rmse_DA', 'rmse_5HT', 'rmse_pH', 'rmse_NE', 'duration'])
metrics2['rmse_DA'] = rmse4[0]
metrics2['rmse_5HT'] = rmse4[1]
metrics2['rmse_pH'] = rmse4[2]
metrics2['rmse_NE'] = rmse4[3]
metrics2['duration'] = 0.0
        
display(HTML(metrics2.to_html()))


/mnt/nfs/proj/in-vitro/Leonardo/inception/results/cf/CF078/last_model.hdf5


Unnamed: 0,rmse_DA,rmse_5HT,rmse_pH,rmse_NE,duration
0,249.979295,117.124644,0.076045,336.884448,0.0


In [9]:
# def plot_concentrations(y):
#     fig, axs = plt.subplots(2, 2, sharey=False, tight_layout=True)
#     for ip in range(0,4):
#         axs[np.unravel_index(ip, axs.shape)].hist(y[:,ip])
# plot_concentrations(y_test)
# plot_concentrations(y_pred)

def plot_compare_test_pred(yt, yp):
    fig, axs = plt.subplots(2, 4, sharey=False, tight_layout=True, figsize=(12, 5))
    for ip in range(0,8):
        axsidx = np.unravel_index(ip, axs.shape)
        if ip < 4:
            axs[axsidx].hist(yt[:,ip])
        else:
            axs[axsidx].hist(yp[:,ip-4])

plot_compare_test_pred(yt, yp)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [10]:
# best_model.summary()

In [11]:
model = best_model

idxs = np.random.permutation(x_val.shape[0])
x_cam = x_val[idxs[:1000],:,:]
y_cam = y_val[idxs[:1000], :]

w_k_c = model.layers[-1].get_weights()[0] # weights for each filter k for each class c 

new_input_layer = model.inputs # same input of the original model

new_outpu_layer = [model.get_layer("conv1d_31").output, model.layers[-1].output] # output is both the original as well as the before last layer 
# new_outpu_layer = [model.get_layer("activation_8").output, model.layers[-1].output] # output is both the original as well as the before last layer 

new_function = keras.backend.function(new_input_layer,new_outpu_layer)

new_feed_forward = new_function

[conv_out, y_pred] = new_feed_forward((x_cam,))

metrics = classifier._calculate_metrics(y_cam, y_pred, 0.0)
display(HTML(metrics.to_html()))
print(conv_out.shape)

Unnamed: 0,rmse_DA,rmse_5HT,rmse_pH,rmse_NE,duration
0,445.641809,341.767141,0.095284,694.319579,0.0


(1000, 999, 128)


In [12]:
w_k_c.shape

(128, 4)

In [15]:
from matplotlib.collections import LineCollection
from matplotlib.colors import ListedColormap, BoundaryNorm

# print("original_label: "+str(encoder.inverse_transform(np.argmax(original_binary_class))))
# print("original_shape: "+str(time_series_original.shape))
# print("predicted_label:"+str(encoder.inverse_transform(np.argmax(predicted))))
# print("predicted_shape:"+str(conv_out.shape))

print(w_k_c.shape)
print(conv_out.shape)

conv_out_0 = conv_out[0,:,:]
# conv_out_0 = np.squeeze(np.mean(conv_out, axis=0))

fig, axs = plt.subplots(2, 2, sharey=False, tight_layout=True, figsize=(12, 5))
for j in range(y_val.shape[1]):
    
    axsidx = np.unravel_index(j, axs.shape)
    
    cas = np.zeros(dtype=np.float, shape = (conv_out.shape[1]))
    for k,w in enumerate(w_k_c[:,j]):
        cas += w * conv_out_0[:,k]
    minimum = np.min(cas)
    cas = cas - minimum
    cas = cas/max(cas)
    cas = cas * 100
    cas = cas.astype(int)
    
#     axs[axsidx].plot(cas)

    y = np.squeeze(np.mean(x_cam, axis=0))
    x = np.array(range(y.shape[0]))
    dydx = cas

    # Create a set of line segments so that we can color them individually
    # This creates the points as a N x 1 x 2 array so that we can stack points
    # together easily to get the segments. The segments array for line collection
    # needs to be (numlines) x (points per line) x 2 (for x and y)
    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)

    # Create a continuous norm to map from data points to colors
    norm = plt.Normalize(dydx.min(), dydx.max())
    lc = LineCollection(segments, cmap='jet', norm=norm)
    # Set the values used for colormapping
    lc.set_array(dydx)
    lc.set_linewidth(4)
    line = axs[axsidx].add_collection(lc)
    fig.colorbar(line, ax=axs[axsidx])

    # # Use a boundary norm instead
    # cmap = ListedColormap(['r', 'g', 'b'])
    # norm = BoundaryNorm([-1, -0.5, 0.5, 1], cmap.N)
    # lc = LineCollection(segments, cmap=cmap, norm=norm)
    # lc.set_array(dydx)
    # lc.set_linewidth(2)
    # line = axs[1].add_collection(lc)
    # fig.colorbar(line, ax=axs[1])

    axs[axsidx].set_xlim(x.min(), x.max())
    axs[axsidx].set_ylim(y.min(), y.max())
    axs[axsidx].set_title(names[j])

plt.show()



(128, 4)
(1000, 999, 128)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [14]:
from matplotlib.collections import LineCollection
from matplotlib.colors import ListedColormap, BoundaryNorm

# print("original_label: "+str(encoder.inverse_transform(np.argmax(original_binary_class))))
# print("original_shape: "+str(time_series_original.shape))
# print("predicted_label:"+str(encoder.inverse_transform(np.argmax(predicted))))
# print("predicted_shape:"+str(conv_out.shape))

print(w_k_c.shape)
print(conv_out.shape)

conv_out_0 = conv_out[0,:,:]

fig, axs = plt.subplots(2, 2, sharey=False, tight_layout=True, figsize=(12, 5))
for j in range(y_val.shape[1]):
    
    axsidx = np.unravel_index(j, axs.shape)
    
    cas = np.zeros(dtype=np.float, shape = (conv_out.shape[1]))
    for k,w in enumerate(w_k_c[:,j]):
        cas += w * conv_out_0[:,k]
    minimum = np.min(cas)
    cas = cas - minimum
    cas = cas/max(cas)
    cas = cas * 100
    cas = cas.astype(int)
    
#     axs[axsidx].plot(cas)

    y = np.squeeze(np.mean(x_cam, axis=0))
    x = np.array(range(y.shape[0]))
    dydx = cas

    # Create a set of line segments so that we can color them individually
    # This creates the points as a N x 1 x 2 array so that we can stack points
    # together easily to get the segments. The segments array for line collection
    # needs to be (numlines) x (points per line) x 2 (for x and y)
    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)

    # Create a continuous norm to map from data points to colors
    norm = plt.Normalize(dydx.min(), dydx.max())
    lc = LineCollection(segments, cmap='CMRmap', norm=norm)
    # Set the values used for colormapping
    lc.set_array(dydx)
    lc.set_linewidth(4)
    line = axs[axsidx].add_collection(lc)
    fig.colorbar(line, ax=axs[axsidx])

    # # Use a boundary norm instead
    # cmap = ListedColormap(['r', 'g', 'b'])
    # norm = BoundaryNorm([-1, -0.5, 0.5, 1], cmap.N)
    # lc = LineCollection(segments, cmap=cmap, norm=norm)
    # lc.set_array(dydx)
    # lc.set_linewidth(2)
    # line = axs[1].add_collection(lc)
    # fig.colorbar(line, ax=axs[1])

    axs[axsidx].set_xlim(x.min(), x.max())
    axs[axsidx].set_ylim(y.min(), y.max())
    axs[axsidx].set_title(names[j])

plt.show()



(128, 4)
(1000, 999, 128)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …