In [None]:
import pandas as pd
import numpy as np
import math
from keras import backend as K
from keras.models import Sequential
from keras.models import load_model
from keras.layers import LSTM,Dense
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import os 
import random
import tensorflow as tf

In [None]:
dfr=pd.read_csv('data/cncc_reverse.csv')
df_left = dfr.loc[dfr['set']=='left']
df_right = dfr.loc[dfr['set']=='right']

dfr.head()

for i in range(1, int(max(df_right['track'])+1)):
    print('track: {}'.format(i))

    max_slice = max(df_left['slice'])

    last_x = df_left.loc[(df_left['track']==i)&(df_left['slice']==max_slice), 'x'].to_numpy()[0]
    last_y = df_left.loc[(df_left['track']==i)&(df_left['slice']==max_slice), 'y'].to_numpy()[0]

    dfr.loc[(dfr['track']==i)&(dfr['slice']==1)&(dfr['set']=='right'), 'x2'] = last_x
    dfr.loc[(dfr['track']==i)&(dfr['slice']==1)&(dfr['set']=='right'), 'y2'] = last_y

    for j in range(2, int(max(dfr['slice'])+1)):
        deltax = dfr.loc[(dfr['track']==i)&(dfr['slice']==j)&(dfr['set']=='right'), 'x'].to_numpy()[0] - dfr.loc[(dfr['track']==i)&(dfr['slice']==j-1)&(dfr['set']=='right'), 'x'].to_numpy()[0]
        prevx = dfr.loc[(dfr['track']==i)&(dfr['slice']==j-1)&(dfr['set']=='right'), 'x2'].to_numpy()[0]
        deltay = dfr.loc[(dfr['track']==i)&(dfr['slice']==j)&(dfr['set']=='right'), 'y'].to_numpy()[0] - dfr.loc[(dfr['track']==i)&(dfr['slice']==j-1)&(dfr['set']=='right'), 'y'].to_numpy()[0]
        prevy = dfr.loc[(dfr['track']==i)&(dfr['slice']==j-1)&(dfr['set']=='right'), 'y2'].to_numpy()[0]

        dfr.loc[(dfr['track']==i)&(dfr['slice']==j)&(dfr['set']=='right'), 'x2'] = prevx + deltax
        dfr.loc[(dfr['track']==i)&(dfr['slice']==j)&(dfr['set']=='right'), 'y2'] = prevy + deltay

dfr['dir2'] = dfr['x2']/np.sqrt(dfr['x2']**2 + dfr['y2']**2)

dfr = dfr[dfr['slice'] != 25]
dfr.loc[dfr['set']=='right','x'] = dfr.loc[dfr['set']=='right','x2']
dfr.loc[dfr['set']=='right','y'] = dfr.loc[dfr['set']=='right','y2']
dfr.loc[dfr['set']=='right','cum_dir'] = dfr.loc[dfr['set']=='right','dir2']
dfr.loc[dfr['set']=='right','slice'] = dfr.loc[dfr['set']=='right','slice'] + 24
dfr.loc[dfr['set']=='right','ef'] = -dfr.loc[dfr['set']=='right','ef']
dfr['volt'] = dfr['ef'] / 1000
dfr['ef'] = dfr['ef'].abs()

df_right = dfr.loc[dfr['set']=='right']
df_right.head(55)

In [None]:
dfr[dfr['track']==1]

In [None]:
#creates dataset to use for training/testing - updated scaling
def create_dataset(df, lookback=20, in_cols=['volt', 'cum_dir'], out_cols='cum_dir', tracks=(1,51)):
    trainX, trainY, testX, testY = [], [], [], [] #lists of training and testing inputs/outputs
    for track in range(tracks[0], tracks[1]):
        cell = df.loc[(df["track"] == track)] #all rows of data pertaining to this cell
        cell = cell[in_cols] #reduce it to our columns of interest
        for i in range(len(cell)-lookback):
            trainX.append(cell[i:i+lookback])
        cell = cell[out_cols]
        for i in range(len(cell)-lookback):
            trainY.append(cell[i+lookback:i+lookback+1])

    trainX = np.array(list(map(lambda x: x.to_numpy(), trainX)))
    trainY = np.array(list(map(lambda x: x.to_numpy(), trainY)))
    return np.array(trainX), np.array(trainY)

In [None]:
trainX, trainY = create_dataset(dfr,tracks=(1,11))
valX, valY = create_dataset(dfr, tracks=(11,21))
testX, testY = create_dataset(dfr, tracks=(21,51))

In [None]:
models = [] #list of models 
for i in range(50):
    #build the model
    print('Training model number {}'.format(i))
    model = Sequential()
    model.add(LSTM(80, input_shape=(20, 2)))
    model.add(Dense(1, activation='tanh'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    history = model.fit(trainX, trainY, validation_data=(valX, valY), epochs=25, batch_size=1, verbose=1)
    models.append(model)
        
    model.save("models/reversal_lstm/model{}.h5".format(i))
    print("Saved model {} to disk".format(i))

    #Predict on training, validation, and test sets
    trainPredict = model.predict(trainX)
    valPredict = model.predict(valX)
    testPredict = model.predict(testX)

    #Calculate RMSEs
    trainScore = math.sqrt(mean_squared_error(trainY, trainPredict))
    print('Training RMSE: {}'.format(trainScore))
    valScore = math.sqrt(mean_squared_error(valY, valPredict))
    print('Validation RMSE: {}'.format(valScore))
    testScore = math.sqrt(mean_squared_error(testY, testPredict))
    print('Testing RMSE: {}'.format(testScore))

In [None]:
# print results to file
i = 0
for model in models:
    print('Writing results for model {}'.format(i))
    maxtrack = int(max(dfr['track']))
    for track in range(1, maxtrack+1):
        cell = dfr.loc[(dfr['track']==track)]
        if len(cell)==0:
            continue
        maxslice = max(dfr.loc[(dfr['track']==track), 'slice'])
        minslice = min(dfr.loc[(dfr['track']==track), 'slice'])
        for sl in range(int(minslice+20),int(maxslice-minslice+2)):
            x = cell.loc[(cell['slice']>sl-21) & (cell['slice']<sl)]
            x = x[['volt','cum_dir']].to_numpy()
            x=x.reshape(1, 20, 2)
            dfr.loc[(dfr['track']==track) & (dfr['slice']==sl), 'pred_dir{}'.format(i)] = model.predict(x)
                
    dfr['pred_error{}'.format(i)] = dfr['pred_dir{}'.format(i)] - dfr['cum_dir']
    i+=1
dfr.to_csv('data/reversal_predictions.csv', index=False)
dfr.head(40)

In [None]:
i=0
for filename in os.listdir('models/cncc_lstm'):
    print('Loading models/cncc_lstm/{}'.format(filename))
    
    # now do transfer learning using cncc
    model = load_model('models/cncc_lstm/{}'.format(filename))
    model.compile(loss='mean_squared_error', optimizer='adam')
    history = model.fit(trainX, trainY, validation_data=(valX, valY), epochs=20, batch_size=1, verbose=1)
    model.save("models/reversal_lstm_transfer/model{}.h5".format(i))
    print("Saved model{}.h5 to disk".format(i))
    
    #Predict on training, validation, and test sets
    trainPredict = model.predict(trainX)
    valPredict = model.predict(valX)
    testPredict = model.predict(testX)

    #Calculate RMSEs
    trainScore = math.sqrt(mean_squared_error(trainY, trainPredict))
    print('Training RMSE: {}'.format(trainScore))
    valScore = math.sqrt(mean_squared_error(valY, valPredict))
    print('Validation RMSE: {}'.format(valScore))
    testScore = math.sqrt(mean_squared_error(testY, testPredict))
    print('Testing RMSE: {}'.format(testScore))
    
    print('Writing results for model {}'.format(i))
    maxtrack = int(max(dfr['track']))
    for track in range(1, maxtrack+1):
        cell = dfr.loc[(dfr['track']==track)]
        if len(cell)==0:
            continue
        maxslice = max(dfr.loc[(dfr['track']==track), 'slice'])
        minslice = min(dfr.loc[(dfr['track']==track), 'slice'])
        for sl in range(int(minslice+20),int(maxslice-minslice+2)):
            x = cell.loc[(cell['slice']>sl-21) & (cell['slice']<sl)]
            x = x[['volt','cum_dir']].to_numpy()
            x=x.reshape(1, 20, 2)
            dfr.loc[(dfr['track']==track) & (dfr['slice']==sl), 'pred_dir{}'.format(i)] = model.predict(x)
                
    dfr['pred_error{}'.format(i)] = dfr['pred_dir{}'.format(i)] - dfr['cum_dir']
    i+=1
dfr.to_csv('data/reversal_transfer_predictions.csv', index=False)
dfr.head(40)

In [None]:
i=0
for filename in os.listdir('models/cncc_lstm'):
    print('Loading models/cncc_lstm/{}'.format(filename))
    
    #Predict on training, validation, and test sets
    trainPredict = model.predict(trainX)
    valPredict = model.predict(valX)
    testPredict = model.predict(testX)

    #Calculate RMSEs
    trainScore = math.sqrt(mean_squared_error(trainY, trainPredict))
    print('Training RMSE: {}'.format(trainScore))
    valScore = math.sqrt(mean_squared_error(valY, valPredict))
    print('Validation RMSE: {}'.format(valScore))
    testScore = math.sqrt(mean_squared_error(testY, testPredict))
    print('Testing RMSE: {}'.format(testScore))
    
    print('Writing results for model {}'.format(i))
    maxtrack = int(max(dfr['track']))
    for track in range(1, maxtrack+1):
        cell = dfr.loc[(dfr['track']==track)]
        if len(cell)==0:
            continue
        maxslice = max(dfr.loc[(dfr['track']==track), 'slice'])
        minslice = min(dfr.loc[(dfr['track']==track), 'slice'])
        for sl in range(int(minslice+20),int(maxslice-minslice+2)):
            x = cell.loc[(cell['slice']>sl-21) & (cell['slice']<sl)]
            x = x[['volt','cum_dir']].to_numpy()
            x=x.reshape(1, 20, 2)
            dfr.loc[(dfr['track']==track) & (dfr['slice']==sl), 'pred_dir{}'.format(i)] = model.predict(x)
                
    dfr['pred_error{}'.format(i)] = dfr['pred_dir{}'.format(i)] - dfr['cum_dir']
    i+=1
dfr.to_csv('data/cncc_lstm_reversal_predictions.csv', index=False)
dfr.head(40)