In [None]:
import pandas as pd
import numpy as np
import math
from keras import backend as K
from keras.models import Sequential
from keras.models import load_model
from keras.layers import LSTM,Dense
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import os 
import random
import tensorflow as tf

In [None]:
df = pd.read_csv('data/cncc.csv')

df['volt'] = df['ef'] / 1000

df2 = df.loc[df['set']==2]
df = df.loc[df['set']==1]
df.head()

In [None]:
#creates dataset to use for training/testing - updated scaling
def create_dataset(df, lookback=20, in_cols=['volt', 'cum_dir'], out_cols='cum_dir', efs=[0,15,30,50,75,100,200], tracks=(1,51)):
    trainX, trainY, testX, testY = [], [], [], [] #lists of training and testing inputs/outputs
    for ef in efs:
        for track in range(tracks[0], tracks[1]):
            cell = df.loc[(df["track"] == track)&(df["ef"] == ef)] #all rows of data pertaining to this cell
            cell = cell[in_cols] #reduce it to our columns of interest
            for i in range(len(cell)-lookback-1):
                trainX.append(cell[i:i+lookback])
            cell = cell[out_cols]
            for i in range(len(cell)-lookback-1):
                trainY.append(cell[i+lookback:i+lookback+1])

    trainX = np.array(list(map(lambda x: x.to_numpy(), trainX)))
    trainY = np.array(list(map(lambda x: x.to_numpy(), trainY)))
    return np.array(trainX), np.array(trainY)

In [None]:
trainX, trainY = create_dataset(df,tracks=(1,41), efs=[0,15,50,75,100,200])
valX, valY = create_dataset(df, tracks=(41,51), efs=[0,15,50,75,100,200])
testX, testY = create_dataset(df2, tracks=(1,51), efs=[0,15,50,75,100,200])

In [None]:
#trains numerous models using a list of numbers to initialize the models
def typical_model(trainX,trainY,valX,valY,testX,testY,numbers):
    models = [] #list of models 
    predictions = [] #list of prediction vectors
    
    for i in numbers:
        #build the model
        print('Training model number {}'.format(i))
        model = Sequential()
        model.add(LSTM(80, input_shape=(20, 2)))
        model.add(Dense(1, activation='tanh'))
        model.compile(loss='mean_squared_error', optimizer='adam')
        history = model.fit(trainX, trainY, validation_data=(valX, valY), epochs=100, batch_size=1, verbose=1)
        models.append(model)

        #Predict on training, validation, and test sets
        trainPredict = model.predict(trainX)
        valPredict = model.predict(valX)
        testPredict = model.predict(testX)

        #Calculate RMSEs
        trainScore = math.sqrt(mean_squared_error(trainY, trainPredict))
        print('Training RMSE: {}'.format(trainScore))
        valScore = math.sqrt(mean_squared_error(valY, valPredict))
        print('Validation RMSE: {}'.format(valScore))
        testScore = math.sqrt(mean_squared_error(testY, testPredict))
        print('Testing RMSE: {}'.format(testScore))
        
        #make predictions on each instance and add to list of prediction vectors
        train_pred = model.predict(trainX)
        val_pred = model.predict(valX)
        test_pred = model.predict(testX)
        preds = np.concatenate([train_pred,val_pred,test_pred], axis=None)
        predictions.append(preds)
        
    #find the average of all prediction vectors
    mean_pred = np.mean(predictions,axis=0)
    
    #find the prediction vector that is closest to the mean
    closest = 0 #index of the prediction vector that is closest to the mean
    dist = 100 #the distance of that closest vector to the mean vector
    for i in range(len(predictions)):
        thisdist = math.sqrt(mean_squared_error(predictions[i], mean_pred))
        
        print('Model number: {}, distance from mean: {}'.format(i,thisdist))
        
        if thisdist < dist:
            dist = thisdist
            closest = i
    
    #return the "most average" model
    print('Returning model {}, whose distance is {}'.format(closest,dist))
    return models, closest

In [None]:
df = pd.read_csv('data/cncc.csv')
df['volt'] = df['ef'] / 1000
df2 = df.loc[df['set']==2]
df = df.loc[df['set']==1]

models, closest = typical_model(trainX,trainY,valX,valY,testX,testY,range(50))
model = models[closest]
model.save("models/cncc_no30.h5")
print("Saved model to disk")

df = pd.read_csv('data/cncc.csv')
df['volt'] = df['ef'] / 1000

i = 0
for model in models:
    model.save("models/cncc_no30/model{}.h5".format(i))
    print("Saved model {} to disk".format(i))
    
    for s in [1,2]:
        for ef in [0,15,30,50,75,100,200]:
            for track in range(1,51):
                cell = df.loc[(df['ef']==ef) & (df['track']==track) & (df['set']==s)]
                for sl in range(21,38):
                    print('set: {}, ef: {}, track: {}, slice: {}'.format(s,ef,track,sl))
                    x = cell.loc[(cell['slice']>sl-21) & (cell['slice']<sl)]
                    x = x[['volt','cum_dir']].to_numpy()
                    x=x.reshape(1, 20, 2)
                    prediction = model.predict(x)
                    df.loc[(df['ef']==ef) & (df['track']==track) & (df['set']==s) & (df['slice']==sl), 'pred_dir{}'.format(i)] = prediction
                    print(prediction)
                    
    df['pred_error{}'.format(i)] = df['pred_dir{}'.format(i)] - df['cum_dir']
    i+=1

                    
df=df.drop('Unnamed: 0', axis=1)
df.to_csv('data/cncc_no30.csv', index=False)
df.head(40)

In [None]:
df = pd.read_csv('data/cncc.csv')
df['volt'] = df['ef'] / 1000

i = 0
for model in models:
    model.save("models/cncc_no30/model{}.h5".format(i))
    print("Saved model {} to disk".format(i))
    
    for s in [1,2]:
        for ef in [0,15,30,50,75,100,200]:
            for track in range(1,51):
                cell = df.loc[(df['ef']==ef) & (df['track']==track) & (df['set']==s)]
                for sl in range(21,38):
                    print('set: {}, ef: {}, track: {}, slice: {}'.format(s,ef,track,sl))
                    x = cell.loc[(cell['slice']>sl-21) & (cell['slice']<sl)]
                    x = x[['volt','cum_dir']].to_numpy()
                    x=x.reshape(1, 20, 2)
                    prediction = model.predict(x)
                    df.loc[(df['ef']==ef) & (df['track']==track) & (df['set']==s) & (df['slice']==sl), 'pred_dir{}'.format(i)] = prediction
                    print(prediction)
                    
    df['pred_error{}'.format(i)] = df['pred_dir{}'.format(i)] - df['cum_dir']
    i+=1

                    
df=df.drop('Unnamed: 0', axis=1)
df.to_csv('data/cncc_no30.csv', index=False)
df.head(40)

In [None]:
trainX, trainY = create_dataset(df,tracks=(1,41), efs=[0,15,30,50,75,100])
valX, valY = create_dataset(df, tracks=(41,51), efs=[0,15,30,50,75,100])
testX, testY = create_dataset(df2, tracks=(1,51), efs=[0,15,30,50,75,100])

In [None]:
df = pd.read_csv('data/cncc.csv')
df['volt'] = df['ef'] / 1000
df2 = df.loc[df['set']==2]
df = df.loc[df['set']==1]

models, closest = typical_model(trainX,trainY,valX,valY,testX,testY,range(50))
model = models[closest]
model.save("models/cncc_no200.h5")
print("Saved model to disk")

df = pd.read_csv('data/cncc.csv')
df['volt'] = df['ef'] / 1000

i = 0
for model in models:
    model.save("models/cncc_no200/model{}.h5".format(i))
    print("Saved model {} to disk".format(i))
    
    for s in [1,2]:
        for ef in [0,15,30,50,75,100,200]:
            for track in range(1,51):
                cell = df.loc[(df['ef']==ef) & (df['track']==track) & (df['set']==s)]
                for sl in range(21,38):
                    print('set: {}, ef: {}, track: {}, slice: {}'.format(s,ef,track,sl))
                    x = cell.loc[(cell['slice']>sl-21) & (cell['slice']<sl)]
                    x = x[['volt','cum_dir']].to_numpy()
                    x=x.reshape(1, 20, 2)
                    prediction = model.predict(x)
                    df.loc[(df['ef']==ef) & (df['track']==track) & (df['set']==s) & (df['slice']==sl), 'pred_dir{}'.format(i)] = prediction
                    print(prediction)
                    
    df['pred_error{}'.format(i)] = df['pred_dir{}'.format(i)] - df['cum_dir']
    i+=1

                    
df=df.drop('Unnamed: 0', axis=1)
df.to_csv('data/cncc_no200.csv', index=False)
df.head(40)