In [None]:
# %load "../batchedRNN/newTrainUtils.py"
import logging, sys
import torch
import h5py
import os
import numpy as np
import torch.utils.data as torchUtils
import torch.optim as optim
from functools import partial
import torch.nn as nn
import json
from shutil import copy2, copyfile, copytree
import argparse

logging.basicConfig(stream=sys.stderr,level=logging.DEBUG)

def getSaveDir():
    saveDir = '../save/models/model0/'
    while os.path.isdir(saveDir):
        numStart = saveDir.rfind("model")+5
        numEnd = saveDir.rfind("/")
        saveDir = saveDir[:numStart] + str(int(saveDir[numStart:numEnd])+1) + "/"
    os.mkdir(saveDir)
    return saveDir

def saveUsefulData():
    argsFile = args.save_dir + "args.txt"
    with open(argsFile, "w") as f:
        f.write(json.dumps(vars(args)))
    copy2("./train.py", args.save_dir+"train.py")
    copy2("./utils.py", args.save_dir+"utils.py")
    copy2("./gridSearchOptimize.py", args.save_dir+"gridsearchOptimize.py")
    copytree("./model", args.save_dir+"model/")

def getLoaderAndScaler(dataDir, category):
    logging.info("Getting {} loader".format(category))
    f = np.load(os.path.join(dataDir, category + '.npz'))
    my_dataset = torchUtils.TensorDataset(torch.Tensor(f["inputs"]),torch.Tensor(f["targets"])) # create your datset
    scaler = getScaler(f["inputs"])
    sequence_len = f['inputs'].shape[1]
    x_dim = f['inputs'].shape[2]
    channels = f["inputs"].shape[3]
    shf = False
    if category == "train":
        shf = True
    loader = torchUtils.DataLoader(
        my_dataset,
        batch_size=args.batch_size,
        shuffle=shf,
        num_workers=0,
        pin_memory=False,
        drop_last=True
        )
    return loader, scaler, sequence_len, x_dim, channels # create your dataloader

def getDataLoaders(dataDir, debug=False):
    loaders = {}
    logging.info("Getting loaders")
    if debug:
        categories = ["test"]
        scalerSet = "test"
    else:
        categories = ["train", "val", "test"]
        scalerSet = "train"
    for category in categories:
        loader, scaler, sequence_len, x_dim, channels = getLoaderAndScaler(dataDir, category)
        if category == scalerSet:
            loaders["scaler"] = scaler
            loaders["sequence_len"] = sequence_len
            loaders["x_dim"] = x_dim
            loaders["channels"] = channels
        loaders[category] = loader
    return loaders

def transformBatch(batch, scaler=None):
    x = scaler.transform(batch[0]).permute(1,0,3,2)
    y = scaler.transform(batch[1])[...,0].permute(1,0,2)
    return x, y

class StandardScaler:
    """
    Standard the input
    """

    def __init__(self, mean0, std0, mean1=0, std1=1):
        self.mean0 = mean0
        self.mean1 = mean1
        self.std0 = std0
        self.std1 = std1

    def transform(self, data):
        mean = torch.zeros(data.size())
        mean[...,0] = self.mean0
        mean[...,1] = self.mean1
        std = torch.ones(data.size())
        std[...,0] = self.std0
        std[...,1] = self.std1
        return torch.div(torch.sub(data,mean),std)

    def inverse_transform(self, data):
        """
        Inverse transform is applied to output and target.
        These are only the speeds, so only use the first 
        """
        mean = torch.ones(data.size()) * self.mean0
        std = torch.ones(data.size()) * self.std0
        transformed = torch.add(torch.mul(data, std), mean)
        return transformed.permute(1,0,2)

    def inverse_transform_both_layers(self, data):
        mean = torch.zeros(data.size())
        mean[...,0] = self.mean0
        mean[...,1] = self.mean1
        std = torch.ones(data.size())
        std[...,0] = self.std0
        std[...,1] = self.std1
        transformed =  torch.add(torch.mul(data, std), mean)
        return transformed.permute(1,0,3,2)

def getScaler(trainX):
    mean = np.mean(trainX[...,0])
    std = np.std(trainX[...,0])
    return StandardScaler(mean, std)

def getLoss(output, target, scaler):
    output = scaler.inverse_transform(output)
    target = scaler.inverse_transform(target)
    assert output.size() == target.size(), "output size: {}, target size: {}".format(output.size(), target.size())
    criterion = "RMSE"
    if criterion == "RMSE":
        criterion = nn.MSELoss()
        return torch.sqrt(criterion(output, target))
    elif criterion == "L1Loss":
        criterion = nn.L1Loss()
        return criterion(output, target)
    else:
        assert False, "bad loss function"



In [None]:
speeds = (np.random.randn(5, 4,3)* 10) + 65
timeOfDay = np.random.rand(5, 4,3)
print(speeds)
print(timeOfDay)
inputData = np.stack((speeds, timeOfDay), axis=3)

In [None]:
inputData.shape

In [None]:
inputData

In [None]:
speedsTarget = (np.random.randn(5,4,3)* 10) + 65
timeOfDayTarget = np.random.rand(5,4,3)
targetData = np.stack((speedsTarget, timeOfDayTarget), axis=3)
print(targetData)

In [None]:
scaler = getScaler(inputData, targetData)

In [None]:
scaler.mean0

In [None]:
scaler.std0

In [None]:
scaler.mean1

In [None]:
scaler.std1

In [None]:
inputTTransformed, targetTTransformed = transformBatch([torch.FloatTensor(inputData), torch.FloatTensor(targetData)], scaler)

In [None]:
inputTTransformed.size()

In [None]:
print(np.mean(inputTTransformed[:,:,0,:].numpy()), np.std(inputTTransformed[:,:,0,:].numpy()))

In [None]:
targetTTransformed.size()

In [None]:
print(np.mean(targetTTransformed.numpy()), np.std(targetTTransformed.numpy()))

In [None]:
targetTTransformed.size()

In [None]:
print(np.mean(np.concatenate((inputTTransformed.numpy()[:,:,0,:], targetTTransformed.numpy()),axis=1)),np.std(np.concatenate((inputTTransformed.numpy()[:,:,0,:], targetTTransformed.numpy()),axis=1)))

In [None]:
np.mean(inputData[...,0])

In [None]:
scaler.transform(torch.FloatTensor(inputData))[...,0].mean()

In [None]:
inputData[0,:,:,0]

In [None]:
inputTTransformed[:,0,0,:]

In [None]:
scaler.mean0

In [None]:
inputData.shape

In [None]:
targetTTransformed.shape

In [None]:
inputData.shape

In [None]:
data = []
data.append(torch.FloatTensor(inputData))
data.append(torch.FloatTensor(targetData))
transedX, transedY = transformBatch(data, scaler)
print(transedX.shape)
print(transedY.shape)

In [None]:
mockOutput = transedX[:,:,0,:]

In [None]:
invTransOutput = scaler.inverse_transform(mockOutput)

In [None]:
invTransOutput.shape

In [None]:
inputData.shape

In [None]:
invTransOutput[0]

In [None]:
inputData[0,:,:,0]

In [None]:
scaler.inverse_transform(transedY)[0]

In [None]:
targetData[0,:,:,0]

In [None]:
(torch.FloatTensor(inputData) == torch.FloatTensor(inputData).permute(1,0,3,2).permute(1,0,3,2)).all()

In [None]:
getLoss(mockOutput, targetTTransformed, scaler)

In [None]:
np.sqrt(np.mean((inputData[...,0] - targetData[...,0])**2))