In [1]:
import math
import torch
import gpytorch
from matplotlib import pyplot as plt
import pandas as pd
import sklearn as sk
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm
from typing import List, Union
import os
from matplotlib import colormaps
import matplotlib as mpl
import tqdm
from typing import Dict, Any

%matplotlib inline
%load_ext autoreload
%autoreload 2

from data import GaussianPreprocessor, SequenceDataset, SequencePredictionDataset, SequenceReconstructionDataset
from torch.utils.tensorboard import SummaryWriter


import pyro
from pyro.contrib.timeseries import IndependentMaternGP, LinearlyCoupledMaternGP, DependentMaternGP

ModuleNotFoundError: No module named 'sklearn'

In [None]:
class CustomFeatureExtractor(torch.nn.Module):
    def __init__(
        self,
        input_size: int,
        hidden_size: Dict[str, Union[int, List[int]]],
        output_size: Dict[str, int],
        dropout: Dict[str, float],
        bidirectional: bool = True,
        num_action: int = 0,
    ):
        super(CustomFeatureExtractor, self).__init__()
        self.num_action = num_action

        self.lstm_feature_extractor = LSTMFeatureExtractor(
            input_size,
            hidden_size["lstm"],
            output_size["lstm"],
            bidirectional=bidirectional,
            dropout=dropout["lstm"],
        )
        if num_action > 0:
            self.linear_feature_extractor = MLPFeatureExtractor(
                num_action,
                hidden_size["linear"],
                output_size["linear"],
                dropout=dropout["linear"],
            )
        self.output_layer = torch.nn.tanh(
            torch.nn.Linear(
                output_size["lstm"] + output_size["linear"], output_size["final"]
            )
        )

    def forward(self, seq, a):
        x = self.lstm_feature_extractor(seq)
        if self.num_action > 0:
            x = torch.cat(x, self.linear_feature_extractor(x), axis=1)
        return self.output_layer(x)

In [None]:
username = os.getenv("USERNAME")

task = 1876652

data_paths= [f"/home/{username}/workspace/dataset_downloader/{task}/processed"]

include_dirs=[
5798514803372,  5798515131052,  5798515442348,  5798515778220,  5798516081324,  5798516392620,
5798514811564,  5798515139244,  5798515450540,  5798515786412,  5798516089516,  5798516400812,
5798514827948,  5798515147436,  5798515458732,  5798515794604,  5798516097708,  5798516409004,
5798514836140,  5798515155628,  5798515466924,  5798515802796,  5798516105900,  5798516417196,
5798514844332,  5798515172012,  5798515475116,  5798515810988,  5798516114092,  5798516425388,
5798514852524,  5798515180204,  5798515483308,  5798515819180,  5798516122284,  5798516433580,
5798514860716,  5798515188396,  5798515491500,  5798515827372,  5798516130476,  5798516441772,
5798514877100,  5798515196588,  5798515499692,  5798515835564,  5798516138668,  5798516449964,
5798514885292,  5798515204780,  5798515507884,  5798515843756,  5798516146860,  5798516458156,
5798514893484,  5798515212972,  5798515524268,  5798515851948,  5798516155052,  5798516466348,
5798514909868,  5798515221164,  5798515532460,  5798515860140,  5798516163244,  5798516474540,
5798514918060,  5798515229356,  5798515540652,  5798515868332,  5798516171436,  5798516482732,
5798514926252,  5798515237548,  5798515548844,  5798515876524,  5798516179628,  5798694355628,
]

backup_dirs =[
5798514934444,  5798515245740,  5798515557036,  5798515884716,  5798516187820,  5798694363820,
5798514950828,  5798515253932,  5798515565228,  5798515892908,  5798516196012,  5798694478508,
5798514959020,  5798515262124,  5798515581612,  5798515901100,  5798516204204,  5798694486700,
5798514967212,  5798515270316,  5798515589804,  5798515909292,  5798516212396,  5798694494892,
5798514975404,  5798515278508,  5798515597996,  5798515917484,  5798516220588,  5798694503084,
5798514983596,  5798515286700,  5798515606188,  5798515925676,  5798516228780,  5798694511276,
5798514999980,  5798515294892,  5798515614380,  5798515933868,  5798516236972,  5798694519468,
5798515008172,  5798515303084,  5798515638956,  5798515942060,  5798516245164,  5798694527660,
5798515016364,  5798515311276,  5798515655340,  5798515950252,  5798516261548,  5798694535852,
5798515024556,  5798515319468,  5798515663532,  5798515958444,  5798516269740,  5798694544044,
5798515032748,  5798515327660,  5798515671724,  5798515966636,  5798516277932,  5798694552236,
5798515040940,  5798515335852,  5798515688108,  5798515974828,  5798516286124,  5798694560428,
5798515049132,  5798515344044,  5798515696300,  5798515983020,  5798516302508,  5798694568620,
5798515057324,  5798515352236,  5798515704492,  5798515991212,  5798516310700,  5798694576812,
5798515065516,  5798515360428,  5798515712684,  5798515999404,  5798516318892,  5798694585004,
5798515073708,  5798515368620,  5798515720876,  5798516007596,  5798516327084,  5798694617772,
5798515081900,  5798515385004,  5798515729068,  5798516015788,  5798516335276,  5798694625964,
5798515090092,  5798515393196,  5798515737260,  5798516023980,  5798516343468,  5798694634156,
5798515098284,  5798515401388,  5798515745452,  5798516032172,  5798516351660,  5798694642348,
5798515106476,  5798515409580,  5798515753644,  5798516048556,  5798516368044,
5798515114668,  5798515417772,  5798515761836,  5798516056748,  5798516376236,
5798515122860,  5798515434156,  5798515770028,  5798516064940,  5798516384428    
]

include_dirs = set(include_dirs)

include_dirs = [str(x) for x in include_dirs]

In [None]:
data_paths = ["/home/qb/Repos/experiments/Dataset/"]

include_ids = {1: [{"fn": "C1-1001_performance_20220915103555.csv"}], 
               2: [{"fn": 'C1-1001_performance_20220915151041.csv'}]}

# include_ids = {1: [{"fn": "toy_1.csv"}], 
#                2: [{"fn": 'toy_2.csv'}]}

masks = ['steering_percentage', 'fr_pressure', 'chassis_drive_mode', 
         'pitch', 'position_y', 'yaw_rate', 'real_axe_pressure', 'roll', 
         'rr_pressure', 'retarder_torque_feedback', 'control_cmd_brake', 
         'position_z','speed_FL', 'position_x', 'cmd_drive_mode', 'speed_RL', 
         'test_id', 'steering_target', 'front_axe_pressure', 'retarder_torque', 
         'imu_vel_forward', 'throttle', 'limited_retarder_torque', 'speed_RR', 
         'acc_cmd', 'yaw', 'heading', 'rl_pressure', 'imu_sideshift_acceleration', 
         'throttle_percentage', 'speed_FR', 'fl_pressure', 'driving_mode', 'timestamp']

normalized_features = ['engine_rpm', 'imu_forward_acceleration', "chassis_acc", 
                       "vehicle_speed", 'drive_motor_torque_nm', 'speed_cmd']

onehot_features = ['gear_location_num']

filtermasks=["driving_mode"],
filtervalues=[[1, 4]],

batch_size = 10
seq_length = 100






In [None]:
preprocessor = GaussianPreprocessor(data_paths, 
                                    masks=masks, 
                                    normalized_features=normalized_features, 
                                    onehot_features=onehot_features)

dataset = SequenceReconstructionDataset(data_paths, 
                                        batch_size=batch_size,
                                        seq_length = seq_length,
                                        preprocessor=preprocessor, 
                                        include_ids=include_ids)


In [None]:
obs_dim = dataset.sample().shape[-1]
init_learning_rate = 0.1
final_learning_rate = 0.001
num_steps = 50
beta1 = 0.1

In [None]:
gp = DependentMaternGP(
    nu=1.5,
    obs_dim=obs_dim,
    length_scale_init=1.5 * torch.ones(obs_dim),
).double()

    # set up optimizer
adam = torch.optim.Adam(
    gp.parameters(),
    lr=init_learning_rate,
    betas=(beta1, 0.999),
    amsgrad=True,
)
    # we decay the learning rate over the course of training
gamma = (final_learning_rate / init_learning_rate) ** (1.0 / num_steps)
scheduler = torch.optim.lr_scheduler.ExponentialLR(adam, gamma=gamma)

In [None]:
for step in range(num_steps):
    adam.zero_grad()
    losses = []
    for batch in dataset.batches():
        for x in batch:
            loss = -gp.log_prob(x).sum() / seq_length
            loss.backward()
            losses.append(loss.item())
    adam.step()
    scheduler.step()

    print("[step %03d]  loss: %.3f" % (step, sum(losses) / batch_size))

In [None]:
def ReadCSV(root: str):
    csv_files = []

# Iterate directory
    for path in os.listdir(root):
        # check if current path is a file
        if os.path.isfile(os.path.join(root, path)) and path.endswith('.csv'):
            csv_files.append(path)
            
    raw_data = None
    
    for f in csv_files:
        if raw_data is not None:
            raw_data = pd.concat([raw_data, pd.read_csv(os.path.join(root, path))], axis=0, ignore_index=True)
        else:
            raw_data = pd.read_csv(os.path.join(root, path))
    
    raw_data.reset_index()
    return raw_data.drop_duplicates()

In [None]:
T_multistep = 49
T_onestep = 5

        # do rolling prediction
print("doing one-step-ahead forecasting...")


%matplotlib inline

test = dataset.sample().squeeze()

print(test.shape)

T_onestep = 5


onestep_means, onestep_stds = np.zeros((T_onestep, obs_dim)), np.zeros((T_onestep, obs_dim))
        
for t in range(T_onestep):
            # predict one step into the future, conditioning on all previous data.
            # note that each call to forecast() conditions on more data than the previous call
    dts = torch.tensor([1.0]).double()
    pred_dist = gp.forecast(test[0 : T_onestep + t, :], dts)
    onestep_means[t, :] = pred_dist.loc.data.numpy()
    onestep_stds[t, :] = pred_dist.covariance_matrix.diagonal(dim1=-1, dim2=-2).data.numpy()
    
    
test_y = test[T_onestep:, :]

ts = np.linspace(0, test.shape[0] - T_onestep, test.shape[0] - T_onestep);

with torch.no_grad():

        
    f = plt.figure(figsize=(10, 30))
    for i in range(5):
        ax = f.add_subplot(5, 1, i+1)

        # Get upper and lower confidence bounds
#         lower, upper = observed_pred.confidence_region()
        # Plot training data as black stars
        ax.plot(ts, test_y[:, i], 'k*', markersize=2, label='predictions')
        # Plot predictive means as blue line
        ax.plot(ts, onestep_means[:, i], 'bo', markersize=2, label='observed data')
        # Shade between the lower and upper confidence bounds
#         ax.plot(overlap_t, overlap_y[:, i], "rx", markersize=20, label='overlap between trainset and testset')
#         ax.set_title(observ_columns[i])
#         ax.fill_between(ts, lowers[:, i], uppers[:, i], alpha=0.5)

In [None]:
# print(rnn)

In [None]:
# rnn(train_x)