In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from torch.utils.data import Dataset
from datetime import datetime

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pickle
import torch
import os

class dataset(Dataset):
    def __init__(self, X, y, lookback, horizon, label_len=0, data_stamp=None):
        self.seq_len = lookback
        self.pred_len = horizon
        self.label_len = label_len
        self.X = []
        self.y = []
        self.add_date = True if data_stamp is not None else False
        if self.add_date:
            self.X_mark = []
            self.y_mark = []
            
        for index in range(0, len(X) - (self.seq_len + self.pred_len)):  
            s_begin = index
            s_end = s_begin + self.seq_len
            r_begin = s_end - self.label_len
            r_end = r_begin + self.label_len + self.pred_len

            seq_x =X[s_begin:s_end]
            seq_y = y[r_begin:r_end]
            seq_x_mark = data_stamp[s_begin:s_end]
            seq_y_mark = data_stamp[r_begin:r_end]
            
            self.X.append(seq_x)
            self.y.append(seq_y)
            if self.add_date:
                self.X_mark.append(seq_x_mark)
                self.y_mark.append(seq_y_mark)

        self.X = torch.stack(self.X)
        self.y = torch.stack(self.y)
        if self.add_date:
            self.X_mark = np.stack(self.X_mark)
            self.y_mark = np.stack(self.y_mark)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if self.add_date: return self.X[idx], self.y[idx], self.X_mark[idx], self.y_mark[idx]
        return self.X[idx], self.y[idx]
   
 

In [2]:
data = 'traffic'
base_path = f'/home/noam.koren/multiTS/NFT/data/{data}/' 
lookback=96
label_len=0

for horizon in [1, 16, 32, 48]:
    for flag in ['train', 'val', 'test']:
        X = torch.tensor(pd.read_pickle(f'{base_path}{flag}_X.pkl'))
        y = torch.tensor(pd.read_pickle(f'{base_path}{flag}_y.pkl'))
        data_stamp = torch.tensor(pd.read_pickle(f'{base_path}{flag}_data_stamp.pkl'))
        
        d = dataset(
            X=X, 
            y=y, 
            lookback=lookback, 
            horizon=horizon, 
            label_len=label_len, 
            data_stamp=data_stamp
        )
        
        path = f'{base_path}{data}_{lookback}l_{horizon}h_{label_len}label/'
        mark_path = f'{base_path}{data}_date_stamp_{lookback}l_{horizon}h_{label_len}label/'

        # Check if directories exist, create if not
        if not os.path.exists(path):
            os.makedirs(path)
        if not os.path.exists(mark_path):
            os.makedirs(mark_path)


        # Convert tensor to a NumPy array if it's on GPU, you might need to call .cpu() before .numpy()
        X_np = d.X.cpu().numpy() if d.X.is_cuda else d.X.numpy()
        y_np = d.y.cpu().numpy() if d.y.is_cuda else d.y.numpy()

        # Save using pandas.to_pickle
        pd.to_pickle(X_np, f'{path}/{flag}_X.pkl')
        pd.to_pickle(y_np, f'{path}/{flag}_y.pkl')

        # For the date_stamp tensor, do the same if it's a tensor
        if isinstance(d.X_mark, torch.Tensor):
            X_mark_np = d.X_mark.cpu().numpy() if d.X_mark.is_cuda else d.X_mark.numpy()
            pd.to_pickle(X_mark_np, f'{mark_path}/{flag}_X.pkl')
            y_mark_np = d.y_mark.cpu().numpy() if d.y_mark.is_cuda else d.y_mark.numpy()
            pd.to_pickle(y_mark_np, f'{mark_path}/{flag}_y.pkl')
        else:
            pd.to_pickle(d.X_mark, f'{mark_path}/{flag}_X.pkl')
            pd.to_pickle(d.y_mark, f'{mark_path}/{flag}_y.pkl')


        # pd.to_pickle(f'{base_path}electricity_{lookback}l_{horizon}h_{label_len}label/{flag}_X.pkl', d.X)
        # pd.to_pickle(f'{base_path}electricity_{lookback}l_{horizon}h_{label_len}label/{flag}_y.pkl', d.y)
        # pd.to_pickle(f'{base_path}electricity_date_Stamp_{lookback}l_{horizon}h_{label_len}label/{flag}_X.pkl', d.X_mark)
        # pd.to_pickle(f'{base_path}electricity_date_Stamp_{lookback}l_{horizon}h_{label_len}label/{flag}_y.pkl', d.y_mark)

        
    
    
    


In [6]:

base_path = f'/home/noam.koren/multiTS/NFT/data/{data}/{data}_96l_' 
lookback=96
label_len=0

for horizon in [1, 16, 32]:
    for flag in ['train', 'val', 'test']:
        X = pd.read_pickle(f'{base_path}{horizon}h_0label/{flag}_X.pkl')
        y = pd.read_pickle(f'{base_path}{horizon}h_0label/{flag}_y.pkl')
        print(f"{flag} {horizon} X: {X.shape}")
        print(f"{flag} {horizon} y: {y.shape}")

train 1 X: (18315, 96, 321)
train 1 y: (18315, 1, 321)
val 1 X: (2631, 96, 321)
val 1 y: (2631, 1, 321)
test 1 X: (5259, 96, 321)
test 1 y: (5259, 1, 321)
train 16 X: (18300, 96, 321)
train 16 y: (18300, 16, 321)
val 16 X: (2616, 96, 321)
val 16 y: (2616, 16, 321)
test 16 X: (5244, 96, 321)
test 16 y: (5244, 16, 321)
train 32 X: (18284, 96, 321)
train 32 y: (18284, 32, 321)
val 32 X: (2600, 96, 321)
val 32 y: (2600, 32, 321)
test 32 X: (5228, 96, 321)
test 32 y: (5228, 32, 321)
