In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
import sys

In [2]:
class Dataset(Dataset):
    def __init__(self, root_path, flag='train', size=None, features='M', data_path='Data2.csv', target='kp', scale=True, inverse=False, cols=None): 

        if size is None: 
            self.seq_len = 24*4*4 
            self.label_len = 24*4 
            self.pred_len = 24*4 
        else: 
            self.seq_lens = size[0]
            self.label_len = size[1]
            self.pred_len = size[2]

        self.output_len = self.label_len + self.pred_len 

        assert flag in ['train', 'test', 'val']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type = type_map[flag]

        self.features = features 
        self.target = target 
        self.scale = scale
        self.inverse = inverse 
        self.cols = cols
        self.root_path = root_path 
        self.data_path = data_path
        self.__read_data__()

    def __reat_data__(self): 
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path))

        cols = list(df_raw.columns)
        cols.remove('Tim')
        cols.remove(self.target)

        df_data = df_raw[cols + [self.target]]

        num_train = int(len(df_raw) * 0.7 )
        num_test = int(len(df_raw) * 0.2 )
        num_vali = len(df_raw) - num_train - num_test 
        border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
        border2s = [num_train, num_train + num_vali, len(df_raw)]
        border1 = border1s[self.set_type]
        border2 = border2s[self.set_type]


        if self.scale: 
            train_data = df_data[border1s[0]:border2s[0]]
            self.scaler.fit(train_data.values)
            data = self.scaler.transform(df_data.values)
        else: 
            data = df_data.values

        self.data_x = data[border1:border2, :-1]
        if self.inverse: 
            self.data_y = df_data.values[border1:border2, -1:]
        else: 
            self.data_y = data[border1:border2, -1:]

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.output_len

        seq_x = self.data_x[s_begin:s_end]
        seq_y = self.data_y[r_begin:r_end]

        return seq_x, seq_y 
    
    def __len__(self):
        return len(self.data_x) - self.seq_len - self.output_len + 1 
    
    def inverse_transform(self, data): 
        return self.scaler.inverse_transform(data)
        