In [2]:
import sys
sys.path.append('/Users/Ella/Desktop/PatchTST-main/PatchTST_supervised')
print(sys.path)


['/Users/Ella/Desktop/PatchTST-main/PatchTST_supervised/data_provider', '/Users/Ella/anaconda3/envs/finance/lib/python310.zip', '/Users/Ella/anaconda3/envs/finance/lib/python3.10', '/Users/Ella/anaconda3/envs/finance/lib/python3.10/lib-dynload', '', '/Users/Ella/anaconda3/envs/finance/lib/python3.10/site-packages', '/Users/Ella/anaconda3/envs/finance/lib/python3.10/site-packages/mpmath-1.2.1-py3.10.egg', '/Users/Ella/Desktop/PatchTST-main/PatchTST_supervised']


In [3]:
import os
import numpy as np
import pandas as pd
import os
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from utils.timefeatures import time_features
import warnings

warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [62]:

class Dataset_Custom(Dataset):
    def __init__(self, root_path, flag='train', size=None,
                 features='S', data_path='ETTh1.csv',
                 target='OT', scale=True, timeenc=0, freq='h'):
        # size [seq_len, label_len, pred_len]
        # info
        if size == None:
            self.seq_len = 24 * 4 * 4
            self.label_len = 24 * 4
            self.pred_len = 24 * 4
        else:
            self.seq_len = size[0]
            self.label_len = size[1]
            self.pred_len = size[2]
        # init
        assert flag in ['train', 'test', 'val']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type = type_map[flag]

        self.features = features
        self.target = target
        self.scale = scale
        self.timeenc = timeenc
        self.freq = freq

        self.root_path = root_path
        self.data_path = data_path
        self.__read_data__()

    def __read_data__(self):
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(os.path.join(self.root_path,
                                          self.data_path))

        '''
        df_raw.columns: ['date', ...(other features), target feature]
        '''
        cols = list(df_raw.columns)
        cols.remove(self.target)
        cols.remove('date')
        df_raw = df_raw[['date'] + cols + [self.target]]
        print(cols)
        
        #Split data into train, test, val, and records the border index
        num_train = int(len(df_raw) * 0.7)
        num_test = int(len(df_raw) * 0.2)
        num_vali = len(df_raw) - num_train - num_test
        #record border1s = [lower bounds], boerder2s = [higher bounds]
        border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
        border2s = [num_train, num_train + num_vali, len(df_raw)]
        border1 = border1s[self.set_type]
        border2 = border2s[self.set_type]

        if self.features == 'M' or self.features == 'MS':
            cols_data = df_raw.columns[1:]
            df_data = df_raw[cols_data] #data with all cols, include 'OT'
        elif self.features == 'S':
            df_data = df_raw[[self.target]] #data with only 'OT' col
        #print(f'raw data:{df_data}')
        
        # Scale/normalize data values 
        if self.scale:
            train_data = df_data[border1s[0]:border2s[0]]
            self.scaler.fit(train_data.values)
            print(self.scaler.mean_)
            # exit()
            data = self.scaler.transform(df_data.values)
        else:
            data = df_data.values
        print(f'normalized data:{data}')
        
        #Date/time info extracted or encoded - > data_stamp
        df_stamp = df_raw[['date']][border1:border2]
        df_stamp['date'] = pd.to_datetime(df_stamp.date)
        #print(df_stamp)
        if self.timeenc == 0:
            #extract the month, day, weekday, hour info, respectively
            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
            data_stamp = df_stamp.drop(['date'], 1).values
        elif self.timeenc == 1:
            #encode each date info to values between [-0.5,0.5]
            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
            data_stamp = data_stamp.transpose(1, 0)
        #print(f'data_stamp:{data_stamp}')
        print(data_stamp.shape)
        
        #store normalized data
        self.data_x = data[border1:border2]
        self.data_y = data[border1:border2]
        self.data_stamp = data_stamp

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.label_len + self.pred_len

        seq_x = self.data_x[s_begin:s_end]
        seq_y = self.data_y[r_begin:r_end]
        seq_x_mark = self.data_stamp[s_begin:s_end]
        seq_y_mark = self.data_stamp[r_begin:r_end]

        return seq_x, seq_y, seq_x_mark, seq_y_mark

    def __len__(self):
        return len(self.data_x) - self.seq_len - self.pred_len + 1

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)
    

In [56]:

root_path="/Users/Ella/Desktop/PatchTST-main/PatchTST_supervised/dataset/"
data_path = "national_illness.csv"
Data = Dataset_Custom
dataset = Data(root_path = root_path, flag = 'train',size = [36,4,24], features = 'M',data_path = data_path, timeenc=0, freq='h')

['% WEIGHTED ILI', '%UNWEIGHTED ILI', 'AGE 0-4', 'AGE 5-24', 'ILITOTAL', 'NUM. OF PROVIDERS']
     % WEIGHTED ILI  %UNWEIGHTED ILI  AGE 0-4  AGE 5-24  ILITOTAL  \
0          1.222620         1.166680      582       805      2060   
1          1.333440         1.216500      683       872      2267   
2          1.319290         1.130570      642       878      2176   
3          1.494840         1.252460      728      1045      2599   
4          1.471950         1.302370      823      1189      2907   
..              ...              ...      ...       ...       ...   
961        0.839059         0.846722     2756      3528     12913   
962        0.895958         0.908885     3203      3778     13979   
963        0.910926         0.941625     3478      3796     14389   
964        0.946945         0.972185     3734      3818     14999   
965        0.963716         1.013760     3955      3843     15307   

     NUM. OF PROVIDERS       OT  
0                  754   176569  
1        

In [44]:
dataset[500]

(array([[-0.28092858, -0.30618003, -0.07675461, -0.01746728, -0.04896526,
          0.49005021,  0.73869823],
        [-0.33793365, -0.3493638 , -0.06971086, -0.06528867, -0.0959488 ,
          0.45357634,  0.70741865],
        [-0.41500736, -0.38534447, -0.09741626, -0.15504213, -0.15303991,
          0.36644431,  0.62374566],
        [-0.41525985, -0.37631672, -0.05984962, -0.17294571, -0.17347719,
          0.1881276 ,  0.53816276],
        [-0.38391073, -0.35738364, -0.09037252, -0.2695308 , -0.25400453,
          0.17596965,  0.24905062],
        [-0.51500039, -0.47566545, -0.15799248, -0.32818857, -0.29710055,
          0.08681129,  0.38491161],
        [-0.52496141, -0.54243954, -0.20729871, -0.40545664, -0.36729818,
         -0.0671895 ,  0.3104822 ],
        [-0.51297235, -0.56791536, -0.19415038, -0.4172353 , -0.37673931,
         -0.05503154,  0.34381154],
        [-0.51635241, -0.54887801, -0.23688244, -0.4464464 , -0.40584023,
         -0.12595296,  0.1868411 ],
        [-

In [79]:
root_path="/Users/Ella/Desktop/PatchTST-main/PatchTST_supervised/dataset/"
data_path = "^IXIC_short.csv"
Data = Dataset_Custom

test_set = Data(root_path = root_path, flag = 'test',size = [104,12,24], features = 'M',data_path = data_path, timeenc=0, freq='h')

['Open', 'High', 'Low', 'Adj Close', 'Volume']
[1.13558567e+04 1.14766495e+04 1.12428677e+04 1.13656166e+04
 4.95423771e+09 1.13656166e+04]
normalized data:[[ 0.85745383  1.07967789  0.96121221  1.20037772 -0.68836451  1.20037772]
 [ 1.19064088  1.14119232  1.20546613  1.06847937 -0.31091606  1.06847937]
 [ 0.99384564  0.95971432  0.75783349  0.57618015  0.57582287  0.57618015]
 ...
 [ 2.76006201  2.67519224  2.80290544  2.78020406 -0.66868963  2.78020406]
 [ 2.83227462  2.78709766  2.90574828  2.76339099 -0.77325048  2.76339099]
 [ 2.77376641  2.75035665  2.86213441  2.83306127 -0.18169821  2.83306127]]
(154, 4)


In [80]:
test_set[0]

(array([[-5.39920207e-01, -4.00279983e-01, -3.66028433e-01,
         -2.46856744e-01,  2.37724181e-01, -2.46856744e-01],
        [-5.81969312e-01, -4.07104972e-01, -4.38356255e-01,
         -5.85094565e-01,  3.42538579e-01, -5.85094565e-01],
        [-5.77560326e-01, -7.18450044e-01, -6.88251780e-01,
         -8.49481985e-01, -2.29097947e-01, -8.49481985e-01],
        [-8.87096691e-01, -5.40596664e-01, -7.09537570e-01,
         -3.90184841e-01, -2.81809396e-01, -3.90184841e-01],
        [-4.92590916e-01, -6.44473292e-01, -4.89517458e-01,
         -5.59651975e-01, -2.54173283e-01, -5.59651975e-01],
        [-3.02565616e-01, -4.81503051e-01, -5.38370282e-01,
         -7.03915420e-01, -3.50798263e-01, -7.03915420e-01],
        [-7.08356071e-01, -7.26703457e-01, -1.07169850e+00,
         -1.24664082e+00,  6.11268089e-01, -1.24664082e+00],
        [-1.43884620e+00, -1.48921702e+00, -1.37440804e+00,
         -1.51627546e+00,  1.87560832e-01, -1.51627546e+00],
        [-1.21515341e+00, -1.387

In [83]:
from torch.utils.data import DataLoader
batch_size=16
shuffle_flag = False
num_workers = 0
drop_last = True

test_loader = DataLoader(
        test_set,
        batch_size=batch_size,
        shuffle=shuffle_flag,
        num_workers=num_workers,
        drop_last=drop_last)

In [90]:
# Iterate over the batches and print their contents
for batch_idx, batch_data in enumerate(test_loader):
    batch_x, batch_y, batch_x_mark, batch_y_mark = batch_data  # Assuming your batch contains inputs and targets
    print(f"Batch {batch_idx + 1}:")
    #print("Inputs:", batch_x)
    #print("Targets:", batch_y)
    print(batch_y.shape)
    batch_y = batch_y[:, -24:, 0:]
    print(batch_y)
    print(batch_y.shape)
    print(batch_y[0, :, -1])

Batch 1:
torch.Size([16, 36, 6])
tensor([[[ 0.7713,  0.6412,  0.7386,  0.5978, -0.7909,  0.5978],
         [ 0.5971,  0.4151,  0.5837,  0.5196, -1.1629,  0.5196],
         [ 0.7518,  0.6990,  0.8641,  0.8312, -0.5840,  0.8312],
         ...,
         [ 0.8385,  0.7386,  0.8785,  0.7246,  0.4155,  0.7246],
         [ 0.9272,  1.0183,  1.0540,  1.1515,  0.3796,  1.1515],
         [ 1.1459,  1.1291,  1.2499,  1.2765,  0.4781,  1.2765]],

        [[ 0.5971,  0.4151,  0.5837,  0.5196, -1.1629,  0.5196],
         [ 0.7518,  0.6990,  0.8641,  0.8312, -0.5840,  0.8312],
         [ 0.9848,  0.8541,  1.0576,  0.9605, -0.2730,  0.9605],
         ...,
         [ 0.9272,  1.0183,  1.0540,  1.1515,  0.3796,  1.1515],
         [ 1.1459,  1.1291,  1.2499,  1.2765,  0.4781,  1.2765],
         [ 1.2851,  1.1796,  1.3966,  1.2558,  0.2715,  1.2558]],

        [[ 0.7518,  0.6990,  0.8641,  0.8312, -0.5840,  0.8312],
         [ 0.9848,  0.8541,  1.0576,  0.9605, -0.2730,  0.9605],
         [ 1.0165,  1.129

In [None]:
def test(self, setting, test=0):
        test_data, test_loader = self._get_data(flag='test')
        
        if test:
            print('loading model')
            self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))

        preds = []
        trues = []
        inputx = []
        folder_path = './test_results/' + setting + '/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        self.model.eval()
        with torch.no_grad():
            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
                batch_x = batch_x.float().to(self.device)
                batch_y = batch_y.float().to(self.device)

                batch_x_mark = batch_x_mark.float().to(self.device)
                batch_y_mark = batch_y_mark.float().to(self.device)

                # decoder input
                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
                # encoder - decoder
                if self.args.use_amp:
                    with torch.cuda.amp.autocast():
                        if 'Linear' in self.args.model or 'TST' in self.args.model:
                            outputs = self.model(batch_x)
                        else:
                            if self.args.output_attention:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            else:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                else:
                    if 'Linear' in self.args.model or 'TST' in self.args.model:
                            outputs = self.model(batch_x)
                    else:
                        if self.args.output_attention:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]

                        else:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

                f_dim = -1 if self.args.features == 'MS' else 0
                #print(outputs.shape,batch_y.shape)
                outputs = outputs[:, -self.args.pred_len:, f_dim:]
                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
                outputs = outputs.detach().cpu().numpy()
                batch_y = batch_y.detach().cpu().numpy()

                pred = outputs  # outputs.detach().cpu().numpy()  # .squeeze()
                true = batch_y  # batch_y.detach().cpu().numpy()  # .squeeze()

                preds.append(pred)
                trues.append(true)
                inputx.append(batch_x.detach().cpu().numpy())
                if i % 20 == 0:
                    input = batch_x.detach().cpu().numpy()
                    gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
                    pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
                    visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))

        if self.args.test_flop:
            test_params_flop((batch_x.shape[1],batch_x.shape[2]))
            exit()
        preds = np.array(preds)
        trues = np.array(trues)
        inputx = np.array(inputx)

        preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
        trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
        inputx = inputx.reshape(-1, inputx.shape[-2], inputx.shape[-1])
