# Exp Setup

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch import optim
from torch.optim import lr_scheduler

import os, sys
import time

import warnings
import numpy as np
from matplotlib import pyplot as plt

sys.path.append('path/to/PatchTST/PatchTST_supervised')
os.chdir('path/to/PatchTST/PatchTST_supervised')

from utils.tools import EarlyStopping, adjust_learning_rate, visual, test_params_flop
from utils.metrics import metric
from data_provider.data_factory import data_provider
from models import Informer, Autoformer, Transformer, DLinear, Linear, NLinear, PatchTST
from models.PatchTST import Model

In [None]:
class Args:
    def __init__(self):
        # load parameters
        self.enc_in = 1
        self.seq_len = 96
        self.label_len = 0
        self.pred_len = 96
        self.e_layers = 6
        self.n_heads = 16
        self.d_model = 128
        self.d_ff = 256
        self.dropout = 0.2
        self.fc_dropout = 0.2
        self.head_dropout = 0.0
        self.individual = False
        self.patch_len = 16
        self.stride = 8
        self.padding_patch = 'end'
        self.revin = True
        self.affine = False
        self.subtract_last = False
        self.decomposition = False
        self.kernel_size = 25

        self.batch_size = 128
        self.data = 'ETTh1'
        self.embed = 'timeF'
        self.freq = 'h'
        self.root_path = 'path/to/PatchTST/PatchTST_supervised/dataset'
        self.data_path = 'ETTh1.csv'
        self.features = 'M'
        self.target = 'OT'
        self.num_workers = 10

In [None]:
dataset_configs = {
    'ETTm2': {'data':'ETTm2', 'data_path': 'path/to/PatchTST/PatchTST_supervised/dataset/'+'/ETT-small/ETTm2.csv', 'enc_in': 7},
    'Electricity': {'data':'custom', 'data_path': 'path/to/PatchTST/PatchTST_supervised/dataset/'+'/electricity/electricity.csv', 'enc_in': 321},
    'weather': {'data':'custom', 'data_path': 'path/to/PatchTST/PatchTST_supervised/dataset/'+'/weather/weather.csv', 'enc_in': 21},
    'traffic': {'data':'custom', 'data_path': 'path/to/PatchTST/PatchTST_supervised/dataset/'+'/traffic/traffic.csv', 'enc_in': 862},
    'Dataset_Patch_dependent': {'data':'Dataset_Patch_dependent', 'data_path': 'path/to/PatchTST/PatchTST_supervised/dataset/'+'/traffic/traffic.csv', 'enc_in': 1},
}

In [None]:
args = Args()

args.seq_len = 336
args.stride = 16
args.e_layers = 3
dataset = 'ETTm2'
model_path = 'path/to/checkpoint.pth'

for attr in dataset_configs[dataset].keys():
    value = dataset_configs[dataset][attr]
    setattr(args, attr, value)

train_data, train_loader = data_provider(args, flag='train')
vali_data, vali_loader = data_provider(args, flag='val')
test_data, test_loader = data_provider(args, flag='test')

model = Model(args).to('cuda')
model = nn.DataParallel(model)
model.load_state_dict(torch.load(model_path))
model = model.module
model.eval()

In [None]:
from tqdm import tqdm
device = 'cuda'
criterion = nn.MSELoss().to(device)
total_loss = []
with torch.no_grad():
    for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(tqdm(test_loader)):
    # for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(tqdm(train_loader)):
        batch_x = batch_x.float().to(device)
        batch_y = batch_y.float()

        batch_x_mark = batch_x_mark.float().to(device)
        batch_y_mark = batch_y_mark.float().to(device)

        outputs = model(batch_x)
        outputs = outputs[:, -args.pred_len:, 0:]
        batch_y = batch_y[:, -args.pred_len:, 0:].to(device)

        pred = outputs.detach().cpu()
        true = batch_y.detach().cpu()

        loss = criterion(pred, true)

        total_loss.append(loss)
total_loss = np.average(total_loss)
print('loss:', total_loss)

# PosEnc Zeroing Out Exp

In [None]:
# Zero out the positional embedding
pos_embed_raw = model.model.backbone.W_pos
model.model.backbone.W_pos = nn.Parameter(torch.zeros_like(model.model.backbone.W_pos) + 0.5)

In [None]:
# Recover the positional embedding
model.model.backbone.W_pos = pos_embed_raw

In [None]:
from tqdm import tqdm
device = 'cuda'
criterion = nn.MSELoss().to(device)
total_loss = []
with torch.no_grad():
    for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(tqdm(test_loader)):
    # for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(tqdm(train_loader)):
        batch_x = batch_x.float().to(device)
        batch_y = batch_y.float()

        batch_x_mark = batch_x_mark.float().to(device)
        batch_y_mark = batch_y_mark.float().to(device)

        outputs = model(batch_x)
        outputs = outputs[:, -args.pred_len:, 0:]
        batch_y = batch_y[:, -args.pred_len:, 0:].to(device)

        pred = outputs.detach().cpu()
        true = batch_y.detach().cpu()

        loss = criterion(pred, true)

        total_loss.append(loss)
total_loss = np.average(total_loss)
print('loss:', total_loss)

In [None]:
def cosine_similarity_matrix(vectors):
    norms = vectors.norm(p=2, dim=1, keepdim=True)
    normalized_vectors = vectors / norms
    return torch.mm(normalized_vectors, normalized_vectors.T)

similarity_matrix = cosine_similarity_matrix(model.model.backbone.W_pos)
vmax = torch.max(similarity_matrix).item()
vmin = torch.min(similarity_matrix).item()
print(vmax, vmin)

similarity_matrix = similarity_matrix[:-1, :-1]
from collections import defaultdict
distance_similarity = defaultdict(list)
for i in range(similarity_matrix.shape[0]):
    for j in range(similarity_matrix.shape[1]):
        if i != j:
            distance = abs(i-j)
            distance_similarity[distance].append(similarity_matrix[i, j].item())
distance_similarity = {k: np.mean(v) for k, v in distance_similarity.items()}

plt.style.use("ggplot")
plt.plot(distance_similarity.keys(), distance_similarity.values(), color='dodgerblue', linestyle='-', linewidth=2, marker='o', markersize=6, markerfacecolor='red')
plt.xlabel('Token Distance')
plt.ylabel('Token Similarity')
# plt.title('distance_similarity')
plt.xticks(range(1, len(distance_similarity)+1))
plt.show()
