In [1]:
import torch
import numpy as np
import pandas as pd
from scipy.sparse import diags
from scipy.integrate import quad
from scipy.special import p_roots

import sys
sys.path.append("../../src/")
import models as TF_model
import data_generator as TF_data
import utilis as TF_tool

# Introduction:
In this workflow, we generate the data for different shift analysis. Specifically:

-- Task shifts: a(x) and V(x)

-- Covariate shifts: Y

(In this workflow, we remove the model paths, so users needs to redefine their model based on their own model paths.)

## Functions of task shift implementation

In [3]:
def KL_rf(x, tau=5, alpha=2, dim=1, order=20):
    bx =  0
    for i in range(order):
        lambda_i = ((i+1) ** 2) * (np.pi ** 2) + tau
        phi_i = np.sin((i+1) * np.pi * x)
        xi = np.random.normal(0, 1, x.shape[0])
        phi_i = xi * phi_i
        bx += (lambda_i ** (-alpha/2)) * phi_i
    return np.exp(bx)

def gauss_root_weight(n,a,b):
    [x,w] = p_roots(n+1)
    x_scaled = 0.5*(b-a)*x+0.5*(b+a)
    #G=0.5*(b-a)*sum(w*f(0.5*(b-a)*x+0.5*(b+a)))
    return x_scaled, w

def get_a_and_V(x_np, tau=5, alpha=3, low=1, high=2, v_type="uniform"):
    # default of distribution of V is [1,2]
    if v_type == "uniform":
        v_value = np.random.uniform(low=low, high=high, size=x_np.shape[0])
    else:
        v_value = np.random.lognormal(low, high, size=x_np.shape[0])
    return KL_rf(x_np, tau=tau, alpha=alpha), v_value

def get_A(x_np, a_np, v_np, input_dim):
    #A_result = np.zeros(input_dim)
    i_pi_cos_input = np.array([np.sqrt((1-0) / 2) * np.sqrt(2) * i * np.pi * np.cos(i * np.pi * x_np) * np.sqrt(a_np) for i in range(1,input_dim+1)])
    sin_input = np.array([np.sqrt((1-0) / 2) * np.sqrt(2) * np.sin(i * np.pi * x_np) * np.sqrt(v_np) for i in range(1,input_dim+1)])

    A_result = np.einsum("ab,bc->ac", i_pi_cos_input, i_pi_cos_input.T) + np.einsum("ab,bc->ac", sin_input, sin_input.T)
    return A_result 
    
def get_random_invertible_matrix(input_dim, total_num, left=1.0,right=2.0,matrix_type="laplacian",tau=5,alpha=1,seed_value=100):
    np.random.seed(seed_value)
    if matrix_type=="diag":
        A_list = []
        for i in range(total_num):
            diagonal_entries = np.random.uniform(low=1.0, high=2.0, size=input_dim)
            A_temp = np.diag(diagonal_entries)
            A_list.append(A_temp)
        return A_list
        
    elif matrix_type == "symetric":
        # (A + A.T) / 2
        return 0
        
    elif matrix_type == "Galerkin":
        A_list = []
    
        x_np, w_np = gauss_root_weight(5*input_dim+1,0,1)
        
        for i in range(total_num):
            a_np, v_np = get_a_and_V(x_np, tau=tau, alpha=alpha, low=left, high=right)
            A_temp = get_A(x_np, a_np * w_np, v_np * w_np, input_dim)
            A_list.append(A_temp)
        return A_list
        
    elif matrix_type == "Galerkin_sv":
        A_list = []
    
        x_np, w_np = gauss_root_weight(2*input_dim+1,0,1)
        
        for i in range(total_num):
            a_np, v_np = get_a_and_V(x_np, tau=tau, alpha=alpha, low=left, high=right, v_type="lognormal")
            A_temp = get_A(x_np, a_np * w_np, v_np * w_np, input_dim)
            A_list.append(A_temp)
        return A_list
            
    elif matrix_type == "laplacian":
        A_list = []
        k = [-np.ones(input_dim-1),2*np.ones(input_dim),-np.ones(input_dim-1)]
        offset = [-1,0,1]
        Lap_basic = diags(k,offset).toarray()
        for i in range(total_num):
            #diagonal_entries = np.random.uniform(low=0.0, high=1.0, size=input_dim)
            diagonal_entries = np.random.uniform(low=left, high=right, size=input_dim)
            A_temp = np.diag(diagonal_entries)
            A_list.append((A_temp+Lap_basic * (input_dim**2)))
        return A_list
    
    elif matrix_type == "laplacian_rf":
        A_list = []
        grid_np = np.arange(input_dim) / input_dim
        period_bound = [i for i in range(1,input_dim)] + [0]
        
        offset = [-1,0,1]
        for i in range(total_num):
            diag_item = KL_rf(grid_np, tau=tau, alpha=alpha, dim=1, order=20)
            k = [-diag_item[:-1],diag_item*2, -diag_item[1:]]
            Lap_rf = diags(k,offset).toarray()
            diagonal_entries = np.random.uniform(low=left, high=right, size=input_dim)
            A_temp = np.diag(diagonal_entries)
            A_list.append((A_temp+Lap_rf/((1/input_dim)**2)))
        return A_list
    
    elif matrix_type == "laplacian_rf_sv":
        A_list = []
        grid_np = np.arange(input_dim) / input_dim
        period_bound = [i for i in range(1,input_dim)] + [0]
        
        offset = [-1,0,1]
        for i in range(total_num):
            diag_item = KL_rf(grid_np, tau=tau, alpha=alpha, dim=1, order=20)
            k = [-diag_item[:-1],diag_item*2, -diag_item[1:]]

            Lap_rf = diags(k,offset).toarray()
            diagonal_entries = np.random.lognormal(left, 1, size=input_dim) #np.random.uniform(low=left, high=right, size=input_dim)
            A_temp = np.diag(diagonal_entries)
            A_list.append((A_temp+Lap_rf/((1/input_dim)**2)))
        return A_list
    else:
        print("Unknown input type!")
        return 0


## Shift on coefficients $a(x)$
Here we shift the parameter $\alpha$ and $\tau$ with parameters defined in "alpha_tau_pairs"

In [None]:
incontext_len_list_new = [20, 50, 100, 150] + [200, 500, 1000] 
d_list = [50]
n_list = [300]
N_list = [5000]

input_dim = 10
incontext_len = 100
total_num_train = 5000
total_num_test = 1000
total_sep_num_test = 1
total_sep_num_train = 1


tau = 5
alpha = 3
Model_dir = ""

test_loss_array_best = []
test_loss_array_identity = []
result = []
result_t = []
result_ot = []

data_list_at = []
data_list_lr = []
test_loss_array_pred_all = []
alpha_tau_pairs = [[1,5], [2,5], [3,5], [4,5]]
left_right_pairs = [[1,2], [2,3], [3,5], [5,10], [10,20]]
seed_for_A_list = [300 + k*1000 for k in range(5)]

print("Shift on alpha, tau")
for index in range(len(alpha_tau_pairs)):
    alpha_temp, tau_temp = alpha_tau_pairs[index]
    left_, right_ = left_right_pairs[0]
    for i in range(len(d_list)):
        input_dim = d_list[i] # d  
        print("d" + str(input_dim))
        for ss in seed_for_A_list:
            A_list_test = get_random_invertible_matrix(input_dim, total_num_test, tau=tau_temp, alpha=alpha_temp, left=left_, right=right_, matrix_type="Galerkin",seed_value=ss)
            A_list_inv = [0] * len(A_list_test)
            for z in range(len(A_list_test)):
                A_list_inv[z] = np.linalg.inv(A_list_test[z])
            A_list_inv_np = np.array(A_list_inv)
        
            for N in N_list:
                total_num_train = N
                print("-N: " + str(total_num_train))
                for k in range(len(n_list)):
                    incontext_len = n_list[k]
                    print("--m: " + str(incontext_len))
                    test_loss_array_pred_n_temp = []
                    for j in range(len(incontext_len_list_new)):
                        best_model_path = Model_dir + ""
                        incontext_len_test = incontext_len_list_new[j]
                        input_test, output_test = TF_data.generate_data_LN_fi_ne(A_list_test, input_dim, total_sep_num_test+incontext_len_test, alpha=1, tau=1, seed_value=600)
                        
                        input_test_np = np.array(input_test) 
                        output_test_np = np.array(output_test) 
                        result_ot.append(output_test_np)
                        device = "cpu"
                        model = TF_model.TF_linear_att(incontext_len_test, input_dim, device=device)
                        model.load_state_dict(torch.load(best_model_path))
                        P = np.array(model.params[0].detach())
                        Q = np.array(model.params[1].detach())
                        
                        Y_temp = input_test_np[:,:-1,:].transpose((0,2,1))
                        Y_n = np.einsum("abc,acd->abd", Y_temp, Y_temp.transpose((0,2,1)))
                        Y_n = Y_n / incontext_len_test #Y_temp.shape[1]
                        input_ = np.einsum("abc,acd->abd", A_list_inv_np, Y_n)
                        pred = np.einsum("ab,dbe->dae", P, input_)
                        pred = np.einsum("dab,bc->dac", pred, Q)
                        pred = np.einsum("dab,db->da", pred, input_test_np[:,-1,:])
                        err_temp = np.mean(np.sum((pred - output_test_np[:,-1,:])**2,axis=1))
                        
                        err_l2 = 0
                        err_l2_relative = 0
                        err_h1 = 0
                        err_h1_relative = 0
                        for item in range(pred.shape[0]):

                            err_l2_temp, err_l2_r_temp, err_h1_temp, err_h1_r_temp = TF_tool.l2_H1_err_l2_H1_relative_G(pred[item,:].reshape(1,-1), output_test_np[item,-1,:].reshape(1,-1))
                            err_l2 += err_l2_temp
                            err_l2_relative += err_l2_r_temp
                            err_h1 += err_h1_temp
                            err_h1_relative += err_h1_r_temp

                        err_l2 = err_l2 / pred.shape[0]
                        err_l2_relative = err_l2_relative / pred.shape[0]
                        err_h1 = err_h1 / pred.shape[0]
                        err_h1_relative = err_h1_relative / pred.shape[0]
                        print(err_temp)
                        print(err_h1)
                        print(err_h1_relative)
                        print(err_l2)
                        print(err_l2_relative)
            
                        data_temp = [index, ss, alpha_temp, tau_temp, left_, right_, input_dim, incontext_len, total_num_train, total_num_test, incontext_len_test, err_temp, err_h1, err_h1_relative, err_l2, err_l2_relative]
                        data_list_at.append(data_temp)
                    
data_shift_at = pd.DataFrame(data=data_list_at, columns=["index", "seed", "alpha", "tau", "v_l", "v_r", "d", "n", "N_train", "N_test", "m", "err_l2_coeff", "err_h1", "err_h1_relative", "err_l2", "err_l2_relative"])
data_shift_at.to_csv("testdistribution_at.csv")

Shift on alpha, tau
d50
-N: 5000
--m: 300
4.509075477051874e-06
0.007321878641737986
0.6442518856118284
0.001210095215207259
0.5382386734912827
1.8661807196568525e-06
0.006073046027838808
0.5530919316921571
0.000785022192698817
0.3595120512076353
9.578147091927625e-07
0.0055830199513786675
0.5129371538078157
0.0005924212004225667
0.2782375512947769
6.849624596792352e-07
0.0054073688060262415
0.4988243814547838
0.000512981612355152
0.2437119457931958
5.477487739895431e-07
0.00527819764180647
0.4813041459937081
0.0004576993051134137
0.2144883837177822
3.084240447488808e-07
0.005085871525631408
0.47436242542144524
0.0003545874517550569
0.17647748287484963
2.2633762509071216e-07
0.005095958885617632
0.474606445895833
0.00031371874831719835
0.161556984066654
-N: 5000
--m: 300
4.52463812794679e-06
0.007319070043453065
0.6436988740848986
0.0012095952860901374
0.5380079452972911
1.8585052689703359e-06
0.006049772320800713
0.5527139402870158
0.0007855951516596264
0.3603361847103916
9.5874956849

## Shift on coefficients $V(x)$
Here we change the left and right boundary of the uniform distribution that defines $V(x)$. Specifically, we shift with parameters defined in "left_right_pairs"

In [None]:
incontext_len_list_new = [20, 50, 100, 150] + [200, 500, 1000] 
d_list = [50]
n_list = [300]
N_list = [5000]

input_dim = 10
incontext_len = 100
total_num_train = 5000
total_num_test = 1000
total_sep_num_test = 1
total_sep_num_train = 1


tau = 5
alpha = 3
Model_dir = ""

test_loss_array_best = []
test_loss_array_identity = []
result = []
result_t = []
result_ot = []

data_list_at = []
data_list_lr = []
test_loss_array_pred_all = []
left_right_pairs = [[1,2], [2,3], [3,5], [5,10], [10,20]]
seed_for_A_list = [300 + k*1000 for k in range(5)]

print("Shift on V")
for index in range(len(left_right_pairs)):
    alpha_temp, tau_temp = [3,5]
    left_, right_ = left_right_pairs[index]
    for i in range(len(d_list)):
        input_dim = d_list[i] # d  
        print("d" + str(input_dim))
        for ss in seed_for_A_list:
            A_list_test = get_random_invertible_matrix(input_dim, total_num_test, tau=tau_temp, alpha=alpha_temp, left=left_, right=right_, matrix_type="Galerkin",seed_value=ss)
            A_list_inv = [0] * len(A_list_test)
            for z in range(len(A_list_test)):
                A_list_inv[z] = np.linalg.inv(A_list_test[z])
            A_list_inv_np = np.array(A_list_inv)
        
            for N in N_list:
                total_num_train = N
                print("-N: " + str(total_num_train))
                for k in range(len(n_list)):
                    incontext_len = n_list[k]
                    print("--m: " + str(incontext_len))
                    test_loss_array_pred_n_temp = []
                    for j in range(len(incontext_len_list_new)):
                        best_model_path = Model_dir + ""
                        incontext_len_test = incontext_len_list_new[j]
                        input_test, output_test = TF_data.generate_data_LN_fi_ne(A_list_test, input_dim, total_sep_num_test+incontext_len_test, seed_value=600)
                        
                        input_test_np = np.array(input_test) 
                        output_test_np = np.array(output_test) 
                        result_ot.append(output_test_np)
                        device = "cpu"
                        model = TF_model.TF_linear_att(incontext_len_test, input_dim, device=device)
                        model.load_state_dict(torch.load(best_model_path))
                        P = np.array(model.params[0].detach())
                        Q = np.array(model.params[1].detach())
                        
                        Y_temp = input_test_np[:,:-1,:].transpose((0,2,1))
                        Y_n = np.einsum("abc,acd->abd", Y_temp, Y_temp.transpose((0,2,1)))
                        Y_n = Y_n / incontext_len_test #Y_temp.shape[1]
                        input_ = np.einsum("abc,acd->abd", A_list_inv_np, Y_n)
                        pred = np.einsum("ab,dbe->dae", P, input_)
                        pred = np.einsum("dab,bc->dac", pred, Q)
                        pred = np.einsum("dab,db->da", pred, input_test_np[:,-1,:])
                        err_temp = np.mean(np.sum((pred - output_test_np[:,-1,:])**2,axis=1))
                        
                        err_l2 = 0
                        err_l2_relative = 0
                        err_h1 = 0
                        err_h1_relative = 0
                        for item in range(pred.shape[0]):

                            err_l2_temp, err_l2_r_temp, err_h1_temp, err_h1_r_temp = TF_tool.l2_H1_err_l2_H1_relative_G(pred[item,:].reshape(1,-1), output_test_np[item,-1,:].reshape(1,-1))
                            err_l2 += err_l2_temp
                            err_l2_relative += err_l2_r_temp
                            err_h1 += err_h1_temp
                            err_h1_relative += err_h1_r_temp

                        err_l2 = err_l2 / pred.shape[0]
                        err_l2_relative = err_l2_relative / pred.shape[0]
                        err_h1 = err_h1 / pred.shape[0]
                        err_h1_relative = err_h1_relative / pred.shape[0]
                        print(err_temp)
                        print(err_h1)
                        print(err_h1_relative)
                        print(err_l2)
                        print(err_l2_relative)
            
                        data_temp = [index, ss, alpha_temp, tau_temp, left_, right_, input_dim, incontext_len, total_num_train, total_num_test, incontext_len_test, err_temp, err_h1, err_h1_relative, err_l2, err_l2_relative]
                        data_list_lr.append(data_temp)

data_shift_V = pd.DataFrame(data=data_list_lr, columns=["index", "seed", "alpha", "tau", "v_l", "v_r", "d", "n", "N_train", "N_test", "m", "err_l2_coeff", "err_h1", "err_h1_relative", "err_l2", "err_l2_relative"])
data_shift_V.to_csv("testdistribution_V.csv")


Shift on V
d50
-N: 5000
--m: 300
4.621442508059479e-06
0.007282029763374657
0.6385589494973611
0.0012246730051168773
0.5380954815458181
1.900706136409797e-06
0.005975664071088749
0.5446373509938381
0.0007929502635658222
0.3595084695741965
9.735410816246802e-07
0.005464821241026837
0.5030593930818215
0.0005965926535559956
0.2775268525715093
6.966554757176695e-07
0.005276092580054658
0.4880105382020371
0.0005150416102260475
0.2418001731185876
5.570648776307959e-07
0.005140769861478299
0.4707014579092907
0.00046100299954751996
0.21387220934183113
3.0510455954707314e-07
0.00494832312166132
0.4631962994900916
0.0003534849494376501
0.17444382828454152
2.224652064261362e-07
0.004945167255570247
0.463065894560013
0.0003114817981050071
0.15946215202062825
-N: 5000
--m: 300
4.626006573481249e-06
0.007282788835873904
0.6385498041764627
0.0012249162857860196
0.5381715454140404
1.9014239234176681e-06
0.005974650343147973
0.5445229897106413
0.0007930269302721118
0.3594710159756054
9.745777209667983e

-N: 5000
--m: 300
3.395040723760407e-06
0.006844547691426549
0.6636215898792148
0.001062123243293974
0.5420444008699709
1.4135442772378156e-06
0.005761135026642091
0.577785479843031
0.0006952036412505622
0.3667508932353558
7.476870812577169e-07
0.0053386334563504416
0.5388290764528465
0.0005308441465485186
0.2870956438249738
5.612996083922954e-07
0.005199981984071747
0.5257365996024855
0.00046970656787358016
0.25612225388092646
4.58926424370577e-07
0.005085489404185548
0.5095960570596733
0.000425642881303437
0.22777809740947152
2.769755020612627e-07
0.004936658155338213
0.5032627521979677
0.00034099196314026125
0.1934307624513486
2.1714346888592786e-07
0.004944967047187658
0.504046896648759
0.0003091961569434517
0.17998483888392208
-N: 5000
--m: 300
3.394936504304838e-06
0.00684509876843576
0.6635985806889197
0.0010622760270834375
0.5418928062121285
1.413830071134556e-06
0.0057613804163945915
0.5777039252868712
0.0006953124355256143
0.36672798274692087
7.46956442675628e-07
0.0053384287

## Covariate shift in the source term $f$
Here we change the scale of the covariance matrix with parameters defined in "scaler_list"

In [None]:
incontext_len_list_new = [20, 50, 100, 150] + [200, 500, 1000] 
d_list = [50]
n_list = [300]
N_list = [5000]

input_dim = 10
incontext_len = 100
total_num_train = 5000
total_num_test = 1000
total_sep_num_test = 1
total_sep_num_train = 1


tau = 5
alpha = 3
Model_dir = ""

test_loss_array_best = []
test_loss_array_identity = []
result = []
result_t = []
result_ot = []

data_list_y = []

test_loss_array_pred_all = []
scaler_list = [1,3,5]
seed_for_A_list = [300 + k*1000 for k in range(5)]

print("Shift on Y")
for index in range(len(scaler_list)):
    alpha_temp, tau_temp = [3,5]
    left_, right_ = [1,2]
    beta_y, c_y = [1,1]
    scaler_temp = scaler_list[index]
    
    for i in range(len(d_list)):
        input_dim = d_list[i] # d  
        print("d" + str(input_dim))
        for ss in seed_for_A_list:
            A_list_test = get_random_invertible_matrix(input_dim, total_num_test, tau=tau_temp, alpha=alpha_temp, left=left_, right=right_, matrix_type="Galerkin",seed_value=ss)
            A_list_inv = [0] * len(A_list_test)
            for z in range(len(A_list_test)):
                A_list_inv[z] = np.linalg.inv(A_list_test[z])
            A_list_inv_np = np.array(A_list_inv)
        
            for N in N_list:
                total_num_train = N
                print("-N: " + str(total_num_train))
                for k in range(len(n_list)):
                    incontext_len = n_list[k]
                    print("--m: " + str(incontext_len))
                    test_loss_array_pred_n_temp = []
                    for j in range(len(incontext_len_list_new)):
                        best_model_path = Model_dir + ""
                        incontext_len_test = incontext_len_list_new[j]
                        input_test, output_test = TF_data.generate_data_LN_fi_ne(A_list_test, input_dim, total_sep_num_test+incontext_len_test, scaler=scaler_temp, alpha=beta_y, tau=c_y, seed_value=600)
                        
                        input_test_np = np.array(input_test) 
                        output_test_np = np.array(output_test) 
                        result_ot.append(output_test_np)
                        device = "cpu"
                        model = TF_model.TF_linear_att(incontext_len_test, input_dim, device=device)
                        model.load_state_dict(torch.load(best_model_path))
                        P = np.array(model.params[0].detach())
                        Q = np.array(model.params[1].detach())
                        
                        Y_temp = input_test_np[:,:-1,:].transpose((0,2,1))
                        Y_n = np.einsum("abc,acd->abd", Y_temp, Y_temp.transpose((0,2,1)))
                        Y_n = Y_n / incontext_len_test
                        input_ = np.einsum("abc,acd->abd", A_list_inv_np, Y_n)
                        pred = np.einsum("ab,dbe->dae", P, input_)
                        pred = np.einsum("dab,bc->dac", pred, Q)
                        pred = np.einsum("dab,db->da", pred, input_test_np[:,-1,:])
                        err_temp = np.mean(np.sum((pred - output_test_np[:,-1,:])**2,axis=1))
                        
                        err_l2 = 0
                        err_l2_relative = 0
                        err_h1 = 0
                        err_h1_relative = 0
                        for item in range(pred.shape[0]):

                            err_l2_temp, err_l2_r_temp, err_h1_temp, err_h1_r_temp = TF_tool.l2_H1_err_l2_H1_relative_G(pred[item,:].reshape(1,-1), output_test_np[item,-1,:].reshape(1,-1))
                            err_l2 += err_l2_temp
                            err_l2_relative += err_l2_r_temp
                            err_h1 += err_h1_temp
                            err_h1_relative += err_h1_r_temp

                        err_l2 = err_l2 / pred.shape[0]
                        err_l2_relative = err_l2_relative / pred.shape[0]
                        err_h1 = err_h1 / pred.shape[0]
                        err_h1_relative = err_h1_relative / pred.shape[0]
                        print(err_temp)
                        print(err_h1)
                        print(err_h1_relative)
                        print(err_l2)
                        print(err_l2_relative)
            
            
                        data_temp = [index, ss, alpha_temp, tau_temp, left_, right_, input_dim, incontext_len, total_num_train, total_num_test, incontext_len_test, err_temp, err_h1, err_h1_relative, err_l2, err_l2_relative]
                        data_list_y.append(data_temp)

data_shift_Y = pd.DataFrame(data=data_list_y, columns=["index", "seed", "alpha", "tau", "v_l", "v_r", "d", "n", "N_train", "N_test", "m", "err_l2_coeff", "err_h1", "err_h1_relative", "err_l2", "err_l2_relative"])
data_shift_Y.to_csv("testdistribution_Y.csv")  


## Covariate shift in the source term $f$
Here we shift the parameter $\beta$ and $c$ with parameters defined in "beta_c_y_pairs"

In [None]:
incontext_len_list_new = [20, 50, 100, 150] + [200, 500, 1000] 
d_list = [50]
n_list = [300]
N_list = [5000]

input_dim = 10
incontext_len = 100
total_num_train = 5000
total_num_test = 1000
total_sep_num_test = 1
total_sep_num_train = 1


tau = 5
alpha = 3
Model_dir = ""

test_loss_array_best = []
test_loss_array_identity = []
result = []
result_t = []
result_ot = []

data_list_y_at = []

test_loss_array_pred_all = []
scaler_list = [1,3,5]

beta_c_y_pairs = [[1,1], [1,2], [2,1], [2,2]]
seed_for_A_list = [300 + k*1000 for k in range(5)]

print("Shift on Y")
for index in range(len(beta_c_y_pairs)):
    alpha_temp, tau_temp = [3,5]
    left_, right_ = [1,2]
    beta_y, c_y = beta_c_y_pairs[index] #[1,1]
    scaler_temp = 1 #scaler_list[index]
    
    for i in range(len(d_list)):
        input_dim = d_list[i] # d  
        print("d" + str(input_dim))
        for ss in seed_for_A_list:
            A_list_test = get_random_invertible_matrix(input_dim, total_num_test, tau=tau_temp, alpha=alpha_temp, left=left_, right=right_, matrix_type="Galerkin",seed_value=ss)
            A_list_inv = [0] * len(A_list_test)
            for z in range(len(A_list_test)):
                A_list_inv[z] = np.linalg.inv(A_list_test[z])
            A_list_inv_np = np.array(A_list_inv)
        
            for N in N_list:
                total_num_train = N
                print("-N: " + str(total_num_train))
                for k in range(len(n_list)):
                    incontext_len = n_list[k]
                    print("--m: " + str(incontext_len))
                    test_loss_array_pred_n_temp = []
                    for j in range(len(incontext_len_list_new)):
                        best_model_path = Model_dir + ""
                        incontext_len_test = incontext_len_list_new[j]
                        input_test, output_test = TF_data.generate_data_LN_fi_ne(A_list_test, input_dim, total_sep_num_test+incontext_len_test, scaler=scaler_temp, alpha=beta_y, tau=c_y, seed_value=600)
                        
                        input_test_np = np.array(input_test) 
                        output_test_np = np.array(output_test) 
                        result_ot.append(output_test_np)
                        device = "cpu"
                        model = TF_model.TF_linear_att(incontext_len_test, input_dim, device=device)
                        model.load_state_dict(torch.load(best_model_path))
                        P = np.array(model.params[0].detach())
                        Q = np.array(model.params[1].detach())
                        
                        Y_temp = input_test_np[:,:-1,:].transpose((0,2,1))
                        Y_n = np.einsum("abc,acd->abd", Y_temp, Y_temp.transpose((0,2,1)))
                        Y_n = Y_n / incontext_len_test #Y_temp.shape[1]
                        input_ = np.einsum("abc,acd->abd", A_list_inv_np, Y_n)
                        pred = np.einsum("ab,dbe->dae", P, input_)
                        pred = np.einsum("dab,bc->dac", pred, Q)
                        pred = np.einsum("dab,db->da", pred, input_test_np[:,-1,:])
                        err_temp = np.mean(np.sum((pred - output_test_np[:,-1,:])**2,axis=1))
                        
                        err_l2 = 0
                        err_l2_relative = 0
                        err_h1 = 0
                        err_h1_relative = 0
                        for item in range(pred.shape[0]):

                            err_l2_temp, err_l2_r_temp, err_h1_temp, err_h1_r_temp = TF_tool.l2_H1_err_l2_H1_relative_G(pred[item,:].reshape(1,-1), output_test_np[item,-1,:].reshape(1,-1))
                            err_l2 += err_l2_temp
                            err_l2_relative += err_l2_r_temp
                            err_h1 += err_h1_temp
                            err_h1_relative += err_h1_r_temp

                        err_l2 = err_l2 / pred.shape[0]
                        err_l2_relative = err_l2_relative / pred.shape[0]
                        err_h1 = err_h1 / pred.shape[0]
                        err_h1_relative = err_h1_relative / pred.shape[0]
                        print(err_temp)
                        print(err_h1)
                        print(err_h1_relative)
                        print(err_l2)
                        print(err_l2_relative)
            
                        data_temp = [index, ss, alpha_temp, tau_temp, left_, right_, input_dim, incontext_len, total_num_train, total_num_test, incontext_len_test, err_temp, err_h1, err_h1_relative, err_l2, err_l2_relative]
                        data_list_y_at.append(data_temp)

data_shift_Y_at = pd.DataFrame(data=data_list_y_at, columns=["index", "seed", "alpha", "tau", "v_l", "v_r", "d", "n", "N_train", "N_test", "m", "err_l2_coeff", "err_h1", "err_h1_relative", "err_l2", "err_l2_relative"])
data_shift_Y_at.to_csv("testdistribution_Y_at.csv")
