In [1]:
# import torch

# # Define the dimensions
# state_dim = 4  # Dimension of the state vector x_n
# control_dim = 2  # Dimension of the control vector u_n
# num_trajectories = 50  # Number of different initial states x_0
# trajectory_length = 100  # Number of steps for each trajectory

# # Initialize the matrices A and B
# A = torch.randn(state_dim, state_dim)
# B = torch.randn(state_dim, control_dim)

# # Initialize the datasets
# x_dataset = torch.zeros(num_trajectories, trajectory_length, state_dim)
# u_dataset = torch.zeros(num_trajectories, trajectory_length, control_dim)

# # Generate the trajectories
# for i in range(num_trajectories):
#     x_n = torch.randn(state_dim)  # Initial state x_0
#     for t in range(trajectory_length):
#         u_n = torch.randn(control_dim)  # Random control input u_n
#         x_n = A @ x_n + B @ u_n  # Compute the next state x_{n+1}
#         x_dataset[i, t] = x_n
#         u_dataset[i, t] = u_n

# # Reshape x_dataset to the desired shape
# x_dataset = x_dataset.view(num_trajectories * trajectory_length, state_dim)
# u_dataset = u_dataset.view(num_trajectories * trajectory_length, control_dim)
# nu_dataset = torch.ones(num_trajectories * trajectory_length, 1)

# # Save the datasets if needed
# torch.save(x_dataset, 'x_dataset.pt')
# torch.save(u_dataset, 'u_dataset.pt')
# torch.save(nu_dataset, 'nu_dataset.pt')
# torch.save(A, 'A.pt')
# torch.save(B, 'B.pt')

# # Print the shape of the datasets to verify
# print(f"x_dataset shape: {x_dataset.shape}")
# print(f"u_dataset shape: {u_dataset.shape}")


In [2]:
import yaml
def read_config_file(config_path):
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    return config

In [3]:
config_dir = "/home/shiqi/code/Project2-sensor-case/model_combination_Argos/pca_from_formulation/output/experiment_17/config.yaml"
config = read_config_file(config_dir)

In [4]:
import torch
import numpy as np
import pca_koopman_dir as km
import sys
sys.path.append('/home/shiqi/code/Project2-sensor-case/model_combination_Argos/utils')
from load_dataset import *


# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)

# nu_list = config['nu_list']

## -------------Cookup Data----------------
# Set the random seed for reproducibility
seed = config['seed']
torch.manual_seed(seed)
np.random.seed(seed)

# Define the dimensions
state_dim = 4  # Dimension of the state vector x_n
control_dim = 2  # Dimension of the control vector u_n
num_trajectories = 5000  # Number of different initial states x_0
trajectory_length = 2  # Number of steps for each trajectory

# Initialize the matrices A and B
A = torch.randn(state_dim, state_dim)
B = torch.randn(state_dim, control_dim)

# Initialize the datasets
x_dataset = []
u_dataset = []
nu_dataset = []

# Generate the trajectories
for i in range(num_trajectories):
    x_n = torch.randn(state_dim)  # Initial state x_0
    x_traj = []
    u_traj = []
    nu_traj = np.ones((trajectory_length, 1))  # nu_dataset with all ones
    for t in range(trajectory_length):
        x_traj.append(x_n.numpy())
        u_n = torch.randn(control_dim)  # Random control input u_n
        x_n = A @ x_n + B @ u_n  # Compute the next state x_{n+1}
        u_traj.append(u_n.numpy())
    x_dataset.append(np.array(x_traj))
    u_dataset.append(np.array(u_traj))
    nu_dataset.append(nu_traj)
x_data = np.concatenate(x_dataset, axis=0)
u_data = np.concatenate(u_dataset, axis=0)
nu_data = np.concatenate(nu_dataset, axis=0)

# Params
params = km.Params(state_dim, control_dim, config)

# Model
if config['experiment'] == 'linear':
    model, x_pca_scaled = km.build_model_linear_multi_nu(params, [x_data], [u_data])
if config['experiment'] == 'DicWithInputs':
    model, x_pca_scaled = km.build_model_DicWithInputs_multi_nu(params, [x_data], [u_data])
if config['experiment'] == 'MatrixWithInputs':
    model, x_pca_scaled = km.build_model_MatrixWithInputs_multi_nu(params, [x_data], [u_data])
model = model.to(device)

# Rescale and Slices
x_data = x_pca_scaled.cpu().numpy()
# u_data = np.concatenate(u_dataset, axis=0)
# nu_data = np.concatenate(nu_dataset, axis=0)
print(x_data.shape)
print(u_data.shape)
print(nu_data.shape)

window_size = config['window_size']
predict_num = config['predict_num']
print(window_size)
print(predict_num)
x_data_slices = cut_slides(x_data, window_size, predict_num)
u_data_slices = cut_slides(u_data, window_size, predict_num)
nu_data_slices = cut_slides(nu_data, window_size, predict_num)
    

x_data = np.concatenate(x_data_slices, axis=0)
u_data = np.concatenate(u_data_slices, axis=0)
nu_data = np.concatenate(nu_data_slices, axis=0)
    

shuffled_indices = np.arange(len(x_data))
np.random.shuffle(shuffled_indices)

x_data = x_data[shuffled_indices]
u_data = u_data[shuffled_indices]
nu_data = nu_data[shuffled_indices]
print(x_data.shape)

Device: cuda
(10000, 4)
(10000, 2)
(10000, 1)
2
2
(5000, 2, 4)


In [5]:
print(x_data_slices[0])

[[[ 0.25926355 -0.23081726  0.66031116  1.7292594 ]
  [ 1.2887409  -2.1077466  -0.814873   -1.297368  ]]]


In [6]:
print(x_dataset[0])
print(x_dataset[0].shape)

[[ 0.6658208 -0.8064082  0.9090227  1.4859948]
 [-3.9207363 -1.167361  -1.3645289  3.3975472]]
(2, 4)


In [7]:
xx = torch.tensor(x_dataset[0][0, :]).to(device)
yy = torch.tensor(x_dataset[0][1, :]).to(device)
uu = torch.tensor(u_dataset[0][0, :]).to(device)
A = A.to(device)
B = B.to(device)
print(A @ xx  + B @ uu)
print(xx @ A.T + uu @ B.T)

tensor([-3.9207, -1.1674, -1.3645,  3.3975], device='cuda:0')
tensor([-3.9207, -1.1674, -1.3645,  3.3975], device='cuda:0')


In [8]:
yy

tensor([-3.9207, -1.1674, -1.3645,  3.3975], device='cuda:0')

In [9]:
# import torch
# import numpy as np
# import pca_koopman_dir as km
# import sys
# sys.path.append('/home/shiqi/code/Project2-sensor-case/model_combination_Argos/utils')
# from load_dataset import *

# # Device
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# print('Device:', device)

# # nu_list = config['nu_list']

# ## -------------Cookup Data----------------
# # Set the random seed for reproducibility
# seed = config['seed']
# torch.manual_seed(seed)
# np.random.seed(seed)

# # Define the dimensions
# state_dim = 4  # Dimension of the state vector x_n
# control_dim = 2  # Dimension of the control vector u_n
# num_trajectories = 1  # Number of different initial states x_0
# trajectory_length = 2  # Number of steps for each trajectory

# # Initialize the matrices A and B
# A = torch.randn(state_dim, state_dim)
# B = torch.randn(state_dim, control_dim)

# # Initialize the datasets
# x_dataset = []
# u_dataset = []
# nu_dataset = []

# # Generate the trajectories
# for i in range(num_trajectories):
#     x_n = torch.randn(state_dim)  # Initial state x_0
#     x_traj = []
#     u_traj = []
#     nu_traj = np.ones((trajectory_length, 1))  # nu_dataset with all ones
#     for t in range(trajectory_length):
#         x_traj.append(x_n.numpy())
#         print("x_n:", x_n)
#         u_n = torch.randn(control_dim)  # Random control input u_n
#         print("u_n", u_n)
#         x_n = A @ x_n + B @ u_n  # Compute the next state x_{n+1}
#         print("x_n+1:", x_n)
#         u_traj.append(u_n.numpy())
#     x_dataset.append(np.array(x_traj))
#     u_dataset.append(np.array(u_traj))
#     nu_dataset.append(nu_traj)
# x_data = np.concatenate(x_dataset, axis=0)
# u_data = np.concatenate(u_dataset, axis=0)
# nu_data = np.concatenate(nu_dataset, axis=0)


# print(x_data.shape)
# print(u_data.shape)
# print(nu_data.shape)

# xx = torch.tensor(x_dataset[0][0, :]).to(device)
# yy = torch.tensor(x_dataset[0][1, :]).to(device)
# uu = torch.tensor(u_dataset[0][0, :]).to(device)
# print(xx, yy, uu)
# A = A.to(device)
# B = B.to(device)
# yy_pred = xx @ A.T + uu @ B.T

# print("Actual yy:", yy)
# print("Predicted yy:", yy_pred)
# print("Difference:", yy - yy_pred)


In [10]:
print('Model:', model)

Model: PCAKoopmanWithInputsInStd(
  (std_layer_1): StdScalerLayerSet(
    (StdScalerLayers): ModuleList(
      (0): StdScalerLayer()
    )
  )
  (std_layer_u): StdScalerLayer()
  (pca_transformer): PCALayer(
    (transform): Linear(in_features=4, out_features=4, bias=False)
    (inverse_transform): Linear(in_features=4, out_features=4, bias=False)
  )
  (std_layer_2): StdScalerLayer()
  (state_dic): State_Encoder()
  (control_encoder): Control_Encoder()
  (state_matrix): StateMatrix_sum(
    (k_matrices): ModuleList(
      (0): Linear(in_features=5, out_features=5, bias=False)
    )
  )
  (control_matrix): ControlMatrix_sum(
    (k_matrices): ModuleList(
      (0): Linear(in_features=3, out_features=5, bias=False)
    )
  )
)


In [11]:
model.pca_transformer.transform.weight

Parameter containing:
tensor([[-0.6455,  0.3412,  0.4742,  0.4920],
        [ 0.2595,  0.6200,  0.4864, -0.5583],
        [-0.0438, -0.7054,  0.6733, -0.2171],
        [ 0.7170,  0.0397,  0.2919,  0.6318]], device='cuda:0')

In [12]:
model.pca_transformer.inverse_transform.weight

Parameter containing:
tensor([[-0.6455,  0.2595, -0.0438,  0.7170],
        [ 0.3412,  0.6200, -0.7054,  0.0397],
        [ 0.4742,  0.4864,  0.6733,  0.2919],
        [ 0.4920, -0.5583, -0.2171,  0.6318]], device='cuda:0')

In [13]:
model.std_layer_1.StdScalerLayers[0].mean

Parameter containing:
tensor([-1.3984e-05,  2.4910e-02,  4.0107e-03, -9.2681e-03], device='cuda:0')

In [14]:
import os
save_dir = config['save_dir']
# model.load_state_dict(torch.load(os.path.join(save_dir, 'model.pth'), map_location=device))

In [15]:
model.control_matrix.k_matrices[0].weight

Parameter containing:
tensor([[-0.1025,  0.4148,  0.2425],
        [ 0.3446, -0.3895,  0.5730],
        [ 0.1525, -0.0062, -0.3782],
        [-0.0782,  0.3320,  0.3893],
        [-0.2750, -0.4066,  0.0402]], device='cuda:0', requires_grad=True)

In [16]:
model.control_matrix.k_matrices[0].bias

In [17]:
model.state_matrix.k_matrices[0].weight

Parameter containing:
tensor([[-0.1386,  0.2002,  0.0684,  0.3576, -0.1546],
        [ 0.4294,  0.1595, -0.2953,  0.0965, -0.2490],
        [-0.3114, -0.1887, -0.3796, -0.0645, -0.2949],
        [ 0.2527,  0.0955, -0.1496,  0.1237, -0.2634],
        [-0.4048,  0.1500, -0.0121,  0.1450, -0.3768]], device='cuda:0',
       requires_grad=True)

In [18]:
model.state_matrix.k_matrices[0].bias

In [19]:
A

tensor([[-1.5405, -0.3515, -0.3186, -1.2221],
        [ 2.1723, -0.5020,  0.9029, -1.6458],
        [ 0.7199,  0.9037,  0.2337, -0.1412],
        [ 1.4302,  0.6298, -0.0863,  1.3132]], device='cuda:0')

In [20]:
B

tensor([[-0.6189, -0.7804],
        [-0.9217,  0.4807],
        [-0.7770,  0.8658],
        [ 0.7492, -0.8138]], device='cuda:0')

In [21]:
print(x_dataset)

[array([[ 0.6658208, -0.8064082,  0.9090227,  1.4859948],
       [-3.9207363, -1.167361 , -1.3645289,  3.3975472]], dtype=float32), array([[-0.20906651,  0.08947694,  0.6861308 ,  0.4243313 ],
       [ 0.02165264, -1.1923257 , -0.85489833,  1.0925571 ]],
      dtype=float32), array([[ 0.22516009, -0.05350342,  0.89305973, -0.16804563],
       [-0.15004501,  2.1418695 ,  0.8487265 , -0.49203357]],
      dtype=float32), array([[-0.0671364 , -1.594493  ,  1.0787683 , -0.43366045],
       [ 2.0029995 ,  2.3869975 , -1.6071364 , -1.3684404 ]],
      dtype=float32), array([[ 1.0163435 , -1.1245362 , -0.26397872,  0.4904136 ],
       [-2.7418392 ,  2.8965209 ,  1.3393388 , -0.24494743]],
      dtype=float32), array([[-0.79236364,  0.71944284, -0.26497057, -0.90946424],
       [ 1.2205241 , -1.3640374 , -0.06559238, -1.6362094 ]],
      dtype=float32), array([[-0.5655101 ,  0.7290081 ,  0.8674291 ,  0.73515135],
       [-0.5026118 , -1.7838514 ,  0.59325844,  0.3087715 ]],
      dtype=float32)

In [22]:
Q = torch.tensor(model.pca_transformer.transform.weight)

In [23]:
print(model.std_layer_1.StdScalerLayers[0].mean)

Parameter containing:
tensor([-1.3984e-05,  2.4910e-02,  4.0107e-03, -9.2681e-03], device='cuda:0')


In [24]:
M1 = torch.tensor(model.std_layer_1.StdScalerLayers[0].mean)
M2 = torch.tensor(model.std_layer_2.mean)
Mu = torch.tensor(model.std_layer_u.mean)

In [25]:
print(M1, M2, Mu)

tensor([-1.3984e-05,  2.4910e-02,  4.0107e-03, -9.2681e-03], device='cuda:0') tensor([ 9.1553e-09, -1.1826e-08, -6.4850e-09,  1.6403e-08], device='cuda:0') tensor([0.0052, 0.0147], device='cuda:0')


In [26]:
S1 = torch.diag(torch.tensor(model.std_layer_1.StdScalerLayers[0].std))
S2 = torch.diag(torch.tensor(model.std_layer_2.std))
Su = torch.diag(torch.tensor(model.std_layer_u.std))

In [27]:
print(S1, S2, Su)

tensor([[1.7508, 0.0000, 0.0000, 0.0000],
        [0.0000, 2.3246, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.3873, 0.0000],
        [0.0000, 0.0000, 0.0000, 1.8141]], device='cuda:0') tensor([[1.3398, 0.0000, 0.0000, 0.0000],
        [0.0000, 1.1517, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.7511, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.5607]], device='cuda:0') tensor([[0.9982, 0.0000],
        [0.0000, 1.0014]], device='cuda:0')


In [28]:
S2_inv = torch.inverse(S2)
S1_inv = torch.inverse(S1)
A = A.to(device)
B = B.to(device)

print("S2_inv:\n", S2_inv)
print("Q:\n", Q)
print("S1_inv:\n", S1_inv)
print("S1:\n", S1)
print("Q.T:\n", Q.T)
print("S2:\n", S2)

S2_inv:
 tensor([[0.7464, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.8683, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.3314, 0.0000],
        [0.0000, 0.0000, 0.0000, 1.7834]], device='cuda:0')
Q:
 tensor([[-0.6455,  0.3412,  0.4742,  0.4920],
        [ 0.2595,  0.6200,  0.4864, -0.5583],
        [-0.0438, -0.7054,  0.6733, -0.2171],
        [ 0.7170,  0.0397,  0.2919,  0.6318]], device='cuda:0')
S1_inv:
 tensor([[0.5712, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.4302, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.7208, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.5512]], device='cuda:0')
S1:
 tensor([[1.7508, 0.0000, 0.0000, 0.0000],
        [0.0000, 2.3246, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.3873, 0.0000],
        [0.0000, 0.0000, 0.0000, 1.8141]], device='cuda:0')
Q.T:
 tensor([[-0.6455,  0.2595, -0.0438,  0.7170],
        [ 0.3412,  0.6200, -0.7054,  0.0397],
        [ 0.4742,  0.4864,  0.6733,  0.2919],
        [ 0.4920, -0.5583, -0.2171,  0.6318]], device='cuda:0')
S2:
 

In [29]:
A_tilde = torch.matmul(torch.matmul(torch.matmul(torch.matmul(torch.matmul(torch.matmul(S2_inv, Q), S1_inv), A), S1), Q.T), S2)
print(A_tilde)

tensor([[-0.6310,  0.9853, -0.5113,  1.1195],
        [-1.1565,  1.2748,  0.5189, -0.4426],
        [ 1.9515,  0.2305, -1.0787, -0.1612],
        [-0.0298,  0.8897, -0.6370, -0.0608]], device='cuda:0')


In [30]:
B_tilde = torch.matmul(torch.matmul(torch.matmul(torch.matmul(S2_inv, Q), S1_inv), B), Su)

print("B_tilde:\n", B_tilde)

B_tilde:
 tensor([[ 0.0227,  0.3240],
        [-0.7286,  0.4926],
        [-0.2280,  0.5216],
        [-0.3058, -0.7370]], device='cuda:0')


In [31]:
term1 = torch.matmul(torch.matmul(torch.matmul(A, S1), Q.T), M2)
term2 = torch.matmul(A, M1)
term3 = torch.matmul(B, Mu)
term4 = torch.matmul(torch.matmul(S1, Q.T), M2)
right_terms = term1 + term2 + term3 - term4 - M1
C_tilde = torch.matmul(torch.matmul(torch.matmul(S2_inv, Q), S1_inv), right_terms)

In [32]:
C_tilde

tensor([0.0103, 0.0023, 0.0254, 0.0035], device='cuda:0')

In [33]:
A_tilde

tensor([[-0.6310,  0.9853, -0.5113,  1.1195],
        [-1.1565,  1.2748,  0.5189, -0.4426],
        [ 1.9515,  0.2305, -1.0787, -0.1612],
        [-0.0298,  0.8897, -0.6370, -0.0608]], device='cuda:0')

In [34]:
# 创建一个 (5, 5) 的零矩阵
combined_matrix_A = torch.zeros((5, 5), device='cuda:0')

# 设置矩阵的元素
combined_matrix_A[0, 0] = 1
combined_matrix_A[0, 1:] = 0
combined_matrix_A[1:, 0] = C_tilde
combined_matrix_A[1:, 1:] = A_tilde

print("Combined Matrix A:\n", combined_matrix_A)

Combined Matrix A:
 tensor([[ 1.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0103, -0.6310,  0.9853, -0.5113,  1.1195],
        [ 0.0023, -1.1565,  1.2748,  0.5189, -0.4426],
        [ 0.0254,  1.9515,  0.2305, -1.0787, -0.1612],
        [ 0.0035, -0.0298,  0.8897, -0.6370, -0.0608]], device='cuda:0')


In [35]:
B_tilde

tensor([[ 0.0227,  0.3240],
        [-0.7286,  0.4926],
        [-0.2280,  0.5216],
        [-0.3058, -0.7370]], device='cuda:0')

In [36]:
combined_matrix_B = torch.zeros((5, 3), device='cuda:0')

# 设置矩阵的元素
combined_matrix_B[0, 0] = 1
combined_matrix_B[0, 1:] = 0
combined_matrix_B[1:, 0] = 0
combined_matrix_B[1:, 1:] = B_tilde

print("Combined Matrix B:\n", combined_matrix_B)

Combined Matrix B:
 tensor([[ 1.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0227,  0.3240],
        [ 0.0000, -0.7286,  0.4926],
        [ 0.0000, -0.2280,  0.5216],
        [ 0.0000, -0.3058, -0.7370]], device='cuda:0')


In [37]:
# # Set the params as non-PCA data
# # 创建一个 (5, 5) 的零矩阵
# combined_matrix_A_2 = torch.zeros((5, 5), device='cuda:0')

# # 设置矩阵的元素
# combined_matrix_A_2[0, 0] = 1
# combined_matrix_A_2[0, 1:] = 0
# combined_matrix_A_2[1:, 0] = 0
# combined_matrix_A_2[1:, 1:] = A

# print("Combined Matrix A:\n", combined_matrix_A_2)

# combined_matrix_B_2 = torch.zeros((5, 3), device='cuda:0')

# combined_matrix_B_2[0, 0] = 1
# combined_matrix_B_2[0, 1:] = 0
# combined_matrix_B_2[1:, 0] = 0
# combined_matrix_B_2[1:, 1:] = B

# print("Combined Matrix B:\n", combined_matrix_B_2)

In [38]:
with torch.no_grad():
    model.state_matrix.k_matrices[0].weight.copy_(combined_matrix_A)
    model.control_matrix.k_matrices[0].weight.copy_(combined_matrix_B)

In [39]:
print(x_data.shape)
print(u_data.shape)
print(nu_data.shape)

(5000, 2, 4)
(5000, 2, 2)
(5000, 2, 1)


In [40]:
y_pred = model.latent_to_latent_forward(torch.tensor(x_data[:, 0, :]).to(device), torch.tensor(u_data[:, 0, :]).to(device), torch.tensor(nu_data[:, 0, :]).to(device))

In [41]:
y_true = torch.tensor(x_data[:, 1, :]).to(device)

In [42]:
y_pred - y_true

tensor([[ 1.3448e-06,  2.0862e-07, -1.0431e-07,  3.9861e-07],
        [-7.1526e-07, -1.3411e-07,  5.0664e-07,  4.7684e-07],
        [ 1.1176e-07,  1.4901e-07, -4.4703e-07,  5.5134e-07],
        ...,
        [ 1.1921e-07, -2.3842e-07, -4.4703e-07,  7.4506e-09],
        [ 1.2219e-06, -2.9802e-08, -5.9605e-07,  3.7253e-07],
        [-1.7881e-07, -3.8743e-07,  2.9802e-07,  2.0862e-07]], device='cuda:0',
       dtype=torch.float64, grad_fn=<SubBackward0>)

In [43]:
y_pred_2 = torch.tensor(x_data[:, 0, :]).to(device) @ A.T + torch.tensor(u_data[:, 0, :]).to(device) @ B.T

In [44]:
y_pred_2

tensor([[-3.0109, -7.2037, -1.3151,  3.8977],
        [ 4.4122,  1.5925,  0.1637, -2.9771],
        [ 0.0678,  1.8300,  1.6636, -0.7128],
        ...,
        [-1.4546, -1.6935,  0.8250, -1.5286],
        [-1.9458, -0.3932, -0.1352,  3.5244],
        [ 0.8218, -1.2450, -1.5157, -2.0002]], device='cuda:0')

In [45]:
y_true

tensor([[ 4.1010, -1.2643, -1.0951,  0.2564],
        [-1.3047,  2.1692, -0.3987,  1.7998],
        [-0.0481,  1.7354,  0.7561,  0.4645],
        ...,
        [ 1.7145,  0.6829,  0.3132, -1.4819],
        [ 0.5684, -1.8073,  1.4107,  0.8936],
        [-0.3860, -1.5849,  1.0007, -0.9598]], device='cuda:0')

In [46]:
y_pred

tensor([[ 4.1010, -1.2643, -1.0951,  0.2564],
        [-1.3047,  2.1692, -0.3987,  1.7998],
        [-0.0481,  1.7354,  0.7561,  0.4645],
        ...,
        [ 1.7145,  0.6829,  0.3132, -1.4819],
        [ 0.5684, -1.8073,  1.4107,  0.8936],
        [-0.3860, -1.5849,  1.0007, -0.9598]], device='cuda:0',
       dtype=torch.float64, grad_fn=<SliceBackward0>)