In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import copy
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import networkx as nx
from torch.nn.utils import parameters_to_vector, vector_to_parameters

In [2]:
# Graph implementation
def generate_graph(cluster_sizes=[100,100], pin=0.5, pout=0.01, seed=0):
    """Generate a random connected graph"""
    probs = np.array([[pin, pout],[pout, pin]])
    while True:
        g = nx.stochastic_block_model(cluster_sizes, probs)
        if nx.algorithms.components.is_connected(g):
            return g


cluster_sizes = [10, 10]
pin = 0.5
pout = 0.01
seed = 0
alpha = 1e-3
lamda = 1e-3
eta = 1e-3
no_users = sum(cluster_sizes)
batch_size = 20
epochs = 1
it = 1000
G = generate_graph(cluster_sizes, pin, pout, seed)

#nx.draw(G, with_labels=True, node_size=100, alpha=1, linewidths=10)
#plt.show()

In [3]:
# Metropolis weights 
number_nodes = G.number_of_nodes()
weights = np.zeros([number_nodes, number_nodes])
for edge in G.edges():
  i, j = edge[0], edge[1]
  weights[i - 1][j - 1] = 1 / (1 + np.max([G.degree(i), G.degree(j)]))
  weights[j - 1][i - 1] = weights[i - 1][j - 1]

print(weights)

weights = weights + np.diag(1 - np.sum(weights, axis=0))

metropolis_weights = weights
print(metropolis_weights)


[[0.         0.125      0.         0.125      0.         0.125
  0.125      0.125      0.125      0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.09090909]
 [0.125      0.         0.125      0.         0.14285714 0.125
  0.14285714 0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.09090909]
 [0.         0.125      0.         0.         0.125      0.125
  0.         0.125      0.125      0.         0.125      0.
  0.         0.         0.         0.         0.         0.
  0.         0.09090909]
 [0.125      0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.125
  0.         0.         0.         0.         0.         0.
  0.         0.09090909]
 [0.         0.14285714 0.125      0.         0.         0.125
  0.         0.         0.16666667 0.         0.         0.
  0.         0.         0.         0.        

In [4]:
def load_dataset():
    transforms_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))])
    mnist_data_train = datasets.MNIST('./data/mnist', train=True, download=True, transform=transforms_mnist)
    mnist_data_test = datasets.MNIST('./data/mnist', train=False, download=True, transform=transforms_mnist)

    return mnist_data_train, mnist_data_test

In [5]:
def degrees(A):
    """Return the degrees of each node of a graph from its adjacency matrix"""
    return np.sum(A, axis=0).reshape(A.shape[0], 1)

def node_degree(n, G):
    cnt = 0
    for i in G.neighbors(n):
        cnt += 1
    return cnt

def get_neighbors(n, G):
    neighbors_list = []
    for i in G.neighbors(n):
        neighbors_list.append(int(i))
    return neighbors_list

In [6]:
datapoints = {}
count = 0
W1 = np.array([2, 2])
W2 = np.array([-2, 2])
W = [W1, W2]
m = 200
n = 2
noise_sd = 0.001
for i, cluster_size in enumerate(cluster_sizes):
    for j in range(cluster_size):
        features = np.random.normal(loc=0.0, scale=1.0, size=(m, n))
        label = np.dot(features, W[i]) + np.random.normal(0,noise_sd)
        datapoints[count] = {
                'features': features,
                'degree': node_degree(count, G),
                'label': label,
                'neighbors': get_neighbors(count, G)
            }
        count += 1

In [7]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = torch.FloatTensor(data)
        self.targets = torch.FloatTensor(targets).unsqueeze(-1)
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        return x, y
    
    def __len__(self):
        return len(self.data)


In [8]:
class MLP_Net(nn.Module):
    def __init__(self, user_id):
        super(MLP_Net, self).__init__()
        self.fc1 = nn.Linear(2, 1, bias=False)
        #self.fc2 = nn.Linear(4, 1, bias=False)
        #self.fc3 = nn.Linear(200, 10)
        self.user_id = user_id

    def forward(self, x):
        x = torch.flatten(x, 1)
        #x = F.relu(self.fc1(x))
        output = self.fc1(x)
        #output = self.fc3(x)
        return output

In [9]:
from typing import Iterable, Optional

def grads_to_vector(parameters: Iterable[torch.Tensor]) -> torch.Tensor:
    r"""Convert parameters to one vector

    Args:
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.

    Returns:
        The parameters represented by a single vector
    """
    # Flag for the device where the parameter is located
    param_device = None

    vec = []
    for param in parameters:
        # Ensure the parameters are located in the same device
        param_device = param.grad

        vec.append(param_device.view(-1))
    return torch.cat(vec)

In [10]:
model = MLP_Net(user_id=0)

lr = 0.01

dataloader = DataLoader(MyDataset(datapoints[19]["features"], datapoints[19]["label"]), batch_size=50, shuffle=False)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for i in range(100):
    for (x, y) in dataloader:
        criterion = nn.MSELoss()
        optimizer.zero_grad()
        yhat = model(x)
        print(y.size())
        print(yhat.size())
        loss = criterion(yhat, y)
        
        loss.backward()
        print(i, loss, grads_to_vector(model.parameters()), parameters_to_vector(model.parameters()))
        #optimizer.step()
        new_model = parameters_to_vector(model.parameters()) - lr * grads_to_vector(model.parameters())
        vector_to_parameters(parameters=model.parameters(), vec=new_model)
        #if i % 50 ==0:
            #lr *= 0.9
            

#parameters_to_vector(model.parameters())

torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(12.1887, grad_fn=<MseLossBackward0>) tensor([ 5.8857, -5.5772]) tensor([0.5882, 0.3605], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(6.8434, grad_fn=<MseLossBackward0>) tensor([ 4.1358, -2.0365]) tensor([0.5293, 0.4163], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(7.0839, grad_fn=<MseLossBackward0>) tensor([ 4.0237, -2.6592]) tensor([0.4880, 0.4367], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
0 tensor(6.8289, grad_fn=<MseLossBackward0>) tensor([ 3.8863, -2.6983]) tensor([0.4477, 0.4633], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
1 tensor(10.4750, grad_fn=<MseLossBackward0>) tensor([ 5.4738, -5.1424]) tensor([0.4089, 0.4903], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
1 tensor(5.8988, grad_fn=<MseLossBackward0>) tensor([ 3.8544, -1.8674]) tensor([0.3541, 0.5417], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([

13 tensor(0.9989, grad_fn=<MseLossBackward0>) tensor([ 1.6488, -0.6609]) tensor([-1.0068,  1.4555], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
13 tensor(1.0183, grad_fn=<MseLossBackward0>) tensor([ 1.5945, -0.8911]) tensor([-1.0233,  1.4622], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
13 tensor(0.9783, grad_fn=<MseLossBackward0>) tensor([ 1.5296, -0.9216]) tensor([-1.0392,  1.4711], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
14 tensor(1.4748, grad_fn=<MseLossBackward0>) tensor([ 2.1303, -1.7995]) tensor([-1.0545,  1.4803], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
14 tensor(0.8619, grad_fn=<MseLossBackward0>) tensor([ 1.5358, -0.6062]) tensor([-1.0758,  1.4983], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
14 tensor(0.8777, grad_fn=<MseLossBackward0>) tensor([ 1.4846, -0.8195]) tensor([-1.0912,  1.5043], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
14 tensor(0.8430

26 tensor(0.2446, grad_fn=<MseLossBackward0>) tensor([ 0.8905, -0.6888]) tensor([-1.6022,  1.8041], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
26 tensor(0.1474, grad_fn=<MseLossBackward0>) tensor([ 0.6534, -0.2154]) tensor([-1.6111,  1.8110], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
26 tensor(0.1483, grad_fn=<MseLossBackward0>) tensor([ 0.6287, -0.3012]) tensor([-1.6177,  1.8132], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
26 tensor(0.1419, grad_fn=<MseLossBackward0>) tensor([ 0.5999, -0.3175]) tensor([-1.6240,  1.8162], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
27 tensor(0.2107, grad_fn=<MseLossBackward0>) tensor([ 0.8281, -0.6361]) tensor([-1.6300,  1.8193], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
27 tensor(0.1273, grad_fn=<MseLossBackward0>) tensor([ 0.6083, -0.1977]) tensor([-1.6382,  1.8257], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
27 tensor(0.1279

42 tensor(0.0141, grad_fn=<MseLossBackward0>) tensor([ 0.2079, -0.0546]) tensor([-1.8778,  1.9479], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
42 tensor(0.0140, grad_fn=<MseLossBackward0>) tensor([ 0.1989, -0.0804]) tensor([-1.8799,  1.9484], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
42 tensor(0.0133, grad_fn=<MseLossBackward0>) tensor([ 0.1888, -0.0863]) tensor([-1.8818,  1.9492], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
43 tensor(0.0195, grad_fn=<MseLossBackward0>) tensor([ 0.2585, -0.1794]) tensor([-1.8837,  1.9501], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
43 tensor(0.0122, grad_fn=<MseLossBackward0>) tensor([ 0.1935, -0.0501]) tensor([-1.8863,  1.9519], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
43 tensor(0.0121, grad_fn=<MseLossBackward0>) tensor([ 0.1850, -0.0740]) tensor([-1.8883,  1.9524], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
43 tensor(0.0115

56 tensor(0.0018, grad_fn=<MseLossBackward0>) tensor([ 0.0723, -0.0257]) tensor([-1.9565,  1.9831], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
56 tensor(0.0017, grad_fn=<MseLossBackward0>) tensor([ 0.0684, -0.0277]) tensor([-1.9572,  1.9833], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
57 tensor(0.0025, grad_fn=<MseLossBackward0>) tensor([ 0.0933, -0.0601]) tensor([-1.9579,  1.9836], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
57 tensor(0.0016, grad_fn=<MseLossBackward0>) tensor([ 0.0708, -0.0152]) tensor([-1.9588,  1.9842], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
57 tensor(0.0015, grad_fn=<MseLossBackward0>) tensor([ 0.0672, -0.0237]) tensor([-1.9595,  1.9844], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
57 tensor(0.0015, grad_fn=<MseLossBackward0>) tensor([ 0.0637, -0.0255]) tensor([-1.9602,  1.9846], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
58 tensor(0.0021

torch.Size([50, 1])
torch.Size([50, 1])
73 tensor(0.0001, grad_fn=<MseLossBackward0>) tensor([ 0.0210, -0.0067]) tensor([-1.9873,  1.9955], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
73 tensor(0.0001, grad_fn=<MseLossBackward0>) tensor([ 0.0199, -0.0067]) tensor([-1.9876,  1.9956], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
74 tensor(0.0002, grad_fn=<MseLossBackward0>) tensor([ 0.0270, -0.0163]) tensor([-1.9878,  1.9957], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
74 tensor(0.0001, grad_fn=<MseLossBackward0>) tensor([ 0.0209, -0.0036]) tensor([-1.9880,  1.9958], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
74 tensor(0.0001, grad_fn=<MseLossBackward0>) tensor([ 0.0195, -0.0062]) tensor([-1.9882,  1.9959], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
74 tensor(0.0001, grad_fn=<MseLossBackward0>) tensor([ 0.0185, -0.0061]) tensor([-1.9884,  1.9959], grad_fn=<CatBackward0>)
torch.Size([50, 

torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(1.7598e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0079, -0.0047]) tensor([-1.9965,  1.9988], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(1.2541e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0063, -0.0009]) tensor([-1.9965,  1.9989], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(1.0946e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0056, -0.0018]) tensor([-1.9966,  1.9989], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
91 tensor(9.6182e-06, grad_fn=<MseLossBackward0>) tensor([ 0.0053, -0.0011]) tensor([-1.9967,  1.9989], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
92 tensor(1.5319e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0073, -0.0044]) tensor([-1.9967,  1.9989], grad_fn=<CatBackward0>)
torch.Size([50, 1])
torch.Size([50, 1])
92 tensor(1.0986e-05, grad_fn=<MseLossBackward0>) tensor([ 0.0059, -0.0008]) tensor([-1.9968,  1.9990], grad_fn=<CatBack

In [11]:
parameters_to_vector(model.parameters())

tensor([-1.9982,  1.9994], grad_fn=<CatBackward0>)

In [12]:
class CNN_Net(nn.Module):
    def __init__(self):
        super(CNN_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.pool = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        return output

In [13]:
class ClientUpdate(object):
    def __init__(self, dataset, batchSize, alpha, lamda, epochs, projection_list, projected_weights):
        self.train_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=batchSize, shuffle=True)
        #self.learning_rate = learning_rate
        self.epochs = epochs
        self.batchSize = batchSize

    def train(self, model):
        criterion = nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.5)

        e_loss = []
        for epoch in range(1, self.epochs+1):
            train_loss = 0
            model.train()
            for i, (data, labels) in zip(range(1), self.train_loader):
                data, labels = data, labels
                optimizer.zero_grad() 
                output = model(data)  
                loss = criterion(output, labels)
                #loss += mu/2 * torch.norm(client_param.data - server_param.data)**2
                loss.backward()
                grads = grads_to_vector(model.parameters())
                #optimizer.step()
                train_loss += loss.item()*data.size(0)
                weights = parameters_to_vector(model.parameters())
                mat_vec_sum = torch.zeros_like(weights)
                for j in G.neighbors(model.user_id):
                    mat_vec_sum = torch.add(mat_vec_sum, torch.matmul(torch.transpose(projection_list[model.user_id][j], 0, 1), 
                                                         projected_weights[j][model.user_id] - projected_weights[model.user_id][j]))
                
                model_update = parameters_to_vector(model.parameters()) - alpha * (grads + lamda * mat_vec_sum)
                
            vector_to_parameters(parameters=model.parameters(), vec=model_update)
                

            train_loss = train_loss/self.batchSize#len(self.train_loader.dataset) 
            e_loss.append(train_loss)

        total_loss = e_loss#sum(e_loss)/len(e_loss)

        return model.state_dict(), total_loss

In [14]:
# Preparing projection matrices
models = [MLP_Net(user_id=i) for i in range(no_users)]
#temp = MLP_Net()
projection_list = []
projected_weights = []

def update_ProjWeight(projection_list, projected_weights, first_run=True):
    #projected_weights = []
    for i in range(no_users):
        neighbors_mat = []
        neighbors_weights = []
        for j in range(no_users):
            if j in G.neighbors(i):
                with torch.no_grad():
                    if first_run == True:
                        row, column = parameters_to_vector(models[j].parameters()).size()[0], parameters_to_vector(models[i].parameters()).size()[0]
                        mat = torch.zeros((row, column))
                        mat.fill_diagonal_(1.0)
                        neighbors_mat.append(mat)
                        neighbors_weights.append(torch.matmul(mat, parameters_to_vector(models[j].parameters())))
                    else:
                        neighbors_weights.append(torch.matmul(projection_list[j][i], parameters_to_vector(models[j].parameters())))
            else:
                neighbors_mat.append(0)
                neighbors_weights.append(0)
        if first_run == True:
            projection_list.append(neighbors_mat)
        projected_weights.append(neighbors_weights)

update_ProjWeight(projection_list, projected_weights)



In [15]:
def testing(model, dataset, bs, criterion): 
    test_loss = 0
    correct = 0
    test_loader = DataLoader(MyDataset(dataset["features"], dataset["label"]), batch_size=bs)
    l = len(test_loader)
    model.eval()
    for data, labels in test_loader:
        data, labels = data, labels
        output = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)
        #_, pred = torch.max(output, 1)
        #correct += pred.eq(labels.data.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    
    return test_loss

In [16]:
model = MLP_Net(user_id=0)

from torch.nn.utils import parameters_to_vector, vector_to_parameters

with torch.no_grad():    
    params = parameters_to_vector(model.parameters())

    print(params)

params *= 2.

vector_to_parameters(parameters=model.parameters(), vec=params)

parameters_to_vector(model.parameters())





tensor([-0.1371, -0.4231])


tensor([-0.2743, -0.8462], grad_fn=<CatBackward0>)

In [47]:
#global_model = CNN_Net().cuda()
models = [MLP_Net(user_id=i) for i in range(no_users)]
dummy_models = [MLP_Net(user_id=i) for i in range(no_users)]

#model.load_state_dict(global_model.state_dict())

criterion = nn.MSELoss()


train_loss = []
test_loss = []
test_accuracy = []


for curr_round in tqdm(range(1, it+1)):
    w, local_loss = [], []

    
    for i in range(no_users):
        dummy_models[i].load_state_dict(models[i].state_dict())
        local_update = ClientUpdate(dataset=datapoints[i], batchSize=batch_size, alpha=alpha, lamda=lamda, epochs=1, projection_list=projection_list, projected_weights=projected_weights)
        weights, loss = local_update.train(dummy_models[i])
        w.append(weights)
        local_loss.append(loss)
        models[i].load_state_dict(w[i])
        
    
    
    # Update prjection matrix
    
    #print(projection_list[0], projected_weights[0])
    
    for i in range(no_users):
        weights = parameters_to_vector(models[i].parameters())
        for j in G.neighbors(i):
            weights = parameters_to_vector(model.parameters())
            mat_vec_sum = torch.zeros_like(weights)
            for k in G.neighbors(i):
                mat_vec_sum = torch.add(mat_vec_sum, projected_weights[k][i] - projected_weights[i][k])
            #print(torch.outer(mat_vec_sum, weights))

            projection_list[i][j] = torch.add(projection_list[i][j], -1 * eta * lamda * mat_vec_sum)
                                         
    projected_weights = []                                          
    update_ProjWeight(projection_list, projected_weights, first_run=False)
        
        
        
    
    




          
            

    local_test_acc = []
    local_test_loss = []
    for k in range(no_users):
      
      g_loss = testing(models[i], datapoints[i], 50, criterion)
      local_test_loss.append(g_loss)
    
        

    g_loss = sum(local_test_loss) / len(local_test_loss)
    #g_accuracy = sum(local_test_acc) / len(local_test_acc)
    
    

    test_loss.append(g_loss)
    #test_accuracy.append(g_accuracy)
    print("Training_loss %2.5f"% (test_loss[-1]))

  0%|          | 1/1000 [00:00<02:21,  7.04it/s]

tensor([[-0.8571, -2.6442],
        [-0.8933, -2.7559]], grad_fn=<MulBackward0>)
tensor([[-0.8571, -2.6442],
        [-0.8933, -2.7559]], grad_fn=<MulBackward0>)
tensor([[-0.8571, -2.6442],
        [-0.8933, -2.7559]], grad_fn=<MulBackward0>)
tensor([[-0.8571, -2.6442],
        [-0.8933, -2.7559]], grad_fn=<MulBackward0>)
tensor([[-0.8571, -2.6442],
        [-0.8933, -2.7559]], grad_fn=<MulBackward0>)
tensor([[-0.8571, -2.6442],
        [-0.8933, -2.7559]], grad_fn=<MulBackward0>)
tensor([[-0.8571, -2.6442],
        [-0.8933, -2.7559]], grad_fn=<MulBackward0>)
tensor([[-0.8571, -2.6442],
        [-0.8933, -2.7559]], grad_fn=<MulBackward0>)
tensor([[-0.8571, -2.6442],
        [-0.8933, -2.7559]], grad_fn=<MulBackward0>)
tensor([[-0.8571, -2.6442],
        [-0.8933, -2.7559]], grad_fn=<MulBackward0>)
tensor([[-0.5122, -1.5802],
        [-0.5769, -1.7800]], grad_fn=<MulBackward0>)
tensor([[-0.5122, -1.5802],
        [-0.5769, -1.7800]], grad_fn=<MulBackward0>)
tensor([[-0.5122, -1.5802],


  0%|          | 2/1000 [00:00<02:34,  6.46it/s]

tensor([[-0.6361, -1.9626],
        [ 0.9054,  2.7933]], grad_fn=<MulBackward0>)
tensor([[0.7238, 2.2331],
        [1.0670, 3.2919]], grad_fn=<MulBackward0>)
tensor([[0.7238, 2.2331],
        [1.0670, 3.2919]], grad_fn=<MulBackward0>)
tensor([[0.7238, 2.2331],
        [1.0670, 3.2919]], grad_fn=<MulBackward0>)
tensor([[0.7238, 2.2331],
        [1.0670, 3.2919]], grad_fn=<MulBackward0>)
tensor([[0.7238, 2.2331],
        [1.0670, 3.2919]], grad_fn=<MulBackward0>)
tensor([[0.7238, 2.2331],
        [1.0670, 3.2919]], grad_fn=<MulBackward0>)
tensor([[-0.2074, -0.6399],
        [-0.3523, -1.0870]], grad_fn=<MulBackward0>)
tensor([[-0.2074, -0.6399],
        [-0.3523, -1.0870]], grad_fn=<MulBackward0>)
tensor([[-0.2074, -0.6399],
        [-0.3523, -1.0870]], grad_fn=<MulBackward0>)
tensor([[ 0.4286,  1.3224],
        [-1.0918, -3.3684]], grad_fn=<MulBackward0>)
tensor([[ 0.4286,  1.3224],
        [-1.0918, -3.3684]], grad_fn=<MulBackward0>)
tensor([[ 0.4286,  1.3224],
        [-1.0918, -3.368

  0%|          | 4/1000 [00:00<02:14,  7.38it/s]

tensor([[-1.0333, -3.1880],
        [-0.8392, -2.5889]], grad_fn=<MulBackward0>)
tensor([[-1.0333, -3.1880],
        [-0.8392, -2.5889]], grad_fn=<MulBackward0>)
tensor([[-1.0333, -3.1880],
        [-0.8392, -2.5889]], grad_fn=<MulBackward0>)
tensor([[-1.0333, -3.1880],
        [-0.8392, -2.5889]], grad_fn=<MulBackward0>)
tensor([[-1.0333, -3.1880],
        [-0.8392, -2.5889]], grad_fn=<MulBackward0>)
tensor([[-1.0333, -3.1880],
        [-0.8392, -2.5889]], grad_fn=<MulBackward0>)
tensor([[-1.0333, -3.1880],
        [-0.8392, -2.5889]], grad_fn=<MulBackward0>)
tensor([[-1.0333, -3.1880],
        [-0.8392, -2.5889]], grad_fn=<MulBackward0>)
tensor([[-1.0333, -3.1880],
        [-0.8392, -2.5889]], grad_fn=<MulBackward0>)
tensor([[-1.0333, -3.1880],
        [-0.8392, -2.5889]], grad_fn=<MulBackward0>)
tensor([[ 0.6517,  2.0107],
        [-0.2744, -0.8465]], grad_fn=<MulBackward0>)
tensor([[ 0.6517,  2.0107],
        [-0.2744, -0.8465]], grad_fn=<MulBackward0>)
tensor([[ 0.6517,  2.0107],


  0%|          | 5/1000 [00:00<02:16,  7.30it/s]

tensor([[-0.6391, -1.9718],
        [ 0.8921,  2.7523]], grad_fn=<MulBackward0>)
tensor([[-0.6391, -1.9718],
        [ 0.8921,  2.7523]], grad_fn=<MulBackward0>)
tensor([[-0.6391, -1.9718],
        [ 0.8921,  2.7523]], grad_fn=<MulBackward0>)
tensor([[-0.6391, -1.9718],
        [ 0.8921,  2.7523]], grad_fn=<MulBackward0>)
tensor([[0.7168, 2.2115],
        [1.0685, 3.2963]], grad_fn=<MulBackward0>)
tensor([[0.7168, 2.2115],
        [1.0685, 3.2963]], grad_fn=<MulBackward0>)
tensor([[0.7168, 2.2115],
        [1.0685, 3.2963]], grad_fn=<MulBackward0>)
tensor([[0.7168, 2.2115],
        [1.0685, 3.2963]], grad_fn=<MulBackward0>)
tensor([[0.7168, 2.2115],
        [1.0685, 3.2963]], grad_fn=<MulBackward0>)
tensor([[0.7168, 2.2115],
        [1.0685, 3.2963]], grad_fn=<MulBackward0>)
tensor([[-0.2090, -0.6448],
        [-0.3476, -1.0723]], grad_fn=<MulBackward0>)
tensor([[-0.2090, -0.6448],
        [-0.3476, -1.0723]], grad_fn=<MulBackward0>)
tensor([[-0.2090, -0.6448],
        [-0.3476, -1.072

  1%|          | 7/1000 [00:00<02:08,  7.71it/s]

tensor([[-1.0502, -3.2402],
        [-0.8395, -2.5899]], grad_fn=<MulBackward0>)
tensor([[-1.0502, -3.2402],
        [-0.8395, -2.5899]], grad_fn=<MulBackward0>)
tensor([[-1.0502, -3.2402],
        [-0.8395, -2.5899]], grad_fn=<MulBackward0>)
tensor([[-1.0502, -3.2402],
        [-0.8395, -2.5899]], grad_fn=<MulBackward0>)
tensor([[-1.0502, -3.2402],
        [-0.8395, -2.5899]], grad_fn=<MulBackward0>)
tensor([[-1.0502, -3.2402],
        [-0.8395, -2.5899]], grad_fn=<MulBackward0>)
tensor([[-1.0502, -3.2402],
        [-0.8395, -2.5899]], grad_fn=<MulBackward0>)
tensor([[-1.0502, -3.2402],
        [-0.8395, -2.5899]], grad_fn=<MulBackward0>)
tensor([[-1.0502, -3.2402],
        [-0.8395, -2.5899]], grad_fn=<MulBackward0>)
tensor([[-1.0502, -3.2402],
        [-0.8395, -2.5899]], grad_fn=<MulBackward0>)
tensor([[ 0.6431,  1.9840],
        [-0.2819, -0.8698]], grad_fn=<MulBackward0>)
tensor([[ 0.6431,  1.9840],
        [-0.2819, -0.8698]], grad_fn=<MulBackward0>)
tensor([[ 0.6431,  1.9840],


  1%|          | 9/1000 [00:01<02:11,  7.54it/s]

tensor([[-0.0761, -0.2347],
        [-0.5780, -1.7832]], grad_fn=<MulBackward0>)
tensor([[-0.0761, -0.2347],
        [-0.5780, -1.7832]], grad_fn=<MulBackward0>)
tensor([[-0.0761, -0.2347],
        [-0.5780, -1.7832]], grad_fn=<MulBackward0>)
tensor([[-0.0761, -0.2347],
        [-0.5780, -1.7832]], grad_fn=<MulBackward0>)
tensor([[-0.6400, -1.9744],
        [ 0.8821,  2.7215]], grad_fn=<MulBackward0>)
tensor([[-0.6400, -1.9744],
        [ 0.8821,  2.7215]], grad_fn=<MulBackward0>)
tensor([[-0.6400, -1.9744],
        [ 0.8821,  2.7215]], grad_fn=<MulBackward0>)
tensor([[-0.6400, -1.9744],
        [ 0.8821,  2.7215]], grad_fn=<MulBackward0>)
tensor([[-0.6400, -1.9744],
        [ 0.8821,  2.7215]], grad_fn=<MulBackward0>)
tensor([[-0.6400, -1.9744],
        [ 0.8821,  2.7215]], grad_fn=<MulBackward0>)
tensor([[0.7116, 2.1955],
        [1.0617, 3.2754]], grad_fn=<MulBackward0>)
tensor([[0.7116, 2.1955],
        [1.0617, 3.2754]], grad_fn=<MulBackward0>)
tensor([[0.7116, 2.1955],
        [1

  1%|          | 10/1000 [00:01<02:07,  7.74it/s]

tensor([[-1.0517, -3.2446],
        [-0.8438, -2.6033]], grad_fn=<MulBackward0>)
tensor([[-1.0517, -3.2446],
        [-0.8438, -2.6033]], grad_fn=<MulBackward0>)
tensor([[-1.0517, -3.2446],
        [-0.8438, -2.6033]], grad_fn=<MulBackward0>)
tensor([[-1.0517, -3.2446],
        [-0.8438, -2.6033]], grad_fn=<MulBackward0>)
tensor([[-1.0517, -3.2446],
        [-0.8438, -2.6033]], grad_fn=<MulBackward0>)
tensor([[-1.0517, -3.2446],
        [-0.8438, -2.6033]], grad_fn=<MulBackward0>)
tensor([[-1.0517, -3.2446],
        [-0.8438, -2.6033]], grad_fn=<MulBackward0>)
tensor([[-1.0517, -3.2446],
        [-0.8438, -2.6033]], grad_fn=<MulBackward0>)
tensor([[-1.0517, -3.2446],
        [-0.8438, -2.6033]], grad_fn=<MulBackward0>)
tensor([[ 0.6355,  1.9606],
        [-0.2869, -0.8852]], grad_fn=<MulBackward0>)
tensor([[ 0.6355,  1.9606],
        [-0.2869, -0.8852]], grad_fn=<MulBackward0>)
tensor([[ 0.6355,  1.9606],
        [-0.2869, -0.8852]], grad_fn=<MulBackward0>)
tensor([[ 0.6355,  1.9606],


  1%|          | 12/1000 [00:01<02:06,  7.78it/s]

tensor([[1.1990, 3.6990],
        [0.7173, 2.2130]], grad_fn=<MulBackward0>)
tensor([[1.1990, 3.6990],
        [0.7173, 2.2130]], grad_fn=<MulBackward0>)
tensor([[1.1990, 3.6990],
        [0.7173, 2.2130]], grad_fn=<MulBackward0>)
tensor([[1.1990, 3.6990],
        [0.7173, 2.2130]], grad_fn=<MulBackward0>)
tensor([[-0.0775, -0.2390],
        [-0.5740, -1.7709]], grad_fn=<MulBackward0>)
tensor([[-0.0775, -0.2390],
        [-0.5740, -1.7709]], grad_fn=<MulBackward0>)
tensor([[-0.0775, -0.2390],
        [-0.5740, -1.7709]], grad_fn=<MulBackward0>)
tensor([[-0.0775, -0.2390],
        [-0.5740, -1.7709]], grad_fn=<MulBackward0>)
tensor([[-0.0775, -0.2390],
        [-0.5740, -1.7709]], grad_fn=<MulBackward0>)
tensor([[-0.6393, -1.9723],
        [ 0.8726,  2.6921]], grad_fn=<MulBackward0>)
tensor([[-0.6393, -1.9723],
        [ 0.8726,  2.6921]], grad_fn=<MulBackward0>)
tensor([[-0.6393, -1.9723],
        [ 0.8726,  2.6921]], grad_fn=<MulBackward0>)
tensor([[-0.6393, -1.9723],
        [ 0.8726

  1%|▏         | 13/1000 [00:01<02:13,  7.37it/s]

tensor([[-1.0556, -3.2566],
        [-0.8471, -2.6134]], grad_fn=<MulBackward0>)
tensor([[-1.0556, -3.2566],
        [-0.8471, -2.6134]], grad_fn=<MulBackward0>)
tensor([[-1.0556, -3.2566],
        [-0.8471, -2.6134]], grad_fn=<MulBackward0>)
tensor([[-1.0556, -3.2566],
        [-0.8471, -2.6134]], grad_fn=<MulBackward0>)
tensor([[-1.0556, -3.2566],
        [-0.8471, -2.6134]], grad_fn=<MulBackward0>)
tensor([[-1.0556, -3.2566],
        [-0.8471, -2.6134]], grad_fn=<MulBackward0>)
tensor([[-1.0556, -3.2566],
        [-0.8471, -2.6134]], grad_fn=<MulBackward0>)
tensor([[-1.0556, -3.2566],
        [-0.8471, -2.6134]], grad_fn=<MulBackward0>)
tensor([[-1.0556, -3.2566],
        [-0.8471, -2.6134]], grad_fn=<MulBackward0>)
tensor([[ 0.6282,  1.9379],
        [-0.2991, -0.9227]], grad_fn=<MulBackward0>)
tensor([[ 0.6282,  1.9379],
        [-0.2991, -0.9227]], grad_fn=<MulBackward0>)
tensor([[ 0.6282,  1.9379],
        [-0.2991, -0.9227]], grad_fn=<MulBackward0>)
tensor([[ 0.6282,  1.9379],


  1%|▏         | 14/1000 [00:01<02:23,  6.86it/s]

tensor([[1.0119, 3.1218],
        [0.0341, 0.1053]], grad_fn=<MulBackward0>)
tensor([[1.0119, 3.1218],
        [0.0341, 0.1053]], grad_fn=<MulBackward0>)
tensor([[1.0119, 3.1218],
        [0.0341, 0.1053]], grad_fn=<MulBackward0>)
tensor([[1.0119, 3.1218],
        [0.0341, 0.1053]], grad_fn=<MulBackward0>)
tensor([[1.0119, 3.1218],
        [0.0341, 0.1053]], grad_fn=<MulBackward0>)
tensor([[1.0119, 3.1218],
        [0.0341, 0.1053]], grad_fn=<MulBackward0>)
tensor([[1.0119, 3.1218],
        [0.0341, 0.1053]], grad_fn=<MulBackward0>)
tensor([[-0.6264, -1.9325],
        [-0.3483, -1.0746]], grad_fn=<MulBackward0>)
tensor([[-0.6264, -1.9325],
        [-0.3483, -1.0746]], grad_fn=<MulBackward0>)
tensor([[-0.6264, -1.9325],
        [-0.3483, -1.0746]], grad_fn=<MulBackward0>)
tensor([[-0.6264, -1.9325],
        [-0.3483, -1.0746]], grad_fn=<MulBackward0>)
tensor([[-0.6264, -1.9325],
        [-0.3483, -1.0746]], grad_fn=<MulBackward0>)
tensor([[-0.6264, -1.9325],
        [-0.3483, -1.0746]],

  2%|▏         | 16/1000 [00:02<02:12,  7.40it/s]

tensor([[-0.6291, -1.9408],
        [ 0.8771,  2.7061]], grad_fn=<MulBackward0>)
tensor([[-0.6291, -1.9408],
        [ 0.8771,  2.7061]], grad_fn=<MulBackward0>)
tensor([[-0.6291, -1.9408],
        [ 0.8771,  2.7061]], grad_fn=<MulBackward0>)
tensor([[0.6923, 2.1358],
        [1.0363, 3.1971]], grad_fn=<MulBackward0>)
tensor([[0.6923, 2.1358],
        [1.0363, 3.1971]], grad_fn=<MulBackward0>)
tensor([[0.6923, 2.1358],
        [1.0363, 3.1971]], grad_fn=<MulBackward0>)
tensor([[0.6923, 2.1358],
        [1.0363, 3.1971]], grad_fn=<MulBackward0>)
tensor([[0.6923, 2.1358],
        [1.0363, 3.1971]], grad_fn=<MulBackward0>)
tensor([[0.6923, 2.1358],
        [1.0363, 3.1971]], grad_fn=<MulBackward0>)
tensor([[-0.2097, -0.6470],
        [-0.3411, -1.0522]], grad_fn=<MulBackward0>)
tensor([[-0.2097, -0.6470],
        [-0.3411, -1.0522]], grad_fn=<MulBackward0>)
tensor([[-0.2097, -0.6470],
        [-0.3411, -1.0522]], grad_fn=<MulBackward0>)
tensor([[ 0.4247,  1.3102],
        [-1.0581, -3.264

  2%|▏         | 17/1000 [00:02<02:10,  7.51it/s]

tensor([[-1.0711, -3.3045],
        [-0.8431, -2.6011]], grad_fn=<MulBackward0>)
tensor([[-1.0711, -3.3045],
        [-0.8431, -2.6011]], grad_fn=<MulBackward0>)
tensor([[-1.0711, -3.3045],
        [-0.8431, -2.6011]], grad_fn=<MulBackward0>)
tensor([[-1.0711, -3.3045],
        [-0.8431, -2.6011]], grad_fn=<MulBackward0>)
tensor([[-1.0711, -3.3045],
        [-0.8431, -2.6011]], grad_fn=<MulBackward0>)
tensor([[-1.0711, -3.3045],
        [-0.8431, -2.6011]], grad_fn=<MulBackward0>)
tensor([[-1.0711, -3.3045],
        [-0.8431, -2.6011]], grad_fn=<MulBackward0>)
tensor([[-1.0711, -3.3045],
        [-0.8431, -2.6011]], grad_fn=<MulBackward0>)
tensor([[-1.0711, -3.3045],
        [-0.8431, -2.6011]], grad_fn=<MulBackward0>)
tensor([[ 0.6301,  1.9439],
        [-0.2996, -0.9243]], grad_fn=<MulBackward0>)
tensor([[ 0.6301,  1.9439],
        [-0.2996, -0.9243]], grad_fn=<MulBackward0>)
tensor([[ 0.6301,  1.9439],
        [-0.2996, -0.9243]], grad_fn=<MulBackward0>)
tensor([[ 0.6301,  1.9439],


  2%|▏         | 19/1000 [00:02<02:16,  7.20it/s]

tensor([[-0.6200, -1.9128],
        [ 0.8594,  2.6513]], grad_fn=<MulBackward0>)
tensor([[-0.6200, -1.9128],
        [ 0.8594,  2.6513]], grad_fn=<MulBackward0>)
tensor([[-0.6200, -1.9128],
        [ 0.8594,  2.6513]], grad_fn=<MulBackward0>)
tensor([[0.6870, 2.1195],
        [1.0318, 3.1833]], grad_fn=<MulBackward0>)
tensor([[0.6870, 2.1195],
        [1.0318, 3.1833]], grad_fn=<MulBackward0>)
tensor([[0.6870, 2.1195],
        [1.0318, 3.1833]], grad_fn=<MulBackward0>)
tensor([[0.6870, 2.1195],
        [1.0318, 3.1833]], grad_fn=<MulBackward0>)
tensor([[0.6870, 2.1195],
        [1.0318, 3.1833]], grad_fn=<MulBackward0>)
tensor([[0.6870, 2.1195],
        [1.0318, 3.1833]], grad_fn=<MulBackward0>)
tensor([[-0.2117, -0.6532],
        [-0.3356, -1.0355]], grad_fn=<MulBackward0>)
tensor([[-0.2117, -0.6532],
        [-0.3356, -1.0355]], grad_fn=<MulBackward0>)
tensor([[-0.2117, -0.6532],
        [-0.3356, -1.0355]], grad_fn=<MulBackward0>)
tensor([[ 0.4204,  1.2970],
        [-1.0487, -3.235

  2%|▏         | 20/1000 [00:02<02:15,  7.24it/s]

tensor([[-1.0771, -3.3230],
        [-0.8441, -2.6040]], grad_fn=<MulBackward0>)
tensor([[-1.0771, -3.3230],
        [-0.8441, -2.6040]], grad_fn=<MulBackward0>)
tensor([[-1.0771, -3.3230],
        [-0.8441, -2.6040]], grad_fn=<MulBackward0>)
tensor([[-1.0771, -3.3230],
        [-0.8441, -2.6040]], grad_fn=<MulBackward0>)
tensor([[-1.0771, -3.3230],
        [-0.8441, -2.6040]], grad_fn=<MulBackward0>)
tensor([[-1.0771, -3.3230],
        [-0.8441, -2.6040]], grad_fn=<MulBackward0>)
tensor([[-1.0771, -3.3230],
        [-0.8441, -2.6040]], grad_fn=<MulBackward0>)
tensor([[-1.0771, -3.3230],
        [-0.8441, -2.6040]], grad_fn=<MulBackward0>)
tensor([[-1.0771, -3.3230],
        [-0.8441, -2.6040]], grad_fn=<MulBackward0>)
tensor([[-1.0771, -3.3230],
        [-0.8441, -2.6040]], grad_fn=<MulBackward0>)
tensor([[ 0.6248,  1.9275],
        [-0.3016, -0.9303]], grad_fn=<MulBackward0>)
tensor([[ 0.6248,  1.9275],
        [-0.3016, -0.9303]], grad_fn=<MulBackward0>)
tensor([[ 0.6248,  1.9275],


  2%|▏         | 22/1000 [00:03<02:12,  7.36it/s]

tensor([[-0.6962, -2.1477],
        [-0.3274, -1.0102]], grad_fn=<MulBackward0>)
tensor([[-0.6962, -2.1477],
        [-0.3274, -1.0102]], grad_fn=<MulBackward0>)
tensor([[-0.6962, -2.1477],
        [-0.3274, -1.0102]], grad_fn=<MulBackward0>)
tensor([[-0.6962, -2.1477],
        [-0.3274, -1.0102]], grad_fn=<MulBackward0>)
tensor([[ 0.0586,  0.1809],
        [-0.5200, -1.6044]], grad_fn=<MulBackward0>)
tensor([[ 0.0586,  0.1809],
        [-0.5200, -1.6044]], grad_fn=<MulBackward0>)
tensor([[ 0.0586,  0.1809],
        [-0.5200, -1.6044]], grad_fn=<MulBackward0>)
tensor([[ 0.0586,  0.1809],
        [-0.5200, -1.6044]], grad_fn=<MulBackward0>)
tensor([[ 0.0586,  0.1809],
        [-0.5200, -1.6044]], grad_fn=<MulBackward0>)
tensor([[ 0.0586,  0.1809],
        [-0.5200, -1.6044]], grad_fn=<MulBackward0>)
tensor([[ 0.0586,  0.1809],
        [-0.5200, -1.6044]], grad_fn=<MulBackward0>)
tensor([[1.1807, 3.6426],
        [0.7081, 2.1846]], grad_fn=<MulBackward0>)
tensor([[1.1807, 3.6426],
      

  2%|▏         | 23/1000 [00:03<02:11,  7.42it/s]

tensor([[-1.0948, -3.3776],
        [-0.8475, -2.6146]], grad_fn=<MulBackward0>)
tensor([[-1.0948, -3.3776],
        [-0.8475, -2.6146]], grad_fn=<MulBackward0>)
tensor([[-1.0948, -3.3776],
        [-0.8475, -2.6146]], grad_fn=<MulBackward0>)
tensor([[-1.0948, -3.3776],
        [-0.8475, -2.6146]], grad_fn=<MulBackward0>)
tensor([[-1.0948, -3.3776],
        [-0.8475, -2.6146]], grad_fn=<MulBackward0>)
tensor([[-1.0948, -3.3776],
        [-0.8475, -2.6146]], grad_fn=<MulBackward0>)
tensor([[-1.0948, -3.3776],
        [-0.8475, -2.6146]], grad_fn=<MulBackward0>)
tensor([[-1.0948, -3.3776],
        [-0.8475, -2.6146]], grad_fn=<MulBackward0>)
tensor([[-1.0948, -3.3776],
        [-0.8475, -2.6146]], grad_fn=<MulBackward0>)
tensor([[-1.0948, -3.3776],
        [-0.8475, -2.6146]], grad_fn=<MulBackward0>)
tensor([[ 0.6242,  1.9258],
        [-0.3101, -0.9566]], grad_fn=<MulBackward0>)
tensor([[ 0.6242,  1.9258],
        [-0.3101, -0.9566]], grad_fn=<MulBackward0>)
tensor([[ 0.6242,  1.9258],


  2%|▏         | 24/1000 [00:03<02:20,  6.92it/s]

tensor([[0.6732, 2.0770],
        [1.0168, 3.1368]], grad_fn=<MulBackward0>)
tensor([[0.6732, 2.0770],
        [1.0168, 3.1368]], grad_fn=<MulBackward0>)
tensor([[0.6732, 2.0770],
        [1.0168, 3.1368]], grad_fn=<MulBackward0>)
tensor([[-0.2189, -0.6753],
        [-0.3283, -1.0129]], grad_fn=<MulBackward0>)
tensor([[-0.2189, -0.6753],
        [-0.3283, -1.0129]], grad_fn=<MulBackward0>)
tensor([[-0.2189, -0.6753],
        [-0.3283, -1.0129]], grad_fn=<MulBackward0>)
tensor([[ 0.4161,  1.2837],
        [-1.0339, -3.1898]], grad_fn=<MulBackward0>)
tensor([[ 0.4161,  1.2837],
        [-1.0339, -3.1898]], grad_fn=<MulBackward0>)
tensor([[ 0.4161,  1.2837],
        [-1.0339, -3.1898]], grad_fn=<MulBackward0>)
tensor([[ 0.4161,  1.2837],
        [-1.0339, -3.1898]], grad_fn=<MulBackward0>)
tensor([[-0.3296, -1.0168],
        [-0.8909, -2.7484]], grad_fn=<MulBackward0>)
tensor([[-0.3296, -1.0168],
        [-0.8909, -2.7484]], grad_fn=<MulBackward0>)
tensor([[-0.3296, -1.0168],
        [-0.

  2%|▎         | 25/1000 [00:03<02:27,  6.62it/s]

tensor([[ 0.4177,  1.2886],
        [-1.0327, -3.1860]], grad_fn=<MulBackward0>)
tensor([[ 0.4177,  1.2886],
        [-1.0327, -3.1860]], grad_fn=<MulBackward0>)
tensor([[ 0.4177,  1.2886],
        [-1.0327, -3.1860]], grad_fn=<MulBackward0>)
tensor([[-0.3264, -1.0071],
        [-0.8884, -2.7409]], grad_fn=<MulBackward0>)
tensor([[-0.3264, -1.0071],
        [-0.8884, -2.7409]], grad_fn=<MulBackward0>)
tensor([[-0.3264, -1.0071],
        [-0.8884, -2.7409]], grad_fn=<MulBackward0>)
tensor([[-0.3264, -1.0071],
        [-0.8884, -2.7409]], grad_fn=<MulBackward0>)
tensor([[-0.1999, -0.6168],
        [ 0.8839,  2.7269]], grad_fn=<MulBackward0>)
tensor([[-0.1999, -0.6168],
        [ 0.8839,  2.7269]], grad_fn=<MulBackward0>)
tensor([[-0.1999, -0.6168],
        [ 0.8839,  2.7269]], grad_fn=<MulBackward0>)
tensor([[-0.1999, -0.6168],
        [ 0.8839,  2.7269]], grad_fn=<MulBackward0>)
tensor([[-0.1999, -0.6168],
        [ 0.8839,  2.7269]], grad_fn=<MulBackward0>)
tensor([[-0.1999, -0.6168],


  3%|▎         | 26/1000 [00:03<02:31,  6.42it/s]

Training_loss 9.04862
tensor([[-1.0986, -3.3895],
        [-0.8487, -2.6185]], grad_fn=<MulBackward0>)
tensor([[-1.0986, -3.3895],
        [-0.8487, -2.6185]], grad_fn=<MulBackward0>)
tensor([[-1.0986, -3.3895],
        [-0.8487, -2.6185]], grad_fn=<MulBackward0>)
tensor([[-1.0986, -3.3895],
        [-0.8487, -2.6185]], grad_fn=<MulBackward0>)
tensor([[-1.0986, -3.3895],
        [-0.8487, -2.6185]], grad_fn=<MulBackward0>)
tensor([[-1.0986, -3.3895],
        [-0.8487, -2.6185]], grad_fn=<MulBackward0>)
tensor([[-1.0986, -3.3895],
        [-0.8487, -2.6185]], grad_fn=<MulBackward0>)
tensor([[-1.0986, -3.3895],
        [-0.8487, -2.6185]], grad_fn=<MulBackward0>)
tensor([[-1.0986, -3.3895],
        [-0.8487, -2.6185]], grad_fn=<MulBackward0>)
tensor([[-1.0986, -3.3895],
        [-0.8487, -2.6185]], grad_fn=<MulBackward0>)
tensor([[ 0.6251,  1.9286],
        [-0.3185, -0.9826]], grad_fn=<MulBackward0>)
tensor([[ 0.6251,  1.9286],
        [-0.3185, -0.9826]], grad_fn=<MulBackward0>)
tensor

  3%|▎         | 27/1000 [00:03<03:11,  5.09it/s]

tensor([[0.6661, 2.0551],
        [1.0105, 3.1175]], grad_fn=<MulBackward0>)
tensor([[0.6661, 2.0551],
        [1.0105, 3.1175]], grad_fn=<MulBackward0>)
tensor([[0.6661, 2.0551],
        [1.0105, 3.1175]], grad_fn=<MulBackward0>)
tensor([[0.6661, 2.0551],
        [1.0105, 3.1175]], grad_fn=<MulBackward0>)
tensor([[-0.2182, -0.6730],
        [-0.3200, -0.9872]], grad_fn=<MulBackward0>)
tensor([[-0.2182, -0.6730],
        [-0.3200, -0.9872]], grad_fn=<MulBackward0>)
tensor([[-0.2182, -0.6730],
        [-0.3200, -0.9872]], grad_fn=<MulBackward0>)
tensor([[ 0.4207,  1.2978],
        [-1.0300, -3.1776]], grad_fn=<MulBackward0>)
tensor([[ 0.4207,  1.2978],
        [-1.0300, -3.1776]], grad_fn=<MulBackward0>)
tensor([[ 0.4207,  1.2978],
        [-1.0300, -3.1776]], grad_fn=<MulBackward0>)
tensor([[ 0.4207,  1.2978],
        [-1.0300, -3.1776]], grad_fn=<MulBackward0>)
tensor([[-0.3199, -0.9869],
        [-0.8822, -2.7217]], grad_fn=<MulBackward0>)
tensor([[-0.3199, -0.9869],
        [-0.8822

  3%|▎         | 28/1000 [00:04<03:26,  4.70it/s]

tensor([[ 0.6237,  1.9241],
        [-0.3174, -0.9793]], grad_fn=<MulBackward0>)
tensor([[ 0.6237,  1.9241],
        [-0.3174, -0.9793]], grad_fn=<MulBackward0>)
tensor([[ 0.6237,  1.9241],
        [-0.3174, -0.9793]], grad_fn=<MulBackward0>)
tensor([[ 0.6237,  1.9241],
        [-0.3174, -0.9793]], grad_fn=<MulBackward0>)
tensor([[ 0.6237,  1.9241],
        [-0.3174, -0.9793]], grad_fn=<MulBackward0>)
tensor([[ 0.6237,  1.9241],
        [-0.3174, -0.9793]], grad_fn=<MulBackward0>)
tensor([[-0.8911, -2.7491],
        [ 0.6948,  2.1434]], grad_fn=<MulBackward0>)
tensor([[-0.8911, -2.7491],
        [ 0.6948,  2.1434]], grad_fn=<MulBackward0>)
tensor([[-0.8911, -2.7491],
        [ 0.6948,  2.1434]], grad_fn=<MulBackward0>)
tensor([[-0.8911, -2.7491],
        [ 0.6948,  2.1434]], grad_fn=<MulBackward0>)
tensor([[-0.8911, -2.7491],
        [ 0.6948,  2.1434]], grad_fn=<MulBackward0>)
tensor([[-0.8911, -2.7491],
        [ 0.6948,  2.1434]], grad_fn=<MulBackward0>)
tensor([[-0.2048, -0.6318],


  3%|▎         | 29/1000 [00:04<03:40,  4.40it/s]

tensor([[-1.0990, -3.3907],
        [-0.8453, -2.6079]], grad_fn=<MulBackward0>)
tensor([[-1.0990, -3.3907],
        [-0.8453, -2.6079]], grad_fn=<MulBackward0>)
tensor([[-1.0990, -3.3907],
        [-0.8453, -2.6079]], grad_fn=<MulBackward0>)
tensor([[-1.0990, -3.3907],
        [-0.8453, -2.6079]], grad_fn=<MulBackward0>)
tensor([[-1.0990, -3.3907],
        [-0.8453, -2.6079]], grad_fn=<MulBackward0>)
tensor([[-1.0990, -3.3907],
        [-0.8453, -2.6079]], grad_fn=<MulBackward0>)
tensor([[-1.0990, -3.3907],
        [-0.8453, -2.6079]], grad_fn=<MulBackward0>)
tensor([[-1.0990, -3.3907],
        [-0.8453, -2.6079]], grad_fn=<MulBackward0>)
tensor([[-1.0990, -3.3907],
        [-0.8453, -2.6079]], grad_fn=<MulBackward0>)
tensor([[-1.0990, -3.3907],
        [-0.8453, -2.6079]], grad_fn=<MulBackward0>)
tensor([[ 0.6272,  1.9349],
        [-0.3156, -0.9735]], grad_fn=<MulBackward0>)
tensor([[ 0.6272,  1.9349],
        [-0.3156, -0.9735]], grad_fn=<MulBackward0>)
tensor([[ 0.6272,  1.9349],


  3%|▎         | 30/1000 [00:04<03:33,  4.54it/s]

tensor([[-1.0985, -3.3891],
        [-0.8420, -2.5978]], grad_fn=<MulBackward0>)
tensor([[-1.0985, -3.3891],
        [-0.8420, -2.5978]], grad_fn=<MulBackward0>)
tensor([[-1.0985, -3.3891],
        [-0.8420, -2.5978]], grad_fn=<MulBackward0>)
tensor([[-1.0985, -3.3891],
        [-0.8420, -2.5978]], grad_fn=<MulBackward0>)
tensor([[-1.0985, -3.3891],
        [-0.8420, -2.5978]], grad_fn=<MulBackward0>)
tensor([[-1.0985, -3.3891],
        [-0.8420, -2.5978]], grad_fn=<MulBackward0>)
tensor([[-1.0985, -3.3891],
        [-0.8420, -2.5978]], grad_fn=<MulBackward0>)
tensor([[-1.0985, -3.3891],
        [-0.8420, -2.5978]], grad_fn=<MulBackward0>)
tensor([[-1.0985, -3.3891],
        [-0.8420, -2.5978]], grad_fn=<MulBackward0>)
tensor([[-1.0985, -3.3891],
        [-0.8420, -2.5978]], grad_fn=<MulBackward0>)
tensor([[ 0.6213,  1.9169],
        [-0.3172, -0.9787]], grad_fn=<MulBackward0>)
tensor([[ 0.6213,  1.9169],
        [-0.3172, -0.9787]], grad_fn=<MulBackward0>)
tensor([[ 0.6213,  1.9169],


  3%|▎         | 31/1000 [00:04<03:28,  4.66it/s]

tensor([[-1.0956, -3.3800],
        [-0.8408, -2.5939]], grad_fn=<MulBackward0>)
tensor([[-1.0956, -3.3800],
        [-0.8408, -2.5939]], grad_fn=<MulBackward0>)
tensor([[-1.0956, -3.3800],
        [-0.8408, -2.5939]], grad_fn=<MulBackward0>)
tensor([[-1.0956, -3.3800],
        [-0.8408, -2.5939]], grad_fn=<MulBackward0>)
tensor([[-1.0956, -3.3800],
        [-0.8408, -2.5939]], grad_fn=<MulBackward0>)
tensor([[-1.0956, -3.3800],
        [-0.8408, -2.5939]], grad_fn=<MulBackward0>)
tensor([[-1.0956, -3.3800],
        [-0.8408, -2.5939]], grad_fn=<MulBackward0>)
tensor([[-1.0956, -3.3800],
        [-0.8408, -2.5939]], grad_fn=<MulBackward0>)
tensor([[-1.0956, -3.3800],
        [-0.8408, -2.5939]], grad_fn=<MulBackward0>)
tensor([[-1.0956, -3.3800],
        [-0.8408, -2.5939]], grad_fn=<MulBackward0>)
tensor([[ 0.6210,  1.9158],
        [-0.3232, -0.9970]], grad_fn=<MulBackward0>)
tensor([[ 0.6210,  1.9158],
        [-0.3232, -0.9970]], grad_fn=<MulBackward0>)
tensor([[ 0.6210,  1.9158],


  3%|▎         | 32/1000 [00:04<03:07,  5.17it/s]

tensor([[-1.0935, -3.3735],
        [-0.8359, -2.5789]], grad_fn=<MulBackward0>)
tensor([[-1.0935, -3.3735],
        [-0.8359, -2.5789]], grad_fn=<MulBackward0>)
tensor([[-1.0935, -3.3735],
        [-0.8359, -2.5789]], grad_fn=<MulBackward0>)
tensor([[ 0.6176,  1.9054],
        [-0.3291, -1.0152]], grad_fn=<MulBackward0>)
tensor([[ 0.6176,  1.9054],
        [-0.3291, -1.0152]], grad_fn=<MulBackward0>)
tensor([[ 0.6176,  1.9054],
        [-0.3291, -1.0152]], grad_fn=<MulBackward0>)
tensor([[ 0.6176,  1.9054],
        [-0.3291, -1.0152]], grad_fn=<MulBackward0>)
tensor([[ 0.6176,  1.9054],
        [-0.3291, -1.0152]], grad_fn=<MulBackward0>)
tensor([[ 0.6176,  1.9054],
        [-0.3291, -1.0152]], grad_fn=<MulBackward0>)
tensor([[ 0.6176,  1.9054],
        [-0.3291, -1.0152]], grad_fn=<MulBackward0>)
tensor([[-0.8868, -2.7358],
        [ 0.6908,  2.1313]], grad_fn=<MulBackward0>)
tensor([[-0.8868, -2.7358],
        [ 0.6908,  2.1313]], grad_fn=<MulBackward0>)
tensor([[-0.8868, -2.7358],


  3%|▎         | 33/1000 [00:05<02:48,  5.73it/s]

tensor([[1.1696, 3.6084],
        [0.7023, 2.1667]], grad_fn=<MulBackward0>)
tensor([[1.1696, 3.6084],
        [0.7023, 2.1667]], grad_fn=<MulBackward0>)
tensor([[1.1696, 3.6084],
        [0.7023, 2.1667]], grad_fn=<MulBackward0>)
tensor([[-0.0711, -0.2194],
        [-0.5354, -1.6518]], grad_fn=<MulBackward0>)
tensor([[-0.0711, -0.2194],
        [-0.5354, -1.6518]], grad_fn=<MulBackward0>)
tensor([[-0.0711, -0.2194],
        [-0.5354, -1.6518]], grad_fn=<MulBackward0>)
tensor([[-0.0711, -0.2194],
        [-0.5354, -1.6518]], grad_fn=<MulBackward0>)
tensor([[-0.0711, -0.2194],
        [-0.5354, -1.6518]], grad_fn=<MulBackward0>)
tensor([[-0.5801, -1.7896],
        [ 0.7848,  2.4211]], grad_fn=<MulBackward0>)
tensor([[-0.5801, -1.7896],
        [ 0.7848,  2.4211]], grad_fn=<MulBackward0>)
tensor([[-0.5801, -1.7896],
        [ 0.7848,  2.4211]], grad_fn=<MulBackward0>)
tensor([[-0.5801, -1.7896],
        [ 0.7848,  2.4211]], grad_fn=<MulBackward0>)
tensor([[-0.5801, -1.7896],
        [ 0.

  4%|▎         | 35/1000 [00:05<02:26,  6.61it/s]

tensor([[-1.1041, -3.4061],
        [-0.8360, -2.5792]], grad_fn=<MulBackward0>)
tensor([[-1.1041, -3.4061],
        [-0.8360, -2.5792]], grad_fn=<MulBackward0>)
tensor([[-1.1041, -3.4061],
        [-0.8360, -2.5792]], grad_fn=<MulBackward0>)
tensor([[-1.1041, -3.4061],
        [-0.8360, -2.5792]], grad_fn=<MulBackward0>)
tensor([[-1.1041, -3.4061],
        [-0.8360, -2.5792]], grad_fn=<MulBackward0>)
tensor([[-1.1041, -3.4061],
        [-0.8360, -2.5792]], grad_fn=<MulBackward0>)
tensor([[-1.1041, -3.4061],
        [-0.8360, -2.5792]], grad_fn=<MulBackward0>)
tensor([[-1.1041, -3.4061],
        [-0.8360, -2.5792]], grad_fn=<MulBackward0>)
tensor([[-1.1041, -3.4061],
        [-0.8360, -2.5792]], grad_fn=<MulBackward0>)
tensor([[-1.1041, -3.4061],
        [-0.8360, -2.5792]], grad_fn=<MulBackward0>)
tensor([[ 0.6224,  1.9202],
        [-0.3261, -1.0060]], grad_fn=<MulBackward0>)
tensor([[ 0.6224,  1.9202],
        [-0.3261, -1.0060]], grad_fn=<MulBackward0>)
tensor([[ 0.6224,  1.9202],


  4%|▎         | 36/1000 [00:05<02:29,  6.44it/s]

tensor([[1.1701, 3.6100],
        [0.6975, 2.1518]], grad_fn=<MulBackward0>)
tensor([[1.1701, 3.6100],
        [0.6975, 2.1518]], grad_fn=<MulBackward0>)
tensor([[1.1701, 3.6100],
        [0.6975, 2.1518]], grad_fn=<MulBackward0>)
tensor([[1.1701, 3.6100],
        [0.6975, 2.1518]], grad_fn=<MulBackward0>)
tensor([[1.1701, 3.6100],
        [0.6975, 2.1518]], grad_fn=<MulBackward0>)
tensor([[-0.0708, -0.2186],
        [-0.5293, -1.6330]], grad_fn=<MulBackward0>)
tensor([[-0.0708, -0.2186],
        [-0.5293, -1.6330]], grad_fn=<MulBackward0>)
tensor([[-0.0708, -0.2186],
        [-0.5293, -1.6330]], grad_fn=<MulBackward0>)
tensor([[-0.0708, -0.2186],
        [-0.5293, -1.6330]], grad_fn=<MulBackward0>)
tensor([[-0.0708, -0.2186],
        [-0.5293, -1.6330]], grad_fn=<MulBackward0>)
tensor([[-0.5750, -1.7739],
        [ 0.7806,  2.4084]], grad_fn=<MulBackward0>)
tensor([[-0.5750, -1.7739],
        [ 0.7806,  2.4084]], grad_fn=<MulBackward0>)
tensor([[-0.5750, -1.7739],
        [ 0.7806,  2

  4%|▎         | 37/1000 [00:05<02:38,  6.08it/s]

tensor([[ 0.1373,  0.4235],
        [-0.5312, -1.6390]], grad_fn=<MulBackward0>)
tensor([[ 0.1373,  0.4235],
        [-0.5312, -1.6390]], grad_fn=<MulBackward0>)
tensor([[ 0.1373,  0.4235],
        [-0.5312, -1.6390]], grad_fn=<MulBackward0>)
tensor([[ 0.1373,  0.4235],
        [-0.5312, -1.6390]], grad_fn=<MulBackward0>)
tensor([[1.1685, 3.6050],
        [0.6999, 2.1592]], grad_fn=<MulBackward0>)
tensor([[1.1685, 3.6050],
        [0.6999, 2.1592]], grad_fn=<MulBackward0>)
tensor([[1.1685, 3.6050],
        [0.6999, 2.1592]], grad_fn=<MulBackward0>)
tensor([[1.1685, 3.6050],
        [0.6999, 2.1592]], grad_fn=<MulBackward0>)
tensor([[1.1685, 3.6050],
        [0.6999, 2.1592]], grad_fn=<MulBackward0>)
tensor([[1.1685, 3.6050],
        [0.6999, 2.1592]], grad_fn=<MulBackward0>)
tensor([[1.1685, 3.6050],
        [0.6999, 2.1592]], grad_fn=<MulBackward0>)
tensor([[-0.0687, -0.2120],
        [-0.5291, -1.6323]], grad_fn=<MulBackward0>)
tensor([[-0.0687, -0.2120],
        [-0.5291, -1.6323]],

  4%|▍         | 39/1000 [00:06<02:25,  6.59it/s]

tensor([[-0.2095, -0.6462],
        [ 0.8617,  2.6586]], grad_fn=<MulBackward0>)
tensor([[-0.2095, -0.6462],
        [ 0.8617,  2.6586]], grad_fn=<MulBackward0>)
tensor([[-0.2095, -0.6462],
        [ 0.8617,  2.6586]], grad_fn=<MulBackward0>)
tensor([[-0.2095, -0.6462],
        [ 0.8617,  2.6586]], grad_fn=<MulBackward0>)
tensor([[-0.2095, -0.6462],
        [ 0.8617,  2.6586]], grad_fn=<MulBackward0>)
tensor([[-0.2095, -0.6462],
        [ 0.8617,  2.6586]], grad_fn=<MulBackward0>)
Training_loss 8.63903
tensor([[-1.1165, -3.4446],
        [-0.8354, -2.5774]], grad_fn=<MulBackward0>)
tensor([[-1.1165, -3.4446],
        [-0.8354, -2.5774]], grad_fn=<MulBackward0>)
tensor([[-1.1165, -3.4446],
        [-0.8354, -2.5774]], grad_fn=<MulBackward0>)
tensor([[-1.1165, -3.4446],
        [-0.8354, -2.5774]], grad_fn=<MulBackward0>)
tensor([[-1.1165, -3.4446],
        [-0.8354, -2.5774]], grad_fn=<MulBackward0>)
tensor([[-1.1165, -3.4446],
        [-0.8354, -2.5774]], grad_fn=<MulBackward0>)
tensor

  4%|▍         | 40/1000 [00:06<02:22,  6.72it/s]

tensor([[-1.1181, -3.4495],
        [-0.8347, -2.5751]], grad_fn=<MulBackward0>)
tensor([[-1.1181, -3.4495],
        [-0.8347, -2.5751]], grad_fn=<MulBackward0>)
tensor([[-1.1181, -3.4495],
        [-0.8347, -2.5751]], grad_fn=<MulBackward0>)
tensor([[-1.1181, -3.4495],
        [-0.8347, -2.5751]], grad_fn=<MulBackward0>)
tensor([[-1.1181, -3.4495],
        [-0.8347, -2.5751]], grad_fn=<MulBackward0>)
tensor([[-1.1181, -3.4495],
        [-0.8347, -2.5751]], grad_fn=<MulBackward0>)
tensor([[-1.1181, -3.4495],
        [-0.8347, -2.5751]], grad_fn=<MulBackward0>)
tensor([[-1.1181, -3.4495],
        [-0.8347, -2.5751]], grad_fn=<MulBackward0>)
tensor([[-1.1181, -3.4495],
        [-0.8347, -2.5751]], grad_fn=<MulBackward0>)
tensor([[ 0.6251,  1.9286],
        [-0.3337, -1.0296]], grad_fn=<MulBackward0>)
tensor([[ 0.6251,  1.9286],
        [-0.3337, -1.0296]], grad_fn=<MulBackward0>)
tensor([[ 0.6251,  1.9286],
        [-0.3337, -1.0296]], grad_fn=<MulBackward0>)
tensor([[ 0.6251,  1.9286],


  4%|▍         | 41/1000 [00:06<02:17,  6.97it/s]

tensor([[-0.7619, -2.3505],
        [-0.6591, -2.0336]], grad_fn=<MulBackward0>)
tensor([[-0.7619, -2.3505],
        [-0.6591, -2.0336]], grad_fn=<MulBackward0>)
tensor([[-0.7619, -2.3505],
        [-0.6591, -2.0336]], grad_fn=<MulBackward0>)
tensor([[-0.7619, -2.3505],
        [-0.6591, -2.0336]], grad_fn=<MulBackward0>)
tensor([[-0.5991, -1.8483],
        [-0.2921, -0.9011]], grad_fn=<MulBackward0>)
tensor([[-0.5991, -1.8483],
        [-0.2921, -0.9011]], grad_fn=<MulBackward0>)
tensor([[-0.5991, -1.8483],
        [-0.2921, -0.9011]], grad_fn=<MulBackward0>)
tensor([[-0.5991, -1.8483],
        [-0.2921, -0.9011]], grad_fn=<MulBackward0>)
tensor([[-0.5991, -1.8483],
        [-0.2921, -0.9011]], grad_fn=<MulBackward0>)
tensor([[-0.5991, -1.8483],
        [-0.2921, -0.9011]], grad_fn=<MulBackward0>)
tensor([[-0.5991, -1.8483],
        [-0.2921, -0.9011]], grad_fn=<MulBackward0>)
tensor([[-0.5991, -1.8483],
        [-0.2921, -0.9011]], grad_fn=<MulBackward0>)
tensor([[-0.5991, -1.8483],


  4%|▍         | 43/1000 [00:06<02:17,  6.96it/s]

tensor([[-1.1520, -3.5541],
        [-0.8539, -2.6345]], grad_fn=<MulBackward0>)
tensor([[-1.1520, -3.5541],
        [-0.8539, -2.6345]], grad_fn=<MulBackward0>)
tensor([[-1.1520, -3.5541],
        [-0.8539, -2.6345]], grad_fn=<MulBackward0>)
tensor([[-1.1520, -3.5541],
        [-0.8539, -2.6345]], grad_fn=<MulBackward0>)
tensor([[-1.1520, -3.5541],
        [-0.8539, -2.6345]], grad_fn=<MulBackward0>)
tensor([[-1.1520, -3.5541],
        [-0.8539, -2.6345]], grad_fn=<MulBackward0>)
tensor([[-1.1520, -3.5541],
        [-0.8539, -2.6345]], grad_fn=<MulBackward0>)
tensor([[-1.1520, -3.5541],
        [-0.8539, -2.6345]], grad_fn=<MulBackward0>)
tensor([[-1.1520, -3.5541],
        [-0.8539, -2.6345]], grad_fn=<MulBackward0>)
tensor([[-1.1520, -3.5541],
        [-0.8539, -2.6345]], grad_fn=<MulBackward0>)
tensor([[ 0.6317,  1.9490],
        [-0.3299, -1.0178]], grad_fn=<MulBackward0>)
tensor([[ 0.6317,  1.9490],
        [-0.3299, -1.0178]], grad_fn=<MulBackward0>)
tensor([[ 0.6317,  1.9490],


  4%|▍         | 45/1000 [00:06<02:06,  7.57it/s]

tensor([[-0.5757, -1.7762],
        [-0.2811, -0.8673]], grad_fn=<MulBackward0>)
tensor([[-0.5757, -1.7762],
        [-0.2811, -0.8673]], grad_fn=<MulBackward0>)
tensor([[-0.5757, -1.7762],
        [-0.2811, -0.8673]], grad_fn=<MulBackward0>)
tensor([[-0.5757, -1.7762],
        [-0.2811, -0.8673]], grad_fn=<MulBackward0>)
tensor([[-0.5757, -1.7762],
        [-0.2811, -0.8673]], grad_fn=<MulBackward0>)
tensor([[ 0.1685,  0.5197],
        [-0.5233, -1.6145]], grad_fn=<MulBackward0>)
tensor([[ 0.1685,  0.5197],
        [-0.5233, -1.6145]], grad_fn=<MulBackward0>)
tensor([[ 0.1685,  0.5197],
        [-0.5233, -1.6145]], grad_fn=<MulBackward0>)
tensor([[ 0.1685,  0.5197],
        [-0.5233, -1.6145]], grad_fn=<MulBackward0>)
tensor([[ 0.1685,  0.5197],
        [-0.5233, -1.6145]], grad_fn=<MulBackward0>)
tensor([[ 0.1685,  0.5197],
        [-0.5233, -1.6145]], grad_fn=<MulBackward0>)
tensor([[ 0.1685,  0.5197],
        [-0.5233, -1.6145]], grad_fn=<MulBackward0>)
tensor([[1.1611, 3.5821],
  

  5%|▍         | 46/1000 [00:06<02:05,  7.61it/s]

tensor([[-1.1579, -3.5722],
        [-0.8542, -2.6355]], grad_fn=<MulBackward0>)
tensor([[-1.1579, -3.5722],
        [-0.8542, -2.6355]], grad_fn=<MulBackward0>)
tensor([[-1.1579, -3.5722],
        [-0.8542, -2.6355]], grad_fn=<MulBackward0>)
tensor([[-1.1579, -3.5722],
        [-0.8542, -2.6355]], grad_fn=<MulBackward0>)
tensor([[-1.1579, -3.5722],
        [-0.8542, -2.6355]], grad_fn=<MulBackward0>)
tensor([[-1.1579, -3.5722],
        [-0.8542, -2.6355]], grad_fn=<MulBackward0>)
tensor([[-1.1579, -3.5722],
        [-0.8542, -2.6355]], grad_fn=<MulBackward0>)
tensor([[-1.1579, -3.5722],
        [-0.8542, -2.6355]], grad_fn=<MulBackward0>)
tensor([[-1.1579, -3.5722],
        [-0.8542, -2.6355]], grad_fn=<MulBackward0>)
tensor([[-1.1579, -3.5722],
        [-0.8542, -2.6355]], grad_fn=<MulBackward0>)
tensor([[ 0.6231,  1.9225],
        [-0.3349, -1.0332]], grad_fn=<MulBackward0>)
tensor([[ 0.6231,  1.9225],
        [-0.3349, -1.0332]], grad_fn=<MulBackward0>)
tensor([[ 0.6231,  1.9225],


  5%|▍         | 48/1000 [00:07<02:03,  7.69it/s]

tensor([[1.1656, 3.5961],
        [0.6827, 2.1061]], grad_fn=<MulBackward0>)
tensor([[-0.0647, -0.1997],
        [-0.5198, -1.6036]], grad_fn=<MulBackward0>)
tensor([[-0.0647, -0.1997],
        [-0.5198, -1.6036]], grad_fn=<MulBackward0>)
tensor([[-0.0647, -0.1997],
        [-0.5198, -1.6036]], grad_fn=<MulBackward0>)
tensor([[-0.0647, -0.1997],
        [-0.5198, -1.6036]], grad_fn=<MulBackward0>)
tensor([[-0.0647, -0.1997],
        [-0.5198, -1.6036]], grad_fn=<MulBackward0>)
tensor([[-0.5576, -1.7203],
        [ 0.7425,  2.2906]], grad_fn=<MulBackward0>)
tensor([[-0.5576, -1.7203],
        [ 0.7425,  2.2906]], grad_fn=<MulBackward0>)
tensor([[-0.5576, -1.7203],
        [ 0.7425,  2.2906]], grad_fn=<MulBackward0>)
tensor([[-0.5576, -1.7203],
        [ 0.7425,  2.2906]], grad_fn=<MulBackward0>)
tensor([[-0.5576, -1.7203],
        [ 0.7425,  2.2906]], grad_fn=<MulBackward0>)
tensor([[-0.5576, -1.7203],
        [ 0.7425,  2.2906]], grad_fn=<MulBackward0>)
tensor([[0.6235, 1.9234],
      

  5%|▍         | 49/1000 [00:07<02:03,  7.70it/s]

tensor([[-1.1707, -3.6117],
        [-0.8599, -2.6528]], grad_fn=<MulBackward0>)
tensor([[-1.1707, -3.6117],
        [-0.8599, -2.6528]], grad_fn=<MulBackward0>)
tensor([[-1.1707, -3.6117],
        [-0.8599, -2.6528]], grad_fn=<MulBackward0>)
tensor([[-1.1707, -3.6117],
        [-0.8599, -2.6528]], grad_fn=<MulBackward0>)
tensor([[-1.1707, -3.6117],
        [-0.8599, -2.6528]], grad_fn=<MulBackward0>)
tensor([[-1.1707, -3.6117],
        [-0.8599, -2.6528]], grad_fn=<MulBackward0>)
tensor([[-1.1707, -3.6117],
        [-0.8599, -2.6528]], grad_fn=<MulBackward0>)
tensor([[-1.1707, -3.6117],
        [-0.8599, -2.6528]], grad_fn=<MulBackward0>)
tensor([[-1.1707, -3.6117],
        [-0.8599, -2.6528]], grad_fn=<MulBackward0>)
tensor([[ 0.6268,  1.9338],
        [-0.3315, -1.0227]], grad_fn=<MulBackward0>)
tensor([[ 0.6268,  1.9338],
        [-0.3315, -1.0227]], grad_fn=<MulBackward0>)
tensor([[ 0.6268,  1.9338],
        [-0.3315, -1.0227]], grad_fn=<MulBackward0>)
tensor([[ 0.6268,  1.9338],


  5%|▌         | 51/1000 [00:07<02:07,  7.46it/s]

tensor([[1.1610, 3.5818],
        [0.6861, 2.1168]], grad_fn=<MulBackward0>)
tensor([[1.1610, 3.5818],
        [0.6861, 2.1168]], grad_fn=<MulBackward0>)
tensor([[1.1610, 3.5818],
        [0.6861, 2.1168]], grad_fn=<MulBackward0>)
tensor([[1.1610, 3.5818],
        [0.6861, 2.1168]], grad_fn=<MulBackward0>)
tensor([[1.1610, 3.5818],
        [0.6861, 2.1168]], grad_fn=<MulBackward0>)
tensor([[1.1610, 3.5818],
        [0.6861, 2.1168]], grad_fn=<MulBackward0>)
tensor([[-0.0649, -0.2003],
        [-0.5173, -1.5959]], grad_fn=<MulBackward0>)
tensor([[-0.0649, -0.2003],
        [-0.5173, -1.5959]], grad_fn=<MulBackward0>)
tensor([[-0.0649, -0.2003],
        [-0.5173, -1.5959]], grad_fn=<MulBackward0>)
tensor([[-0.0649, -0.2003],
        [-0.5173, -1.5959]], grad_fn=<MulBackward0>)
tensor([[-0.0649, -0.2003],
        [-0.5173, -1.5959]], grad_fn=<MulBackward0>)
tensor([[-0.5535, -1.7076],
        [ 0.7338,  2.2639]], grad_fn=<MulBackward0>)
tensor([[-0.5535, -1.7076],
        [ 0.7338,  2.263

  5%|▌         | 52/1000 [00:07<02:04,  7.64it/s]

tensor([[-1.1767, -3.6303],
        [-0.8600, -2.6532]], grad_fn=<MulBackward0>)
tensor([[-1.1767, -3.6303],
        [-0.8600, -2.6532]], grad_fn=<MulBackward0>)
tensor([[-1.1767, -3.6303],
        [-0.8600, -2.6532]], grad_fn=<MulBackward0>)
tensor([[-1.1767, -3.6303],
        [-0.8600, -2.6532]], grad_fn=<MulBackward0>)
tensor([[-1.1767, -3.6303],
        [-0.8600, -2.6532]], grad_fn=<MulBackward0>)
tensor([[-1.1767, -3.6303],
        [-0.8600, -2.6532]], grad_fn=<MulBackward0>)
tensor([[-1.1767, -3.6303],
        [-0.8600, -2.6532]], grad_fn=<MulBackward0>)
tensor([[-1.1767, -3.6303],
        [-0.8600, -2.6532]], grad_fn=<MulBackward0>)
tensor([[-1.1767, -3.6303],
        [-0.8600, -2.6532]], grad_fn=<MulBackward0>)
tensor([[-1.1767, -3.6303],
        [-0.8600, -2.6532]], grad_fn=<MulBackward0>)
tensor([[ 0.6236,  1.9238],
        [-0.3382, -1.0435]], grad_fn=<MulBackward0>)
tensor([[ 0.6236,  1.9238],
        [-0.3382, -1.0435]], grad_fn=<MulBackward0>)
tensor([[ 0.6236,  1.9238],


  5%|▌         | 54/1000 [00:07<02:04,  7.63it/s]

tensor([[1.1582, 3.5733],
        [0.6879, 2.1221]], grad_fn=<MulBackward0>)
tensor([[-0.0597, -0.1841],
        [-0.5159, -1.5915]], grad_fn=<MulBackward0>)
tensor([[-0.0597, -0.1841],
        [-0.5159, -1.5915]], grad_fn=<MulBackward0>)
tensor([[-0.0597, -0.1841],
        [-0.5159, -1.5915]], grad_fn=<MulBackward0>)
tensor([[-0.0597, -0.1841],
        [-0.5159, -1.5915]], grad_fn=<MulBackward0>)
tensor([[-0.0597, -0.1841],
        [-0.5159, -1.5915]], grad_fn=<MulBackward0>)
tensor([[-0.5551, -1.7127],
        [ 0.7305,  2.2537]], grad_fn=<MulBackward0>)
tensor([[-0.5551, -1.7127],
        [ 0.7305,  2.2537]], grad_fn=<MulBackward0>)
tensor([[-0.5551, -1.7127],
        [ 0.7305,  2.2537]], grad_fn=<MulBackward0>)
tensor([[-0.5551, -1.7127],
        [ 0.7305,  2.2537]], grad_fn=<MulBackward0>)
tensor([[-0.5551, -1.7127],
        [ 0.7305,  2.2537]], grad_fn=<MulBackward0>)
tensor([[-0.5551, -1.7127],
        [ 0.7305,  2.2537]], grad_fn=<MulBackward0>)
tensor([[0.6113, 1.8861],
      

  6%|▌         | 55/1000 [00:08<02:05,  7.54it/s]

tensor([[-1.1926, -3.6793],
        [-0.8698, -2.6833]], grad_fn=<MulBackward0>)
tensor([[-1.1926, -3.6793],
        [-0.8698, -2.6833]], grad_fn=<MulBackward0>)
tensor([[-1.1926, -3.6793],
        [-0.8698, -2.6833]], grad_fn=<MulBackward0>)
tensor([[-1.1926, -3.6793],
        [-0.8698, -2.6833]], grad_fn=<MulBackward0>)
tensor([[-1.1926, -3.6793],
        [-0.8698, -2.6833]], grad_fn=<MulBackward0>)
tensor([[-1.1926, -3.6793],
        [-0.8698, -2.6833]], grad_fn=<MulBackward0>)
tensor([[-1.1926, -3.6793],
        [-0.8698, -2.6833]], grad_fn=<MulBackward0>)
tensor([[-1.1926, -3.6793],
        [-0.8698, -2.6833]], grad_fn=<MulBackward0>)
tensor([[-1.1926, -3.6793],
        [-0.8698, -2.6833]], grad_fn=<MulBackward0>)
tensor([[ 0.6178,  1.9061],
        [-0.3450, -1.0643]], grad_fn=<MulBackward0>)
tensor([[ 0.6178,  1.9061],
        [-0.3450, -1.0643]], grad_fn=<MulBackward0>)
tensor([[ 0.6178,  1.9061],
        [-0.3450, -1.0643]], grad_fn=<MulBackward0>)
tensor([[ 0.6178,  1.9061],


  6%|▌         | 56/1000 [00:08<02:12,  7.10it/s]

tensor([[-0.5201, -1.6045],
        [-0.2732, -0.8430]], grad_fn=<MulBackward0>)
tensor([[-0.5201, -1.6045],
        [-0.2732, -0.8430]], grad_fn=<MulBackward0>)
tensor([[-0.5201, -1.6045],
        [-0.2732, -0.8430]], grad_fn=<MulBackward0>)
tensor([[-0.5201, -1.6045],
        [-0.2732, -0.8430]], grad_fn=<MulBackward0>)
tensor([[-0.5201, -1.6045],
        [-0.2732, -0.8430]], grad_fn=<MulBackward0>)
tensor([[-0.5201, -1.6045],
        [-0.2732, -0.8430]], grad_fn=<MulBackward0>)
tensor([[-0.5201, -1.6045],
        [-0.2732, -0.8430]], grad_fn=<MulBackward0>)
tensor([[-0.5201, -1.6045],
        [-0.2732, -0.8430]], grad_fn=<MulBackward0>)
tensor([[-0.5201, -1.6045],
        [-0.2732, -0.8430]], grad_fn=<MulBackward0>)
tensor([[ 0.2123,  0.6549],
        [-0.5152, -1.5896]], grad_fn=<MulBackward0>)
tensor([[ 0.2123,  0.6549],
        [-0.5152, -1.5896]], grad_fn=<MulBackward0>)
tensor([[ 0.2123,  0.6549],
        [-0.5152, -1.5896]], grad_fn=<MulBackward0>)
tensor([[ 0.2123,  0.6549],


  6%|▌         | 57/1000 [00:08<02:28,  6.36it/s]

tensor([[ 0.2141,  0.6605],
        [-0.5158, -1.5914]], grad_fn=<MulBackward0>)
tensor([[ 0.2141,  0.6605],
        [-0.5158, -1.5914]], grad_fn=<MulBackward0>)
tensor([[ 0.2141,  0.6605],
        [-0.5158, -1.5914]], grad_fn=<MulBackward0>)
tensor([[ 0.2141,  0.6605],
        [-0.5158, -1.5914]], grad_fn=<MulBackward0>)
tensor([[ 0.2141,  0.6605],
        [-0.5158, -1.5914]], grad_fn=<MulBackward0>)
tensor([[ 0.2141,  0.6605],
        [-0.5158, -1.5914]], grad_fn=<MulBackward0>)
tensor([[1.1620, 3.5848],
        [0.6775, 2.0903]], grad_fn=<MulBackward0>)
tensor([[1.1620, 3.5848],
        [0.6775, 2.0903]], grad_fn=<MulBackward0>)
tensor([[1.1620, 3.5848],
        [0.6775, 2.0903]], grad_fn=<MulBackward0>)
tensor([[1.1620, 3.5848],
        [0.6775, 2.0903]], grad_fn=<MulBackward0>)
tensor([[1.1620, 3.5848],
        [0.6775, 2.0903]], grad_fn=<MulBackward0>)
tensor([[1.1620, 3.5848],
        [0.6775, 2.0903]], grad_fn=<MulBackward0>)
tensor([[1.1620, 3.5848],
        [0.6775, 2.0903]],

  6%|▌         | 58/1000 [00:08<02:35,  6.06it/s]

tensor([[-0.5119, -1.5793],
        [-0.2681, -0.8270]], grad_fn=<MulBackward0>)
tensor([[ 0.2185,  0.6742],
        [-0.5113, -1.5774]], grad_fn=<MulBackward0>)
tensor([[ 0.2185,  0.6742],
        [-0.5113, -1.5774]], grad_fn=<MulBackward0>)
tensor([[ 0.2185,  0.6742],
        [-0.5113, -1.5774]], grad_fn=<MulBackward0>)
tensor([[ 0.2185,  0.6742],
        [-0.5113, -1.5774]], grad_fn=<MulBackward0>)
tensor([[ 0.2185,  0.6742],
        [-0.5113, -1.5774]], grad_fn=<MulBackward0>)
tensor([[ 0.2185,  0.6742],
        [-0.5113, -1.5774]], grad_fn=<MulBackward0>)
tensor([[ 0.2185,  0.6742],
        [-0.5113, -1.5774]], grad_fn=<MulBackward0>)
tensor([[1.1596, 3.5776],
        [0.6804, 2.0992]], grad_fn=<MulBackward0>)
tensor([[1.1596, 3.5776],
        [0.6804, 2.0992]], grad_fn=<MulBackward0>)
tensor([[1.1596, 3.5776],
        [0.6804, 2.0992]], grad_fn=<MulBackward0>)
tensor([[1.1596, 3.5776],
        [0.6804, 2.0992]], grad_fn=<MulBackward0>)
tensor([[1.1596, 3.5776],
        [0.6804, 2

  6%|▌         | 59/1000 [00:08<02:30,  6.24it/s]

tensor([[ 0.3721,  1.1481],
        [-0.9606, -2.9635]], grad_fn=<MulBackward0>)
tensor([[ 0.3721,  1.1481],
        [-0.9606, -2.9635]], grad_fn=<MulBackward0>)
tensor([[ 0.3721,  1.1481],
        [-0.9606, -2.9635]], grad_fn=<MulBackward0>)
tensor([[-0.2838, -0.8757],
        [-0.8238, -2.5414]], grad_fn=<MulBackward0>)
tensor([[-0.2838, -0.8757],
        [-0.8238, -2.5414]], grad_fn=<MulBackward0>)
tensor([[-0.2838, -0.8757],
        [-0.8238, -2.5414]], grad_fn=<MulBackward0>)
tensor([[-0.2838, -0.8757],
        [-0.8238, -2.5414]], grad_fn=<MulBackward0>)
tensor([[-0.2068, -0.6381],
        [ 0.8434,  2.6019]], grad_fn=<MulBackward0>)
tensor([[-0.2068, -0.6381],
        [ 0.8434,  2.6019]], grad_fn=<MulBackward0>)
tensor([[-0.2068, -0.6381],
        [ 0.8434,  2.6019]], grad_fn=<MulBackward0>)
tensor([[-0.2068, -0.6381],
        [ 0.8434,  2.6019]], grad_fn=<MulBackward0>)
tensor([[-0.2068, -0.6381],
        [ 0.8434,  2.6019]], grad_fn=<MulBackward0>)
tensor([[-0.2068, -0.6381],


  6%|▌         | 61/1000 [00:09<02:32,  6.17it/s]

tensor([[-1.2070, -3.7237],
        [-0.8662, -2.6723]], grad_fn=<MulBackward0>)
tensor([[-1.2070, -3.7237],
        [-0.8662, -2.6723]], grad_fn=<MulBackward0>)
tensor([[-1.2070, -3.7237],
        [-0.8662, -2.6723]], grad_fn=<MulBackward0>)
tensor([[-1.2070, -3.7237],
        [-0.8662, -2.6723]], grad_fn=<MulBackward0>)
tensor([[-1.2070, -3.7237],
        [-0.8662, -2.6723]], grad_fn=<MulBackward0>)
tensor([[-1.2070, -3.7237],
        [-0.8662, -2.6723]], grad_fn=<MulBackward0>)
tensor([[-1.2070, -3.7237],
        [-0.8662, -2.6723]], grad_fn=<MulBackward0>)
tensor([[-1.2070, -3.7237],
        [-0.8662, -2.6723]], grad_fn=<MulBackward0>)
tensor([[-1.2070, -3.7237],
        [-0.8662, -2.6723]], grad_fn=<MulBackward0>)
tensor([[-1.2070, -3.7237],
        [-0.8662, -2.6723]], grad_fn=<MulBackward0>)
tensor([[ 0.6151,  1.8976],
        [-0.3582, -1.1052]], grad_fn=<MulBackward0>)
tensor([[ 0.6151,  1.8976],
        [-0.3582, -1.1052]], grad_fn=<MulBackward0>)
tensor([[ 0.6151,  1.8976],


  6%|▌         | 62/1000 [00:09<02:37,  5.96it/s]

tensor([[-1.2076, -3.7256],
        [-0.8715, -2.6886]], grad_fn=<MulBackward0>)
tensor([[-1.2076, -3.7256],
        [-0.8715, -2.6886]], grad_fn=<MulBackward0>)
tensor([[-1.2076, -3.7256],
        [-0.8715, -2.6886]], grad_fn=<MulBackward0>)
tensor([[-1.2076, -3.7256],
        [-0.8715, -2.6886]], grad_fn=<MulBackward0>)
tensor([[-1.2076, -3.7256],
        [-0.8715, -2.6886]], grad_fn=<MulBackward0>)
tensor([[-1.2076, -3.7256],
        [-0.8715, -2.6886]], grad_fn=<MulBackward0>)
tensor([[-1.2076, -3.7256],
        [-0.8715, -2.6886]], grad_fn=<MulBackward0>)
tensor([[-1.2076, -3.7256],
        [-0.8715, -2.6886]], grad_fn=<MulBackward0>)
tensor([[-1.2076, -3.7256],
        [-0.8715, -2.6886]], grad_fn=<MulBackward0>)
tensor([[-1.2076, -3.7256],
        [-0.8715, -2.6886]], grad_fn=<MulBackward0>)
tensor([[ 0.6169,  1.9031],
        [-0.3607, -1.1128]], grad_fn=<MulBackward0>)
tensor([[ 0.6169,  1.9031],
        [-0.3607, -1.1128]], grad_fn=<MulBackward0>)
tensor([[ 0.6169,  1.9031],


  6%|▋         | 63/1000 [00:09<02:26,  6.41it/s]

tensor([[-1.2128, -3.7415],
        [-0.8762, -2.7033]], grad_fn=<MulBackward0>)
tensor([[-1.2128, -3.7415],
        [-0.8762, -2.7033]], grad_fn=<MulBackward0>)
tensor([[-1.2128, -3.7415],
        [-0.8762, -2.7033]], grad_fn=<MulBackward0>)
tensor([[-1.2128, -3.7415],
        [-0.8762, -2.7033]], grad_fn=<MulBackward0>)
tensor([[-1.2128, -3.7415],
        [-0.8762, -2.7033]], grad_fn=<MulBackward0>)
tensor([[-1.2128, -3.7415],
        [-0.8762, -2.7033]], grad_fn=<MulBackward0>)
tensor([[-1.2128, -3.7415],
        [-0.8762, -2.7033]], grad_fn=<MulBackward0>)
tensor([[-1.2128, -3.7415],
        [-0.8762, -2.7033]], grad_fn=<MulBackward0>)
tensor([[-1.2128, -3.7415],
        [-0.8762, -2.7033]], grad_fn=<MulBackward0>)
tensor([[-1.2128, -3.7415],
        [-0.8762, -2.7033]], grad_fn=<MulBackward0>)
tensor([[ 0.6167,  1.9025],
        [-0.3605, -1.1122]], grad_fn=<MulBackward0>)
tensor([[ 0.6167,  1.9025],
        [-0.3605, -1.1122]], grad_fn=<MulBackward0>)
tensor([[ 0.6167,  1.9025],


  6%|▋         | 64/1000 [00:09<02:26,  6.39it/s]

tensor([[-0.4774, -1.4727],
        [-0.2597, -0.8011]], grad_fn=<MulBackward0>)
tensor([[-0.4774, -1.4727],
        [-0.2597, -0.8011]], grad_fn=<MulBackward0>)
tensor([[-0.4774, -1.4727],
        [-0.2597, -0.8011]], grad_fn=<MulBackward0>)
tensor([[-0.4774, -1.4727],
        [-0.2597, -0.8011]], grad_fn=<MulBackward0>)
tensor([[-0.4774, -1.4727],
        [-0.2597, -0.8011]], grad_fn=<MulBackward0>)
tensor([[ 0.2468,  0.7613],
        [-0.5120, -1.5796]], grad_fn=<MulBackward0>)
tensor([[ 0.2468,  0.7613],
        [-0.5120, -1.5796]], grad_fn=<MulBackward0>)
tensor([[ 0.2468,  0.7613],
        [-0.5120, -1.5796]], grad_fn=<MulBackward0>)
tensor([[ 0.2468,  0.7613],
        [-0.5120, -1.5796]], grad_fn=<MulBackward0>)
tensor([[ 0.2468,  0.7613],
        [-0.5120, -1.5796]], grad_fn=<MulBackward0>)
tensor([[ 0.2468,  0.7613],
        [-0.5120, -1.5796]], grad_fn=<MulBackward0>)
tensor([[ 0.2468,  0.7613],
        [-0.5120, -1.5796]], grad_fn=<MulBackward0>)
tensor([[1.1478, 3.5412],
  

  6%|▋         | 65/1000 [00:09<02:34,  6.07it/s]

tensor([[-0.0549, -0.1694],
        [-0.5022, -1.5495]], grad_fn=<MulBackward0>)
tensor([[-0.5280, -1.6291],
        [ 0.6852,  2.1139]], grad_fn=<MulBackward0>)
tensor([[-0.5280, -1.6291],
        [ 0.6852,  2.1139]], grad_fn=<MulBackward0>)
tensor([[-0.5280, -1.6291],
        [ 0.6852,  2.1139]], grad_fn=<MulBackward0>)
tensor([[-0.5280, -1.6291],
        [ 0.6852,  2.1139]], grad_fn=<MulBackward0>)
tensor([[-0.5280, -1.6291],
        [ 0.6852,  2.1139]], grad_fn=<MulBackward0>)
tensor([[-0.5280, -1.6291],
        [ 0.6852,  2.1139]], grad_fn=<MulBackward0>)
tensor([[0.5881, 1.8145],
        [0.9062, 2.7959]], grad_fn=<MulBackward0>)
tensor([[0.5881, 1.8145],
        [0.9062, 2.7959]], grad_fn=<MulBackward0>)
tensor([[0.5881, 1.8145],
        [0.9062, 2.7959]], grad_fn=<MulBackward0>)
tensor([[0.5881, 1.8145],
        [0.9062, 2.7959]], grad_fn=<MulBackward0>)
tensor([[0.5881, 1.8145],
        [0.9062, 2.7959]], grad_fn=<MulBackward0>)
tensor([[0.5881, 1.8145],
        [0.9062, 2.795

  7%|▋         | 67/1000 [00:10<02:25,  6.40it/s]

tensor([[0.5856, 1.8066],
        [0.9004, 2.7777]], grad_fn=<MulBackward0>)
tensor([[0.5856, 1.8066],
        [0.9004, 2.7777]], grad_fn=<MulBackward0>)
tensor([[-0.2342, -0.7224],
        [-0.2710, -0.8362]], grad_fn=<MulBackward0>)
tensor([[-0.2342, -0.7224],
        [-0.2710, -0.8362]], grad_fn=<MulBackward0>)
tensor([[-0.2342, -0.7224],
        [-0.2710, -0.8362]], grad_fn=<MulBackward0>)
tensor([[ 0.3686,  1.1372],
        [-0.9507, -2.9331]], grad_fn=<MulBackward0>)
tensor([[ 0.3686,  1.1372],
        [-0.9507, -2.9331]], grad_fn=<MulBackward0>)
tensor([[ 0.3686,  1.1372],
        [-0.9507, -2.9331]], grad_fn=<MulBackward0>)
tensor([[ 0.3686,  1.1372],
        [-0.9507, -2.9331]], grad_fn=<MulBackward0>)
tensor([[-0.2743, -0.8461],
        [-0.8109, -2.5018]], grad_fn=<MulBackward0>)
tensor([[-0.2743, -0.8461],
        [-0.8109, -2.5018]], grad_fn=<MulBackward0>)
tensor([[-0.2743, -0.8461],
        [-0.8109, -2.5018]], grad_fn=<MulBackward0>)
tensor([[-0.2743, -0.8461],
        

  7%|▋         | 68/1000 [00:10<02:36,  5.94it/s]

tensor([[-1.2210, -3.7671],
        [-0.8690, -2.6811]], grad_fn=<MulBackward0>)
tensor([[-1.2210, -3.7671],
        [-0.8690, -2.6811]], grad_fn=<MulBackward0>)
tensor([[-1.2210, -3.7671],
        [-0.8690, -2.6811]], grad_fn=<MulBackward0>)
tensor([[-1.2210, -3.7671],
        [-0.8690, -2.6811]], grad_fn=<MulBackward0>)
tensor([[-1.2210, -3.7671],
        [-0.8690, -2.6811]], grad_fn=<MulBackward0>)
tensor([[-1.2210, -3.7671],
        [-0.8690, -2.6811]], grad_fn=<MulBackward0>)
tensor([[-1.2210, -3.7671],
        [-0.8690, -2.6811]], grad_fn=<MulBackward0>)
tensor([[-1.2210, -3.7671],
        [-0.8690, -2.6811]], grad_fn=<MulBackward0>)
tensor([[-1.2210, -3.7671],
        [-0.8690, -2.6811]], grad_fn=<MulBackward0>)
tensor([[-1.2210, -3.7671],
        [-0.8690, -2.6811]], grad_fn=<MulBackward0>)
tensor([[ 0.6150,  1.8973],
        [-0.3661, -1.1296]], grad_fn=<MulBackward0>)
tensor([[ 0.6150,  1.8973],
        [-0.3661, -1.1296]], grad_fn=<MulBackward0>)
tensor([[ 0.6150,  1.8973],


  7%|▋         | 69/1000 [00:10<02:45,  5.63it/s]

tensor([[-1.2160, -3.7516],
        [-0.8661, -2.6720]], grad_fn=<MulBackward0>)
tensor([[-1.2160, -3.7516],
        [-0.8661, -2.6720]], grad_fn=<MulBackward0>)
tensor([[-1.2160, -3.7516],
        [-0.8661, -2.6720]], grad_fn=<MulBackward0>)
tensor([[-1.2160, -3.7516],
        [-0.8661, -2.6720]], grad_fn=<MulBackward0>)
tensor([[-1.2160, -3.7516],
        [-0.8661, -2.6720]], grad_fn=<MulBackward0>)
tensor([[-1.2160, -3.7516],
        [-0.8661, -2.6720]], grad_fn=<MulBackward0>)
tensor([[-1.2160, -3.7516],
        [-0.8661, -2.6720]], grad_fn=<MulBackward0>)
tensor([[-1.2160, -3.7516],
        [-0.8661, -2.6720]], grad_fn=<MulBackward0>)
tensor([[-1.2160, -3.7516],
        [-0.8661, -2.6720]], grad_fn=<MulBackward0>)
tensor([[ 0.6131,  1.8914],
        [-0.3625, -1.1182]], grad_fn=<MulBackward0>)
tensor([[ 0.6131,  1.8914],
        [-0.3625, -1.1182]], grad_fn=<MulBackward0>)
tensor([[ 0.6131,  1.8914],
        [-0.3625, -1.1182]], grad_fn=<MulBackward0>)
tensor([[ 0.6131,  1.8914],


  7%|▋         | 70/1000 [00:10<02:49,  5.48it/s]

tensor([[-1.2162, -3.7522],
        [-0.8634, -2.6637]], grad_fn=<MulBackward0>)
tensor([[-1.2162, -3.7522],
        [-0.8634, -2.6637]], grad_fn=<MulBackward0>)
tensor([[-1.2162, -3.7522],
        [-0.8634, -2.6637]], grad_fn=<MulBackward0>)
tensor([[-1.2162, -3.7522],
        [-0.8634, -2.6637]], grad_fn=<MulBackward0>)
tensor([[-1.2162, -3.7522],
        [-0.8634, -2.6637]], grad_fn=<MulBackward0>)
tensor([[-1.2162, -3.7522],
        [-0.8634, -2.6637]], grad_fn=<MulBackward0>)
tensor([[-1.2162, -3.7522],
        [-0.8634, -2.6637]], grad_fn=<MulBackward0>)
tensor([[ 0.6119,  1.8877],
        [-0.3616, -1.1157]], grad_fn=<MulBackward0>)
tensor([[ 0.6119,  1.8877],
        [-0.3616, -1.1157]], grad_fn=<MulBackward0>)
tensor([[ 0.6119,  1.8877],
        [-0.3616, -1.1157]], grad_fn=<MulBackward0>)
tensor([[ 0.6119,  1.8877],
        [-0.3616, -1.1157]], grad_fn=<MulBackward0>)
tensor([[ 0.6119,  1.8877],
        [-0.3616, -1.1157]], grad_fn=<MulBackward0>)
tensor([[ 0.6119,  1.8877],


  7%|▋         | 71/1000 [00:10<02:51,  5.43it/s]

tensor([[-1.2123, -3.7401],
        [-0.8572, -2.6447]], grad_fn=<MulBackward0>)
tensor([[-1.2123, -3.7401],
        [-0.8572, -2.6447]], grad_fn=<MulBackward0>)
tensor([[-1.2123, -3.7401],
        [-0.8572, -2.6447]], grad_fn=<MulBackward0>)
tensor([[-1.2123, -3.7401],
        [-0.8572, -2.6447]], grad_fn=<MulBackward0>)
tensor([[-1.2123, -3.7401],
        [-0.8572, -2.6447]], grad_fn=<MulBackward0>)
tensor([[-1.2123, -3.7401],
        [-0.8572, -2.6447]], grad_fn=<MulBackward0>)
tensor([[-1.2123, -3.7401],
        [-0.8572, -2.6447]], grad_fn=<MulBackward0>)
tensor([[-1.2123, -3.7401],
        [-0.8572, -2.6447]], grad_fn=<MulBackward0>)
tensor([[ 0.6092,  1.8796],
        [-0.3641, -1.1234]], grad_fn=<MulBackward0>)
tensor([[ 0.6092,  1.8796],
        [-0.3641, -1.1234]], grad_fn=<MulBackward0>)
tensor([[ 0.6092,  1.8796],
        [-0.3641, -1.1234]], grad_fn=<MulBackward0>)
tensor([[ 0.6092,  1.8796],
        [-0.3641, -1.1234]], grad_fn=<MulBackward0>)
tensor([[ 0.6092,  1.8796],


  7%|▋         | 72/1000 [00:11<02:46,  5.57it/s]

tensor([[-1.2191, -3.7610],
        [-0.8619, -2.6591]], grad_fn=<MulBackward0>)
tensor([[-1.2191, -3.7610],
        [-0.8619, -2.6591]], grad_fn=<MulBackward0>)
tensor([[-1.2191, -3.7610],
        [-0.8619, -2.6591]], grad_fn=<MulBackward0>)
tensor([[-1.2191, -3.7610],
        [-0.8619, -2.6591]], grad_fn=<MulBackward0>)
tensor([[-1.2191, -3.7610],
        [-0.8619, -2.6591]], grad_fn=<MulBackward0>)
tensor([[-1.2191, -3.7610],
        [-0.8619, -2.6591]], grad_fn=<MulBackward0>)
tensor([[-1.2191, -3.7610],
        [-0.8619, -2.6591]], grad_fn=<MulBackward0>)
tensor([[-1.2191, -3.7610],
        [-0.8619, -2.6591]], grad_fn=<MulBackward0>)
tensor([[-1.2191, -3.7610],
        [-0.8619, -2.6591]], grad_fn=<MulBackward0>)
tensor([[ 0.6110,  1.8850],
        [-0.3632, -1.1204]], grad_fn=<MulBackward0>)
tensor([[ 0.6110,  1.8850],
        [-0.3632, -1.1204]], grad_fn=<MulBackward0>)
tensor([[ 0.6110,  1.8850],
        [-0.3632, -1.1204]], grad_fn=<MulBackward0>)
tensor([[ 0.6110,  1.8850],


  7%|▋         | 73/1000 [00:11<02:32,  6.07it/s]

tensor([[ 1.1536,  3.5589],
        [-0.1276, -0.3938]], grad_fn=<MulBackward0>)
tensor([[0.8420, 2.5977],
        [0.1144, 0.3531]], grad_fn=<MulBackward0>)
tensor([[0.8420, 2.5977],
        [0.1144, 0.3531]], grad_fn=<MulBackward0>)
tensor([[0.8420, 2.5977],
        [0.1144, 0.3531]], grad_fn=<MulBackward0>)
tensor([[0.8420, 2.5977],
        [0.1144, 0.3531]], grad_fn=<MulBackward0>)
tensor([[0.8420, 2.5977],
        [0.1144, 0.3531]], grad_fn=<MulBackward0>)
tensor([[0.8420, 2.5977],
        [0.1144, 0.3531]], grad_fn=<MulBackward0>)
tensor([[0.8420, 2.5977],
        [0.1144, 0.3531]], grad_fn=<MulBackward0>)
tensor([[-0.5465, -1.6862],
        [-0.2501, -0.7715]], grad_fn=<MulBackward0>)
tensor([[-0.5465, -1.6862],
        [-0.2501, -0.7715]], grad_fn=<MulBackward0>)
tensor([[-0.5465, -1.6862],
        [-0.2501, -0.7715]], grad_fn=<MulBackward0>)
tensor([[-0.5465, -1.6862],
        [-0.2501, -0.7715]], grad_fn=<MulBackward0>)
tensor([[-0.5465, -1.6862],
        [-0.2501, -0.7715]],

  7%|▋         | 74/1000 [00:11<02:41,  5.73it/s]

tensor([[0.5714, 1.7629],
        [0.8814, 2.7193]], grad_fn=<MulBackward0>)
tensor([[-0.2377, -0.7332],
        [-0.2658, -0.8199]], grad_fn=<MulBackward0>)
tensor([[-0.2377, -0.7332],
        [-0.2658, -0.8199]], grad_fn=<MulBackward0>)
tensor([[-0.2377, -0.7332],
        [-0.2658, -0.8199]], grad_fn=<MulBackward0>)
tensor([[ 0.3578,  1.1038],
        [-0.9278, -2.8623]], grad_fn=<MulBackward0>)
tensor([[ 0.3578,  1.1038],
        [-0.9278, -2.8623]], grad_fn=<MulBackward0>)
tensor([[ 0.3578,  1.1038],
        [-0.9278, -2.8623]], grad_fn=<MulBackward0>)
tensor([[ 0.3578,  1.1038],
        [-0.9278, -2.8623]], grad_fn=<MulBackward0>)
tensor([[-0.2646, -0.8163],
        [-0.7985, -2.4636]], grad_fn=<MulBackward0>)
tensor([[-0.2646, -0.8163],
        [-0.7985, -2.4636]], grad_fn=<MulBackward0>)
tensor([[-0.2646, -0.8163],
        [-0.7985, -2.4636]], grad_fn=<MulBackward0>)
tensor([[-0.2646, -0.8163],
        [-0.7985, -2.4636]], grad_fn=<MulBackward0>)
tensor([[-0.2136, -0.6590],
    

  8%|▊         | 75/1000 [00:11<02:46,  5.55it/s]

tensor([[ 0.2946,  0.9088],
        [-0.5115, -1.5780]], grad_fn=<MulBackward0>)
tensor([[ 0.2946,  0.9088],
        [-0.5115, -1.5780]], grad_fn=<MulBackward0>)
tensor([[ 0.2946,  0.9088],
        [-0.5115, -1.5780]], grad_fn=<MulBackward0>)
tensor([[1.1406, 3.5190],
        [0.6865, 2.1179]], grad_fn=<MulBackward0>)
tensor([[1.1406, 3.5190],
        [0.6865, 2.1179]], grad_fn=<MulBackward0>)
tensor([[1.1406, 3.5190],
        [0.6865, 2.1179]], grad_fn=<MulBackward0>)
tensor([[1.1406, 3.5190],
        [0.6865, 2.1179]], grad_fn=<MulBackward0>)
tensor([[1.1406, 3.5190],
        [0.6865, 2.1179]], grad_fn=<MulBackward0>)
tensor([[1.1406, 3.5190],
        [0.6865, 2.1179]], grad_fn=<MulBackward0>)
tensor([[1.1406, 3.5190],
        [0.6865, 2.1179]], grad_fn=<MulBackward0>)
tensor([[-0.0623, -0.1923],
        [-0.4965, -1.5319]], grad_fn=<MulBackward0>)
tensor([[-0.0623, -0.1923],
        [-0.4965, -1.5319]], grad_fn=<MulBackward0>)
tensor([[-0.0623, -0.1923],
        [-0.4965, -1.5319]],

  8%|▊         | 76/1000 [00:11<02:48,  5.48it/s]

tensor([[-0.4275, -1.3189],
        [-0.2311, -0.7130]], grad_fn=<MulBackward0>)
tensor([[-0.4275, -1.3189],
        [-0.2311, -0.7130]], grad_fn=<MulBackward0>)
tensor([[-0.4275, -1.3189],
        [-0.2311, -0.7130]], grad_fn=<MulBackward0>)
tensor([[-0.4275, -1.3189],
        [-0.2311, -0.7130]], grad_fn=<MulBackward0>)
tensor([[-0.4275, -1.3189],
        [-0.2311, -0.7130]], grad_fn=<MulBackward0>)
tensor([[-0.4275, -1.3189],
        [-0.2311, -0.7130]], grad_fn=<MulBackward0>)
tensor([[-0.4275, -1.3189],
        [-0.2311, -0.7130]], grad_fn=<MulBackward0>)
tensor([[ 0.2960,  0.9133],
        [-0.5100, -1.5734]], grad_fn=<MulBackward0>)
tensor([[ 0.2960,  0.9133],
        [-0.5100, -1.5734]], grad_fn=<MulBackward0>)
tensor([[ 0.2960,  0.9133],
        [-0.5100, -1.5734]], grad_fn=<MulBackward0>)
tensor([[ 0.2960,  0.9133],
        [-0.5100, -1.5734]], grad_fn=<MulBackward0>)
tensor([[ 0.2960,  0.9133],
        [-0.5100, -1.5734]], grad_fn=<MulBackward0>)
tensor([[ 0.2960,  0.9133],


  8%|▊         | 78/1000 [00:12<02:28,  6.20it/s]

tensor([[-0.2649, -0.8173],
        [-0.7919, -2.4431]], grad_fn=<MulBackward0>)
tensor([[-0.2649, -0.8173],
        [-0.7919, -2.4431]], grad_fn=<MulBackward0>)
tensor([[-0.2110, -0.6511],
        [ 0.8072,  2.4903]], grad_fn=<MulBackward0>)
tensor([[-0.2110, -0.6511],
        [ 0.8072,  2.4903]], grad_fn=<MulBackward0>)
tensor([[-0.2110, -0.6511],
        [ 0.8072,  2.4903]], grad_fn=<MulBackward0>)
tensor([[-0.2110, -0.6511],
        [ 0.8072,  2.4903]], grad_fn=<MulBackward0>)
tensor([[-0.2110, -0.6511],
        [ 0.8072,  2.4903]], grad_fn=<MulBackward0>)
tensor([[-0.2110, -0.6511],
        [ 0.8072,  2.4903]], grad_fn=<MulBackward0>)
tensor([[-0.2110, -0.6511],
        [ 0.8072,  2.4903]], grad_fn=<MulBackward0>)
Training_loss 7.43509
tensor([[-1.2215, -3.7686],
        [-0.8648, -2.6680]], grad_fn=<MulBackward0>)
tensor([[-1.2215, -3.7686],
        [-0.8648, -2.6680]], grad_fn=<MulBackward0>)
tensor([[-1.2215, -3.7686],
        [-0.8648, -2.6680]], grad_fn=<MulBackward0>)
tensor

  8%|▊         | 79/1000 [00:12<02:27,  6.25it/s]

tensor([[-1.2264, -3.7836],
        [-0.8640, -2.6656]], grad_fn=<MulBackward0>)
tensor([[-1.2264, -3.7836],
        [-0.8640, -2.6656]], grad_fn=<MulBackward0>)
tensor([[-1.2264, -3.7836],
        [-0.8640, -2.6656]], grad_fn=<MulBackward0>)
tensor([[-1.2264, -3.7836],
        [-0.8640, -2.6656]], grad_fn=<MulBackward0>)
tensor([[-1.2264, -3.7836],
        [-0.8640, -2.6656]], grad_fn=<MulBackward0>)
tensor([[-1.2264, -3.7836],
        [-0.8640, -2.6656]], grad_fn=<MulBackward0>)
tensor([[-1.2264, -3.7836],
        [-0.8640, -2.6656]], grad_fn=<MulBackward0>)
tensor([[-1.2264, -3.7836],
        [-0.8640, -2.6656]], grad_fn=<MulBackward0>)
tensor([[-1.2264, -3.7836],
        [-0.8640, -2.6656]], grad_fn=<MulBackward0>)
tensor([[-1.2264, -3.7836],
        [-0.8640, -2.6656]], grad_fn=<MulBackward0>)
tensor([[ 0.6060,  1.8697],
        [-0.3523, -1.0868]], grad_fn=<MulBackward0>)
tensor([[ 0.6060,  1.8697],
        [-0.3523, -1.0868]], grad_fn=<MulBackward0>)
tensor([[ 0.6060,  1.8697],


  8%|▊         | 80/1000 [00:12<02:30,  6.12it/s]

tensor([[ 1.1423,  3.5242],
        [-0.1318, -0.4065]], grad_fn=<MulBackward0>)
tensor([[ 1.1423,  3.5242],
        [-0.1318, -0.4065]], grad_fn=<MulBackward0>)
tensor([[ 1.1423,  3.5242],
        [-0.1318, -0.4065]], grad_fn=<MulBackward0>)
tensor([[ 1.1423,  3.5242],
        [-0.1318, -0.4065]], grad_fn=<MulBackward0>)
tensor([[0.8155, 2.5159],
        [0.1305, 0.4026]], grad_fn=<MulBackward0>)
tensor([[0.8155, 2.5159],
        [0.1305, 0.4026]], grad_fn=<MulBackward0>)
tensor([[0.8155, 2.5159],
        [0.1305, 0.4026]], grad_fn=<MulBackward0>)
tensor([[0.8155, 2.5159],
        [0.1305, 0.4026]], grad_fn=<MulBackward0>)
tensor([[0.8155, 2.5159],
        [0.1305, 0.4026]], grad_fn=<MulBackward0>)
tensor([[0.8155, 2.5159],
        [0.1305, 0.4026]], grad_fn=<MulBackward0>)
tensor([[0.8155, 2.5159],
        [0.1305, 0.4026]], grad_fn=<MulBackward0>)
tensor([[-0.5317, -1.6403],
        [-0.2463, -0.7598]], grad_fn=<MulBackward0>)
tensor([[-0.5317, -1.6403],
        [-0.2463, -0.7598]],

  8%|▊         | 82/1000 [00:12<02:20,  6.52it/s]

tensor([[ 0.4422,  1.3644],
        [-0.3925, -1.2109]], grad_fn=<MulBackward0>)
tensor([[-0.6910, -2.1317],
        [-0.5929, -1.8293]], grad_fn=<MulBackward0>)
tensor([[-0.6910, -2.1317],
        [-0.5929, -1.8293]], grad_fn=<MulBackward0>)
tensor([[-0.6910, -2.1317],
        [-0.5929, -1.8293]], grad_fn=<MulBackward0>)
tensor([[-0.6910, -2.1317],
        [-0.5929, -1.8293]], grad_fn=<MulBackward0>)
tensor([[-0.6910, -2.1317],
        [-0.5929, -1.8293]], grad_fn=<MulBackward0>)
tensor([[-0.4069, -1.2554],
        [-0.2232, -0.6885]], grad_fn=<MulBackward0>)
tensor([[-0.4069, -1.2554],
        [-0.2232, -0.6885]], grad_fn=<MulBackward0>)
tensor([[-0.4069, -1.2554],
        [-0.2232, -0.6885]], grad_fn=<MulBackward0>)
tensor([[-0.4069, -1.2554],
        [-0.2232, -0.6885]], grad_fn=<MulBackward0>)
tensor([[-0.4069, -1.2554],
        [-0.2232, -0.6885]], grad_fn=<MulBackward0>)
tensor([[-0.4069, -1.2554],
        [-0.2232, -0.6885]], grad_fn=<MulBackward0>)
tensor([[-0.4069, -1.2554],


  8%|▊         | 83/1000 [00:12<02:12,  6.90it/s]

tensor([[-1.2317, -3.8000],
        [-0.8566, -2.6428]], grad_fn=<MulBackward0>)
tensor([[-1.2317, -3.8000],
        [-0.8566, -2.6428]], grad_fn=<MulBackward0>)
tensor([[-1.2317, -3.8000],
        [-0.8566, -2.6428]], grad_fn=<MulBackward0>)
tensor([[-1.2317, -3.8000],
        [-0.8566, -2.6428]], grad_fn=<MulBackward0>)
tensor([[-1.2317, -3.8000],
        [-0.8566, -2.6428]], grad_fn=<MulBackward0>)
tensor([[-1.2317, -3.8000],
        [-0.8566, -2.6428]], grad_fn=<MulBackward0>)
tensor([[-1.2317, -3.8000],
        [-0.8566, -2.6428]], grad_fn=<MulBackward0>)
tensor([[-1.2317, -3.8000],
        [-0.8566, -2.6428]], grad_fn=<MulBackward0>)
tensor([[-1.2317, -3.8000],
        [-0.8566, -2.6428]], grad_fn=<MulBackward0>)
tensor([[-1.2317, -3.8000],
        [-0.8566, -2.6428]], grad_fn=<MulBackward0>)
tensor([[ 0.6026,  1.8592],
        [-0.3632, -1.1204]], grad_fn=<MulBackward0>)
tensor([[ 0.6026,  1.8592],
        [-0.3632, -1.1204]], grad_fn=<MulBackward0>)
tensor([[ 0.6026,  1.8592],


  8%|▊         | 85/1000 [00:13<02:06,  7.24it/s]

tensor([[-0.2310, -0.7128],
        [-0.2570, -0.7929]], grad_fn=<MulBackward0>)
tensor([[-0.2310, -0.7128],
        [-0.2570, -0.7929]], grad_fn=<MulBackward0>)
tensor([[ 0.3542,  1.0928],
        [-0.9091, -2.8047]], grad_fn=<MulBackward0>)
tensor([[ 0.3542,  1.0928],
        [-0.9091, -2.8047]], grad_fn=<MulBackward0>)
tensor([[ 0.3542,  1.0928],
        [-0.9091, -2.8047]], grad_fn=<MulBackward0>)
tensor([[ 0.3542,  1.0928],
        [-0.9091, -2.8047]], grad_fn=<MulBackward0>)
tensor([[-0.2575, -0.7945],
        [-0.7744, -2.3891]], grad_fn=<MulBackward0>)
tensor([[-0.2575, -0.7945],
        [-0.7744, -2.3891]], grad_fn=<MulBackward0>)
tensor([[-0.2575, -0.7945],
        [-0.7744, -2.3891]], grad_fn=<MulBackward0>)
tensor([[-0.2575, -0.7945],
        [-0.7744, -2.3891]], grad_fn=<MulBackward0>)
tensor([[-0.2182, -0.6731],
        [ 0.8001,  2.4683]], grad_fn=<MulBackward0>)
tensor([[-0.2182, -0.6731],
        [ 0.8001,  2.4683]], grad_fn=<MulBackward0>)
tensor([[-0.2182, -0.6731],


  9%|▊         | 86/1000 [00:13<02:03,  7.41it/s]

tensor([[-1.2361, -3.8135],
        [-0.8678, -2.6773]], grad_fn=<MulBackward0>)
tensor([[-1.2361, -3.8135],
        [-0.8678, -2.6773]], grad_fn=<MulBackward0>)
tensor([[-1.2361, -3.8135],
        [-0.8678, -2.6773]], grad_fn=<MulBackward0>)
tensor([[-1.2361, -3.8135],
        [-0.8678, -2.6773]], grad_fn=<MulBackward0>)
tensor([[-1.2361, -3.8135],
        [-0.8678, -2.6773]], grad_fn=<MulBackward0>)
tensor([[-1.2361, -3.8135],
        [-0.8678, -2.6773]], grad_fn=<MulBackward0>)
tensor([[-1.2361, -3.8135],
        [-0.8678, -2.6773]], grad_fn=<MulBackward0>)
tensor([[-1.2361, -3.8135],
        [-0.8678, -2.6773]], grad_fn=<MulBackward0>)
tensor([[-1.2361, -3.8135],
        [-0.8678, -2.6773]], grad_fn=<MulBackward0>)
tensor([[ 0.6082,  1.8763],
        [-0.3604, -1.1119]], grad_fn=<MulBackward0>)
tensor([[ 0.6082,  1.8763],
        [-0.3604, -1.1119]], grad_fn=<MulBackward0>)
tensor([[ 0.6082,  1.8763],
        [-0.3604, -1.1119]], grad_fn=<MulBackward0>)
tensor([[ 0.6082,  1.8763],


  9%|▊         | 87/1000 [00:13<02:08,  7.09it/s]

tensor([[1.1225, 3.4632],
        [0.6797, 2.0968]], grad_fn=<MulBackward0>)
tensor([[1.1225, 3.4632],
        [0.6797, 2.0968]], grad_fn=<MulBackward0>)
tensor([[1.1225, 3.4632],
        [0.6797, 2.0968]], grad_fn=<MulBackward0>)
tensor([[1.1225, 3.4632],
        [0.6797, 2.0968]], grad_fn=<MulBackward0>)
tensor([[-0.0540, -0.1667],
        [-0.4836, -1.4920]], grad_fn=<MulBackward0>)
tensor([[-0.0540, -0.1667],
        [-0.4836, -1.4920]], grad_fn=<MulBackward0>)
tensor([[-0.0540, -0.1667],
        [-0.4836, -1.4920]], grad_fn=<MulBackward0>)
tensor([[-0.0540, -0.1667],
        [-0.4836, -1.4920]], grad_fn=<MulBackward0>)
tensor([[-0.0540, -0.1667],
        [-0.4836, -1.4920]], grad_fn=<MulBackward0>)
tensor([[-0.5070, -1.5642],
        [ 0.6359,  1.9620]], grad_fn=<MulBackward0>)
tensor([[-0.5070, -1.5642],
        [ 0.6359,  1.9620]], grad_fn=<MulBackward0>)
tensor([[-0.5070, -1.5642],
        [ 0.6359,  1.9620]], grad_fn=<MulBackward0>)
tensor([[-0.5070, -1.5642],
        [ 0.6359

  9%|▉         | 89/1000 [00:13<02:05,  7.26it/s]

tensor([[-1.2345, -3.8085],
        [-0.8693, -2.6819]], grad_fn=<MulBackward0>)
tensor([[-1.2345, -3.8085],
        [-0.8693, -2.6819]], grad_fn=<MulBackward0>)
tensor([[-1.2345, -3.8085],
        [-0.8693, -2.6819]], grad_fn=<MulBackward0>)
tensor([[-1.2345, -3.8085],
        [-0.8693, -2.6819]], grad_fn=<MulBackward0>)
tensor([[-1.2345, -3.8085],
        [-0.8693, -2.6819]], grad_fn=<MulBackward0>)
tensor([[-1.2345, -3.8085],
        [-0.8693, -2.6819]], grad_fn=<MulBackward0>)
tensor([[-1.2345, -3.8085],
        [-0.8693, -2.6819]], grad_fn=<MulBackward0>)
tensor([[-1.2345, -3.8085],
        [-0.8693, -2.6819]], grad_fn=<MulBackward0>)
tensor([[-1.2345, -3.8085],
        [-0.8693, -2.6819]], grad_fn=<MulBackward0>)
tensor([[-1.2345, -3.8085],
        [-0.8693, -2.6819]], grad_fn=<MulBackward0>)
tensor([[ 0.5991,  1.8482],
        [-0.3629, -1.1196]], grad_fn=<MulBackward0>)
tensor([[ 0.5991,  1.8482],
        [-0.3629, -1.1196]], grad_fn=<MulBackward0>)
tensor([[ 0.5991,  1.8482],


  9%|▉         | 89/1000 [00:13<02:20,  6.48it/s]

tensor([[-0.0520, -0.1606],
        [-0.4802, -1.4815]], grad_fn=<MulBackward0>)
tensor([[-0.0520, -0.1606],
        [-0.4802, -1.4815]], grad_fn=<MulBackward0>)
tensor([[-0.0520, -0.1606],
        [-0.4802, -1.4815]], grad_fn=<MulBackward0>)
tensor([[-0.0520, -0.1606],
        [-0.4802, -1.4815]], grad_fn=<MulBackward0>)
tensor([[-0.0520, -0.1606],
        [-0.4802, -1.4815]], grad_fn=<MulBackward0>)
tensor([[-0.4993, -1.5405],
        [ 0.6241,  1.9253]], grad_fn=<MulBackward0>)
tensor([[-0.4993, -1.5405],
        [ 0.6241,  1.9253]], grad_fn=<MulBackward0>)
tensor([[-0.4993, -1.5405],
        [ 0.6241,  1.9253]], grad_fn=<MulBackward0>)
tensor([[-0.4993, -1.5405],
        [ 0.6241,  1.9253]], grad_fn=<MulBackward0>)
tensor([[-0.4993, -1.5405],
        [ 0.6241,  1.9253]], grad_fn=<MulBackward0>)
tensor([[-0.4993, -1.5405],
        [ 0.6241,  1.9253]], grad_fn=<MulBackward0>)
tensor([[0.5472, 1.6883],
        [0.8205, 2.5313]], grad_fn=<MulBackward0>)
tensor([[0.5472, 1.6883],
      




KeyboardInterrupt: 

In [23]:
#plot.plot(test_loss)
parameters_to_vector(models[19].parameters())

tensor([-1.7433,  1.8266], grad_fn=<CatBackward0>)

In [29]:
for j in G.neighbors(3):
    print(j)

0
2
5
6
8
9
11


In [30]:
parameters_to_vector(models[19].parameters())

tensor([-1.7433,  1.8266], grad_fn=<CatBackward0>)

In [39]:
projection_list[1]

[tensor([[ 9.9958e-01, -4.1648e-04],
         [-4.1648e-04,  9.9958e-01]], grad_fn=<AddBackward0>),
 0,
 tensor([[ 9.9958e-01, -4.1648e-04],
         [-4.1648e-04,  9.9958e-01]], grad_fn=<AddBackward0>),
 0,
 tensor([[ 9.9958e-01, -4.1648e-04],
         [-4.1648e-04,  9.9958e-01]], grad_fn=<AddBackward0>),
 0,
 tensor([[ 9.9958e-01, -4.1648e-04],
         [-4.1648e-04,  9.9958e-01]], grad_fn=<AddBackward0>),
 tensor([[ 9.9958e-01, -4.1648e-04],
         [-4.1648e-04,  9.9958e-01]], grad_fn=<AddBackward0>),
 tensor([[ 9.9958e-01, -4.1648e-04],
         [-4.1648e-04,  9.9958e-01]], grad_fn=<AddBackward0>),
 tensor([[ 9.9958e-01, -4.1648e-04],
         [-4.1648e-04,  9.9958e-01]], grad_fn=<AddBackward0>),
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [40]:
projected_weights[0]

[0,
 tensor([1.6737, 1.8006]),
 tensor([1.8963, 1.6652]),
 tensor([1.7272, 1.7652]),
 tensor([1.6959, 1.8277]),
 tensor([1.7328, 1.7924]),
 tensor([1.5966, 1.6365]),
 tensor([1.7904, 1.7272]),
 tensor([1.6778, 1.6466]),
 tensor([1.6821, 1.7800]),
 tensor([-1.6982,  1.5879]),
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]