In [1]:
import numpy as np
import pandas as pd
np.random.seed(0)

In [2]:
# Downloading Movielens-100k
!curl -O http://files.grouplens.org/datasets/movielens/ml-100k.zip
!unzip ml-100k.zip
!cd ml-100k/

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0 4808k    0  7021    0     0   8646      0  0:09:29 --:--:--  0:09:29  8635
  9 4808k    9  453k    0     0   268k      0  0:00:17  0:00:01  0:00:16  268k
 89 4808k   89 4308k    0     0  1569k      0  0:00:03  0:00:02  0:00:01 1568k
100 4808k  100 4808k    0     0  1651k      0  0:00:02  0:00:02 --:--:-- 1651k
"unzip" ­Ґ пў«пҐвбп ў­гваҐ­­Ґ© Ё«Ё ў­Ґи­Ґ©
Є®¬ ­¤®©, ЁбЇ®«­пҐ¬®© Їа®Ја ¬¬®© Ё«Ё Ї ЄҐв­л¬ д ©«®¬.
‘ЁбвҐ¬Ґ ­Ґ г¤ Ґвбп ­ ©вЁ гЄ § ­­л© Їгвм.


In [3]:
import zipfile
with zipfile.ZipFile('ml-100k.zip', 'r') as zip_ref:
    zip_ref.extractall('./')

In [4]:
names = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('./ml-100k/u.data', sep='\t', names=names)
df.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [5]:
n_users = df.user_id.unique().shape[0]
n_items = df.item_id.unique().shape[0]
ratings = np.zeros((n_users, n_items))
for row in df.itertuples():
    ratings[row[1]-1, row[2]-1] = row[3]
ratings

array([[5., 3., 4., ..., 0., 0., 0.],
       [4., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 0., 0.]])

In [6]:
print (str(n_users) + ' users')
print (str(n_items) + ' items')
sparsity = float(len(ratings.nonzero()[0]))
sparsity /= (ratings.shape[0] * ratings.shape[1])
sparsity *= 100
print ('Sparsity: {:4.2f}%'.format(sparsity))

943 users
1682 items
Sparsity: 6.30%


In [8]:
def train_test_split(ratings):
    test = np.zeros(ratings.shape)
    train = ratings.copy()
    for user in range(ratings.shape[0]):
        test_ratings = np.random.choice(ratings[user, :].nonzero()[0], size=10, replace=False)
        train[user, test_ratings] = 0.
        test[user, test_ratings] = ratings[user, test_ratings]
        
    # Test and training are truly disjoint
    assert(np.all((train * test) == 0)) 
    return train, test

In [9]:
train, test = train_test_split(ratings)

In [10]:
from sklearn.metrics import mean_squared_error
from numpy.linalg import solve

def get_mse(pred, actual):
    # Ignore nonzero terms.
    pred = pred[actual.nonzero()].flatten()
    actual = actual[actual.nonzero()].flatten()
    return mean_squared_error(pred, actual)

In [11]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from numpy.linalg import solve
sns.set()

def plot_learning_curve(iter_array, model):
    plt.plot(iter_array, model.train_mse, \
             label='Training', linewidth=5)
    plt.plot(iter_array, model.test_mse, \
             label='Test', linewidth=5)


    plt.xticks(fontsize=16);
    plt.yticks(fontsize=16);
    plt.xlabel('iterations', fontsize=30);
    plt.ylabel('MSE', fontsize=30);
    plt.legend(loc='best', fontsize=20);

In [13]:
# TODO - TO CHange with 
class ExplicitMF():
    def __init__(self, 
                 ratings,
                 n_factors=40,
                 learning='sgd',
                 item_fact_reg=0.0, 
                 user_fact_reg=0.0,
                 item_bias_reg=0.0,
                 user_bias_reg=0.0,
                 verbose=False):
        """
        Train a matrix factorization model to predict empty 
        entries in a matrix. The terminology assumes a 
        ratings matrix which is ~ user x item
        
        Params
        ======
        ratings : (ndarray)
            User x Item matrix with corresponding ratings
        
        n_factors : (int)
            Number of latent factors to use in matrix 
            factorization model
        learning : (str)
            Method of optimization. Options include 
            'sgd' or 'als'.
        
        item_fact_reg : (float)
            Regularization term for item latent factors
        
        user_fact_reg : (float)
            Regularization term for user latent factors
            
        item_bias_reg : (float)
            Regularization term for item biases
        
        user_bias_reg : (float)
            Regularization term for user biases
        
        verbose : (bool)
            Whether or not to printout training progress
        """
        
        self.ratings = ratings
        self.n_users, self.n_items = ratings.shape
        self.n_factors = n_factors
        self.item_fact_reg = item_fact_reg
        self.user_fact_reg = user_fact_reg
        self.item_bias_reg = item_bias_reg
        self.user_bias_reg = user_bias_reg
        self.learning = learning
        if self.learning == 'sgd':
            self.sample_row, self.sample_col = self.ratings.nonzero()
            self.n_samples = len(self.sample_row)
        self._v = verbose

    def als_step(self,
                 latent_vectors,
                 fixed_vecs,
                 ratings,
                 _lambda,
                 type='user'):
        """
        One of the two ALS steps. Solve for the latent vectors
        specified by type.
        """
        if type == 'user':
            # Precompute
            YTY = fixed_vecs.T.dot(fixed_vecs)
            lambdaI = np.eye(YTY.shape[0]) * _lambda

            for u in range(latent_vectors.shape[0]):
                latent_vectors[u, :] = solve((YTY + lambdaI), 
                                             ratings[u, :].dot(fixed_vecs))
        elif type == 'item':
            # Precompute
            XTX = fixed_vecs.T.dot(fixed_vecs)
            lambdaI = np.eye(XTX.shape[0]) * _lambda
            
            for i in range(latent_vectors.shape[0]):
                latent_vectors[i, :] = solve((XTX + lambdaI), 
                                             ratings[:, i].T.dot(fixed_vecs))
        return latent_vectors

    def train(self, n_iter=10, learning_rate=0.1):
        """ Train model for n_iter iterations from scratch."""
        # initialize latent vectors        
        self.user_vecs = np.random.normal(scale=1./self.n_factors,\
                                          size=(self.n_users, self.n_factors))
        self.item_vecs = np.random.normal(scale=1./self.n_factors,
                                          size=(self.n_items, self.n_factors))
        
        if self.learning == 'als':
            self.partial_train(n_iter)
        elif self.learning == 'sgd':
            self.learning_rate = learning_rate
            self.user_bias = np.zeros(self.n_users)
            self.item_bias = np.zeros(self.n_items)
            self.global_bias = np.mean(self.ratings[np.where(self.ratings != 0)])
            self.partial_train(n_iter)
    
    
    def partial_train(self, n_iter):
        """ 
        Train model for n_iter iterations. Can be 
        called multiple times for further training.
        """
        ctr = 1
        while ctr <= n_iter:
            if ctr % 10 == 0 and self._v:
                print ('\tcurrent iteration: {}'.format(ctr))
            if self.learning == 'als':
                self.user_vecs = self.als_step(self.user_vecs, 
                                               self.item_vecs, 
                                               self.ratings, 
                                               self.user_fact_reg, 
                                               type='user')
                self.item_vecs = self.als_step(self.item_vecs, 
                                               self.user_vecs, 
                                               self.ratings, 
                                               self.item_fact_reg, 
                                               type='item')
            elif self.learning == 'sgd':
                self.training_indices = np.arange(self.n_samples)
                np.random.shuffle(self.training_indices)
                self.sgd()
            ctr += 1

    def sgd(self):
        for idx in self.training_indices:
            u = self.sample_row[idx]
            i = self.sample_col[idx]
            prediction = self.predict(u, i)
            e = (self.ratings[u,i] - prediction) # error
            
            # Update biases
            self.user_bias[u] += self.learning_rate * \
                                (e - self.user_bias_reg * self.user_bias[u])
            self.item_bias[i] += self.learning_rate * \
                                (e - self.item_bias_reg * self.item_bias[i])
            
            #Update latent factors
            self.user_vecs[u, :] += self.learning_rate * \
                                    (e * self.item_vecs[i, :] - \
                                     self.user_fact_reg * self.user_vecs[u,:])
            self.item_vecs[i, :] += self.learning_rate * \
                                    (e * self.user_vecs[u, :] - \
                                     self.item_fact_reg * self.item_vecs[i,:])
    def predict(self, u, i):
        """ Single user and item prediction."""
        if self.learning == 'als':
            return self.user_vecs[u, :].dot(self.item_vecs[i, :].T)
        elif self.learning == 'sgd':
            prediction = self.global_bias + self.user_bias[u] + self.item_bias[i]
            prediction += self.user_vecs[u, :].dot(self.item_vecs[i, :].T)
            return prediction
    
    def predict_all(self):
        """ Predict ratings for every user and item."""
        predictions = np.zeros((self.user_vecs.shape[0], 
                                self.item_vecs.shape[0]))
        for u in range(self.user_vecs.shape[0]):
            for i in range(self.item_vecs.shape[0]):
                predictions[u, i] = self.predict(u, i)
                
        return predictions
    
    def calculate_learning_curve(self, iter_array, test, learning_rate=0.1):
        """
        Keep track of MSE as a function of training iterations.
        
        Params
        ======
        iter_array : (list)
            List of numbers of iterations to train for each step of 
            the learning curve. e.g. [1, 5, 10, 20]
        test : (2D ndarray)
            Testing dataset (assumed to be user x item).
        
        The function creates two new class attributes:
        
        train_mse : (list)
            Training data MSE values for each value of iter_array
        test_mse : (list)
            Test data MSE values for each value of iter_array
        """
        iter_array.sort()
        self.train_mse =[]
        self.test_mse = []
        iter_diff = 0
        for (i, n_iter) in enumerate(iter_array):
            if self._v:
                print ('Iteration: {}'.format(n_iter))
            if i == 0:
                self.train(n_iter - iter_diff, learning_rate)
            else:
                self.partial_train(n_iter - iter_diff)

            predictions = self.predict_all()

            self.train_mse += [get_mse(predictions, self.ratings)]
            self.test_mse += [get_mse(predictions, test)]
            if self._v:
                print ('Train mse: ' + str(self.train_mse[-1]))
                print ('Test mse: ' + str(self.test_mse[-1]))
            iter_diff = n_iter

In [15]:
MF_SGD = ExplicitMF(train, 40, learning='sgd', verbose=True)
# iter_array = [1, 2, 5, 10, 25, 50, 100, 200]

iter_array = [200]
MF_SGD.calculate_learning_curve(iter_array, test, learning_rate=0.001)

Iteration: 200
	current iteration: 10
	current iteration: 20
	current iteration: 30
	current iteration: 40
	current iteration: 50
	current iteration: 60
	current iteration: 70
	current iteration: 80
	current iteration: 90
	current iteration: 100
	current iteration: 110
	current iteration: 120
	current iteration: 130
	current iteration: 140
	current iteration: 150
	current iteration: 160
	current iteration: 170
	current iteration: 180
	current iteration: 190
	current iteration: 200
Train mse: 0.400792173674417
Test mse: 0.921740000016754


##################################################################################################################

In [17]:
train.shape

(943, 1682)

In [18]:
test.shape

(943, 1682)

In [19]:
ratings

array([[5., 3., 4., ..., 0., 0., 0.],
       [4., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 0., 0.]])

In [20]:
def get_sparsity(mat):
    print (str(n_users) + ' users')
    print (str(n_items) + ' items')
    sparsity = float(len(mat.nonzero()[0]))
    sparsity /= (mat.shape[0] * mat.shape[1])
    sparsity *= 100
    print ('Sparsity: {:4.2f}%'.format(sparsity))
    return sparsity

In [21]:
get_sparsity(ratings)

943 users
1682 items
Sparsity: 6.30%


6.304669364224531

In [22]:
get_sparsity(train)

943 users
1682 items
Sparsity: 5.71%


5.710139043178159

In [23]:
get_sparsity(test)

943 users
1682 items
Sparsity: 0.59%


0.5945303210463734

# ####################################### GANS ###################################################

In [24]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils import data as t_data
import torchvision.datasets as datasets
from torchvision import transforms

In [25]:
batch_size = 16

In [26]:
def make_some_noise():
    return torch.rand(batch_size,100)

In [27]:
make_some_noise()

tensor([[0.4573, 0.6817, 0.3278,  ..., 0.7098, 0.2105, 0.4574],
        [0.2063, 0.2336, 0.3870,  ..., 0.2125, 0.5887, 0.3218],
        [0.2768, 0.9493, 0.5432,  ..., 0.3495, 0.7503, 0.1425],
        ...,
        [0.0093, 0.2231, 0.4584,  ..., 0.0626, 0.4817, 0.6860],
        [0.0913, 0.9948, 0.6718,  ..., 0.8972, 0.9647, 0.5612],
        [0.5974, 0.5792, 0.1227,  ..., 0.7180, 0.6887, 0.6740]])

In [28]:
# defining generator class

class generator(nn.Module):
    
    def __init__(self, inp, out):
        
        super(generator, self).__init__()
        
        self.net = nn.Sequential(
                                 nn.Linear(inp,300),
                                 nn.ReLU(inplace=True),
                                 nn.Linear(300,1000),
                                 nn.ReLU(inplace=True),
                                 nn.Linear(1000,800),
                                 nn.ReLU(inplace=True),
                                 nn.Linear(800,out)
                                    )
        
    def forward(self, x):
        x = self.net(x)
        return x

In [29]:
# defining discriminator class

class discriminator(nn.Module):
    
    def __init__(self, inp, out):
        
        super(discriminator, self).__init__()
        
        self.net = nn.Sequential(
                                 nn.Linear(inp,300),
                                 nn.ReLU(inplace=True),
                                 nn.Linear(300,300),
                                 nn.ReLU(inplace=True),
                                 nn.Linear(300,200),
                                 nn.ReLU(inplace=True),
                                 nn.Linear(200,out),
                                 nn.Sigmoid()
                                    )
        
    def forward(self, x):
        x = self.net(x)
        return x

In [30]:
dis = discriminator(ratings.shape[1], 1)
gen = generator(100, ratings.shape[1])

In [31]:
dis

discriminator(
  (net): Sequential(
    (0): Linear(in_features=1682, out_features=300, bias=True)
    (1): ReLU(inplace)
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): ReLU(inplace)
    (4): Linear(in_features=300, out_features=200, bias=True)
    (5): ReLU(inplace)
    (6): Linear(in_features=200, out_features=1, bias=True)
    (7): Sigmoid()
  )
)

In [32]:
gen

generator(
  (net): Sequential(
    (0): Linear(in_features=100, out_features=300, bias=True)
    (1): ReLU(inplace)
    (2): Linear(in_features=300, out_features=1000, bias=True)
    (3): ReLU(inplace)
    (4): Linear(in_features=1000, out_features=800, bias=True)
    (5): ReLU(inplace)
    (6): Linear(in_features=800, out_features=1682, bias=True)
  )
)

In [33]:
d_steps = 100
g_steps = 100

criteriond1 = nn.BCELoss()
optimizerd1 = optim.SGD(dis.parameters(), lr=0.001, momentum=0.9)

criteriond2 = nn.BCELoss()
optimizerd2 = optim.SGD(gen.parameters(), lr=0.001, momentum=0.9)

printing_steps = 200

epochs = 50

In [34]:
def get_random_batch(mat, batch_size=16):
    rand_rows = np.random.randint(mat.shape[0], size=batch_size)
#     print(mat.shape, rand_rows)
#     print(mat[rand_rows].shape)
    return mat[rand_rows]
    
get_random_batch(train)

array([[4., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [2., 4., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [5., 4., 3., ..., 0., 0., 0.],
       [5., 4., 0., ..., 0., 0., 0.]])

In [35]:
train = torch.autograd.Variable(torch.Tensor(train))

In [93]:
for epoch in range(epochs):
    
    print (epoch)

    # training discriminator
    for d_step in range(d_steps):
        dis.zero_grad()
        
        # training discriminator on real data
        real_rows = get_random_batch(train)
        discriminator_real_outputs = dis(real_rows)
   
        dis_real_loss = criteriond1(discriminator_real_outputs, Variable(torch.ones(batch_size,1)))
    
        dis_real_loss.backward()

        # training discriminator on data produced by generator
        z_vector = make_some_noise()
        #output from generator is generated        
        fake_rows = gen(z_vector).detach()
#         print(fake_rows[:20])
        dis_fake_out = dis(fake_rows)
        dis_fake_loss = criteriond1(dis_fake_out, Variable(torch.zeros(batch_size,1)))
        dis_fake_loss.backward()

        optimizerd1.step()
        
    # training generator
    for g_step in range(g_steps):
        gen.zero_grad()
        
        #generating data for input for generator
        z_vector = make_some_noise()
        
        fake_rows = gen(z_vector)
        print(fake_rows[:20])
        dis_out_gen_training = dis(fake_rows)
        gen_loss = criteriond2(dis_out_gen_training, Variable(torch.ones(batch_size,1)))
        gen_loss.backward()
        
        optimizerd2.step()
        

0
tensor([[ 0.0036, -0.0345,  0.0234,  ...,  0.0061, -0.0089,  0.0043],
        [ 0.0130, -0.0471,  0.0196,  ..., -0.0147, -0.0094, -0.0107],
        [ 0.0043, -0.0552,  0.0249,  ...,  0.0243, -0.0039, -0.0061],
        ...,
        [-0.0092, -0.0419,  0.0165,  ..., -0.0304, -0.0197, -0.0203],
        [ 0.0089, -0.0534,  0.0179,  ..., -0.0010, -0.0135, -0.0036],
        [-0.0037, -0.0374,  0.0163,  ..., -0.0018,  0.0049, -0.0043]],
       grad_fn=<SliceBackward>)
tensor([[-0.0027, -0.0253,  0.0181,  ...,  0.0011, -0.0131,  0.0037],
        [ 0.0055, -0.0409,  0.0121,  ...,  0.0006, -0.0085, -0.0271],
        [ 0.0068, -0.0423,  0.0132,  ...,  0.0046, -0.0076,  0.0017],
        ...,
        [ 0.0083, -0.0517,  0.0215,  ...,  0.0056, -0.0021, -0.0245],
        [ 0.0025, -0.0391,  0.0209,  ..., -0.0113, -0.0196, -0.0069],
        [ 0.0181, -0.0339,  0.0180,  ..., -0.0090, -0.0193, -0.0047]],
       grad_fn=<SliceBackward>)
tensor([[ 5.1557e-03, -4.6190e-02,  1.5709e-02,  ...,  1.4301e-02,

       grad_fn=<SliceBackward>)
tensor([[ 0.0331, -0.0567,  0.0313,  ..., -0.0064, -0.0148, -0.0216],
        [ 0.0229, -0.0551,  0.0309,  ...,  0.0005, -0.0092, -0.0230],
        [ 0.0322, -0.0417,  0.0209,  ..., -0.0312, -0.0154, -0.0075],
        ...,
        [ 0.0276, -0.0579,  0.0224,  ..., -0.0131,  0.0017, -0.0109],
        [ 0.0125, -0.0507,  0.0145,  ..., -0.0140, -0.0112, -0.0036],
        [ 0.0307, -0.0448,  0.0157,  ..., -0.0096, -0.0206, -0.0148]],
       grad_fn=<SliceBackward>)
tensor([[ 0.0087, -0.0560,  0.0057,  ..., -0.0030,  0.0031, -0.0040],
        [ 0.0233, -0.0401,  0.0257,  ...,  0.0059, -0.0085, -0.0062],
        [ 0.0385, -0.0564,  0.0284,  ..., -0.0321, -0.0191, -0.0204],
        ...,
        [ 0.0168, -0.0496,  0.0225,  ...,  0.0042, -0.0175, -0.0129],
        [ 0.0210, -0.0721,  0.0095,  ..., -0.0108, -0.0151, -0.0172],
        [ 0.0276, -0.0643,  0.0247,  ..., -0.0201, -0.0103, -0.0251]],
       grad_fn=<SliceBackward>)
tensor([[ 0.0150, -0.0550,  0.0115, 

       grad_fn=<SliceBackward>)
tensor([[ 0.0746, -0.0616,  0.0318,  ..., -0.0378, -0.0165, -0.0092],
        [ 0.0767, -0.0618,  0.0389,  ..., -0.0218, -0.0029, -0.0019],
        [ 0.0546, -0.0587,  0.0132,  ..., -0.0277, -0.0213, -0.0224],
        ...,
        [ 0.0636, -0.0631,  0.0273,  ..., -0.0322, -0.0264, -0.0109],
        [ 0.0659, -0.0587,  0.0084,  ..., -0.0288, -0.0107, -0.0198],
        [ 0.0630, -0.0744,  0.0215,  ..., -0.0179, -0.0180, -0.0225]],
       grad_fn=<SliceBackward>)
tensor([[ 0.0599, -0.0596,  0.0328,  ..., -0.0339, -0.0287, -0.0175],
        [ 0.0780, -0.0806,  0.0362,  ..., -0.0332, -0.0073, -0.0059],
        [ 0.0767, -0.0703,  0.0255,  ..., -0.0138, -0.0248, -0.0092],
        ...,
        [ 0.0852, -0.0800,  0.0230,  ..., -0.0520, -0.0192, -0.0034],
        [ 0.0905, -0.0660,  0.0272,  ..., -0.0422, -0.0349, -0.0147],
        [ 0.0838, -0.0648,  0.0172,  ..., -0.0542, -0.0346, -0.0137]],
       grad_fn=<SliceBackward>)
tensor([[ 0.0889, -0.0834,  0.0267, 

       grad_fn=<SliceBackward>)
tensor([[ 0.1600, -0.0871,  0.0329,  ..., -0.0699, -0.0532, -0.0030],
        [ 0.1600, -0.0872,  0.0152,  ..., -0.0652, -0.0551, -0.0170],
        [ 0.1709, -0.0942,  0.0270,  ..., -0.0749, -0.0530, -0.0023],
        ...,
        [ 0.1620, -0.0892,  0.0165,  ..., -0.0786, -0.0263, -0.0031],
        [ 0.1594, -0.0789,  0.0448,  ..., -0.0698, -0.0415, -0.0013],
        [ 0.1770, -0.0978,  0.0234,  ..., -0.0815, -0.0364, -0.0012]],
       grad_fn=<SliceBackward>)
tensor([[ 0.1640, -0.0987,  0.0382,  ..., -0.0725, -0.0474, -0.0081],
        [ 0.1733, -0.0931,  0.0158,  ..., -0.0827, -0.0470, -0.0195],
        [ 0.1635, -0.0835,  0.0408,  ..., -0.0723, -0.0584, -0.0060],
        ...,
        [ 0.1490, -0.0842,  0.0212,  ..., -0.0488, -0.0112,  0.0025],
        [ 0.1875, -0.0969,  0.0389,  ..., -0.0735, -0.0621, -0.0074],
        [ 0.1702, -0.1125,  0.0315,  ..., -0.0790, -0.0477, -0.0077]],
       grad_fn=<SliceBackward>)
tensor([[ 0.1712, -0.0814,  0.0227, 

       grad_fn=<SliceBackward>)
tensor([[ 2.8046e-01, -1.0635e-01,  6.1545e-02,  ..., -1.1043e-01,
         -5.2427e-02,  1.1965e-02],
        [ 2.9268e-01, -1.1711e-01,  3.1985e-02,  ..., -1.1693e-01,
         -6.5933e-02, -1.9256e-02],
        [ 3.1242e-01, -1.3343e-01,  2.6502e-02,  ..., -1.3278e-01,
         -7.6139e-02,  4.0078e-03],
        ...,
        [ 3.2150e-01, -1.2608e-01,  5.5418e-02,  ..., -1.3901e-01,
         -7.1338e-02,  2.8817e-04],
        [ 3.1615e-01, -1.3541e-01,  3.0440e-02,  ..., -1.4894e-01,
         -1.0302e-01, -2.6356e-03],
        [ 3.0843e-01, -1.1916e-01,  2.9131e-02,  ..., -1.5536e-01,
         -8.2406e-02, -3.5232e-03]], grad_fn=<SliceBackward>)
tensor([[ 0.3009, -0.1181,  0.0449,  ..., -0.1197, -0.0810,  0.0028],
        [ 0.3087, -0.1036,  0.0405,  ..., -0.1086, -0.0741, -0.0135],
        [ 0.3398, -0.1339,  0.0375,  ..., -0.1643, -0.1045,  0.0089],
        ...,
        [ 0.3076, -0.1213,  0.0585,  ..., -0.1196, -0.0715, -0.0064],
        [ 0.2914, 

       grad_fn=<SliceBackward>)
tensor([[ 0.3758, -0.1279,  0.0475,  ..., -0.1575, -0.1135,  0.0099],
        [ 0.4025, -0.1429,  0.0484,  ..., -0.1648, -0.0930,  0.0055],
        [ 0.4117, -0.1375,  0.0260,  ..., -0.1724, -0.1080,  0.0188],
        ...,
        [ 0.3772, -0.1312,  0.0537,  ..., -0.1378, -0.0860, -0.0006],
        [ 0.3922, -0.1265,  0.0683,  ..., -0.1538, -0.0879,  0.0051],
        [ 0.4129, -0.1286,  0.0503,  ..., -0.1696, -0.1067,  0.0224]],
       grad_fn=<SliceBackward>)
tensor([[ 3.9372e-01, -1.2907e-01,  4.8914e-02,  ..., -1.4616e-01,
         -9.0399e-02,  1.0825e-02],
        [ 4.0251e-01, -1.4759e-01,  4.3844e-02,  ..., -1.6604e-01,
         -1.0072e-01,  1.9537e-03],
        [ 3.8999e-01, -1.2805e-01,  5.6418e-02,  ..., -1.6790e-01,
         -9.7963e-02,  1.3059e-02],
        ...,
        [ 3.9506e-01, -1.3433e-01,  4.2885e-02,  ..., -1.5826e-01,
         -1.0265e-01, -2.6084e-04],
        [ 4.2709e-01, -1.4623e-01,  6.1748e-02,  ..., -1.7020e-01,
         -

       grad_fn=<SliceBackward>)
tensor([[ 0.4639, -0.1439,  0.0495,  ..., -0.1691, -0.1168,  0.0105],
        [ 0.4666, -0.1262,  0.0489,  ..., -0.1781, -0.1096, -0.0039],
        [ 0.4671, -0.1454,  0.0723,  ..., -0.1826, -0.1090,  0.0141],
        ...,
        [ 0.4296, -0.1321,  0.0616,  ..., -0.1665, -0.1001,  0.0227],
        [ 0.4641, -0.1370,  0.0469,  ..., -0.1835, -0.1158,  0.0115],
        [ 0.4901, -0.1511,  0.0576,  ..., -0.1987, -0.1020,  0.0191]],
       grad_fn=<SliceBackward>)
tensor([[ 0.4702, -0.1353,  0.0571,  ..., -0.1864, -0.1073,  0.0170],
        [ 0.4874, -0.1515,  0.0497,  ..., -0.1850, -0.1049,  0.0126],
        [ 0.4855, -0.1514,  0.0627,  ..., -0.1787, -0.1241,  0.0176],
        ...,
        [ 0.4659, -0.1471,  0.0498,  ..., -0.1974, -0.1067,  0.0288],
        [ 0.4190, -0.1255,  0.0418,  ..., -0.1528, -0.0815,  0.0055],
        [ 0.3925, -0.1294,  0.0512,  ..., -0.1565, -0.0882,  0.0234]],
       grad_fn=<SliceBackward>)
tensor([[ 0.4325, -0.1316,  0.0379, 

       grad_fn=<SliceBackward>)
tensor([[ 0.6775, -0.0688,  0.0433,  ..., -0.1238, -0.0462,  0.0510],
        [ 0.6972, -0.0773,  0.0431,  ..., -0.1331, -0.0390,  0.0592],
        [ 0.7630, -0.0827,  0.0539,  ..., -0.1597, -0.0607,  0.0594],
        ...,
        [ 0.7934, -0.0800,  0.0481,  ..., -0.1661, -0.0713,  0.0594],
        [ 0.8052, -0.0765,  0.0484,  ..., -0.1602, -0.0728,  0.0613],
        [ 0.6618, -0.0781,  0.0359,  ..., -0.1448, -0.0661,  0.0542]],
       grad_fn=<SliceBackward>)
tensor([[ 0.7200, -0.0733,  0.0434,  ..., -0.1420, -0.0543,  0.0628],
        [ 0.7031, -0.0764,  0.0422,  ..., -0.1276, -0.0541,  0.0528],
        [ 0.6895, -0.0589,  0.0411,  ..., -0.1238, -0.0452,  0.0638],
        ...,
        [ 0.8047, -0.0681,  0.0568,  ..., -0.1601, -0.0685,  0.0661],
        [ 0.7098, -0.0699,  0.0379,  ..., -0.1368, -0.0491,  0.0431],
        [ 0.7000, -0.0617,  0.0451,  ..., -0.1398, -0.0624,  0.0570]],
       grad_fn=<SliceBackward>)
tensor([[ 0.7827, -0.0680,  0.0581, 

       grad_fn=<SliceBackward>)
tensor([[ 1.0897e+00, -1.0902e-03,  4.7440e-02,  ..., -1.4185e-01,
         -2.1550e-02,  9.7181e-02],
        [ 1.0402e+00, -1.0521e-02,  3.8866e-02,  ..., -1.4053e-01,
         -2.7414e-02,  9.3044e-02],
        [ 9.7022e-01, -1.3982e-02,  4.9211e-02,  ..., -1.1395e-01,
         -2.6104e-02,  1.0781e-01],
        ...,
        [ 1.0620e+00, -5.1744e-06,  4.2424e-02,  ..., -1.1410e-01,
         -3.6111e-02,  9.7310e-02],
        [ 1.0461e+00, -1.3200e-02,  4.6198e-02,  ..., -1.2770e-01,
         -3.2300e-02,  9.6993e-02],
        [ 9.5671e-01, -5.0588e-03,  3.7844e-02,  ..., -1.1505e-01,
         -2.5714e-02,  8.6218e-02]], grad_fn=<SliceBackward>)
tensor([[ 1.0955, -0.0099,  0.0336,  ..., -0.1324, -0.0202,  0.0983],
        [ 0.9712, -0.0053,  0.0450,  ..., -0.1126, -0.0151,  0.1008],
        [ 1.0530, -0.0090,  0.0379,  ..., -0.1352, -0.0288,  0.0899],
        ...,
        [ 1.0478, -0.0087,  0.0516,  ..., -0.1286, -0.0171,  0.0964],
        [ 1.1427, 

         -2.4921e-02,  9.0330e-02]], grad_fn=<SliceBackward>)
tensor([[ 1.1752e+00,  3.1920e-02,  4.4709e-02,  ..., -1.2168e-01,
         -1.8014e-02,  1.2407e-01],
        [ 9.9503e-01,  4.8466e-03,  4.3335e-02,  ..., -8.4068e-02,
         -1.6361e-02,  7.7213e-02],
        [ 1.1321e+00,  2.0252e-02,  2.8607e-02,  ..., -1.0535e-01,
         -2.8387e-04,  1.0435e-01],
        ...,
        [ 1.1671e+00,  1.8145e-02,  5.2150e-02,  ..., -1.1179e-01,
         -2.1394e-02,  1.0301e-01],
        [ 1.1721e+00, -7.8238e-03,  3.5726e-02,  ..., -1.1544e-01,
         -1.5530e-02,  1.0477e-01],
        [ 1.1979e+00,  1.0425e-03,  3.7889e-02,  ..., -1.1990e-01,
         -2.2383e-02,  9.5747e-02]], grad_fn=<SliceBackward>)
tensor([[ 1.1836,  0.0261,  0.0488,  ..., -0.1441, -0.0243,  0.1141],
        [ 1.1910,  0.0037,  0.0464,  ..., -0.1036,  0.0090,  0.1149],
        [ 1.1057,  0.0141,  0.0473,  ..., -0.1194, -0.0152,  0.1008],
        ...,
        [ 1.1764,  0.0111,  0.0501,  ..., -0.1138,  0.0107

       grad_fn=<SliceBackward>)
tensor([[ 1.3526e+00,  2.3901e-02,  4.4986e-02,  ..., -1.3332e-01,
         -2.1467e-02,  1.3927e-01],
        [ 1.1243e+00,  3.1706e-02,  3.5026e-02,  ..., -1.0596e-01,
          4.6248e-04,  1.0971e-01],
        [ 1.2301e+00,  2.8743e-02,  4.4420e-02,  ..., -1.1044e-01,
         -3.3467e-03,  9.7062e-02],
        ...,
        [ 1.2285e+00,  2.3489e-02,  4.2752e-02,  ..., -1.2626e-01,
         -2.3873e-02,  1.4761e-01],
        [ 1.0688e+00,  8.4112e-03,  4.7822e-02,  ..., -1.0713e-01,
         -9.1805e-03,  1.1547e-01],
        [ 1.2045e+00,  1.7605e-02,  4.9656e-02,  ..., -1.1043e-01,
         -2.1478e-02,  9.5749e-02]], grad_fn=<SliceBackward>)
tensor([[ 1.2123e+00,  9.8553e-03,  6.0913e-02,  ..., -1.1290e-01,
         -2.4083e-02,  1.1336e-01],
        [ 1.2028e+00,  2.2476e-02,  4.8833e-02,  ..., -1.0474e-01,
         -2.0086e-02,  1.2306e-01],
        [ 1.1616e+00,  3.1615e-02,  3.0415e-02,  ..., -1.1520e-01,
         -1.2001e-02,  1.0190e-01],
  

       grad_fn=<SliceBackward>)
tensor([[ 1.4031e+00,  2.4923e-02,  4.4681e-02,  ..., -1.2787e-01,
         -2.9357e-03,  1.4951e-01],
        [ 1.3047e+00,  1.7502e-02,  4.5721e-02,  ..., -1.1178e-01,
         -6.0248e-04,  1.3343e-01],
        [ 1.2733e+00,  2.7584e-02,  5.5686e-02,  ..., -1.1036e-01,
          3.1522e-03,  1.2327e-01],
        ...,
        [ 1.4966e+00,  3.6126e-02,  5.4457e-02,  ..., -1.3187e-01,
         -1.0600e-02,  1.2927e-01],
        [ 1.2584e+00,  2.9731e-02,  5.2568e-02,  ..., -9.8643e-02,
         -7.2884e-03,  1.2140e-01],
        [ 1.4083e+00,  3.9719e-02,  4.2899e-02,  ..., -1.4637e-01,
          3.3675e-03,  1.4913e-01]], grad_fn=<SliceBackward>)
tensor([[ 1.2802e+00,  2.6729e-02,  3.3212e-02,  ..., -1.1407e-01,
          1.6352e-03,  1.1048e-01],
        [ 1.1674e+00,  2.3422e-02,  3.5713e-02,  ..., -1.1048e-01,
         -1.3891e-02,  1.0469e-01],
        [ 1.2755e+00,  8.2822e-03,  4.2327e-02,  ..., -1.0657e-01,
         -1.4316e-02,  1.2038e-01],
  

       grad_fn=<SliceBackward>)
tensor([[ 1.2377e+00,  2.0811e-02,  4.9885e-02,  ..., -1.0900e-01,
          7.1824e-04,  1.1327e-01],
        [ 1.3074e+00,  3.8030e-02,  4.1793e-02,  ..., -1.2034e-01,
         -2.4003e-02,  1.3219e-01],
        [ 1.3364e+00,  3.5242e-02,  5.3979e-02,  ..., -1.1257e-01,
          1.2123e-02,  1.2421e-01],
        ...,
        [ 1.4103e+00,  3.9093e-02,  4.5065e-02,  ..., -1.2391e-01,
         -5.9184e-03,  1.3939e-01],
        [ 1.2951e+00,  2.9722e-02,  4.1007e-02,  ..., -9.5871e-02,
         -1.0767e-02,  1.2243e-01],
        [ 1.2598e+00,  4.0571e-02,  4.5153e-02,  ..., -9.9009e-02,
         -1.4120e-02,  1.0212e-01]], grad_fn=<SliceBackward>)
tensor([[ 1.3545,  0.0322,  0.0223,  ..., -0.1145,  0.0072,  0.1297],
        [ 1.5097,  0.0332,  0.0472,  ..., -0.1460, -0.0068,  0.1519],
        [ 1.3617,  0.0359,  0.0470,  ..., -0.1283,  0.0019,  0.1304],
        ...,
        [ 1.2782,  0.0284,  0.0395,  ..., -0.1061,  0.0098,  0.1236],
        [ 1.3012, 

       grad_fn=<SliceBackward>)
tensor([[1.7213, 0.2234, 0.0404,  ..., 0.0838, 0.1514, 0.1057],
        [1.6596, 0.1958, 0.0449,  ..., 0.0591, 0.1470, 0.1144],
        [1.6608, 0.1945, 0.0335,  ..., 0.0519, 0.1422, 0.1040],
        ...,
        [1.8111, 0.2343, 0.0560,  ..., 0.0877, 0.1706, 0.1356],
        [1.6870, 0.2245, 0.0353,  ..., 0.0614, 0.1521, 0.1144],
        [1.7066, 0.2107, 0.0291,  ..., 0.0737, 0.1240, 0.1118]],
       grad_fn=<SliceBackward>)
tensor([[1.6998, 0.2010, 0.0332,  ..., 0.0816, 0.1442, 0.1167],
        [1.7605, 0.2108, 0.0386,  ..., 0.0798, 0.1525, 0.0971],
        [1.7058, 0.2101, 0.0442,  ..., 0.0730, 0.1499, 0.1184],
        ...,
        [1.6323, 0.2043, 0.0506,  ..., 0.0818, 0.1378, 0.0959],
        [1.7832, 0.2158, 0.0433,  ..., 0.0846, 0.1597, 0.1075],
        [1.6770, 0.2007, 0.0258,  ..., 0.0794, 0.1596, 0.0853]],
       grad_fn=<SliceBackward>)
tensor([[1.6645, 0.2175, 0.0467,  ..., 0.0776, 0.1530, 0.0981],
        [1.8165, 0.2263, 0.0582,  ..., 0.082

       grad_fn=<SliceBackward>)
tensor([[1.9342, 0.2983, 0.0429,  ..., 0.1964, 0.2245, 0.1000],
        [1.8489, 0.2888, 0.0678,  ..., 0.1737, 0.2144, 0.0934],
        [2.0313, 0.3276, 0.0664,  ..., 0.1832, 0.2723, 0.1068],
        ...,
        [1.8296, 0.2908, 0.0554,  ..., 0.1648, 0.2250, 0.1095],
        [1.7188, 0.2670, 0.0468,  ..., 0.1477, 0.2010, 0.1098],
        [1.8286, 0.2954, 0.0609,  ..., 0.1618, 0.2106, 0.0933]],
       grad_fn=<SliceBackward>)
tensor([[1.8412, 0.2942, 0.0519,  ..., 0.1704, 0.2189, 0.1085],
        [1.6203, 0.2490, 0.0409,  ..., 0.1588, 0.2031, 0.0557],
        [1.7245, 0.2670, 0.0597,  ..., 0.1756, 0.2012, 0.0870],
        ...,
        [1.9881, 0.3173, 0.0533,  ..., 0.1649, 0.2161, 0.0988],
        [1.8624, 0.3028, 0.0579,  ..., 0.1788, 0.2181, 0.0886],
        [1.8373, 0.2996, 0.0510,  ..., 0.1653, 0.2196, 0.0996]],
       grad_fn=<SliceBackward>)
tensor([[2.0169, 0.3250, 0.0548,  ..., 0.1861, 0.2372, 0.0882],
        [2.0171, 0.3356, 0.0685,  ..., 0.193

       grad_fn=<SliceBackward>)
tensor([[1.8050, 0.2941, 0.0567,  ..., 0.1923, 0.2130, 0.0838],
        [1.9159, 0.2994, 0.0480,  ..., 0.1900, 0.2405, 0.0778],
        [1.9661, 0.3160, 0.0450,  ..., 0.1923, 0.2334, 0.0855],
        ...,
        [1.7051, 0.2596, 0.0502,  ..., 0.1644, 0.2001, 0.0903],
        [1.9251, 0.3080, 0.0398,  ..., 0.1803, 0.2194, 0.0906],
        [2.0351, 0.3321, 0.0439,  ..., 0.1813, 0.2486, 0.1185]],
       grad_fn=<SliceBackward>)
tensor([[2.1366, 0.3471, 0.0511,  ..., 0.2073, 0.2396, 0.1129],
        [1.7450, 0.2882, 0.0470,  ..., 0.1608, 0.2102, 0.0931],
        [1.9704, 0.3099, 0.0393,  ..., 0.1752, 0.2380, 0.0936],
        ...,
        [2.1096, 0.3617, 0.0779,  ..., 0.2007, 0.2579, 0.1046],
        [1.8531, 0.2904, 0.0565,  ..., 0.1756, 0.2243, 0.0802],
        [1.9296, 0.2899, 0.0695,  ..., 0.1729, 0.2308, 0.1109]],
       grad_fn=<SliceBackward>)
tensor([[1.8010, 0.2868, 0.0438,  ..., 0.1755, 0.2244, 0.0956],
        [2.1018, 0.3511, 0.0553,  ..., 0.195

       grad_fn=<SliceBackward>)
tensor([[ 1.8752,  0.6963,  0.0772,  ...,  0.0652,  0.1769, -0.0599],
        [ 2.0204,  0.7751,  0.0822,  ...,  0.0668,  0.2164, -0.0427],
        [ 1.9313,  0.7345,  0.0590,  ...,  0.0768,  0.1843, -0.0643],
        ...,
        [ 1.9810,  0.7650,  0.0668,  ...,  0.0797,  0.1921, -0.0547],
        [ 2.0181,  0.7766,  0.0646,  ...,  0.0807,  0.2145, -0.0626],
        [ 1.9310,  0.7419,  0.0679,  ...,  0.0770,  0.1904, -0.0545]],
       grad_fn=<SliceBackward>)
tensor([[ 1.6859,  0.6317,  0.0525,  ...,  0.0520,  0.1775, -0.0451],
        [ 1.8190,  0.6936,  0.0749,  ...,  0.0745,  0.1941, -0.0454],
        [ 1.7813,  0.6639,  0.0514,  ...,  0.0645,  0.1724, -0.0356],
        ...,
        [ 1.8090,  0.6915,  0.0767,  ...,  0.0672,  0.1878, -0.0493],
        [ 2.0096,  0.7748,  0.0675,  ...,  0.0541,  0.1820, -0.0415],
        [ 1.9337,  0.7204,  0.0707,  ...,  0.0512,  0.1815, -0.0496]],
       grad_fn=<SliceBackward>)
tensor([[ 1.8816,  0.7177,  0.0556, 

KeyboardInterrupt: 