### 0. Imports

In [2]:
import torch
import torch.nn as nn
import pickle
import numpy as np
from torch.utils.data import Dataset , DataLoader
import sys
#sys.path.append("../snpe/")
#print(sys.path)
#from snpe.simulations import simulator_class, marketplace_simulator_class

### A. Load the data: using simulational output

In [4]:
'''
Loading an example marketplace simulation. 
The idea is to replace this with built in methods from the snpe repo's simulation classes.
'''

with open('sample_simulational_output.pkl', 'rb') as f:
    raw_data = pickle.load(f)
    
'''
Using th function below to find the proudct with the most ratings for training. By now we'll be using just
one product. 
'''
    
def find_it(sim_output):
    '''
    Returns the timeseries histogram of the product with the most reviews within
    the output of a marketplace simulation (timeseries)
    '''
    max_length = []
    for i in range(len(sim_output.get('simulations'))):
        for a in range(len(sim_output.get('simulations')[0])):
            if len(sim_output.get('simulations')[i][a]) > len(max_length):
                max_length = sim_output.get('simulations')[i][a]
    return max_length 

'''
Applying the function and taking a look at the first rows of the result
'''

raw_prod = find_it(raw_data)
print("Sample product has received of a total of " + str(len(raw_prod)) + " reviews")
print("Displaying the ten first elements of the time series:")
print(raw_prod[0:10])
print("Displaying the last ten elements of the time series:")
print(raw_prod[-10:])

Sample product has received of a total of 521 reviews
Displaying the ten first elements of the time series:
[[1. 1. 1. 1. 1.]
 [2. 1. 1. 1. 1.]
 [3. 1. 1. 1. 1.]
 [3. 1. 2. 1. 1.]
 [3. 1. 3. 1. 1.]
 [4. 1. 3. 1. 1.]
 [4. 1. 4. 1. 1.]
 [5. 1. 4. 1. 1.]
 [5. 1. 5. 1. 1.]
 [5. 1. 6. 1. 1.]]
Displaying the last ten elements of the time series:
[[ 52.   6. 430.   1.  27.]
 [ 52.   6. 431.   1.  27.]
 [ 53.   6. 431.   1.  27.]
 [ 53.   6. 432.   1.  27.]
 [ 53.   6. 433.   1.  27.]
 [ 53.   6. 434.   1.  27.]
 [ 53.   6. 435.   1.  27.]
 [ 53.   6. 436.   1.  27.]
 [ 53.   6. 437.   1.  27.]
 [ 54.   6. 437.   1.  27.]]


Representing the number of ratings for each value as a proportion of the total (approx): [0.1, 0.01, 0.83, 0.001, 0.05]

### B. Data pre-processing, Dataset and DataLoader

In [3]:
'''
Taking the first differences of the raw data.  
'''

raw_input = [raw_prod[i] - raw_prod[i-1] for i in range(len(raw_prod))]
raw_input = raw_input[1:]
raw_input = np.stack(raw_input, axis=0 )
raw_input

array([[1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       ...,
       [0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.]])

A preliminary comment on the foloowing cells below:
In pytorch you can either import pre existing datasets or use your own. To use your own you have to define it as aninstance of the dataset class, including three functions: `__init__, __len__` and `__getitem__` as the guy does below.

- Dataset stores the samples and their corresponding labels, and DataLoader wraps an iterable around the Dataset to enable easy access to the samples.

- read more about this in: 
https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
- Sliding help: https://discuss.pytorch.org/t/dataloader-for-a-lstm-model-with-a-sliding-window/22235 . The SlidingDataset has been defined according to the code proposed by the user in this previous link.

In [4]:
class SlidingDataset(Dataset):
    '''
    This SlidingDataset class is conformed so each input is comprised of the last N observations (from t-n to t) 
    of the series, where n  is the size of the sliding window, and the target is the corresponding observation in 
    t+1.
    '''
    def __init__(self, data, window):
        self.data = data
        #self.data = torch.from_numpy(data)
        self.window = window
        
    def __len__(self): 
        '''
        The __len__ function returns the number of samples in our dataset. In this case intuitively we have
        a number of samples equal to the total number of elements in the time series minus the size of the 
        sliding window. (i.e. How many previous observations conform each input)
        '''
        return len(self.data)-self.window

    def __getitem__(self, index): 
        return  self.data[index : index+self.window], self.data[index+self.window]

In [5]:
'''
Dividing the data into train and test set (Validation is to be added ASAP) and transforming in into tensors
'''
training_size=int(len(raw_input)*0.6)
validation_size=int(len(raw_input)*0.2)
#test_size=int(len(raw_input)*0.2)
train_data, validation, test_data= torch.from_numpy(raw_input[0:training_size,:]),torch.from_numpy(raw_input[training_size:training_size+validation_size,:]), torch.from_numpy(raw_input[training_size+validation_size:len(raw_input),:])

'''
Feeding the tensors to the sliding dataset class. Besides the actual data, the other provided argument is the size
of the sliding windows which in this case is ten.
'''    
train_dataset = SlidingDataset(train_data, 10) # sliding window = 10
test_dataset = SlidingDataset(test_data, 10) # sliding window = 10

'''
The next step is to feed the SlidingDataset objects to the DataLoader class so these can be finally 
provided to the model. In this case these are shuffled and a batch size of 32 observations is defined.
'''
batch_size = 32 
train_dataloader = DataLoader(train_dataset,batch_size, shuffle = True) #,drop_last=True)
test_dataloader = DataLoader(test_dataset,batch_size, shuffle = True)  #,drop_last=True)

### C. A first version of the LSTM model

Definitions of the parametors below taken from pytorch doc. See: https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html
- __Input_dim__: The number of expected features in the input x 
- __Hidden_dim__: The number of features in the hidden state h 
- __Layer_dim__: Number of recurrent layers. E.g., setting num_layers=2 would mean stacking two LSTMs together to form a stacked LSTM, with the second LSTM taking in outputs of the first LSTM and computing the final results. Default: 1



In [6]:
class LSTM_0(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim):
        super(LSTM_0, self).__init__()
        
        self.input_dim = input_dim # n of expected features in the input
        self.hidden_dim = hidden_dim # Hidden dimensions - n of features in hidden state
        self.layer_dim = layer_dim  # Number of hidden layers 
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim) #LSTM
        self.readout = nn.Softmax(dim=0) # Readout layer - translating from hidden space to rating space, dim = dimension that has to add up to 1
        #self.output_layer = nn.Linear(hidden_dim,input_dim)

    def forward(self, x):
        # Initialize hidden state & cell states with zeros 
        # theoretically not 100% necessary - made by default for zeros
        h0 = torch.zeros(self.layer_dim, 10, self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.layer_dim, 10, self.hidden_dim).requires_grad_()

        out, (hn, cn) = self.lstm(x.double(), (h0.double(),c0.double()))
        #out, (hn, cn) = self.lstm(x)
        final_out = self.readout(out[-1][-1])
 
        return out, hn, cn, final_out

### D. Training loop

In [22]:
def LSTM_train(dataloader):    
    model.train()
    for batch , item in enumerate(dataloader):
        x , y = item
        x = x.to(device)
        y = y.to(device)
        out, hn, cn, final_out = model(x)
        checkme = (out, (hn, cn), final_out)
        ''' Prints for closer inspection '''
        #print("Softmax-processed output -------------")
        #print(checkme[2])
        #print("Last tensor from last cell state (= unprocessed output) -------------")
        #print(out[-1][-1])
        #print("###")
        loss = criterion(out[-1][-1], torch.argmax(y[-1]))
        #loss = criterion(hn[-1][-1], torch.argmax(y[-1]))
        hn = hn.detach() # given the nature of RNN we detach the cn and hn as the are not parameters to be updated (Remove from computational graph)
        cn = cn.detach() # given the nature of RNN we detach the cn and hn as the are not parameters to be updated (Remove from computational graph)
        optimizer.zero_grad() #clears the gradient to 0s
        loss.backward() #calculates the partial derivatives (updates the gradient)
        optimizer.step() #updates the parameters from the gradient
    
        if batch == len(dataloader)-1:
            loss = loss.item()
            print(f"train loss: {loss:>7f} ")

def LSTM_test(dataloader):
    model.eval()
    for batch , item in enumerate(dataloader):
        x , y = item
        x = x.to(device)
        y = y.to(device)
        out, hn, cn, final_out = model(x)
        loss = criterion(out[-1][-1], torch.argmax(y[-1]))
        #loss = criterion(hn[-1][-1], torch.argmax(y[-1]))
        
        if batch == len(dataloader)-1:
            loss = loss.item()
            print(f"test loss: {loss:>7f}")

D.1 -> Brief

In [9]:
'''
Look here -> https://cnvrg.io/pytorch-lstm/
'''
input_dim = 5 
hidden_size = 5
num_layers = 1

device = "cuda" if torch.cuda.is_available() else "cpu"
model = LSTM_0(input_dim , hidden_size , num_layers).to(device)
learning_rate=1e-3

criterion = torch.nn.CrossEntropyLoss()    
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate) 
model = model.double()


num_epochs = 200
for epoch in range(num_epochs): 
    print("epoch " + str(epoch))
    LSTM_train(train_dataloader)
    LSTM_test(test_dataloader)
    print('#####################')    

epoch 0
train loss: 1.310431 
test loss: 1.210353
#####################
epoch 1
train loss: 1.748020 
test loss: 1.157299
#####################
epoch 2
train loss: 1.954507 
test loss: 1.087948
#####################
epoch 3
train loss: 1.048563 
test loss: 1.027377
#####################
epoch 4
train loss: 0.928779 
test loss: 0.917326
#####################
epoch 5
train loss: 1.060510 
test loss: 0.871691
#####################
epoch 6
train loss: 0.831037 
test loss: 0.821833
#####################
epoch 7
train loss: 0.820170 
test loss: 0.775457
#####################
epoch 8
train loss: 0.751845 
test loss: 0.743559
#####################
epoch 9
train loss: 0.716814 
test loss: 0.708380
#####################
epoch 10
train loss: 0.732037 
test loss: 0.694288
#####################
epoch 11
train loss: 0.674424 
test loss: 0.670393
#####################
epoch 12
train loss: 0.681278 
test loss: 0.663717
#####################
epoch 13
train loss: 1.994375 
test loss: 1.923478
##########

train loss: 0.458784 
test loss: 0.457197
#####################
epoch 115
train loss: 0.458569 
test loss: 0.457283
#####################
epoch 116
train loss: 0.456751 
test loss: 0.457214
#####################
epoch 117
train loss: 0.457391 
test loss: 0.457108
#####################
epoch 118
train loss: 0.456739 
test loss: 0.456399
#####################
epoch 119
train loss: 2.303066 
test loss: 0.456748
#####################
epoch 120
train loss: 0.465311 
test loss: 2.375871
#####################
epoch 121
train loss: 2.376911 
test loss: 0.453637
#####################
epoch 122
train loss: 0.455881 
test loss: 0.453774
#####################
epoch 123
train loss: 0.455881 
test loss: 2.306045
#####################
epoch 124
train loss: 2.297686 
test loss: 0.456214
#####################
epoch 125
train loss: 0.456154 
test loss: 0.455710
#####################
epoch 126
train loss: 2.298769 
test loss: 0.455915
#####################
epoch 127
train loss: 0.455390 
test loss: 0.455

D.2 -> Long

In [23]:
'''
Look here -> https://cnvrg.io/pytorch-lstm/
'''
input_dim = 5 
hidden_size = 5
num_layers = 1

device = "cuda" if torch.cuda.is_available() else "cpu"
model = LSTM_0(input_dim , hidden_size , num_layers).to(device)
learning_rate=1e-3

criterion = torch.nn.CrossEntropyLoss()    
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate) 
model = model.double()


num_epochs = 200
for epoch in range(num_epochs): 
    print("epoch " + str(epoch))
    LSTM_train(train_dataloader)
    LSTM_test(test_dataloader)
    print('#####################')

epoch 0
Softmax-processed output -------------
tensor([0.2697, 0.1410, 0.2195, 0.1877, 0.1821], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([ 0.1942, -0.4543, -0.0120, -0.1683, -0.1985], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.2480, 0.1578, 0.2327, 0.1946, 0.1668], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([ 0.0935, -0.3587,  0.0299, -0.1488, -0.3029], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.2537, 0.1311, 0.2229, 0.1917, 0.2006], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([ 0.0943, -0.5665, -0.0353, -0.1862, -0.1407], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-pro

Softmax-processed output -------------
tensor([0.2403, 0.1391, 0.3165, 0.1434, 0.1608], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.0606, -0.6073,  0.2148, -0.5770, -0.4626], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.2374, 0.1391, 0.3216, 0.1418, 0.1602], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.0772, -0.6121,  0.2262, -0.5929, -0.4709], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.2351, 0.1390, 0.3244, 0.1417, 0.1598], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.0893, -0.6149,  0.2328, -0.5956, -0.4756], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

tensor([-0.1979, -0.6715,  0.3710, -0.6820, -0.6109], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.2127, 0.1330, 0.3832, 0.1312, 0.1399], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.2059, -0.6751,  0.3828, -0.6890, -0.6244], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.2099, 0.1331, 0.3854, 0.1313, 0.1403], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.2215, -0.6765,  0.3863, -0.6902, -0.6243], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.2062, 0.1330, 0.3915, 0.1307, 0.1386], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.2404, -0

Softmax-processed output -------------
tensor([0.1654, 0.1271, 0.4589, 0.1235, 0.1252], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.4781, -0.7415,  0.5425, -0.7703, -0.7560], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1666, 0.1270, 0.4575, 0.1236, 0.1253], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.4707, -0.7427,  0.5392, -0.7691, -0.7562], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.781975 
test loss: 0.770479
#####################
epoch 9
Softmax-processed output -------------
tensor([0.1585, 0.1451, 0.4439, 0.1245, 0.1280], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.5220, -0.6107,  0.5078, -0.7635, -0.7362], dtype=

Softmax-processed output -------------
tensor([0.1532, 0.1224, 0.4869, 0.1189, 0.1187], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.5523, -0.7767,  0.6043, -0.8052, -0.8070], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1505, 0.1225, 0.4899, 0.1188, 0.1184], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.5702, -0.7764,  0.6100, -0.8070, -0.8103], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1499, 0.1223, 0.4911, 0.1186, 0.1182], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.5741, -0.7778,  0.6127, -0.8083, -0.8119], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.1396, 0.1188, 0.5126, 0.1148, 0.1142], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.6393, -0.8004,  0.6614, -0.8347, -0.8400], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1393, 0.1186, 0.5134, 0.1147, 0.1141], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.6412, -0.8014,  0.6634, -0.8356, -0.8407], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1317, 0.1119, 0.5159, 0.1168, 0.1237], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.6623, -0.8256,  0.7029, -0.7826, -0.7248], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

tensor([0.1196, 0.1169, 0.5342, 0.1152, 0.1141], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7538, -0.7763,  0.7433, -0.7909, -0.8003], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1255, 0.1088, 0.5372, 0.1118, 0.1168], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7085, -0.8513,  0.7457, -0.8239, -0.7804], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1324, 0.1141, 0.5328, 0.1108, 0.1099], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.6799, -0.8292,  0.7123, -0.8580, -0.8659], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1328, 0.1

Softmax-processed output -------------
tensor([0.1302, 0.1121, 0.5423, 0.1082, 0.1072], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.6936, -0.8426,  0.7336, -0.8784, -0.8880], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1310, 0.1136, 0.5393, 0.1085, 0.1076], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.6883, -0.8305,  0.7269, -0.8762, -0.8853], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1300, 0.1119, 0.5431, 0.1080, 0.1070], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.6933, -0.8439,  0.7361, -0.8792, -0.8881], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.1273, 0.1103, 0.5504, 0.1062, 0.1058], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7100, -0.8531,  0.7542, -0.8910, -0.8948], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1277, 0.1102, 0.5495, 0.1065, 0.1061], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7054, -0.8523,  0.7541, -0.8872, -0.8905], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1279, 0.1102, 0.5500, 0.1061, 0.1059], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7044, -0.8539,  0.7540, -0.8913, -0.8937], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.59791

test loss: 0.579763
#####################
epoch 26
Softmax-processed output -------------
tensor([0.1249, 0.1077, 0.5585, 0.1042, 0.1047], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7208, -0.8683,  0.7774, -0.9016, -0.8970], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1239, 0.1076, 0.5606, 0.1038, 0.1042], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7290, -0.8705,  0.7804, -0.9060, -0.9026], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1244, 0.1075, 0.5596, 0.1040, 0.1045], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7234, -0.8695,  0.7801, -0.9027, -0.8984], dtype=torch.float64,
      

Softmax-processed output -------------
tensor([0.1218, 0.1057, 0.5677, 0.1021, 0.1027], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7378, -0.8800,  0.8013, -0.9139, -0.9087], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1217, 0.1056, 0.5685, 0.1019, 0.1024], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7394, -0.8817,  0.8020, -0.9170, -0.9123], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 2.106206 
test loss: 0.564502
#####################
epoch 29
Softmax-processed output -------------
tensor([0.1223, 0.1058, 0.5679, 0.1019, 0.1021], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7318, -0.8768,  0.8036, -0.9147, -0.9127], dtype

Softmax-processed output -------------
tensor([0.1187, 0.1037, 0.5767, 0.1003, 0.1007], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7578, -0.8926,  0.8231, -0.9262, -0.9224], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1186, 0.1037, 0.5768, 0.1003, 0.1006], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7587, -0.8922,  0.8234, -0.9262, -0.9228], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1185, 0.1037, 0.5770, 0.1002, 0.1006], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7587, -0.8927,  0.8242, -0.9265, -0.9227], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

#####################
epoch 34
Softmax-processed output -------------
tensor([0.1161, 0.1020, 0.5834, 0.0989, 0.0996], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7734, -0.9028,  0.8407, -0.9344, -0.9271], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1160, 0.1020, 0.5837, 0.0988, 0.0995], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7747, -0.9033,  0.8416, -0.9348, -0.9274], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1162, 0.1020, 0.5832, 0.0989, 0.0996], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7717, -0.9023,  0.8414, -0.9328, -0.9256], dtype=torch.float64,
       grad_fn=<SelectBack

Softmax-processed output -------------
tensor([0.1141, 0.1008, 0.5886, 0.0978, 0.0986], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7849, -0.9087,  0.8557, -0.9395, -0.9308], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1141, 0.1008, 0.5888, 0.0977, 0.0986], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7850, -0.9090,  0.8562, -0.9397, -0.9307], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1144, 0.1014, 0.5874, 0.0979, 0.0989], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7835, -0.9046,  0.8521, -0.9399, -0.9294], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.53207

Softmax-processed output -------------
tensor([0.1085, 0.0973, 0.5948, 0.0973, 0.1020], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8191, -0.9280,  0.8821, -0.9279, -0.8813], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1120, 0.0993, 0.5950, 0.0964, 0.0973], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7986, -0.9189,  0.8717, -0.9481, -0.9392], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1119, 0.0992, 0.5953, 0.0964, 0.0972], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.7995, -0.9191,  0.8723, -0.9483, -0.9397], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.1106, 0.0981, 0.5994, 0.0955, 0.0963], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8055, -0.9254,  0.8841, -0.9523, -0.9438], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1071, 0.0963, 0.5999, 0.0962, 0.1005], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8288, -0.9350,  0.8945, -0.9358, -0.8920], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1101, 0.0980, 0.6004, 0.0953, 0.0962], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8099, -0.9270,  0.8860, -0.9542, -0.9455], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.1107, 0.0974, 0.6011, 0.0948, 0.0960], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8032, -0.9307,  0.8891, -0.9578, -0.9456], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.508929 
test loss: 0.507290
#####################
epoch 45
Softmax-processed output -------------
tensor([0.1100, 0.0973, 0.6022, 0.0947, 0.0958], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8094, -0.9313,  0.8910, -0.9585, -0.9474], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1099, 0.0973, 0.6024, 0.0947, 0.0957], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8099, -0.9316,  0.8916, -0.9586, -0.9476], dtype

Softmax-processed output -------------
tensor([0.1089, 0.0970, 0.6045, 0.0941, 0.0956], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8173, -0.9328,  0.8971, -0.9633, -0.9474], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1088, 0.0969, 0.6047, 0.0940, 0.0956], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8176, -0.9331,  0.8976, -0.9635, -0.9473], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1059, 0.0957, 0.6033, 0.0949, 0.1002], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8366, -0.9380,  0.9029, -0.9470, -0.8924], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.1077, 0.0961, 0.6080, 0.0934, 0.0948], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8236, -0.9383,  0.9068, -0.9669, -0.9516], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1077, 0.0960, 0.6082, 0.0933, 0.0948], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8236, -0.9387,  0.9074, -0.9672, -0.9513], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1075, 0.0960, 0.6085, 0.0933, 0.0947], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8251, -0.9390,  0.9081, -0.9674, -0.9521], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.1065, 0.0954, 0.6111, 0.0928, 0.0942], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8326, -0.9420,  0.9150, -0.9703, -0.9546], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1064, 0.0954, 0.6112, 0.0927, 0.0942], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8325, -0.9422,  0.9153, -0.9704, -0.9543], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1063, 0.0954, 0.6114, 0.0927, 0.0942], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8336, -0.9424,  0.9158, -0.9705, -0.9548], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

test loss: 0.487341
#####################
epoch 56
Softmax-processed output -------------
tensor([0.1049, 0.0950, 0.6142, 0.0922, 0.0936], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8441, -0.9436,  0.9229, -0.9732, -0.9579], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1049, 0.0950, 0.6144, 0.0922, 0.0936], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8445, -0.9438,  0.9233, -0.9733, -0.9580], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1066, 0.1005, 0.6040, 0.0935, 0.0954], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8377, -0.8957,  0.8973, -0.9687, -0.9483], dtype=torch.float64,
      

Softmax-processed output -------------
tensor([0.1038, 0.0943, 0.6169, 0.0918, 0.0932], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8528, -0.9480,  0.9297, -0.9759, -0.9598], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1037, 0.0943, 0.6171, 0.0917, 0.0932], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8536, -0.9482,  0.9300, -0.9760, -0.9601], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1052, 0.0994, 0.6075, 0.0929, 0.0950], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8479, -0.9045,  0.9055, -0.9724, -0.9500], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0984, 0.0956, 0.6171, 0.0928, 0.0960], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9014, -0.9294,  0.9350, -0.9593, -0.9252], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1027, 0.0937, 0.6193, 0.0913, 0.0929], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8606, -0.9524,  0.9357, -0.9783, -0.9614], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1041, 0.0984, 0.6106, 0.0924, 0.0945], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8552, -0.9116,  0.9136, -0.9749, -0.9526], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.1033, 0.0933, 0.6197, 0.0910, 0.0927], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8541, -0.9558,  0.9378, -0.9806, -0.9619], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1032, 0.0933, 0.6199, 0.0910, 0.0927], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8548, -0.9559,  0.9382, -0.9807, -0.9621], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1037, 0.0933, 0.6193, 0.0910, 0.0928], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8501, -0.9559,  0.9371, -0.9806, -0.9614], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 2.37219

Softmax-processed output -------------
tensor([0.1035, 0.0931, 0.6199, 0.0908, 0.0927], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8514, -0.9567,  0.9389, -0.9824, -0.9616], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1037, 0.0934, 0.6192, 0.0909, 0.0928], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8499, -0.9547,  0.9372, -0.9819, -0.9605], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1036, 0.0932, 0.6196, 0.0908, 0.0928], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8503, -0.9562,  0.9384, -0.9820, -0.9604], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

test loss: 0.475024
#####################
epoch 70
Softmax-processed output -------------
tensor([0.1027, 0.0927, 0.6218, 0.0904, 0.0924], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8574, -0.9599,  0.9439, -0.9841, -0.9626], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1030, 0.0927, 0.6215, 0.0904, 0.0924], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8543, -0.9599,  0.9433, -0.9841, -0.9625], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1040, 0.0967, 0.6140, 0.0914, 0.0939], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8520, -0.9248,  0.9236, -0.9811, -0.9545], dtype=torch.float64,
      

tensor([-0.9010, -0.9445,  0.9477, -0.9721, -0.9326], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1025, 0.0921, 0.6233, 0.0901, 0.0919], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8568, -0.9635,  0.9484, -0.9857, -0.9656], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1026, 0.0921, 0.6230, 0.0901, 0.0921], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8556, -0.9635,  0.9477, -0.9857, -0.9645], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.473169 
test loss: 0.472704
#####################
epoch 73
Softmax-processed output -------------
tensor([0.1025, 0.0921, 0.6232, 0.0901, 0.0920], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from

Softmax-processed output -------------
tensor([0.1018, 0.0918, 0.6248, 0.0899, 0.0918], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8624, -0.9662,  0.9520, -0.9870, -0.9658], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1018, 0.0917, 0.6248, 0.0899, 0.0918], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8626, -0.9663,  0.9522, -0.9871, -0.9658], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1017, 0.0917, 0.6250, 0.0898, 0.0918], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8632, -0.9664,  0.9524, -0.9871, -0.9660], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

test loss: 2.300383
#####################
epoch 79
Softmax-processed output -------------
tensor([0.0966, 0.0926, 0.6264, 0.0904, 0.0940], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9119, -0.9545,  0.9574, -0.9781, -0.9394], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0983, 0.0906, 0.6275, 0.0897, 0.0940], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8922, -0.9741,  0.9617, -0.9834, -0.9366], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1005, 0.0911, 0.6277, 0.0895, 0.0913], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8731, -0.9705,  0.9592, -0.9890, -0.9689], dtype=torch.float64,
      

Softmax-processed output -------------
tensor([0.1010, 0.0909, 0.6276, 0.0893, 0.0911], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8669, -0.9717,  0.9600, -0.9898, -0.9701], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1010, 0.0909, 0.6277, 0.0893, 0.0911], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8672, -0.9718,  0.9602, -0.9899, -0.9702], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1009, 0.0909, 0.6278, 0.0893, 0.0911], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8675, -0.9719,  0.9604, -0.9899, -0.9702], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.1011, 0.0906, 0.6282, 0.0891, 0.0909], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8642, -0.9741,  0.9622, -0.9907, -0.9714], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1011, 0.0906, 0.6281, 0.0891, 0.0909], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8643, -0.9742,  0.9619, -0.9907, -0.9707], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1011, 0.0906, 0.6282, 0.0891, 0.0909], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8647, -0.9742,  0.9620, -0.9907, -0.9708], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.1008, 0.0903, 0.6294, 0.0889, 0.0906], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8665, -0.9764,  0.9653, -0.9916, -0.9727], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.463026 
test loss: 0.462745
#####################
epoch 87
Softmax-processed output -------------
tensor([0.1006, 0.0903, 0.6296, 0.0889, 0.0906], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8677, -0.9765,  0.9657, -0.9917, -0.9730], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1014, 0.0929, 0.6244, 0.0896, 0.0917], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8658, -0.9540,  0.9517, -0.9897, -0.9668], dtype

Softmax-processed output -------------
tensor([0.0983, 0.0896, 0.6305, 0.0890, 0.0926], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8883, -0.9809,  0.9698, -0.9883, -0.9482], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.1002, 0.0900, 0.6308, 0.0887, 0.0903], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8707, -0.9786,  0.9688, -0.9925, -0.9753], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0984, 0.0896, 0.6305, 0.0890, 0.0925], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8877, -0.9809,  0.9701, -0.9883, -0.9488], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0996, 0.0897, 0.6321, 0.0886, 0.0901], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8764, -0.9805,  0.9718, -0.9933, -0.9768], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0995, 0.0897, 0.6321, 0.0886, 0.0900], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8767, -0.9806,  0.9719, -0.9933, -0.9768], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0977, 0.0894, 0.6319, 0.0888, 0.0922], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8941, -0.9827,  0.9730, -0.9896, -0.9518], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

test loss: 0.457294
#####################
epoch 95
Softmax-processed output -------------
tensor([0.0993, 0.0895, 0.6329, 0.0884, 0.0898], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8775, -0.9813,  0.9744, -0.9939, -0.9788], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0992, 0.0895, 0.6331, 0.0884, 0.0898], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8793, -0.9817,  0.9743, -0.9940, -0.9787], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0991, 0.0895, 0.6331, 0.0884, 0.0898], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8796, -0.9818,  0.9744, -0.9940, -0.9788], dtype=torch.float64,
      

Softmax-processed output -------------
tensor([0.0992, 0.0913, 0.6301, 0.0889, 0.0906], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8834, -0.9659,  0.9656, -0.9931, -0.9740], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0954, 0.0904, 0.6334, 0.0890, 0.0918], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9193, -0.9727,  0.9741, -0.9885, -0.9576], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.456574 
test loss: 0.455451
#####################
epoch 98
Softmax-processed output -------------
tensor([0.0991, 0.0913, 0.6303, 0.0888, 0.0905], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8840, -0.9664,  0.9661, -0.9932, -0.9743], dtype

tensor([-0.8951, -0.9839,  0.9786, -0.9951, -0.9801], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0975, 0.0893, 0.6354, 0.0883, 0.0896], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8954, -0.9840,  0.9787, -0.9952, -0.9802], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0975, 0.0892, 0.6354, 0.0883, 0.0896], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8957, -0.9840,  0.9788, -0.9952, -0.9802], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0978, 0.0892, 0.6352, 0.0882, 0.0896], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8926, -0

Softmax-processed output -------------
tensor([0.0980, 0.0891, 0.6353, 0.0882, 0.0894], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8893, -0.9852,  0.9795, -0.9956, -0.9810], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0980, 0.0891, 0.6353, 0.0881, 0.0894], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8895, -0.9853,  0.9796, -0.9956, -0.9810], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0980, 0.0891, 0.6353, 0.0882, 0.0895], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8897, -0.9853,  0.9794, -0.9956, -0.9806], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0965, 0.0887, 0.6352, 0.0882, 0.0913], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9036, -0.9877,  0.9806, -0.9934, -0.9597], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0983, 0.0889, 0.6353, 0.0881, 0.0894], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8864, -0.9864,  0.9800, -0.9960, -0.9804], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0951, 0.0899, 0.6351, 0.0886, 0.0913], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9211, -0.9776,  0.9780, -0.9914, -0.9612], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0981, 0.0888, 0.6359, 0.0880, 0.0893], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8879, -0.9871,  0.9815, -0.9963, -0.9817], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0981, 0.0888, 0.6358, 0.0880, 0.0893], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8880, -0.9871,  0.9814, -0.9963, -0.9812], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0980, 0.0888, 0.6359, 0.0880, 0.0893], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8883, -0.9872,  0.9815, -0.9964, -0.9813], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

tensor([-0.8868, -0.9886,  0.9829, -0.9968, -0.9820], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0981, 0.0886, 0.6363, 0.0879, 0.0892], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8870, -0.9886,  0.9829, -0.9968, -0.9820], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0948, 0.0894, 0.6365, 0.0884, 0.0909], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9226, -0.9816,  0.9814, -0.9930, -0.9646], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0980, 0.0886, 0.6364, 0.0879, 0.0892], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8875, -0

Softmax-processed output -------------
tensor([0.0982, 0.0885, 0.6363, 0.0878, 0.0891], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8859, -0.9888,  0.9834, -0.9971, -0.9822], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0981, 0.0885, 0.6364, 0.0878, 0.0891], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8862, -0.9888,  0.9835, -0.9971, -0.9823], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0981, 0.0886, 0.6363, 0.0878, 0.0891], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8859, -0.9887,  0.9833, -0.9970, -0.9821], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0976, 0.0897, 0.6351, 0.0881, 0.0895], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8945, -0.9785,  0.9787, -0.9966, -0.9803], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0972, 0.0885, 0.6377, 0.0878, 0.0889], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8955, -0.9895,  0.9858, -0.9975, -0.9842], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0972, 0.0885, 0.6377, 0.0877, 0.0889], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8952, -0.9894,  0.9859, -0.9975, -0.9843], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0951, 0.0883, 0.6384, 0.0879, 0.0903], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9166, -0.9909,  0.9870, -0.9961, -0.9692], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0967, 0.0884, 0.6383, 0.0877, 0.0888], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8999, -0.9900,  0.9871, -0.9977, -0.9854], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0967, 0.0884, 0.6383, 0.0877, 0.0888], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8997, -0.9900,  0.9871, -0.9977, -0.9853], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.44888

Softmax-processed output -------------
tensor([0.0964, 0.0884, 0.6388, 0.0877, 0.0887], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9035, -0.9900,  0.9878, -0.9979, -0.9860], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0967, 0.0895, 0.6366, 0.0880, 0.0893], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9030, -0.9803,  0.9820, -0.9972, -0.9827], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0963, 0.0884, 0.6388, 0.0877, 0.0888], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9039, -0.9901,  0.9878, -0.9979, -0.9857], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0970, 0.0894, 0.6363, 0.0879, 0.0893], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8988, -0.9810,  0.9818, -0.9974, -0.9819], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0967, 0.0883, 0.6386, 0.0876, 0.0887], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9001, -0.9905,  0.9879, -0.9981, -0.9856], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0969, 0.0883, 0.6384, 0.0876, 0.0887], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8977, -0.9905,  0.9878, -0.9981, -0.9855], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 2.33418

Softmax-processed output -------------
tensor([0.0967, 0.0882, 0.6388, 0.0876, 0.0887], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8993, -0.9909,  0.9886, -0.9983, -0.9861], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0967, 0.0882, 0.6388, 0.0876, 0.0887], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8995, -0.9909,  0.9885, -0.9983, -0.9858], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0967, 0.0882, 0.6388, 0.0876, 0.0887], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8997, -0.9909,  0.9886, -0.9983, -0.9858], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0950, 0.0881, 0.6394, 0.0877, 0.0899], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9174, -0.9924,  0.9895, -0.9973, -0.9727], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0968, 0.0881, 0.6390, 0.0875, 0.0885], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8977, -0.9915,  0.9895, -0.9985, -0.9870], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0970, 0.0891, 0.6371, 0.0878, 0.0890], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.8978, -0.9833,  0.9842, -0.9979, -0.9836], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.45084

Softmax-processed output -------------
tensor([0.0965, 0.0881, 0.6395, 0.0875, 0.0885], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9011, -0.9921,  0.9904, -0.9986, -0.9876], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0965, 0.0881, 0.6395, 0.0875, 0.0885], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9010, -0.9921,  0.9906, -0.9986, -0.9877], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0964, 0.0881, 0.6396, 0.0875, 0.0884], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9012, -0.9922,  0.9906, -0.9986, -0.9877], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0961, 0.0880, 0.6399, 0.0875, 0.0885], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9047, -0.9925,  0.9909, -0.9987, -0.9875], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0962, 0.0880, 0.6399, 0.0875, 0.0885], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9043, -0.9925,  0.9911, -0.9987, -0.9876], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.446497 
test loss: 0.446408
#####################
epoch 137
Softmax-processed output -------------
tensor([0.0961, 0.0880, 0.6399, 0.0875, 0.0885], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9052, -0.9925,  0.9910, -0.9988, -0.9876], dtyp

Softmax-processed output -------------
tensor([0.0959, 0.0880, 0.6402, 0.0875, 0.0885], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9070, -0.9932,  0.9916, -0.9989, -0.9876], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0941, 0.0880, 0.6406, 0.0876, 0.0897], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9262, -0.9939,  0.9917, -0.9980, -0.9746], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.445295 
test loss: 0.445874
#####################
epoch 140
Softmax-processed output -------------
tensor([0.0960, 0.0887, 0.6387, 0.0877, 0.0889], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9071, -0.9864,  0.9876, -0.9984, -0.9848], dtyp

Softmax-processed output -------------
tensor([0.0957, 0.0879, 0.6405, 0.0875, 0.0884], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9086, -0.9933,  0.9921, -0.9990, -0.9882], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0957, 0.0879, 0.6405, 0.0875, 0.0884], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9088, -0.9933,  0.9922, -0.9990, -0.9882], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0957, 0.0879, 0.6405, 0.0875, 0.0884], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9090, -0.9934,  0.9922, -0.9990, -0.9883], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

#####################
epoch 145
Softmax-processed output -------------
tensor([0.0929, 0.0884, 0.6413, 0.0877, 0.0896], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9403, -0.9896,  0.9915, -0.9978, -0.9765], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0954, 0.0879, 0.6409, 0.0874, 0.0883], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9121, -0.9938,  0.9929, -0.9991, -0.9888], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0954, 0.0879, 0.6409, 0.0874, 0.0883], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9123, -0.9938,  0.9929, -0.9991, -0.9888], dtype=torch.float64,
       grad_fn=<SelectBac

Softmax-processed output -------------
tensor([0.0942, 0.0878, 0.6410, 0.0875, 0.0894], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9250, -0.9949,  0.9928, -0.9985, -0.9768], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0960, 0.0878, 0.6404, 0.0874, 0.0883], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9046, -0.9942,  0.9928, -0.9992, -0.9884], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0960, 0.0878, 0.6405, 0.0874, 0.0883], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9048, -0.9942,  0.9928, -0.9992, -0.9884], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0960, 0.0878, 0.6405, 0.0874, 0.0884], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9053, -0.9945,  0.9930, -0.9992, -0.9878], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0959, 0.0878, 0.6405, 0.0874, 0.0884], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9055, -0.9945,  0.9930, -0.9992, -0.9878], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0959, 0.0878, 0.6405, 0.0874, 0.0884], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9057, -0.9945,  0.9930, -0.9992, -0.9876], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

test loss: 0.445426
#####################
epoch 153
Softmax-processed output -------------
tensor([0.0960, 0.0877, 0.6406, 0.0873, 0.0884], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9049, -0.9949,  0.9932, -0.9993, -0.9872], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0960, 0.0877, 0.6406, 0.0873, 0.0884], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9051, -0.9950,  0.9933, -0.9993, -0.9872], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0940, 0.0878, 0.6412, 0.0875, 0.0896], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9265, -0.9955,  0.9933, -0.9987, -0.9746], dtype=torch.float64,
     

Softmax-processed output -------------
tensor([0.0957, 0.0877, 0.6410, 0.0873, 0.0884], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9084, -0.9954,  0.9938, -0.9994, -0.9875], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0957, 0.0877, 0.6410, 0.0873, 0.0884], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9083, -0.9954,  0.9940, -0.9994, -0.9876], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0958, 0.0877, 0.6408, 0.0873, 0.0884], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9065, -0.9954,  0.9938, -0.9994, -0.9874], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 2.34525

Softmax-processed output -------------
tensor([0.0957, 0.0876, 0.6410, 0.0873, 0.0883], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9072, -0.9957,  0.9942, -0.9994, -0.9876], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0958, 0.0882, 0.6398, 0.0875, 0.0887], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9078, -0.9912,  0.9909, -0.9992, -0.9848], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0957, 0.0876, 0.6410, 0.0873, 0.0883], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9077, -0.9958,  0.9942, -0.9994, -0.9876], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0956, 0.0876, 0.6412, 0.0873, 0.0883], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9088, -0.9961,  0.9946, -0.9995, -0.9878], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0956, 0.0876, 0.6412, 0.0873, 0.0883], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9090, -0.9961,  0.9947, -0.9995, -0.9879], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0955, 0.0876, 0.6413, 0.0873, 0.0883], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9093, -0.9961,  0.9947, -0.9995, -0.9879], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0952, 0.0876, 0.6416, 0.0873, 0.0883], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9127, -0.9965,  0.9952, -0.9995, -0.9884], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0934, 0.0876, 0.6422, 0.0874, 0.0893], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9332, -0.9968,  0.9951, -0.9991, -0.9773], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.442793 
test loss: 0.443657
#####################
epoch 164
Softmax-processed output -------------
tensor([0.0952, 0.0876, 0.6417, 0.0873, 0.0883], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9131, -0.9965,  0.9952, -0.9996, -0.9885], dtyp

tensor([-0.9181, -0.9966,  0.9956, -0.9996, -0.9889], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0947, 0.0876, 0.6421, 0.0873, 0.0883], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9183, -0.9966,  0.9956, -0.9996, -0.9890], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0947, 0.0876, 0.6422, 0.0873, 0.0883], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9186, -0.9967,  0.9957, -0.9996, -0.9890], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0947, 0.0876, 0.6422, 0.0873, 0.0882], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9188, -0

Softmax-processed output -------------
tensor([0.0950, 0.0875, 0.6419, 0.0873, 0.0882], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9148, -0.9967,  0.9957, -0.9996, -0.9890], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 2.353766 
test loss: 0.443445
#####################
epoch 169
Softmax-processed output -------------
tensor([0.0952, 0.0875, 0.6418, 0.0873, 0.0882], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9127, -0.9967,  0.9957, -0.9996, -0.9892], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0952, 0.0875, 0.6418, 0.0873, 0.0882], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9132, -0.9967,  0.9957, -0.9996, -0.9890], dtyp

Softmax-processed output -------------
tensor([0.0931, 0.0876, 0.6428, 0.0874, 0.0891], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9361, -0.9973,  0.9961, -0.9994, -0.9802], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0924, 0.0879, 0.6429, 0.0875, 0.0893], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9448, -0.9947,  0.9952, -0.9991, -0.9790], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0948, 0.0875, 0.6422, 0.0873, 0.0882], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9165, -0.9970,  0.9962, -0.9997, -0.9897], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0924, 0.0878, 0.6431, 0.0875, 0.0892], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9450, -0.9951,  0.9955, -0.9992, -0.9793], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0950, 0.0875, 0.6422, 0.0873, 0.0881], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9150, -0.9972,  0.9964, -0.9997, -0.9900], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0949, 0.0875, 0.6422, 0.0873, 0.0881], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9152, -0.9972,  0.9964, -0.9997, -0.9900], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0945, 0.0875, 0.6426, 0.0873, 0.0881], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9201, -0.9975,  0.9967, -0.9997, -0.9901], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0945, 0.0875, 0.6426, 0.0873, 0.0881], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9202, -0.9975,  0.9967, -0.9997, -0.9901], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.442187 
test loss: 0.441179
#####################
epoch 178
Softmax-processed output -------------
tensor([0.0945, 0.0875, 0.6427, 0.0873, 0.0881], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9203, -0.9975,  0.9968, -0.9997, -0.9903], dtyp

Softmax-processed output -------------
tensor([0.0944, 0.0875, 0.6428, 0.0873, 0.0881], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9211, -0.9977,  0.9970, -0.9998, -0.9907], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0944, 0.0875, 0.6428, 0.0873, 0.0881], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9214, -0.9977,  0.9971, -0.9998, -0.9908], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0944, 0.0875, 0.6428, 0.0873, 0.0881], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9216, -0.9977,  0.9971, -0.9998, -0.9908], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0940, 0.0875, 0.6432, 0.0873, 0.0881], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9264, -0.9979,  0.9973, -0.9998, -0.9909], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0939, 0.0875, 0.6432, 0.0873, 0.0881], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9266, -0.9979,  0.9973, -0.9998, -0.9910], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0939, 0.0875, 0.6432, 0.0873, 0.0881], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9268, -0.9978,  0.9973, -0.9998, -0.9910], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed o

Softmax-processed output -------------
tensor([0.0925, 0.0875, 0.6438, 0.0874, 0.0888], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9431, -0.9982,  0.9974, -0.9996, -0.9835], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 2.421305 
test loss: 0.441455
#####################
epoch 186
Softmax-processed output -------------
tensor([0.0942, 0.0874, 0.6431, 0.0873, 0.0880], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9237, -0.9980,  0.9975, -0.9998, -0.9913], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0942, 0.0874, 0.6431, 0.0873, 0.0880], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9239, -0.9980,  0.9975, -0.9998, -0.9913], dtyp

tensor([0.0946, 0.0874, 0.6429, 0.0872, 0.0879], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9190, -0.9980,  0.9975, -0.9998, -0.9916], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0945, 0.0874, 0.6429, 0.0872, 0.0880], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9194, -0.9980,  0.9975, -0.9998, -0.9915], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0945, 0.0874, 0.6429, 0.0872, 0.0880], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9196, -0.9980,  0.9975, -0.9998, -0.9916], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0945, 0.0

tensor([0.0943, 0.0874, 0.6431, 0.0873, 0.0880], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9223, -0.9981,  0.9977, -0.9998, -0.9918], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0943, 0.0874, 0.6431, 0.0873, 0.0880], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9225, -0.9981,  0.9977, -0.9998, -0.9919], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0943, 0.0874, 0.6431, 0.0872, 0.0879], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9225, -0.9981,  0.9978, -0.9998, -0.9920], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0943, 0.0

#####################
epoch 195
Softmax-processed output -------------
tensor([0.0942, 0.0874, 0.6432, 0.0872, 0.0879], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9235, -0.9982,  0.9979, -0.9999, -0.9920], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0941, 0.0874, 0.6433, 0.0873, 0.0879], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9239, -0.9982,  0.9979, -0.9999, -0.9919], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0941, 0.0874, 0.6433, 0.0873, 0.0879], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9241, -0.9982,  0.9979, -0.9999, -0.9919], dtype=torch.float64,
       grad_fn=<SelectBac

tensor([-0.9193, -0.9984,  0.9980, -0.9999, -0.9924], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0921, 0.0876, 0.6439, 0.0874, 0.0889], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9469, -0.9970,  0.9972, -0.9996, -0.9827], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
train loss: 0.440233 
test loss: 0.441696
#####################
epoch 198
Softmax-processed output -------------
tensor([0.0927, 0.0875, 0.6438, 0.0873, 0.0887], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor from last cell state (= unprocessed output) -------------
tensor([-0.9407, -0.9985,  0.9978, -0.9997, -0.9843], dtype=torch.float64,
       grad_fn=<SelectBackward0>)
###
Softmax-processed output -------------
tensor([0.0947, 0.0873, 0.6430, 0.0872, 0.0878], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)
Last tensor fro

### Y. Thoughts, questions and comments

__The output of the Pytorch LSTM layer is a tuple with two elements. The first element of the tuple is LSTM’s output corresponding to all timesteps (hᵗ : ∀t = 1,2…T) with shape (timesteps, batch, output_features). The second element of the tuple is another tuple with two elements. The first element of this second tuple is the output corresponding to the last timestep (hᵀ). It has the shape (1, batch, output_features). The second element of this second tuple is the cell state corresponding to the last timestep (cᵀ).__ 

### Z. Dirty code

#### About cross entropy loss

In [None]:
'''
La cross entropy toma un valor sin argmaxear y la etiqueta de la clase correcta 
(no one hot encoding, solo el valor denotado por el indice, i.e. [0-4]). Ahora te toca incorporar esto en el loop.
'''

loss = nn.CrossEntropyLoss()
loss(stacked_fcs[-1], torch.argmax(y[0]))

#### "Checkme" checker

In [11]:
input_size = 5 
hidden_size = 5
num_layers = 1

device = "cuda" if torch.cuda.is_available() else "cpu"
model = LSTM_0(input_size , hidden_size , num_layers).to(device)
learning_rate=1e-3

criterion = torch.nn.CrossEntropyLoss()    
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate) 
model = model.double()

model.train()
for batch , item in enumerate(train_dataloader):
    x , y = item
    x = x.to(device)
    y = y.to(device)
    out, hn, cn, final_out = model(x)
    checkme = (out, (hn, cn), final_out)
    break
    #print("FINAL OUT (Softmax) -------------")
    #print(checkme[2])
    #print("Last element of the last element of 'out' -------------")
    #print(out[-1][-1])
    #print("Last cell state -------------")
    #print(hn[-1][-1])
    #print("################################################" + str(batch))

In [27]:
x.size()

torch.Size([32, 10, 5])

In [28]:
y.size()

torch.Size([32, 5])

In [16]:
x.shape

torch.Size([32, 10, 5])

In [19]:
out.shape

torch.Size([32, 10, 5])

In [20]:
hn.shape

torch.Size([1, 10, 5])

In [21]:
hn

tensor([[[-0.1475,  0.0561,  0.0163, -0.0170, -0.0910],
         [-0.1475,  0.0561,  0.0163, -0.0170, -0.0910],
         [-0.1536,  0.0564,  0.0203, -0.0180, -0.0887],
         [-0.1752,  0.0568,  0.0621, -0.0321, -0.0833],
         [-0.1475,  0.0560,  0.0163, -0.0170, -0.0911],
         [-0.1508,  0.0522,  0.0269, -0.0187, -0.0890],
         [-0.1688,  0.0909,  0.1040, -0.0663, -0.0560],
         [-0.1475,  0.0561,  0.0163, -0.0170, -0.0910],
         [-0.1845,  0.0856,  0.0406, -0.0612, -0.0860],
         [-0.1497,  0.0525,  0.0406, -0.0200, -0.0848]]], dtype=torch.float64,
       grad_fn=<StackBackward0>)

In [13]:
# First batch: 32 inputs ("observations")
len(x)

32

In [18]:
checkme[1][0] # --> last hidden state 

tensor([[[-0.3794, -0.0908,  0.1568,  0.1983,  0.1075],
         [-0.3744, -0.0932,  0.1575,  0.2008,  0.1090],
         [-0.3791, -0.0910,  0.1571,  0.1990,  0.1077],
         [-0.2738, -0.1517,  0.2123,  0.1763,  0.0707],
         [-0.3794, -0.0908,  0.1568,  0.1980,  0.1075],
         [-0.3674, -0.0971,  0.1572,  0.2030,  0.1099],
         [-0.3774, -0.0918,  0.1575,  0.2003,  0.1084],
         [-0.3665, -0.0978,  0.1581,  0.2058,  0.1105],
         [-0.3535, -0.1106,  0.1560,  0.2099,  0.1099],
         [-0.3795, -0.0908,  0.1569,  0.1985,  0.1075]]], dtype=torch.float64,
       grad_fn=<StackBackward0>)

In [20]:
checkme[1][1] # --> last cell state

tensor([[[-0.8294, -0.1876,  0.2826,  0.5188,  0.2271],
         [-0.8136, -0.1928,  0.2838,  0.5255,  0.2300],
         [-0.8284, -0.1880,  0.2831,  0.5210,  0.2275],
         [-0.5729, -0.4017,  0.3055,  0.5759,  0.2020],
         [-0.8294, -0.1875,  0.2825,  0.5182,  0.2270],
         [-0.7919, -0.2015,  0.2829,  0.5304,  0.2319],
         [-0.8230, -0.1897,  0.2838,  0.5246,  0.2287],
         [-0.7894, -0.2031,  0.2843,  0.5383,  0.2330],
         [-0.7384, -0.2281,  0.2779,  0.5450,  0.2316],
         [-0.8296, -0.1876,  0.2827,  0.5195,  0.2271]]], dtype=torch.float64,
       grad_fn=<StackBackward0>)

In [30]:
checkme[0].size() # --> last output (No softmax) (equal to last hidden state by definition)

torch.Size([32, 10, 5])

In [21]:
checkme[2] # --> last output (softmax) -- result of applying softmax to last element of last output

tensor([0.1342, 0.1790, 0.2294, 0.2391, 0.2183], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)

In [13]:
# This is what happens when softmax is applied to tensor of shape (10, 5)
torch.softmax(hn[-1], dim=0)

tensor([[0.1010, 0.0994, 0.0980, 0.1011, 0.0994],
        [0.1010, 0.0994, 0.0980, 0.1011, 0.0994],
        [0.1004, 0.0994, 0.0984, 0.1010, 0.0996],
        [0.0982, 0.0995, 0.1026, 0.0996, 0.1002],
        [0.1010, 0.0994, 0.0980, 0.1011, 0.0994],
        [0.1006, 0.0990, 0.0991, 0.1010, 0.0996],
        [0.0988, 0.1029, 0.1070, 0.0963, 0.1030],
        [0.1010, 0.0994, 0.0980, 0.1011, 0.0994],
        [0.0973, 0.1024, 0.1004, 0.0968, 0.0999],
        [0.1007, 0.0991, 0.1004, 0.1008, 0.1000]], dtype=torch.float64,
       grad_fn=<SoftmaxBackward0>)

#### Exploring  the elements of a dataloader object

In [None]:
'''
Este bloque de código te mete los elementos del dataloader en una lista.
Esta lista está compuesta de tuplas que contienen el identificador del batch como primer elemento,
mientras que como segundo elemento contienen otra tupla con los 32 X e Y que conforman cada batch.
'''
#a = []
#for batch , (item, target) in enumerate(train_dataloader):
#    a.append((batch , (item, target)))

#### Attempt of using the ".load_simulation()" to load the data

In [2]:
params = {'previous_rating_measure': 'mode',
 'review_prior': [1,1,1,1,1],
 'tendency_to_rate': 0.05,
 'simulation_type': 'timeseries',
 'min_reviews_for_herding': 5,
 'num_products': 1400,
 'num_total_marketplace_reviews': 140000,
 'consideration_set_size': 5,
 'one_star_lowest_limit': -1.5,
 'five_star_highest_limit': 1.5,
 'max_bias_5_star': 0.5,
 'num_dist_samples': 10}

simpleloader = marketplace_simulator_class.MarketplaceSimulator(params = params)
simpleloader.load_simulator()

NameError: name 'marketplace_simulator_class' is not defined