In [None]:
#import libraries
import numpy as np
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torchmetrics

In [None]:
#Load the device GPU
gpu = torch.device("cuda:0")
print(torch.cuda.get_device_name(torch.cuda.current_device()))

NVIDIA GeForce GTX 960


In [None]:
#load the dataset
import codecs
dataset_t = ""
with codecs.open('dataset/dataset_barbero_sarzana.txt', encoding='utf-8') as f:
    for character in f:
        dataset_t = dataset_t + character

In [None]:
# transform dataset from numeric to one-hot
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
whole_dataset_list_of_chars = list(dataset_t)
dataset_int = pd.DataFrame(whole_dataset_list_of_chars, columns=['data'])

# create a dictionary
label_enc = LabelEncoder()
dictionary = dataset_int.drop_duplicates(subset=['data'])
dictionary['int_encoding'] = label_enc.fit_transform(dictionary['data'])
dataset_int['int_encoding'] = label_enc.fit_transform(dataset_int['data'])


# one hot encode
one_hot_enc = OneHotEncoder()
# lstm uses float32 insted of float64.
one_hot_encoded_dataset = one_hot_enc.fit_transform(dataset_int[['int_encoding']]).toarray().astype(np.float32)
one_hot_encoded_dataset = torch.from_numpy(one_hot_encoded_dataset)
one_hot_encoded_dataset = one_hot_encoded_dataset.to(gpu)
print(one_hot_encoded_dataset)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dictionary['int_encoding'] = label_enc.fit_transform(dictionary['data'])


tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')


In [None]:
print(dictionary)

        data  int_encoding
0          b            39
1          e            42
2          n            51
4                        1
6          u            58
...      ...           ...
1099082    -             4
1099618    U            33
1100751    È            64
1101278    N            26
1104174    V            34

[72 rows x 2 columns]


In [None]:
# check that the device set is the GPU
assert one_hot_encoded_dataset.get_device() == 0

In [None]:
# ADD LABELS: turn dataset from a string "s1,s2,...,si,si+1" to ((s1,...,sk),(s2,...,sk+1)),...
k = 100
dataset = []
for i in range(0,len(one_hot_encoded_dataset)-k,int(k/2)):
    x = []
    y = []
    for j in range(k):
        x.append(one_hot_encoded_dataset[i+j])
        y.append(one_hot_encoded_dataset[i+j+1])
    dataset.append((x, y))
print(len(dataset))

22736


In [None]:
print(len(dataset[0]))

2


In [None]:
#create the dataloader: 70% for training, 30% for test
train_set_threshold = int(len(dataset) * 0.7)
training_set = dataset[:train_set_threshold]
test_set = dataset[train_set_threshold+1:]

In [None]:
batch_size = 128
train_dataloader = DataLoader(training_set, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_set, batch_size=1, shuffle=True)
iterator = iter(train_dataloader)
x, y = next(iter(iterator))
# check that the data is in the form: L,N,H. L=sequence length, N=batch size, H=input size
assert len(x) == k and len(x[0]) == batch_size and len(x[0][0]) == 72
assert len(y) == k and len(y[0]) == batch_size and len(y[0][0]) == 72
#for x,y in iterator:
#    print(x)
#    print("\n")

In [None]:
#build architecture
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        # weights and biases are initialized using the uniform distribution:
        #      weights.uniform_(-sqrt(output_size), +sqrt(output_size))
        self.lstm_layer = torch.nn.LSTMCell(input_size=input_size, hidden_size=hidden_size)
        self.dense = torch.nn.Linear(in_features=hidden_size, out_features=output_size)

    def forward(self, x):
        outputs = []
        # we convert a list of tensors (given as such by the dataloader) to a tensor
        x = torch.stack(x)

        # the split function collapses the batch dimension, so now, instead of having (sequence_len,batch_size,input_size) inputs,
        # you have a number equal to batch_size of (batch_size,input_size) inputs; you will feed these inputs one after another to the lstm, using
        # the history h_t and short term memory c_t, along with the i-th input of type (batch_size,input_size)
        for time_step in torch.split(x, split_size_or_sections=1, dim=0):
            # this is needed to remove a dimension that wasn't removed by split
            time_step = torch.squeeze(time_step)
            h_t, c_t = self.lstm_layer(time_step)
            output = self.dense(h_t)
            outputs.append(output)
        # len(outputs) = batch_size. We convert outputs to a tensor by concatenating all tensors inside it
        #outputs = torch.stack(outputs)
        #print(outputs.size())
        return outputs
assert len(training_set[0][0][0]) == 72
input_size = len(training_set[0][0][0])
output_size = input_size
model = NeuralNetwork(input_size, 32, output_size)
model.to(gpu)
print(input_size)

72


In [None]:
# try the model
inp = next(iter(train_dataloader))[0]
output = model(inp)
print(len(output))
print(output[1].size())
#assert output.size() == (100,128,72)
print(torch.stack(output).size())

100
torch.Size([128, 72])
torch.Size([100, 128, 72])


In [None]:
#declare loss, optimizer
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
#train
def train_cycle(EPOCHS, train_dataloader, model, loss, optimizer):
    losses = []
    for i in range(EPOCHS):
        iterator = iter(train_dataloader)
        print("Epoch: " + str(i))
        for data, label in iterator:
            # pytorch accumulate gradients at every batch, doing this you reset these gradients
            optimizer.zero_grad()
            #forward step
            outputs = model(data)
            loss_acc = 0
            for i in range(len(outputs)):
                loss_value = loss(outputs[i],label[i])
                #backward step: compute gradients (apply automatic differentiation)
                loss_value.backward()
                loss_acc = loss_acc + loss_value
            loss_acc = loss_acc / len(outputs)
            losses.append(loss_acc)
            #update the parameters using the already computed gradients
            optimizer.step()
    return losses

In [None]:
import time

start = time.time()

EPOCHS = 5
losses = train_cycle(EPOCHS, train_dataloader, model, loss, optimizer)

end = time.time()
print("Time passed: " + str(end - start))
print(losses)

Epoch: 0
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Time passed: 46.121175050735474
[tensor(4.2936, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2880, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2848, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2821, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2792, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2738, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2704, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2692, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2651, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2619, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2595, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2549, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2525, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2505, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2428, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2419, device='cuda:0', grad_fn=<DivBackward0>), tensor(4.2

In [None]:
#test
from torchmetrics.classification import MulticlassAccuracy
iterator = iter(test_dataloader)
accuracy = MulticlassAccuracy(num_classes=72).to(gpu)
softmax = torch.nn.Softmax(dim=1)
accuracies = []
for data, label in iterator:
    #reshape data and label from sequence_len x num_classes
    prediction = model(data)
    prediction = torch.stack(prediction).reshape(100,72)
    label = torch.stack(label).reshape(100,72)
    #compute accuracy
    pred_arg = torch.argmax(softmax(prediction),dim=1)
    label_arg = torch.argmax(label,dim=1)
    acc = sum(torch.eq(pred_arg, label_arg).tolist())/100
    accuracies.append(acc)
print(accuracies)

[0.29, 0.2, 0.2, 0.24, 0.28, 0.32, 0.27, 0.26, 0.34, 0.22, 0.28, 0.31, 0.31, 0.19, 0.23, 0.27, 0.18, 0.32, 0.23, 0.27, 0.33, 0.29, 0.31, 0.3, 0.23, 0.28, 0.27, 0.25, 0.35, 0.3, 0.25, 0.28, 0.26, 0.29, 0.27, 0.28, 0.34, 0.31, 0.25, 0.27, 0.29, 0.36, 0.31, 0.24, 0.2, 0.19, 0.26, 0.27, 0.28, 0.26, 0.26, 0.25, 0.28, 0.25, 0.28, 0.34, 0.25, 0.28, 0.33, 0.27, 0.31, 0.28, 0.27, 0.23, 0.19, 0.23, 0.18, 0.31, 0.26, 0.2, 0.27, 0.34, 0.29, 0.22, 0.35, 0.28, 0.14, 0.26, 0.26, 0.23, 0.28, 0.29, 0.24, 0.23, 0.28, 0.24, 0.28, 0.2, 0.34, 0.29, 0.29, 0.34, 0.26, 0.31, 0.24, 0.21, 0.3, 0.27, 0.31, 0.17, 0.23, 0.24, 0.2, 0.27, 0.28, 0.28, 0.29, 0.22, 0.24, 0.29, 0.25, 0.25, 0.26, 0.32, 0.31, 0.34, 0.26, 0.3, 0.29, 0.27, 0.31, 0.36, 0.27, 0.28, 0.25, 0.25, 0.3, 0.28, 0.24, 0.28, 0.24, 0.22, 0.26, 0.31, 0.21, 0.24, 0.26, 0.23, 0.29, 0.19, 0.33, 0.28, 0.33, 0.3, 0.22, 0.2, 0.25, 0.32, 0.26, 0.3, 0.24, 0.29, 0.2, 0.19, 0.24, 0.24, 0.33, 0.25, 0.23, 0.3, 0.32, 0.22, 0.21, 0.26, 0.32, 0.22, 0.24, 0.27, 0.23, 0

In [None]:
print("Mean accuracy is: " + str(sum(accuracies)/len(accuracies)))

Mean accuracy is: 0.2651143695014595


In [None]:
# this function encoded a string as an input fitting for the NN of size (SxN)
# * N = number of classes
# * S = length of the string (in terms of characters)
def generate_phrase_to_NN_input(num_classes, dictionary, device):
    # input: a string
    # output: a Sx72 one-hot tensor
    def phrase_to_NN_input(phrase):
        result = []
        for char in phrase:
            word_converted = torch.zeros(num_classes)
            word_index = dictionary.loc[dictionary['data'] == char]['int_encoding'].item()
            word_converted[word_index] = 1
            result.append(word_converted)
        result = torch.stack(result)
        result = result.to(device)
        return result
    return phrase_to_NN_input
phrase_to_NN_input = generate_phrase_to_NN_input(72,dictionary, gpu)

In [None]:
# this function decodes a string expressed as a tensor of shape SxN with values x\in{0,1}, to a string of length S
def generate_NN_output_to_phrase(num_classes, dictionary, device):
    # input: a string
    # output: a Sx72 one-hot tensor
    def NN_output_to_phrase(nn_output):
        result = ""
        for tensor_hot_enc in nn_output:
            idx_word = torch.argmax(tensor_hot_enc).item()
            char_decoded = dictionary.loc[dictionary['int_encoding'] == idx_word]['data'].item()
            result = result + char_decoded
        return result
    return NN_output_to_phrase
NN_output_to_phrase = generate_NN_output_to_phrase(72,dictionary, gpu)

In [None]:
wordminusminus_encoded = phrase_to_NN_input("--")
assert wordminusminus_encoded[0][4].item() == 1.0 and wordminusminus_encoded[1][4].item() == 1.0

In [None]:
wordminusminus_decoded = NN_output_to_phrase(wordminusminus_encoded)
assert wordminusminus_decoded == "--"

In [None]:
phrase = "eqweqweqwaaIII--------IIIIIaaaaaAAAAAAAAAAA"
phrase_encoded = phrase_to_NN_input(phrase)
prediction = model([phrase_encoded])
prediction_decoded = NN_output_to_phrase(prediction[0])
print(prediction_decoded)

 ua ua ua                                  


In [None]:
iterator = iter(test_dataloader)
data, label = next(iterator)
prediction = torch.stack(model(data))
data_arg = torch.argmax(softmax(prediction).reshape(100,72),dim=1)
label_arg = torch.argmax(torch.stack(label).reshape(100,72),dim=1)
sum(torch.eq(data_arg, label_arg).tolist())/100

0.08

In [None]:
37.65237474441528

In [None]:
a = torch.tensor([[1,2],[2,3]])
print(a)

tensor([[1, 2],
        [2, 3]])


In [None]:
import torch
a = torch.tensor([2.0], requires_grad=True)
b = torch.tensor([3.0], requires_grad=True)

c = 0
for i in range(3):
    c = c + a
    print(c)

c = c + torch.log(b)

In [None]:
c.backward()

In [None]:
print(a.grad)
print(b.grad)

tensor([3.])
tensor([0.3333])


In [None]:
import numpy as np
import tensorflow as tf


a = tf.Variable(2.0)
b = tf.Variable(3.0)

with tf.GradientTape() as tape:
    c = 0
    for i in range(3):
        c = c + a
        print(c)

    c = c + tf.math.log(b)

tf.Tensor(2.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)


In [None]:
dy_da = tape.gradient(c, [a,b])
dy_da

[<tf.Tensor: shape=(), dtype=float32, numpy=3.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.33333334>]

In [None]:
print(c)

tf.Tensor(7.0986123, shape=(), dtype=float32)


In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
import pandas as pd

data = ('a','b','c','c','a')
dictionary = pd.DataFrame(data, columns=['data'])

dictionary['int_encoding'] = labelencoder.fit_transform(dictionary['data'])
print(dictionary)

  data  int_encoding
0    a             0
1    b             1
2    c             2
3    c             2
4    a             0


In [None]:
one_hot_enc = OneHotEncoder()
one_hot = one_hot_enc.fit_transform(dictionary[['int_encoding']]).toarray()
print(dictionary[['int_encoding']])
print(one_hot)

   int_encoding
0             0
1             1
2             2
3             2
4             0
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]]


In [None]:
import torch
mat = torch.empty(5,100).to(gpu)
for i in range(0, 1000000):
    mat = torch.mm(mat, torch.transpose(mat,0,1))

In [None]:
mat = torch.empty(2,3)
a = torch.transpose(mat,0,1)
print(mat)
print(a)
print(torch.mm(mat,a))

tensor([[6.6648e-10, 1.1040e-05, 2.6081e+20],
        [2.0975e-07, 2.1876e-04, 4.3921e-05]])
tensor([[6.6648e-10, 2.0975e-07],
        [1.1040e-05, 2.1876e-04],
        [2.6081e+20, 4.3921e-05]])
tensor([[       inf, 1.1455e+16],
        [1.1455e+16, 4.9784e-08]])


In [None]:
a = torch.arange(10).reshape(5,2)
a
torch.split(a,1)
a.size(1)

2

In [None]:
a = torch.stack([torch.randn((1,2)),torch.randn((1,2))])
print(a.size())
a

torch.Size([2, 1, 2])


tensor([[[-0.1867, -0.5635]],

        [[-0.5525, -0.4179]]])

In [None]:
a = next(iter(train_dataloader))
b = torch.stack(a[0])
print(b.size(0))
print(b.size(1))
print(b.size(2))

100
128
72


IndexError: Dimension out of range (expected to be in range of [-3, 2], but got 3)

In [None]:
loss = torch.nn.CrossEntropyLoss()
# pred=(a,c),(a,a), label=(a,b),(c,c)
pred = [torch.FloatTensor([10,20]),torch.FloatTensor([20,10])
softmax = torch.nn.Softmax(dim=0)
label = torch.FloatTensor([[softmax(pred)[0][0],softmax(pred)[0][1]],[softmax(pred)[1][0],softmax(pred)[1][1]]])
print(pred.size())
loss(pred,label)

AttributeError: 'list' object has no attribute 'softmax'

In [None]:
x = torch.ones(1, requires_grad=True)
y = x**2
z = x**3
w = x**3
z.backward()
w.backward()
x.grad

tensor([6.])

In [None]:
from torch import tensor
target = tensor([0, 1, 2, 3])
preds = tensor([0, 2, 1, 3])
accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=4)
accuracy(preds, target)

AssertionError: 

In [None]:
y = torch.tensor([
     [
       [1, 0, 0],
       [1, 0, 0]
     ],
     [
       [0, 1, 0],
       [0, 0, 1]
     ],
     [
       [0, 0, 1],
       [0, 0, 1]
     ]
   ])

In [None]:
y.size()

torch.Size([3, 2, 3])

In [None]:
torch.argmax(y.float(),dim=2)

tensor([[0, 0],
        [1, 2],
        [2, 2]])

In [None]:
print(dictionary)
print(len(dictionary))
a = dictionary.loc[dictionary['data'] == 'b']['int_encoding']
print(a[0])

        data  int_encoding
0          b            39
1          e            42
2          n            51
4                        1
6          u            58
...      ...           ...
1099082    -             4
1099618    U            33
1100751    È            64
1101278    N            26
1104174    V            34

[72 rows x 2 columns]
72
39


##

In [None]:
fdsfdsfsdfsdfsdfsdfdsfs