In [1]:
import re
import math
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import nltk
from nltk.tokenize import word_tokenize

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

from matplotlib import pyplot as plt

In [2]:
train_path = "a3_data/wsd_train.txt"
test_path = "a3_data/wsd_test_blind.txt"

with open(train_path, encoding = "utf-8") as f:
    for d, line in enumerate(f):
        print(line.lower())
        break
        
with open(test_path, encoding = "utf-8") as f:
    for d, line in enumerate(f):
        print(line.lower())
        break

keep%2:42:07::	keep.v	15	action by the committee in pursuance of its mandate , the committee will continue to keep under review the situation relating to the question of palestine and participate in relevant meetings of the general assembly and the security council . the committee will also continue to monitor the situation on the ground and draw the attention of the international community to urgent developments in the occupied palestinian territory , including east jerusalem , requiring international action .

?	physical.a	58	iaea pointed out that training and education were fundamental to the agency 's approach to enhancing physical protection systems in states . training courses , workshops and seminars that had been held on six continents had raised awareness and had provided hands-on experience of various subjects including the physical protection of research facilities , the practical operation of physical protection systems , and the engineering safety aspects of physical prote

# Load the data

In [3]:
def load_data(file_path):
    
    sense_list = []
    lemma_list = []
    position_list = []
    text_list = []

    with open(file_path, encoding = "utf-8") as f:
        for d, line in enumerate(f):

            line = line.lower()

            ix = line.find("\t")
            sense_key = line[0:ix]
            line = line[ix+1:]

            ix = line.find("\t")
            lemma = line[0:ix]
            line = line[ix+1:]

            ix = line.find("\t")
            position = line[0:ix]
            text = line[ix+1:].split()

            #if d == 0:
            #    print("sense_key ", sense_key)
            #    print("lemma ", lemma)
            #    print("position ", position)
            #    print("text ", text)

            sense_list.append(sense_key)
            lemma_list.append(lemma)
            position_list.append(position)
            text_list.append(text)

            #if d == 10000:
            #    break

    #print(d)
    
    df = pd.DataFrame(sense_list, columns = ["Sense_key"])
    df["Lemma"] = lemma_list
    df["Position"] = position_list
    df["Text"] = text_list

    del sense_list, lemma_list, position_list
    
    return df

In [87]:
class Preprocessing:
    def __init__(self, df, num_words, seq_len):
        self.data = df
        self.num_words = num_words
        self.seq_len = seq_len  
        
        self.vocabulary = None
        self.x_tokenized = None
        self.x_embedded = None
        self.x_padded = None
        self.x_raw = None
        
        self.lemma = None
        self.n_outputs = None
        self.le = None
        self.y = None
        
        self.x_train = None
        self.x_test = None
        self.y_train = None
        self.y_test = None
    
    def load_data(self):
        # split into sentences (x) and sense key (y)
        df = self.data
        self.x_raw = df.Text.values
        self.lemma = df.Lemma.iloc[0]
        self.n_outputs = len(df.Sense_key.unique())
        
        labels = np.asarray(df.Sense_key.values)
        le = preprocessing.LabelEncoder()
        targets = le.fit_transform(labels)
        self.y = targets
        self.le = le
        
    def build_vocabulary(self):
        # Builds the vocabulary 
        self.vocabulary = dict()
        fdist = nltk.FreqDist()

        for sentence in self.x_raw:
            for word in sentence:
                fdist[word] += 1

        common_words = fdist.most_common(self.num_words)

        for idx, word in enumerate(common_words):
            self.vocabulary[word[0]] = (idx+1)
            
    def word_to_idx(self):
        # By using the dictionary (vocabulary), it is transformed
        # each token into its index based representatio
        self.x_tokenized = list() 

        for sentence in self.x_raw:
            temp_sentence = list()
            for word in sentence:
                if word in self.vocabulary.keys():
                    temp_sentence.append(self.vocabulary[word])
            self.x_tokenized.append(temp_sentence)
        
    def find_seq_len(self):
        
        max_len = 0
        for item in self.x_raw:
    
            if len(item) > max_len:
                max_len = len(item)
        
        self.seq_len = max_len
    
    def padding_sentences(self):
        # Each sentence which does not fulfill the required length is padded with the index 0
        pad_idx = 0
        self.x_padded = list()

        for sentence in self.x_tokenized:
            while len(sentence) < self.seq_len:
                sentence.insert(len(sentence), pad_idx)

            self.x_padded.append(sentence)
            
        self.x_padded = np.array(self.x_padded) 
    
    def split_data(self):
        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(self.x_padded, self.y, test_size=0.25, random_state=42)

### Old



class Preprocessing:

    def __init__(self, df, num_words, seq_len):
        self.data = df
        self.num_words = num_words
        self.seq_len = seq_len
        self.vocabulary = None
        self.x_tokenized = None
        self.x_padded = None
        self.x_raw = None
        self.y = None

        self.x_train = None
        self.x_test = None
        self.y_train = None
        self.y_test = None

    def load_data(self):
        # Reads the raw df file and split into
        # sentences (x) and target (y)
        self.x_raw = df.Text.values
        self.y = df.Sense_key.values

    def clean_text(self):
        # Removes special symbols and just keep
        # words in lower or upper form

        #self.x_raw = [x.lower() for x in self.x_raw]
        self.x_raw = [re.sub(r'[^A-Za-z]+', ' ', x) for x in self.x_raw]

    #def text_tokenization(self):
        # Tokenizes each sentence by implementing the nltk tool
        #self.x_raw = [word_tokenize(x) for x in self.x_raw]

    def build_vocabulary(self):
        # Builds the vocabulary and keeps the "x" most frequent words
        self.vocabulary = dict()
        fdist = nltk.FreqDist()

        for sentence in self.x_raw:
            for word in sentence:
                fdist[word] += 1

        common_words = fdist.most_common(self.num_words)

        for idx, word in enumerate(common_words):
            self.vocabulary[word[0]] = (idx+1)
            
    def word_to_idx(self):
        # By using the dictionary (vocabulary), it is transformed
        # each token into its index based representation

        self.x_tokenized = list()

        for sentence in self.x_raw:
            temp_sentence = list()
        for word in sentence:
            if word in self.vocabulary.keys():
                temp_sentence.append(self.vocabulary[word])
        self.x_tokenized.append(temp_sentence)
   
    def padding_sentences(self):
        # Each sentence which does not fulfill the required len
        # it's padded with the index 0

        pad_idx = 0
        self.x_padded = list()

        for sentence in self.x_tokenized:
            while len(sentence) < self.seq_len:
                sentence.insert(len(sentence), pad_idx)
            self.x_padded.append(sentence)
        self.x_padded = np.array(self.x_padded)
   
    def split_data(self):
        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(self.x_padded, self.y, test_size=0.25, random_state=42)

## Check that it works

In [83]:
df = load_data(train_path)

In [109]:
sense_list = ["positive%3:00:01::", "positive%5:00:00:advantageous:00"]

df_pos = df[df.Lemma == "positive.a"]
df_pos = df_pos.loc[df_pos.Sense_key.isin(sense_list)]

In [110]:
len(df_pos.Sense_key.unique())

2

In [111]:
len(df_pos)

723

In [97]:
data_pos = Preprocessing(df_pos, num_words = 6000, seq_len = 100)
data_pos.load_data()
data_pos.build_vocabulary()
data_pos.word_to_idx()
data_pos.find_seq_len()
data_pos.padding_sentences()
data_pos.split_data()

In [100]:
data_pos.y[0:30]

array([2, 2, 4, 0, 4, 1, 0, 1, 1, 2, 2, 4, 1, 1, 4, 0, 2, 4, 2, 1, 2, 2,
       4, 4, 0, 4, 0, 2, 4, 4])

In [101]:
data_pos.x_test[0]

array([1840, 4624, 1923,    1,  459,  134,   10,  718,   44,   80,   64,
        778,  109,   42,  333,  120,    5,   94,    2,   23,   20,   35,
       1216,    6,   26,    8,  964, 1808,    3, 1622,    2,    4,  311,
       4625,  335,    1,  459,  134,   45,  762, 3078,   53,    9,  585,
          7,  237,    6, 4626,   53,  855,   91,    5,   10,  480,    2,
          1,  125,  588,   11, 1857, 3461,   37,  301,    1, 4627,  957,
          3,   39,   33,   91,   55,   18,  243,    6,   11, 2462,   12,
         39,    2,  247,   67,   87, 1902, 1384,    1, 1331,    5,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,   

In [102]:
data_pos.x_train.shape

(912, 236)

# Embedding the data

Are stopwords able to change the sense of a word? I think so!

- standing in line - waiting for something
- standing in a line - they're just standing 

Based on this, I will not remove stopwords. I will also leave in punctuation, but it seems like a good idea to lowercase the entire text. We're not doing NER, and I don't want Line and line to end up having two meanings - the position alone should clarify the sense. CBoW seems like a terrible choice in this setting - the word senses will almost certainly get lost. Try representation with pre-trained GloVe vectors.

https://medium.com/analytics-vidhya/basics-of-using-pre-trained-glove-vectors-in-python-d38905f356db

idea: only embed the sentence containing the word in question (maybe later)
use word position in an attention model, or for determining weights in a CNN/RNN (think that is an attention model)
 
represent sentence/doc
one-hot encode labels

prediction: something with a softmax layer

CNNs seem promising, as they can model interactions between words (exactly what we want). They also have a local structure, which is great. (can steal code from demo notebook if I want to use this)

In [12]:
train_df = load_data(train_path)
train_df.head()

Unnamed: 0,Sense_key,Lemma,Position,Text
0,keep%2:42:07::,keep.v,15,"[action, by, the, committee, in, pursuance, of..."
1,national%3:01:00::,national.a,25,"[a, guard, of, honour, stood, in, formation, i..."
2,build%2:31:03::,build.v,38,"[the, principle, that, statistics, should, be,..."
3,place%1:04:00::,place.n,36,"[again, ,, he, appealed, for, additional, supp..."
4,position%1:04:01::,position.n,76,"[also, ,, the, iaea, has, the, lowest, number,..."


In [13]:
test_df = load_data(test_path)
test_df.head()

Unnamed: 0,Sense_key,Lemma,Position,Text
0,?,physical.a,58,"[iaea, pointed, out, that, training, and, educ..."
1,?,see.v,8,"[aid, official, development, assistance, (, od..."
2,?,line.n,39,"[she, would, appreciate, receiving, informatio..."
3,?,keep.v,42,"[we, look, forward, to, its, eventual, assessm..."
4,?,national.a,57,"[in, his, report, to, the, general, assembly, ..."


start out simple! ignore position, see it as a document classification problem

In [14]:
from collections import Counter

def count_word_frequencies(YOUR_FILE, ENCODING):
    
    freqs = Counter()
    with open(YOUR_FILE, encoding = ENCODING) as f:
        for line in f:
            tokens = line.lower().split()
            for token in tokens:
                freqs[token] += 1
                
    return freqs

In [15]:
def map_to_int(docs:list()) -> (list(), int, Counter()):
    
    '''
    Function from assignment 2.
    
    Create bag of words from cleaned and smaller corpus.
    Associate each word in bag with an unique integer,
    ranging from 0 (most common word) to length of bag of words.
    Map each token in docs to the respective int. Return this list of list of ints.
    '''
    
    freqs = Counter()
    for doc in docs:
        for token in doc:
            freqs[token] += 1
    most_common = freqs.most_common()
    
    token_to_int = []
    for i in range(len(most_common)):
        token_to_int.append(most_common[i][0])

    # Get pairs of elements    
    mapping = zip(token_to_int, range(0,len(token_to_int)))
    
    # Make pairs into a dictionary
    vocab = dict(mapping)
        
    # Match token to int
    docs_int = list()
    
    for doc in docs:
        docs_int.append(list([vocab.get(x) for x in doc]))
        
    return docs_int, len(vocab), vocab

## Start with just one lemma - positive

In [17]:
df_pos = df[df.Lemma == "positive.a"]
df_pos.shape

(1216, 4)

In [None]:
df_pos.Sense_key.value_counts()

In [None]:
df_pos.head()

Will onehot encode the sense key. This makes the most sense.

In [None]:
for i, val in enumerate(df_pos.Sense_key.unique()):

    col_name = "Onehot_sense_" + str(i)
    df_pos[col_name] = df.loc[:, "Sense_key"] == val
    df_pos[col_name] = df_pos[col_name].astype(int)

In [None]:
df_pos.head(2)

In [None]:
max_len = 0

for item in df_pos.Text_int:
    
    if len(item) > max_len:
        max_len = len(item)
        
max_len

In [None]:
for i in range(len(df_pos.Text_int)):
    
    diff = max_len - len(df_pos.Text_int.iloc[i]) 
    
    vec = df_pos.Text_int.iloc[i]
    
    
    
    df_pos.Text_int.iloc
    
    print(diff)
    
    break

In [None]:
max_len - len(df_pos.Text_int.iloc[0])

vec = df_pos.Text_int.iloc[0]

append = -1*np.ones(diff)

vec

In [None]:
# want a training accuracy score for each network

Could learn representation as I go, but there's not a lot of examples per unique sense_key, in some cases...
Also, this is a pain. Since the WSD texts appear to be generic enough, pretrained GloVe vectors should be ok. 
Do I use these as an initial guess or what? Also, GloVe only encodes one word at a time - so do I apply a context window myself as well?

for a 50-dim embedding of a 100-word document, we get a $100*50$ matrix. Seems to make sense to run a CNN over this! 

output layer size should depend on the number of distinct senses for each lemma, so this is a lemma-by-lemma approach

or try summing up all the vectors to create one representation for the entire document, then input it into a deep neural net of size 50. however this is silly and a RNN is better, can then have feedback in time if we input one word at a time.

# Classify using a CNN

## Parameters

In [134]:
from dataclasses import dataclass

@dataclass
class Parameters:

    # Preprocessing parameeters
    n_outputs: int = None
    seq_len: int = None
    num_words: int = 6000

    # Model parameters
    embedding_size: int = 64
    out_size: int = 32
    stride: int = 2

    # Training parameters
    epochs: int = 3
    batch_size: int = 12
    learning_rate: float = 0.001

## TextClassifier

In [131]:
class TextClassifier(nn.ModuleList):

    def __init__(self, params):
        super(TextClassifier, self).__init__()

        # Parameters regarding text preprocessing
        self.seq_len = params.seq_len
        self.num_words = params.num_words
        self.embedding_size = params.embedding_size

        # Dropout definition
        self.dropout = nn.Dropout(0.25)

        # CNN parameters definition
        # Kernel sizes
        self.kernel_1 = 2
        self.kernel_2 = 3
        self.kernel_3 = 4
        self.kernel_4 = 5

        # Output size for each convolution
        self.out_size = params.out_size
        # Number of strides for each convolution
        self.stride = params.stride

        # Embedding layer definition
        self.embedding = nn.Embedding(self.num_words + 1, self.embedding_size, padding_idx=0)

        # Convolution layers definition
        self.conv_1 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_1, self.stride)
        self.conv_2 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_2, self.stride)
        self.conv_3 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_3, self.stride)
        self.conv_4 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_4, self.stride)

        # Max pooling layers definition
        self.pool_1 = nn.MaxPool1d(self.kernel_1, self.stride)
        self.pool_2 = nn.MaxPool1d(self.kernel_2, self.stride)
        self.pool_3 = nn.MaxPool1d(self.kernel_3, self.stride)
        self.pool_4 = nn.MaxPool1d(self.kernel_4, self.stride)

        # Fully connected layer definition
        self.fc = nn.Linear(self.in_features_fc(), 1)


    def in_features_fc(self):
        '''Calculates the number of output features after Convolution + Max pooling

        Convolved_Features = ((embedding_size + (2 * padding) - dilation * (kernel - 1) - 1) / stride) + 1
        Pooled_Features = ((embedding_size + (2 * padding) - dilation * (kernel - 1) - 1) / stride) + 1

        source: https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
        '''
        
        # Calculate size of convolved/pooled features for convolution_1/max_pooling_1 features
        out_conv_1 = ((self.embedding_size - 1 * (self.kernel_1 - 1) - 1) / self.stride) + 1
        out_conv_1 = math.floor(out_conv_1)
        out_pool_1 = ((out_conv_1 - 1 * (self.kernel_1 - 1) - 1) / self.stride) + 1
        out_pool_1 = math.floor(out_pool_1)

        # Calcualte size of convolved/pooled features for convolution_2/max_pooling_2 features
        out_conv_2 = ((self.embedding_size - 1 * (self.kernel_2 - 1) - 1) / self.stride) + 1
        out_conv_2 = math.floor(out_conv_2)
        out_pool_2 = ((out_conv_2 - 1 * (self.kernel_2 - 1) - 1) / self.stride) + 1
        out_pool_2 = math.floor(out_pool_2)

        # Calcualte size of convolved/pooled features for convolution_3/max_pooling_3 features
        out_conv_3 = ((self.embedding_size - 1 * (self.kernel_3 - 1) - 1) / self.stride) + 1
        out_conv_3 = math.floor(out_conv_3)
        out_pool_3 = ((out_conv_3 - 1 * (self.kernel_3 - 1) - 1) / self.stride) + 1
        out_pool_3 = math.floor(out_pool_3)

        # Calcualte size of convolved/pooled features for convolution_4/max_pooling_4 features
        out_conv_4 = ((self.embedding_size - 1 * (self.kernel_4 - 1) - 1) / self.stride) + 1
        out_conv_4 = math.floor(out_conv_4)
        out_pool_4 = ((out_conv_4 - 1 * (self.kernel_4 - 1) - 1) / self.stride) + 1
        out_pool_4 = math.floor(out_pool_4)

        # Returns "flattened" vector (input for fully connected layer)
        return (out_pool_1 + out_pool_2 + out_pool_3 + out_pool_4) * self.out_size



    def forward(self, x):

        # Sequence of tokes is filtered through an embedding layer
        x = self.embedding(x)

        # Convolution layer 1 is applied
        x1 = self.conv_1(x)
        x1 = torch.relu(x1)
        x1 = self.pool_1(x1)

        # Convolution layer 2 is applied
        x2 = self.conv_2(x)
        x2 = torch.relu((x2))
        x2 = self.pool_2(x2)

        # Convolution layer 3 is applied
        x3 = self.conv_3(x)
        x3 = torch.relu(x3)
        x3 = self.pool_3(x3)

        # Convolution layer 4 is applied
        x4 = self.conv_4(x)
        x4 = torch.relu(x4)
        x4 = self.pool_4(x4)

        # The output of each convolutional layer is concatenated into a unique vector
        union = torch.cat((x1, x2, x3, x4), 2)
        union = union.reshape(union.size(0), -1)

        # The "flattened" vector is passed through a fully connected layer
        out = self.fc(union)
        # Dropout is applied
        out = self.dropout(out)
        # Activation function is applied
        out = torch.sigmoid(out)

        if len(out) > 1:
            return out.squeeze()
        else:
            return out

## Run

In [132]:
class DatasetMaper(Dataset):

    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

class Run:
    '''Training, evaluation and metrics calculation'''

    @staticmethod
    def train(model, data, params):

        # Initialize dataset maper
        train = DatasetMaper(data['x_train'], data['y_train'])
        test = DatasetMaper(data['x_test'], data['y_test'])

        # Initialize loaders
        loader_train = DataLoader(train, batch_size=params.batch_size)
        loader_test = DataLoader(test, batch_size=params.batch_size)

        # Define optimizer
        optimizer = optim.RMSprop(model.parameters(), lr=params.learning_rate)

        # Starts training phase
        for epoch in range(params.epochs):
            # Set model in training model
            model.train()
            predictions = []
            # Starts batch training
            for x_batch, y_batch in loader_train:

                y_batch = y_batch.type(torch.FloatTensor)

                # Feed the model
                y_pred = model(x_batch.long())

                # Loss calculation
                loss = F.binary_cross_entropy(y_pred, y_batch)

                # Clean gradientes
                optimizer.zero_grad()

                # Gradients calculation
                loss.backward()

                # Gradients update
                optimizer.step()

                # Save predictions
                predictions += list(y_pred.detach().numpy())

            # Evaluation phase
            test_predictions = Run.evaluation(model, loader_test)
            
            # Metrics calculation
            train_accuary = Run.calculate_accuracy(data['y_train'], predictions)
            test_accuracy = Run.calculate_accuracy(data['y_test'], test_predictions)
            print("Epoch: %d, loss: %.3f, Train accuracy: %.3f, Test accuracy: %.3f" % (epoch+1, loss.item(), train_accuary, test_accuracy))

    @staticmethod
    def evaluation(model, loader_test):

        # Set the model in evaluation mode
        model.eval()
        predictions = []

        # Starst evaluation phase
        with torch.no_grad():
            for x_batch, y_batch in loader_test:
                y_pred = model(x_batch.long())
                predictions += list(y_pred.detach().numpy())
        return predictions
        
    @staticmethod
    def calculate_accuracy(grand_truth, predictions):
        # Metrics calculation - will need to change this! 
        true_positives = 0
        true_negatives = 0
        for true, pred in zip(grand_truth, predictions):
            if (pred >= 0.5) and (true == 1):
                true_positives += 1
            elif (pred < 0.5) and (true == 0):
                true_negatives += 1
            else:
                pass
        # Return accuracy
        return (true_positives+true_negatives) / len(grand_truth)

# Controller

In [135]:
class Controller(Parameters):

    def __init__(self):
        # Preprocessing pipeline
        self.data, seq_len, n_outputs = self.prepare_data(df_pos, Parameters.num_words, Parameters.seq_len)
        Parameters.seq_len = seq_len
        Parameters.n_outputs = n_outputs

        # Initialize the model
        self.model = TextClassifier(Parameters)

        # Training - Evaluation pipeline
        Run().train(self.model, self.data, Parameters)

    @staticmethod
    def prepare_data(df, num_words, seq_len):
        # Preprocessing pipeline
        pr = Preprocessing(df, num_words, seq_len)
        pr.load_data()
        pr.build_vocabulary()
        pr.word_to_idx()
        pr.find_seq_len()
        pr.padding_sentences()
        pr.split_data()

        return {'x_train': pr.x_train, 'y_train': pr.y_train, 'x_test': pr.x_test, 'y_test': pr.y_test}, pr.seq_len, pr.n_outputs

if __name__ == '__main__':
    controller = Controller()

Epoch: 1, loss: 0.694, Train accuracy: 0.572, Test accuracy: 0.602
Epoch: 2, loss: 0.224, Train accuracy: 0.777, Test accuracy: 0.602
Epoch: 3, loss: 0.002, Train accuracy: 0.851, Test accuracy: 0.652


## Troubleshooting

In [61]:
def prepare_data(df, num_words, seq_len):
    # Preprocessing pipeline
    pr = Preprocessing(df, num_words, seq_len)
    pr.load_data()
    pr.build_vocabulary()
    pr.word_to_idx()
    pr.find_seq_len()
    pr.padding_sentences()
    pr.split_data()

    return {'x_train': pr.x_train, 'y_train': pr.y_train, 'x_test': pr.x_test, 'y_test': pr.y_test}

data = prepare_data(df_pos, num_words = 6000, seq_len = 224)

In [62]:
data["x_train"].shape

(542, 224)

In [63]:
params = Parameters()
model = TextClassifier(Parameters)

# Initialize dataset maper
train = DatasetMaper(data['x_train'], data['y_train'])
test = DatasetMaper(data['x_test'], data['y_test'])

# Initialize loaders
loader_train = DataLoader(train, batch_size=16)
loader_test = DataLoader(test, batch_size=16)

# Define optimizer
optimizer = optim.RMSprop(model.parameters(), lr=params.learning_rate)

# Starts training phase
for epoch in range(params.epochs):
    # Set model in training model
    model.train()
    predictions = []
    # Starts batch training
    for x_batch, y_batch in loader_train:

        y_batch = y_batch.type(torch.FloatTensor)
        
        # Feed the model
        y_pred = model(x_batch.long())

        # Loss calculation
        loss = F.binary_cross_entropy(y_pred, y_batch)

        # Clean gradientes
        optimizer.zero_grad()

        # Gradients calculation
        loss.backward()

        # Gradients update
        optimizer.step()

        # Save predictions
        predictions += list(y_pred.detach().numpy())
        
    # Evaluation phase
    test_predictions = Run.evaluation(model, loader_test)

    # Metrics calculation
    train_accuracy = Run.calculate_accuracy(data['y_train'], predictions)
    test_accuracy = Run.calculate_accuracy(data['y_test'], test_predictions)
    print("Epoch: %d, loss: %.5f, Train accuracy: %.5f, Test accuracy: %.5f" % (epoch+1, loss.item(), train_accuracy, test_accuracy))
    

16
16
16
16
16
16
16
16
16
16
16
5
Epoch: 1, loss: 0.69262, Train accuracy: 0.52583, Test accuracy: 0.47514
16
16
16
16
16
16
16
16
16
16
16
5
Epoch: 2, loss: 0.46222, Train accuracy: 0.70295, Test accuracy: 0.59116
16
16
16
16
16
16
16
16
16
16
16
5
Epoch: 3, loss: 0.36723, Train accuracy: 0.82288, Test accuracy: 0.51381


In [27]:
len(test_predictions)

181