# Evaluation Method:
As there are no actual labels but only *more probable* hypothesis out of two possible hypotheses, simple binary accuracy is well suited for this task.

In [None]:
class Accuracy:
    """
    A class representing simple accuracy metric.
    """
    def __init__(self):
      pass

    # Calculate accuracy percentage between two lists
    def binary_accuracy(self, gold_labels, predicted_labels):
      """
      Args:
        gold_labels: ground truth labels
        predicted_labels: predictions from the model
      
      Returns:
        int: accuracy
      """
      correct = 0
      for i in range(len(gold_labels)):
        if gold_labels[i] == predicted_labels[i]:
          correct += 1
      return correct / float(len(gold_labels)) * 100.0

Test the accuracy implementation.

In [None]:
# Test accuracy
gold = [0,0,0,0,0,1,1,1,1,1]
predicted = [0,1,0,0,0,1,0,1,1,1]
accuracy_metric = Accuracy()
accuracy = accuracy_metric.binary_accuracy(gold, predicted)
print(accuracy)

80.0


# FileReader implemented below:


In [None]:
import pandas as pd
import csv
import json

class FileReader:
  """
  A class which reads data into pandas dataframe with specified column names.
  """

  @classmethod
  def read_tsv_into_pandas(cls, file_path, column_names, header=None, index_col=False, delimiter="\t"):
    """
    Reads csv into pandas dataframe.

    Args:
        file_path: path for the train.tsv
        column_names: column names to be initiated by pandas dataframe
        header: header if wanted - default: None
        index_col: whether to index the columns - default: False
        delimiter: column seperator - default: \t

    Returns:
        pandas DataFrame
    """

    df = pd.read_csv(file_path, header=header, index_col=index_col, delimiter=delimiter, names=column_names)  

    # Don't think we need to drop them
    #df = df.drop('story_id', axis='columns', inplace=False)  

    return df
  
  @classmethod
  def read_tsv(cls, file_path, quotechar=None, delimiter="\t"):
    """
    Reads csv into a python list.

    Args:
        file_path: path for the train.tsv
        quotechar: character to quote fields containing special characters - default: None
        delimiter: column seperator - default: \t

    Returns:
        python list
    """

    df = []
    with open(file_path, 'r') as f:
      file_reader = csv.reader(f, delimiter=delimiter, quotechar=quotechar)
      for l in file_reader:
        df.append(l)   

    return df  

  @classmethod
  def read_jsonl_into_pandas(cls, file_path, lines=True):
    """
    Read jsonl into pandas dataframe.

    Args:
        file_path: path for train.jsonl
        lines: whether json or jsonlines file - default: True

    Returns:
        pandas DataFrame
    """

    df = pd.read_json(file_path, lines=lines)

    # Don't think we need to drop them
    #df = df.drop('story_id', axis='columns', inplace=False)

    return df
  
  @classmethod
  def read_jsonl(cls, file_path, quotechar=None):
    """
    Reads jsonl into python list.

    Args:
        file_path: path for the train.jsonl
        quotechar: character to quote fields containing special characters - default: None

    Returns:
        python list
    """

    df = []
    with open(file_path, 'rb') as f:
      for l in f:
        json_obj = json.loads(l)
        df.append(json_obj)

    return df

# PrepareDate class to linguistically preprocess the input data:

In [None]:
import re
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords') 

class PrepareData:
  """
  A class to preprocess the data.
  """
  def __init__(self, columns=None):
    # initialize stopwords, lemmatizer and stemmer
    self.stopwords = set(stopwords.words('english'))
    self.lemmatizer = nltk.stem.wordnet.WordNetLemmatizer()
    self.stemmer = nltk.stem.porter.PorterStemmer()
    # set the columns if the input is pandas dataframe
    if columns is not None:
      self.columns = columns[1:]

  def preprocess_text(self, text, flg_clean=False, flg_stemm=False, flg_lemm=False, stopwords=None):
    """
    Procosesses a text with given conditions.

    Args:
        text: text to be preprocessed
        flg_clean: whether to remove non-word characters
        flg_stemm: whether to stemm
        flg_lemm: whether to lemmatize
        stopwords: stopwords to be removed
    
    Returns:
        list of preprocessed tokens
    """
    # remove anything that is not word or space, 
    if flg_clean:
      text = re.sub(r'[^\w\s]', '', str(text))
            
    # strip and lowercase
    # whitespace-tokenize - split on space
    tokenized = str(text).strip().lower().split()

    # remove Stopwords
    if stopwords is not None:
        tokenized = [word for word in tokenized if word not in 
                    stopwords]
                
    # Stemming (remove -ing, -ly, ...)
    if flg_stemm == True:
        ps = nltk.stem.porter.PorterStemmer()
        tokenized = [ps.stem(word) for word in tokenized]
                
    # Lemmatisation (convert the word into root form)
    if flg_lemm == True:
        lem = nltk.stem.wordnet.WordNetLemmatizer()
        tokenized = [lem.lemmatize(word) for word in tokenized]
            
    # convert list back to string
    #text = " ".join(tokenized)
    return tokenized

  def preprocess_data(self, lst_data, flg_clean, flg_stemm, flg_lemm):
    """
    Preprocesses a list containing text.

    Args:
        lst_data: list to be preprocessed
        flg_clean: whether to remove non-word characters
        flg_stemm: whether to stemm
        flg_lemm: whether to lemmatize
    
    Returns:
        list containing lists of preprocessed tokens
    """
    data = []
    for line in lst_data:
      example = self.preprocess_text(line, flg_clean, flg_stemm, 
                                                           flg_lemm, self.stopwords)
      data.append(example)

    return data

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
import pandas as pd
import json

class Data:
  """
  A class that represents nli data.
  """

  def __init__(self, story_id, obs1, hypes, obs2, label=None):
    self.story_id = story_id
    self.obs1 = obs1
    self.hypes = hypes
    self.obs2 = obs2
    self.label = label

  def __repr__(self):
    """Creates printable representation of an nli example."""
    exp = []
    exp.append("story_id:\t{}".format(self.story_id))
    exp.append("obs1:\t{}".format(self.obs1))
    for i, hyp in enumerate(self.hypes, 1):
      exp.append("hyp{}:\t{}".format(i, hyp))
    exp.append("obs2:\t{}".format(self.obs2))

    if self.label != None:
      exp.append("label:\t{}".format(self.label))
    
    return "\n ".join(exp)
  
  def hypothesis_only(self):
    """Representes the hypothesis only version."""
    exp = {"hyp1": self.hypes[0],
           "hyp2": self.hypes[1],
           "label": self.label
    }

    return exp
    
  # not really implemented
  def obs1_hyp(self):
    """Represents the first obsevation + hypothesis version."""
    exp = [
           {"part1": " ".join(self.obs1, self.hypes[0])},
           {"part2": " ".join(self.obs1, self.hypes[1])}
    ]

    return exp
  
  def obs1_hyp_obs2(self):
    """Represents the first obsevation + hypothesis + second observation version."""
    exp = {"hyp1": self.obs1 + " " +  self.hypes[0] + " " + self.obs2,
            "hyp2": self.obs1 + " " + self.hypes[1] + " " + self.obs2,
            "label": self.label}

    return exp
  

class DataProcessor(FileReader):

  def __init__(self):
    pass

  def create_example(self, exp_list, input_type, labels=None,):
    """Creates examples for the splits."""
    
    examples = []

    if labels == None:
      labels = [None] * len(exp_list)
    
    for (i, (exp, label)) in enumerate(zip(exp_list, labels)):
      story_id = "%s" % (exp['story_id'])

      obs1 = exp['obs1']
      obs2 = exp['obs2']

      hyp1 = exp['hyp1']
      hyp2 = exp['hyp2']

      label = label

      data_raw = Data(story_id=story_id,
                  obs1=obs1,
                  hypes=[hyp1, hyp2],
                  obs2=obs2,
                  label=label)
      
      if input_type is None:
        examples.append(data_raw)
      
      elif input_type is not None:
        if input_type == 'hyp-only':
          examples.append(data_raw.hypothesis_only())

        elif input_type == 'full-seq':
          examples.append(data_raw.obs1_hyp_obs2())
          
    return examples
  
  def get_labels(self, input_file: str):
    labels = []
    with open(input_file, "rb") as f:
        for l in f:
            labels.append(l.decode().strip())

    return labels
  
  def create_binary_examples(self, file_path, input_type, labels_path=None):
    if labels_path != None:
      examples = self.create_example(self.read_jsonl(file_path), input_type, self.get_labels(labels_path))
    else:
      examples = self.create_example(self.read_jsonl(file_path))
    
    binary_examples = []
    binary_labels = []

    for example in examples:
      if int(example['label']) == 1:
        binary_examples.append(example['hyp1'])
        binary_labels.append(1)
        binary_examples.append(example['hyp2'])
        binary_labels.append(0)
      elif int(example['label']) == 2:
        binary_examples.append(example['hyp2'])
        binary_labels.append(1)
        binary_examples.append(example['hyp1'])
        binary_labels.append(0)

    return binary_examples, binary_labels
  
  def get_train_examples(self, file_path, input_type, labels_path=None):
    if labels_path != None:
      examples = self.create_example(self.read_jsonl(file_path), input_type, self.get_labels(labels_path))
    else:
      examples = self.create_example(self.read_jsonl(file_path))

    return examples
  
  def get_dev_examples(self, file_path, labels_path=None):
    if labels_path != None:
      examples = self.create_example(self.read_jsonl(file_path), input_type, self.get_labels(labels_path))
    else:
      examples = self.create_example(self.read_jsonl(file_path))

    return examples

In [None]:
import pandas as pd
import json

class Data:
  """
  A class that represents nli data.
  """

  def __init__(self, story_id, obs1, hypes, obs2, label=None):
    self.story_id = story_id
    self.obs1 = obs1
    self.hypes = hypes
    self.obs2 = obs2
    self.label = label

  def __repr__(self):
    """Creates printable representation of an nli example."""
    exp = []
    exp.append("story_id:\t{}".format(self.story_id))
    exp.append("obs1:\t{}".format(self.obs1))
    for i, hyp in enumerate(self.hypes, 1):
      exp.append("hyp{}:\t{}".format(i, hyp))
    exp.append("obs2:\t{}".format(self.obs2))

    if self.label != None:
      exp.append("label:\t{}".format(self.label))
    
    return "\n ".join(exp)
  
  def hypothesis_only(self):
    """Representes the hypothesis only version."""
    exp = {"hyp1": self.hypes[0],
            "hyp2": self.hypes[1],
            "label": self.label
           }

    return exp
  # not really implemented
  def obs1_hyp(self):
    """Represents the first obsevation + hypothesis version."""
    exp = {"hyp1": " ".join(self.obs1, self.hypes[0]),
           "hyp2": " ".join(self.obs1, self.hypes[1])
          }

    return exp
  
  def obs1_hyp_obs2(self):
    """Represents the first obsevation + hypothesis + second observation version."""
    exp = {"hyp1": self.obs1 + " " +  self.hypes[0] + " " + self.obs2,
            "hyp2": self.obs1 + " " + self.hypes[1] + " " + self.obs2,
            "label": self.label}

    return exp
  

class DataProcessor(FileReader):

  def __init__(self):
    pass

  def create_example(self, exp_list, input_type, labels=None,):
    """Creates examples for the splits."""
    
    examples = []

    if labels == None:
      labels = [None] * len(exp_list)
    
    for (i, (exp, label)) in enumerate(zip(exp_list, labels)):
      story_id = "%s" % (exp['story_id'])

      obs1 = exp['obs1']
      obs2 = exp['obs2']

      hyp1 = exp['hyp1']
      hyp2 = exp['hyp2']

      label = label

      data_raw = Data(story_id=story_id,
                  obs1=obs1,
                  hypes=[hyp1, hyp2],
                  obs2=obs2,
                  label=label)
      
      if input_type is None:
        examples.append(data_raw)
      
      elif input_type is not None:
        if input_type == 'hyp-only':
          examples.append(data_raw.hypothesis_only())

        elif input_type == 'full-seq':
          examples.append(data_raw.obs1_hyp_obs2())
          
    return examples
  
  def get_labels(self, input_file: str):
    labels = []
    with open(input_file, "rb") as f:
        for l in f:
            labels.append(l.decode().strip())

    return labels
  
  def create_binary_examples(self, file_path, input_type, labels_path=None):
    if labels_path != None:
      examples = self.create_example(self.read_jsonl(file_path), input_type, self.get_labels(labels_path))
    else:
      examples = self.create_example(self.read_jsonl(file_path))
    
    binary_examples = []
    binary_labels = []

    for example in examples:
      if int(example['label']) == 1:
        binary_examples.append(example['hyp1'])
        binary_labels.append(1)
        binary_examples.append(example['hyp2'])
        binary_labels.append(0)
      elif int(example['label']) == 2:
        binary_examples.append(example['hyp2'])
        binary_labels.append(1)
        binary_examples.append(example['hyp1'])
        binary_labels.append(0)
    
    return binary_examples, binary_labels
  
  def get_train_examples(self, file_path, input_type, labels_path=None):
    if labels_path != None:
      examples = self.create_example(self.read_jsonl(file_path), input_type, self.get_labels(labels_path))
    else:
      examples = self.create_example(self.read_jsonl(file_path))

    return examples
  
  def get_dev_examples(self, file_path, labels_path=None):
    if labels_path != None:
      examples = self.create_example(self.read_jsonl(file_path), input_type, self.get_labels(labels_path))
    else:
      examples = self.create_example(self.read_jsonl(file_path))

    return examples

# Class to create GloVe word embeddings for training:

In [None]:
import os
import numpy as np
class Features:
  """
  A class for future extraction.
  ...
  Param:
      embedding_file: GloVe embedding file
      lst_corpus: list of training examples
      embedding_dim: embedding dimensions
  """

  def __init__(self, embedding_file, lst_corpus, embedding_dim):
    self.embedding_file = embedding_file
    self.lst_corpus = lst_corpus
    self.embedding_dim = embedding_dim
  
  def get_word_index(self):
    """
    Creates a word index from the corpus.
    """

    vocab = set()
    for l in self.lst_corpus:
      for t in l:
        vocab.add(t)

    word_index = dict()
    for i,w in enumerate(vocab):
      word_index[w] = i

    return word_index
  
  def create_embedding_index(self):
    """
    Creates an embedding index for GloVe embeddings.
    """

    embeddings_index = {}
    f = open(os.path.join('./', self.embedding_file))
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()

    #print('Found %s word vectors.' % len(embeddings_index))
    return embeddings_index

  def create_embedding_matrix(self):
    """
    Creates an embedding matrix given the word index and the embedding index.
    """

    word_index = self.get_word_index()
    embeddings_index = self.create_embedding_index()
    embedding_matrix = np.zeros((len(word_index) + 1, self.embedding_dim))
    for word,i in word_index.items():
      embedding_vector = embeddings_index.get(word)
      if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector
        
    return embedding_matrix 
  
  def create_features(self):
    """
    Creates the GloVe words embeddings from the training data.
    Averages the word embeddings to create an averaged bag of words.
    """

    embedding_matrix = self.create_embedding_matrix()
    word_index = self.get_word_index()
    sentence_embeddings = []
    for s in self.lst_corpus:
      summed = np.zeros(self.embedding_dim)
      for w in s:
        w_embedding = embedding_matrix[word_index.get(w)]
        summed += w_embedding
      sentence_embeddings.append(summed/len(s))
    
    return sentence_embeddings

Download GloVe embeddings...

Unzip the file...

In [None]:
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip glove*.zip

--2021-05-26 16:08:26--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2021-05-26 16:08:26--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2021-05-26 16:08:26--  http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


2021-0

Download the data...

Unzip the file...

In [None]:
!wget https://storage.googleapis.com/ai2-mosaic/public/alphanli/alphanli-train-dev.zip
!unzip -d alphanli alphanli-train-dev.zip

--2021-05-26 16:11:37--  https://storage.googleapis.com/ai2-mosaic/public/alphanli/alphanli-train-dev.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.193.128, 172.217.204.128, 172.217.203.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.193.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5118294 (4.9M) [application/zip]
Saving to: ‘alphanli-train-dev.zip’


2021-05-26 16:11:38 (67.8 MB/s) - ‘alphanli-train-dev.zip’ saved [5118294/5118294]

Archive:  alphanli-train-dev.zip
  inflating: alphanli/train.jsonl    
  inflating: alphanli/train-labels.lst  
  inflating: alphanli/dev.jsonl      
  inflating: alphanli/dev-labels.lst  


Set the paths to the data...

In [None]:
train_file = './alphanli/train.jsonl'
train_labels = './alphanli/train-labels.lst'
dev_file = './alphanli/dev.jsonl'
dev_labels = './alphanli/dev-labels.lst'
test_file = './alphanli/test.jsonl'
test_labels = './alphanli/test-labels.lst'

Get the examples and the corresponding lables...

In [None]:
data_processor = DataProcessor()
# get the training data for hypothesis-only experiment
train_exp, train_labels = data_processor.create_binary_examples(file_path=train_file, input_type='hyp-only', labels_path=train_labels)

# get the training data for fully-connected experiment
# train_exp, train_labels = data_processor.create_binary_examples(file_path=train_file, input_type='hyp-only', labels_path=train_labels)

In [None]:
# get the dev data for hypothesis-only experiment
dev_exp, dev_labels = data_processor.create_binary_examples(file_path=dev_file, input_type='hyp-only', labels_path=dev_labels)

# get the dev data for fully-connected experiment
# dev_exp, dev_labels = data_processor.create_binary_examples(file_path=dev_file, input_type='full-seq', labels_path=dev_labels)

In [None]:
# get the test data for hypothesis-only experiment
test_exp, test_labels = data_processor.create_binary_examples(file_path=test_path, input_type='hypo-only', labels_path=test_labels)

# get the test data for fully-connected experiment
# test_exp, test_labels = data_processor.create_binary_examples(file_path=test_path, input_type='full-seq', labels_path=test_labels)

Normalize the training examples...

In [None]:
processor = PrepareData()
normalized_train = processor.preprocess_data(train_exp, flg_clean=True, flg_stemm=False, flg_lemm=False)

In [None]:
normalized_dev = processor.preprocess_data(dev_exp, flg_clean=True, flg_stemm=False, flg_lemm=False)

In [None]:
normalized_test = processor.preprocess_data(test_exp, flg_clean=True, flg_stemm=False, flg_lemm=False)

Create the averaged glove sentence embeddings...



*  Encode words
*  Average the words in the sequence to create final embeddings



In [None]:
feature_extractor_train = Features(embedding_file='glove.6B.100d.txt', lst_corpus=normalized_train, embedding_dim=100)
feature_extractor_dev = Features(embedding_file='glove.6B.100d.txt', lst_corpus=normalized_dev, embedding_dim=100)
feature_extractor_test = Features(embedding_file='glove.6B.100d.txt', lst_corpus=normalized_test, embedding_dim=100)

In [None]:
sentence_vectors_train = feature_extractor_train.create_features()
sentence_vectors_dev = feature_extractor_dev.create_features()
sentence_vectors_test = feature_extractor_test.create_features()



# Training Data:
* X_train
* y_train

# Development Data:
* X_dev
* y_dev

# Test Data:
* X_test
* y_test

In [None]:
X_train = sentence_vectors_train
y_train = labels_train

X_dev = sentence_vectors_dev
y_dev = labels_dev

X_test = sentence_vectors_test
y_test = labels_test

The training data is ordered.

Shuffle sentence_vectors and labels before training.


In [None]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)
X_dev, y_dev = shuffle(X_dev, y_dev)

# Till here it's almost good to go. I only need to do one last thing tomorrow.

In [None]:
import numpy as np


class BaseLayer:
    def __init__(self):
        pass

    def forward(self, input):
        return input

    def backward(self, input, gradients):
        # d(loss) / d(x) = gradients * ( d(layer) / d(x) )
        d_layer_d_x = np.eye(input.shape[1])
        d_loss_d_x = np.dot(gradients, d_layer_d_x)
        return d_loss_d_x

    def update(self, weights_grads, bias_grads):
        pass


class Dense(BaseLayer):
    def __init__(self, shape, lr=0.01):
        # Xavier initialization on ReLu using nets
        self.weights = np.random.normal(loc=0, scale=1, size=shape)*np.sqrt(2/shape[1])
        self.bias = np.zeros(shape[1])
        self.lr = lr

    def forward(self, input):
        return np.dot(input, self.weights) + self.bias

    def backward(self, input, gradients):
        # d f / d x = (d f / d layer) * (d layer / d x)
        d_dense_d_input = np.dot(gradients, self.weights.T)
        weights_grads = np.dot(input.T, gradients)
        bias_grads = gradients.mean(axis=0)*input.shape[0]
        self.update(weights_grads, bias_grads)
        return d_dense_d_input

    def update(self, weights_grads, bias_grads):
        self.weights = self.weights - self.lr * weights_grads
        self.bias = self.bias - self.lr * bias_grads


class ReLU(BaseLayer):
    def __init__(self):
        pass

    def forward(self, input):
        logits = np.maximum(0, input)
        return logits

    def backward(self, input, gradients):
        return input * (input > 0)


class MultiLayerPerceptron:
    def __init__(self):
        self.layers = list()

    def add_layer(self, shape):
        self.layers.append(Dense(shape))

    def add_activation_func(self):
        self.layers.append(ReLU())

    def forward(self, input):
        intermediate_outputs = list()
        this_input = input
        if len(self.layers) > 0:
            for layer in self.layers:
                intermediate_outputs.append(layer.forward(this_input))
                this_input = intermediate_outputs[-1]
        else:
            raise ValueError("Must have at least one layer!")
        return intermediate_outputs

    def loss_n_grads(self, init_grads, y):
        # loss = -[golden] + log (SUM([predict n]))
        x_id = np.arange(len(init_grads))
        golden_grads = init_grads[x_id, y]

        golden_labels = np.zeros(init_grads.shape, dtype=init_grads.dtype)
        golden_labels[x_id, y] = 1
        loss = - golden_grads + np.log(np.sum(np.exp(init_grads), axis=-1))

        # keepdims = True very important! Takes forever to debug this.
        softmax = np.exp(init_grads) / (np.exp(init_grads).sum(keepdims=True, axis=-1))
        loss_grads = (softmax-golden_labels) / init_grads.shape[0]

        return loss, loss_grads

    def train(self, x, y):
        intermediate_outputs = self.forward(x)
        initial_grads = intermediate_outputs[-1]
        loss, loss_grads = self.loss_n_grads(initial_grads, y)

        intermediate_inputs = [x] + intermediate_outputs
        for layer_id in reversed(range(len(self.layers))):
            loss_grads = self.layers[layer_id].backward(intermediate_inputs[layer_id], loss_grads)

    def predict(self, x):
        return self.forward(x)[-1].argmax(axis=-1)

    def validate(self, x, y):
        return np.mean(self.forward(x)[-1].argmax(axis=-1) == y)

0.51953125
0.517578125
0.517578125
0.517578125
0.517578125
0.515625
0.515625
0.515625
0.515625
0.515625


# Training loop
##### we do not have dev set, so here i'll just train the model without validation

In [None]:
from statistics import mean

# A functiion which returns a generator, yielding one batch of data each time
def make_batch(data, batch_size):
  batch_data = []
  for entry in data:
    if entry is not None:
      batch_data.append(entry)
      if len(batch_data) == batch_size:
        yield np.asarray(batch_data)
        batch_data = []
    else:
      continue
  if batch_data:
    yield np.asarray(batch_data)

parameter_size = 150
max_epoch = 20
batch_size = 1024

# If the accuracy on dev set does not increase in the latest 3 epoches of training, stop training early
early_stopping = 3 


# Building the model: 2 dense layers and one ReLu layer as activation function
model = MultiLayerPerceptron()
model.add_layer((100, parameter_size))
model.add_activation_func()
model.add_layer((parameter_size, 2))

# The training loop
dev_acces = []
for e in range(1, max_epoch):
  for X_train_batch, y_train_batch in zip(make_batch(X_train, batch_size), make_batch(y_train, batch_size)):
    model.train(X_train_batch, y_train_batch)

  epoch_dev_acc = []
  for X_dev_batch, y_dev_batch in zip(make_batch(X_dev, batch_size), make_batch(y_dev, batch_size)):
    this_acc = model.validate(X_dev_batch, y_dev_batch)
    epoch_dev_acc.append(this_acc)
  this_dev_acc = mean(epoch_dev_acc)

  dev_acces.append(this_dev_acc)
  print("dev acc: ", this_dev_acc)
  if len(dev_acces) == early_stopping:
    if dev_acces[2] <= dev_acces[1] and dev_acces[2] <= dev_acces[0]:
      break
    else:
      dev_acces = []
      continue
    
predict = model.predict(X_test)
acc = np.mean(predict == y_test)

print("acc: ", this_acc, "acc on dev:", this_dev_acc, "e: ", e)


0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
