<a href="https://colab.research.google.com/github/TongleiChen/colab_notebook/blob/main/a4_surname_classifier_lstm_template_0403.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## A4 - Surname Classifier

Author: Austin Blodgett

Adaptation to colab: Nitin Venkateswaran


### Follow the steps to use this notebook for your A4. 
**NOTE**: It is best to use your Georgetown Google accounts.
##### 1. Save a copy of this notebook starter template in your Google Drive (File -> Save a copy in drive)
##### 2. Upload a copy of the datafile files from **surname-data** directory (available in a4.zip) to your Google Drive in the location **A4/surname-data/surnames.csv**; you will need to create the folder 'A4' at the root location in your Drive, followed by the subfolder 'surname-data' 
##### 3. You are all set!



###Import libraries and mount Google Drive





In [None]:
from google.colab import drive
drive.mount('/content/drive')


import os, random
import tensorflow as tf

from collections import Counter
from keras.models import Sequential
from keras.layers import Embedding, Bidirectional, LSTM, Dense
from keras.regularizers import l2

import numpy as np
from keras import backend as K



Mounted at /content/drive


In [None]:
data_file = '/content/drive/My Drive/A4/surname-data/surnames.csv'
UNK = '[UNK]'
PAD = '[PAD]'
START = '<s>'
END = '</s>'

###Implement this function if you want to transform the input text, e.g. normalizing case

In [None]:
# TODO
def transform_text_sequence(seq):
    '''
    Implement this function if you want to transform the input text,
    for example normalizing case.
    '''
    return seq

###Helper Functions (no need to implement)

In [None]:
def get_vocabulary_and_data(data_file, split, max_vocab_size=None):
    vocab = Counter()
    data = []
    labels = []
    with open(data_file, 'r', encoding='utf8') as f:
        for line in f:
            cols = line.split(',')
            s, surname, label = cols[0].strip(), cols[1].strip(), cols[2].strip()
            if s==split:
                surname = list(surname)
                surname = [START]+surname+[END]
                data.append(transform_text_sequence(surname))
                labels.append(label)
            for tok in surname:
                vocab[tok]+=1

    vocab = sorted(vocab.keys(), key=lambda k: vocab[k], reverse=True)
    if max_vocab_size:
        vocab = vocab[:max_vocab_size-2]
    vocab = [UNK, PAD] + vocab

    return {k:v for v,k in enumerate(vocab)}, set(labels), data, labels


def vectorize_sequence(seq, vocab):
    seq = [tok if tok in vocab else UNK for tok in seq]
    return [vocab[tok] for tok in seq]


def unvectorize_sequence(seq, vocab):
    translate = sorted(vocab.keys(),key=lambda k:vocab[k])
    return [translate[i] for i in seq]


def one_hot_encode_label(label, label_set):
    vec = [1.0 if l==label else 0.0 for l in label_set]
    return np.array(vec)


def batch_generator(data, labels, vocab, label_set, batch_size=1):
    while True:
        batch_x = []
        batch_y = []
        for doc, label in zip(data,labels):
            batch_x.append(vectorize_sequence(doc, vocab))
            batch_y.append(one_hot_encode_label(label, label_set))
            if len(batch_x) >= batch_size:
                # Pad Sequences in batch to same length
                batch_x = pad_sequences(batch_x, vocab[PAD])
                yield np.array(batch_x), np.array(batch_y)
                batch_x = []
                batch_y = []


def describe_data(data, gold_labels, label_set, generator):
    batch_x, batch_y = [], []
    for bx, by in generator:
        batch_x = bx
        batch_y = by
        break
    print('Data example:',data[0])
    print('Label:',gold_labels[0])
    print('Label count:', len(label_set))
    print('Data size', len(data))
    print('Batch input shape:', batch_x.shape)
    print('Batch output shape:', batch_y.shape)


def pad_sequences(batch_x, pad_value):
    ''' This function should take a batch of sequences of different lengths
        and pad them with the pad_value token so that they are all the same length.

        Assume that batch_x is a list of lists.
    '''
    pad_length = len(max(batch_x, key=lambda x: len(x)))
    for i, x in enumerate(batch_x):
        if len(x) < pad_length:
            batch_x[i] = x + ([pad_value] * (pad_length - len(x)))

    return batch_x

###Change these arguments for the main procedure call as needed for your experiments

In [None]:
epochs = 15 # number of epochs
learning_rate = 0.01 # learning rate
dropout = 0.3 # dropout rate
early_stopping = -1 # early stopping criteria
embedding_size = 100 # embedding dimension size
hidden_size = 40 # hidden layer size
batch_size = 50 # batch size

###Check the GPU is available

In [None]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  device_name = '/cpu:0'
  print(
      '\n\n This notebook is not '
      'configured to use a GPU.  You can change this in Notebook Settings. Defaulting to:' + device_name)
else:
  print ('GPU Device found: ' + device_name)

GPU Device found: /device:GPU:0


In [None]:
vocab, labels, train_data, train_labels = get_vocabulary_and_data(data_file, 'train')
_, _, dev_data, dev_labels = get_vocabulary_and_data(data_file, 'dev')
_, _, test_data, test_labels = get_vocabulary_and_data(data_file, 'test')

describe_data(train_data, train_labels, labels,
                  batch_generator(train_data, train_labels, vocab, labels, batch_size))

Data example: ['<s>', 'H', 'a', 'd', 'a', 'd', '</s>']
Label: arabic
Label count: 19
Data size 15000
Batch input shape: (50, 14)
Batch output shape: (50, 19)


In [None]:
len(vocab)

91

###Main procedure call: Implement the keras model here

##### Use the variables batch_size, hidden_size, embedding_size, dropout, epochs here.

In [None]:
vocab, labels, train_data, train_labels = get_vocabulary_and_data(data_file, 'train')
_, _, dev_data, dev_labels = get_vocabulary_and_data(data_file, 'dev')
_, _, test_data, test_labels = get_vocabulary_and_data(data_file, 'test')

describe_data(train_data, train_labels, labels,
                  batch_generator(train_data, train_labels, vocab, labels, batch_size))

with tf.device(device_name):
    # Implement your model here! ----------------------------------------------------------------------
    # Use the variables batch_size, hidden_size, embedding_size, dropout, epochs
    classifier = tf.keras.Sequential()
    input_size = len(vocab)
    output_size = len(labels)
    classifier.add(tf.keras.layers.Embedding(input_size, embedding_size, input_length=batch_size))
    classifier.add(Bidirectional(LSTM(hidden_size, return_sequences=False)))
    classifier.add(tf.keras.layers.Dense(output_size, activation='softmax'))
    optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    # ------------------------------------------------------------------------------------------------

    classifier.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


    for i in range(epochs):
        print('Epoch',i+1,'/',epochs)
        # Training
        classifier.fit(batch_generator(train_data, train_labels, vocab, labels, batch_size=batch_size),
                                  epochs=1, steps_per_epoch=len(train_data)/batch_size)
        # Evaluation
        loss, acc = classifier.evaluate(batch_generator(dev_data, dev_labels, vocab, labels),
                                                  steps=len(dev_data))
        print('Dev Loss:', loss, 'Dev Acc:', acc)


Data example: ['<s>', 'H', 'a', 'd', 'a', 'd', '</s>']
Label: arabic
Label count: 19
Data size 15000
Batch input shape: (50, 14)
Batch output shape: (50, 19)
Epoch 1 / 10
Dev Loss: 1.0513139963150024 Dev Acc: 0.6803921461105347
Epoch 2 / 10
Dev Loss: 0.9270397424697876 Dev Acc: 0.7120915055274963
Epoch 3 / 10
Dev Loss: 0.8985798954963684 Dev Acc: 0.7313725352287292
Epoch 4 / 10
Dev Loss: 0.8560300469398499 Dev Acc: 0.7424836754798889
Epoch 5 / 10
Dev Loss: 0.8485093712806702 Dev Acc: 0.7490196228027344
Epoch 6 / 10
Dev Loss: 0.9118237495422363 Dev Acc: 0.7464052438735962
Epoch 7 / 10
Dev Loss: 0.8927451372146606 Dev Acc: 0.7486928105354309
Epoch 8 / 10
Dev Loss: 0.9032886624336243 Dev Acc: 0.7464052438735962
Epoch 9 / 10
Dev Loss: 0.9667800664901733 Dev Acc: 0.73562091588974
Epoch 10 / 10
Dev Loss: 0.9550936818122864 Dev Acc: 0.7457516193389893


In [None]:
with tf.device(device_name):
    # Implement your model here! ----------------------------------------------------------------------
    # Use the variables batch_size, hidden_size, embedding_size, dropout, epochs
    classifier = tf.keras.Sequential()
    input_size = len(vocab)
    output_size = len(labels)
    classifier.add(tf.keras.layers.Embedding(input_size, embedding_size, input_length=batch_size))
    classifier.add(Bidirectional(LSTM(hidden_size, return_sequences=False)))
    classifier.add(tf.keras.layers.Dense(output_size, activation='softmax'))
    optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    # ------------------------------------------------------------------------------------------------

    classifier.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


    for i in range(epochs):
        print('Epoch',i+1,'/',epochs)
        # Training
        classifier.fit(batch_generator(train_data, train_labels, vocab, labels, batch_size=batch_size),
                                  epochs=1, steps_per_epoch=len(train_data)/batch_size)
        # Evaluation
        loss, acc = classifier.evaluate(batch_generator(dev_data, dev_labels, vocab, labels),
                                                  steps=len(dev_data))
        print('Dev Loss:', loss, 'Dev Acc:', acc)


Epoch 1 / 15
Dev Loss: 1.0541378259658813 Dev Acc: 0.6807189583778381
Epoch 2 / 15
Dev Loss: 0.8359115123748779 Dev Acc: 0.758169949054718
Epoch 3 / 15
Dev Loss: 0.7668155431747437 Dev Acc: 0.7738562226295471
Epoch 4 / 15
Dev Loss: 0.7777050733566284 Dev Acc: 0.7741830348968506
Epoch 5 / 15
Dev Loss: 0.7815014719963074 Dev Acc: 0.7781046032905579
Epoch 6 / 15
Dev Loss: 0.8094638586044312 Dev Acc: 0.7732025980949402
Epoch 7 / 15
Dev Loss: 0.8133890628814697 Dev Acc: 0.7754902243614197
Epoch 8 / 15
Dev Loss: 0.825677216053009 Dev Acc: 0.7702614665031433
Epoch 9 / 15
Dev Loss: 0.9067528247833252 Dev Acc: 0.758169949054718
Epoch 10 / 15
Dev Loss: 0.9188896417617798 Dev Acc: 0.7539215683937073
Epoch 11 / 15
Dev Loss: 0.9048445224761963 Dev Acc: 0.7679738402366638
Epoch 12 / 15
Dev Loss: 0.876818835735321 Dev Acc: 0.7683006525039673
Epoch 13 / 15
Dev Loss: 0.93780916929245 Dev Acc: 0.7601307034492493
Epoch 14 / 15
Dev Loss: 1.0113099813461304 Dev Acc: 0.7447712421417236
Epoch 15 / 15
Dev Los

In [None]:
# test evaluation 
loss, acc = classifier.evaluate(batch_generator(test_data, test_labels, vocab, labels),
                                                  steps=len(test_data))
print('Test Loss:', loss, 'Test Acc:', acc)

Test Loss: 0.8794943690299988 Test Acc: 0.7735849022865295


In [None]:
embedding_size = 300
with tf.device(device_name):
    # Implement your model here! ----------------------------------------------------------------------
    # Use the variables batch_size, hidden_size, embedding_size, dropout, epochs
    classifier = tf.keras.Sequential()
    input_size = len(vocab)
    output_size = len(labels)
    classifier.add(tf.keras.layers.Embedding(input_size, embedding_size, input_length=batch_size))
    classifier.add(Bidirectional(LSTM(hidden_size, return_sequences=False)))
    classifier.add(tf.keras.layers.Dense(output_size, activation='softmax'))
    optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    # ------------------------------------------------------------------------------------------------

    classifier.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


    for i in range(epochs):
        print('Epoch',i+1,'/',epochs)
        # Training
        classifier.fit(batch_generator(train_data, train_labels, vocab, labels, batch_size=batch_size),
                                  epochs=1, steps_per_epoch=len(train_data)/batch_size)
        # Evaluation
        loss, acc = classifier.evaluate(batch_generator(dev_data, dev_labels, vocab, labels),
                                                  steps=len(dev_data))
        print('Dev Loss:', loss, 'Dev Acc:', acc)


Epoch 1 / 10
Dev Loss: 0.9762228727340698 Dev Acc: 0.7117646932601929
Epoch 2 / 10
Dev Loss: 0.8609779477119446 Dev Acc: 0.7503268122673035
Epoch 3 / 10
Dev Loss: 0.8259142637252808 Dev Acc: 0.7516340017318726
Epoch 4 / 10
Dev Loss: 0.8497036695480347 Dev Acc: 0.7434640526771545
Epoch 5 / 10
Dev Loss: 0.8201887607574463 Dev Acc: 0.7647058963775635
Epoch 6 / 10
Dev Loss: 0.8267378807067871 Dev Acc: 0.7607843279838562
Epoch 7 / 10
Dev Loss: 0.9876015782356262 Dev Acc: 0.7163398861885071
Epoch 8 / 10
Dev Loss: 0.8884518146514893 Dev Acc: 0.7549019455909729
Epoch 9 / 10
Dev Loss: 0.9328904747962952 Dev Acc: 0.7359477281570435
Epoch 10 / 10
Dev Loss: 0.9595970511436462 Dev Acc: 0.7173202633857727


In [None]:
embedding_size = 50
with tf.device(device_name):
    # Implement your model here! ----------------------------------------------------------------------
    # Use the variables batch_size, hidden_size, embedding_size, dropout, epochs
    classifier = tf.keras.Sequential()
    input_size = len(vocab)
    output_size = len(labels)
    classifier.add(tf.keras.layers.Embedding(input_size, embedding_size, input_length=batch_size))
    classifier.add(Bidirectional(LSTM(hidden_size, return_sequences=False)))
    classifier.add(tf.keras.layers.Dense(output_size, activation='softmax'))
    optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    # ------------------------------------------------------------------------------------------------

    classifier.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


    for i in range(epochs):
        print('Epoch',i+1,'/',epochs)
        # Training
        classifier.fit(batch_generator(train_data, train_labels, vocab, labels, batch_size=batch_size),
                                  epochs=1, steps_per_epoch=len(train_data)/batch_size)
        # Evaluation
        loss, acc = classifier.evaluate(batch_generator(dev_data, dev_labels, vocab, labels),
                                                  steps=len(dev_data))
        print('Dev Loss:', loss, 'Dev Acc:', acc)


Epoch 1 / 10
Dev Loss: 1.1234782934188843 Dev Acc: 0.6679738759994507
Epoch 2 / 10
Dev Loss: 0.8221744894981384 Dev Acc: 0.7503268122673035
Epoch 3 / 10
Dev Loss: 0.7468716502189636 Dev Acc: 0.7833333611488342
Epoch 4 / 10
Dev Loss: 0.7561596035957336 Dev Acc: 0.7761437892913818
Epoch 5 / 10
Dev Loss: 0.7774919867515564 Dev Acc: 0.7777777910232544
Epoch 6 / 10
Dev Loss: 0.8074012398719788 Dev Acc: 0.7754902243614197
Epoch 7 / 10
Dev Loss: 0.791312575340271 Dev Acc: 0.7790849804878235
Epoch 8 / 10
Dev Loss: 0.8509447574615479 Dev Acc: 0.7728758454322815
Epoch 9 / 10
Dev Loss: 0.8828542828559875 Dev Acc: 0.7669934630393982
Epoch 10 / 10
Dev Loss: 0.9409745931625366 Dev Acc: 0.7650327086448669


In [None]:
hidden_size = 30
with tf.device(device_name):
    # Implement your model here! ----------------------------------------------------------------------
    # Use the variables batch_size, hidden_size, embedding_size, dropout, epochs
    classifier = tf.keras.Sequential()
    input_size = len(vocab)
    output_size = len(labels)
    classifier.add(tf.keras.layers.Embedding(input_size, embedding_size, input_length=batch_size))
    classifier.add(Bidirectional(LSTM(hidden_size, return_sequences=False)))
    classifier.add(tf.keras.layers.Dense(output_size, activation='softmax'))
    optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    # ------------------------------------------------------------------------------------------------

    classifier.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    for i in range(epochs):
        print('Epoch',i+1,'/',epochs)
        # Training
        classifier.fit(batch_generator(train_data, train_labels, vocab, labels, batch_size=batch_size),
                                  epochs=1, steps_per_epoch=len(train_data)/batch_size)
        # Evaluation
        loss, acc = classifier.evaluate(batch_generator(dev_data, dev_labels, vocab, labels),
                                                  steps=len(dev_data))
        print('Dev Loss:', loss, 'Dev Acc:', acc)


Epoch 1 / 15
Dev Loss: 1.1574530601501465 Dev Acc: 0.6598039269447327
Epoch 2 / 15
Dev Loss: 0.9291821718215942 Dev Acc: 0.7303921580314636
Epoch 3 / 15
Dev Loss: 0.8615626096725464 Dev Acc: 0.7503268122673035
Epoch 4 / 15
Dev Loss: 0.8531039953231812 Dev Acc: 0.7588235139846802
Epoch 5 / 15
Dev Loss: 0.8435284495353699 Dev Acc: 0.7594771385192871
Epoch 6 / 15
Dev Loss: 0.8440449833869934 Dev Acc: 0.7676470875740051
Epoch 7 / 15
Dev Loss: 0.8510923385620117 Dev Acc: 0.7607843279838562
Epoch 8 / 15
Dev Loss: 0.8395876288414001 Dev Acc: 0.7738562226295471
Epoch 9 / 15
Dev Loss: 0.8502305150032043 Dev Acc: 0.7748365998268127
Epoch 10 / 15
Dev Loss: 0.8694010376930237 Dev Acc: 0.7647058963775635
Epoch 11 / 15
Dev Loss: 0.9132871031761169 Dev Acc: 0.7620915174484253
Epoch 12 / 15
Dev Loss: 0.9041827321052551 Dev Acc: 0.7653594613075256
Epoch 13 / 15
Dev Loss: 0.9185845851898193 Dev Acc: 0.7601307034492493
Epoch 14 / 15
Dev Loss: 0.8839916586875916 Dev Acc: 0.7722222208976746
Epoch 15 / 15
D

In [None]:
hidden_size = 20
with tf.device(device_name):
    # Implement your model here! ----------------------------------------------------------------------
    # Use the variables batch_size, hidden_size, embedding_size, dropout, epochs
    classifier = tf.keras.Sequential()
    input_size = len(vocab)
    output_size = len(labels)
    classifier.add(tf.keras.layers.Embedding(input_size, embedding_size, input_length=batch_size))
    classifier.add(Bidirectional(LSTM(hidden_size, return_sequences=False)))
    classifier.add(tf.keras.layers.Dense(output_size, activation='softmax'))
    optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    # ------------------------------------------------------------------------------------------------

    classifier.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


    for i in range(epochs):
        print('Epoch',i+1,'/',epochs)
        # Training
        classifier.fit(batch_generator(train_data, train_labels, vocab, labels, batch_size=batch_size),
                                  epochs=1, steps_per_epoch=len(train_data)/batch_size)
        # Evaluation
        loss, acc = classifier.evaluate(batch_generator(dev_data, dev_labels, vocab, labels),
                                                  steps=len(dev_data))
        print('Dev Loss:', loss, 'Dev Acc:', acc)


Epoch 1 / 10
Dev Loss: 1.1121164560317993 Dev Acc: 0.6905228495597839
Epoch 2 / 10
Dev Loss: 0.9851279258728027 Dev Acc: 0.7137255072593689
Epoch 3 / 10
Dev Loss: 0.8797189593315125 Dev Acc: 0.7310457229614258
Epoch 4 / 10
Dev Loss: 0.8448327779769897 Dev Acc: 0.741830050945282
Epoch 5 / 10
Dev Loss: 0.8373358845710754 Dev Acc: 0.7470588088035583
Epoch 6 / 10
Dev Loss: 0.8354207873344421 Dev Acc: 0.7490196228027344
Epoch 7 / 10
Dev Loss: 0.8483107089996338 Dev Acc: 0.7473856210708618
Epoch 8 / 10
Dev Loss: 0.8585145473480225 Dev Acc: 0.7369281053543091
Epoch 9 / 10
Dev Loss: 0.8604316115379333 Dev Acc: 0.7513071894645691
Epoch 10 / 10
Dev Loss: 0.8955186605453491 Dev Acc: 0.73562091588974


In [None]:
hidden_size = 50
with tf.device(device_name):
    # Implement your model here! ----------------------------------------------------------------------
    # Use the variables batch_size, hidden_size, embedding_size, dropout, epochs
    classifier = tf.keras.Sequential()
    input_size = len(vocab)
    output_size = len(labels)
    classifier.add(tf.keras.layers.Embedding(input_size, embedding_size, input_length=batch_size))
    classifier.add(Bidirectional(LSTM(hidden_size, return_sequences=False)))
    classifier.add(tf.keras.layers.Dense(output_size, activation='softmax'))
    optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    # ------------------------------------------------------------------------------------------------

    classifier.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


    for i in range(epochs):
        print('Epoch',i+1,'/',epochs)
        # Training
        classifier.fit(batch_generator(train_data, train_labels, vocab, labels, batch_size=batch_size),
                                  epochs=1, steps_per_epoch=len(train_data)/batch_size)
        # Evaluation
        loss, acc = classifier.evaluate(batch_generator(dev_data, dev_labels, vocab, labels),
                                                  steps=len(dev_data))
        print('Dev Loss:', loss, 'Dev Acc:', acc)


Epoch 1 / 10
Dev Loss: 0.9867905378341675 Dev Acc: 0.7160130739212036
Epoch 2 / 10
Dev Loss: 0.8662500381469727 Dev Acc: 0.7405228614807129
Epoch 3 / 10
Dev Loss: 0.7619409561157227 Dev Acc: 0.780718982219696
Epoch 4 / 10
Dev Loss: 0.7713531255722046 Dev Acc: 0.7843137383460999
Epoch 5 / 10
Dev Loss: 0.7657081484794617 Dev Acc: 0.786928117275238
Epoch 6 / 10
Dev Loss: 0.8102098107337952 Dev Acc: 0.7741830348968506
Epoch 7 / 10
Dev Loss: 0.7914313077926636 Dev Acc: 0.7843137383460999
Epoch 8 / 10
Dev Loss: 0.7982662320137024 Dev Acc: 0.7813725471496582
Epoch 9 / 10
Dev Loss: 0.836898922920227 Dev Acc: 0.785620927810669
Epoch 10 / 10
Dev Loss: 0.8652098178863525 Dev Acc: 0.7839869260787964


In [None]:
hidden_size = 10
with tf.device(device_name):
    # Implement your model here! ----------------------------------------------------------------------
    # Use the variables batch_size, hidden_size, embedding_size, dropout, epochs
    classifier = tf.keras.Sequential()
    input_size = len(vocab)
    output_size = len(labels)
    classifier.add(tf.keras.layers.Embedding(input_size, embedding_size, input_length=batch_size))
    classifier.add(Bidirectional(LSTM(hidden_size, return_sequences=False)))
    classifier.add(tf.keras.layers.Dense(output_size, activation='softmax'))
    optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    # ------------------------------------------------------------------------------------------------

    classifier.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


    for i in range(epochs):
        print('Epoch',i+1,'/',epochs)
        # Training
        classifier.fit(batch_generator(train_data, train_labels, vocab, labels, batch_size=batch_size),
                                  epochs=1, steps_per_epoch=len(train_data)/batch_size)
        # Evaluation
        loss, acc = classifier.evaluate(batch_generator(dev_data, dev_labels, vocab, labels),
                                                  steps=len(dev_data))
        print('Dev Loss:', loss, 'Dev Acc:', acc)


Epoch 1 / 10
Dev Loss: 1.2415069341659546 Dev Acc: 0.6552287340164185
Epoch 2 / 10
Dev Loss: 1.1990325450897217 Dev Acc: 0.6503267884254456
Epoch 3 / 10
Dev Loss: 1.1019408702850342 Dev Acc: 0.6784313917160034
Epoch 4 / 10
Dev Loss: 1.0539747476577759 Dev Acc: 0.699999988079071
Epoch 5 / 10
Dev Loss: 1.011244535446167 Dev Acc: 0.719281017780304
Epoch 6 / 10
Dev Loss: 1.0510516166687012 Dev Acc: 0.7042483687400818
Epoch 7 / 10
Dev Loss: 1.1069016456604004 Dev Acc: 0.6970587968826294
Epoch 8 / 10
Dev Loss: 1.126950740814209 Dev Acc: 0.706209123134613
Epoch 9 / 10
Dev Loss: 1.0814073085784912 Dev Acc: 0.7058823704719543
Epoch 10 / 10
Dev Loss: 1.0511503219604492 Dev Acc: 0.706209123134613


In [None]:
hidden_size = 60
with tf.device(device_name):
    # Implement your model here! ----------------------------------------------------------------------
    # Use the variables batch_size, hidden_size, embedding_size, dropout, epochs
    classifier = tf.keras.Sequential()
    input_size = len(vocab)
    output_size = len(labels)
    classifier.add(tf.keras.layers.Embedding(input_size, embedding_size, input_length=batch_size))
    classifier.add(Bidirectional(LSTM(hidden_size, return_sequences=False)))
    classifier.add(tf.keras.layers.Dense(output_size, activation='softmax'))
    optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    # ------------------------------------------------------------------------------------------------

    classifier.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


    for i in range(epochs):
        print('Epoch',i+1,'/',epochs)
        # Training
        classifier.fit(batch_generator(train_data, train_labels, vocab, labels, batch_size=batch_size),
                                  epochs=1, steps_per_epoch=len(train_data)/batch_size)
        # Evaluation
        loss, acc = classifier.evaluate(batch_generator(dev_data, dev_labels, vocab, labels),
                                                  steps=len(dev_data))
        print('Dev Loss:', loss, 'Dev Acc:', acc)


Epoch 1 / 10
Dev Loss: 1.0102802515029907 Dev Acc: 0.6983659863471985
Epoch 2 / 10
Dev Loss: 0.7423787713050842 Dev Acc: 0.7764706015586853
Epoch 3 / 10
Dev Loss: 0.7139737010002136 Dev Acc: 0.7833333611488342
Epoch 4 / 10
Dev Loss: 0.7313960790634155 Dev Acc: 0.7885621190071106
Epoch 5 / 10
Dev Loss: 0.7374964356422424 Dev Acc: 0.7901960611343384
Epoch 6 / 10
Dev Loss: 0.8051390647888184 Dev Acc: 0.7797385454177856
Epoch 7 / 10
Dev Loss: 0.8218370079994202 Dev Acc: 0.7810457348823547
Epoch 8 / 10
Dev Loss: 0.865903913974762 Dev Acc: 0.7709150314331055
Epoch 9 / 10
Dev Loss: 0.8693708181381226 Dev Acc: 0.7797385454177856
Epoch 10 / 10
Dev Loss: 0.8779428601264954 Dev Acc: 0.7722222208976746


In [None]:
hidden_size = 50
embedding_size = 300
epochs = 8
with tf.device(device_name):
    # Implement your model here! ----------------------------------------------------------------------
    # Use the variables batch_size, hidden_size, embedding_size, dropout, epochs
    classifier = tf.keras.Sequential()
    input_size = len(vocab)
    output_size = len(labels)
    classifier.add(tf.keras.layers.Embedding(input_size, embedding_size, input_length=batch_size))
    classifier.add(Bidirectional(LSTM(hidden_size, return_sequences=False)))
    classifier.add(tf.keras.layers.Dense(output_size, activation='softmax'))
    optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    # ------------------------------------------------------------------------------------------------

    classifier.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


    for i in range(epochs):
        print('Epoch',i+1,'/',epochs)
        # Training
        classifier.fit(batch_generator(train_data, train_labels, vocab, labels, batch_size=batch_size),
                                  epochs=1, steps_per_epoch=len(train_data)/batch_size)
        # Evaluation
        loss, acc = classifier.evaluate(batch_generator(dev_data, dev_labels, vocab, labels),
                                                  steps=len(dev_data))
        print('Dev Loss:', loss, 'Dev Acc:', acc)


Epoch 1 / 8
Dev Loss: 1.0146058797836304 Dev Acc: 0.6839869022369385
Epoch 2 / 8
Dev Loss: 0.9734741449356079 Dev Acc: 0.6990196108818054
Epoch 3 / 8
Dev Loss: 0.895188570022583 Dev Acc: 0.7313725352287292
Epoch 4 / 8
Dev Loss: 0.8395711779594421 Dev Acc: 0.7526143789291382
Epoch 5 / 8
Dev Loss: 0.8439297676086426 Dev Acc: 0.7503268122673035
Epoch 6 / 8
Dev Loss: 0.860573947429657 Dev Acc: 0.7486928105354309
Epoch 7 / 8
Dev Loss: 0.8529453873634338 Dev Acc: 0.7408496737480164
Epoch 8 / 8
Dev Loss: 0.867561936378479 Dev Acc: 0.7421568632125854


In [None]:
hidden_size = 50
embedding_size = 150
epochs = 8
with tf.device(device_name):
    # Implement your model here! ----------------------------------------------------------------------
    # Use the variables batch_size, hidden_size, embedding_size, dropout, epochs
    classifier = tf.keras.Sequential()
    input_size = len(vocab)
    output_size = len(labels)
    classifier.add(tf.keras.layers.Embedding(input_size, embedding_size, input_length=batch_size))
    classifier.add(Bidirectional(LSTM(hidden_size, return_sequences=False)))
    classifier.add(tf.keras.layers.Dense(output_size, activation='softmax'))
    optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    # ------------------------------------------------------------------------------------------------

    classifier.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


    for i in range(epochs):
        print('Epoch',i+1,'/',epochs)
        # Training
        classifier.fit(batch_generator(train_data, train_labels, vocab, labels, batch_size=batch_size),
                                  epochs=1, steps_per_epoch=len(train_data)/batch_size)
        # Evaluation
        loss, acc = classifier.evaluate(batch_generator(dev_data, dev_labels, vocab, labels),
                                                  steps=len(dev_data))
        print('Dev Loss:', loss, 'Dev Acc:', acc)


Epoch 1 / 8
Dev Loss: 0.9532268643379211 Dev Acc: 0.70686274766922
Epoch 2 / 8
Dev Loss: 0.7966799139976501 Dev Acc: 0.7699346542358398
Epoch 3 / 8
Dev Loss: 0.7741916179656982 Dev Acc: 0.772549033164978
Epoch 4 / 8
Dev Loss: 0.7494708895683289 Dev Acc: 0.7774509787559509
Epoch 5 / 8
Dev Loss: 0.7674776911735535 Dev Acc: 0.7846405506134033
Epoch 6 / 8
Dev Loss: 0.7811238765716553 Dev Acc: 0.7790849804878235
Epoch 7 / 8
Dev Loss: 0.8074911236763 Dev Acc: 0.7810457348823547
Epoch 8 / 8
Dev Loss: 0.8151692748069763 Dev Acc: 0.7872549295425415


In [None]:
hidden_size = 50
embedding_size = 50
epochs = 8
with tf.device(device_name):
    # Implement your model here! ----------------------------------------------------------------------
    # Use the variables batch_size, hidden_size, embedding_size, dropout, epochs
    classifier = tf.keras.Sequential()
    input_size = len(vocab)
    output_size = len(labels)
    classifier.add(tf.keras.layers.Embedding(input_size, embedding_size, input_length=batch_size))
    classifier.add(Bidirectional(LSTM(hidden_size, return_sequences=False)))
    classifier.add(tf.keras.layers.Dense(output_size, activation='softmax'))
    optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    # ------------------------------------------------------------------------------------------------

    classifier.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


    for i in range(epochs):
        print('Epoch',i+1,'/',epochs)
        # Training
        classifier.fit(batch_generator(train_data, train_labels, vocab, labels, batch_size=batch_size),
                                  epochs=1, steps_per_epoch=len(train_data)/batch_size)
        # Evaluation
        loss, acc = classifier.evaluate(batch_generator(dev_data, dev_labels, vocab, labels),
                                                  steps=len(dev_data))
        print('Dev Loss:', loss, 'Dev Acc:', acc)


Epoch 1 / 8
Dev Loss: 1.000499963760376 Dev Acc: 0.7052287459373474
Epoch 2 / 8
Dev Loss: 0.8405497074127197 Dev Acc: 0.7486928105354309
Epoch 3 / 8
Dev Loss: 0.7820004820823669 Dev Acc: 0.7718954086303711
Epoch 4 / 8
Dev Loss: 0.769624650478363 Dev Acc: 0.7689542770385742
Epoch 5 / 8
Dev Loss: 0.7728553414344788 Dev Acc: 0.7797385454177856
Epoch 6 / 8
Dev Loss: 0.7979559302330017 Dev Acc: 0.7761437892913818
Epoch 7 / 8
Dev Loss: 0.7971059083938599 Dev Acc: 0.7846405506134033
Epoch 8 / 8
Dev Loss: 0.8424361944198608 Dev Acc: 0.7846405506134033


In [None]:
hidden_size = 50
embedding_size = 100
epochs = 5
with tf.device(device_name):
    # Implement your model here! ----------------------------------------------------------------------
    # Use the variables batch_size, hidden_size, embedding_size, dropout, epochs
    classifier = tf.keras.Sequential()
    input_size = len(vocab)
    output_size = len(labels)
    classifier.add(tf.keras.layers.Embedding(input_size, embedding_size, input_length=batch_size))
    classifier.add(Bidirectional(LSTM(hidden_size, return_sequences=False)))
    classifier.add(tf.keras.layers.Dense(output_size, activation='softmax'))
    optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

    # ------------------------------------------------------------------------------------------------

    classifier.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


    for i in range(epochs):
        print('Epoch',i+1,'/',epochs)
        # Training
        classifier.fit(batch_generator(train_data, train_labels, vocab, labels, batch_size=batch_size),
                                  epochs=1, steps_per_epoch=len(train_data)/batch_size)
        # Evaluation
        loss, acc = classifier.evaluate(batch_generator(dev_data, dev_labels, vocab, labels),
                                                  steps=len(dev_data))
        print('Dev Loss:', loss, 'Dev Acc:', acc)


Epoch 1 / 5
Dev Loss: 1.004434585571289 Dev Acc: 0.7084967494010925
Epoch 2 / 5
Dev Loss: 0.8302643299102783 Dev Acc: 0.7539215683937073
Epoch 3 / 5
Dev Loss: 0.7627842426300049 Dev Acc: 0.7751634120941162
Epoch 4 / 5
Dev Loss: 0.7540210485458374 Dev Acc: 0.7866013050079346
Epoch 5 / 5
Dev Loss: 0.7621514797210693 Dev Acc: 0.7820261716842651


In [None]:
loss, acc = classifier.evaluate(batch_generator(test_data, test_labels, vocab, labels),
                                                  steps=len(test_data))
print('Test Loss:', loss, 'Test Acc:', acc)

Test Loss: 0.7449893355369568 Test Acc: 0.7835153937339783
