In [2]:
import numpy as np 
import pandas as pd 
import os
import spacy
import string
import re
import numpy as np
from spacy.symbols import ORTH
from collections import Counter
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import numpy as np
from collections import defaultdict
import re
from sklearn.metrics import confusion_matrix

## Read file into DataFrame

In [3]:
# read the file into DataFrame
df = pd.read_csv('./CAMEO_IDEA_labeled_data.csv')

# separate content and label
text = df['Content']
labels = df['Category Code']

## Tokenize the text

In [4]:
# funtion tokenize sentence
tokenizer = spacy.load("en_core_web_sm")
stopwords = tokenizer.Defaults.stop_words
# tokenize, lemmatize the text, drop punctuations and stopwords
tokenize = lambda t: [token.lemma_ for token in tokenizer(t) if (not token.is_punct) and (not token.is_stop)]

# only tokenize the text
# tokenize = lambda t: [token.text for token in tokenizer(t)]

In [5]:
# build dictionary <key=word : value=count>
cnt = Counter()
size = text.size
for idx in range(size):
    for word in tokenize(text[idx]):
        cnt[word] += 1

In [6]:
counter = dict(cnt)

In [18]:
counter
c = 0
for key in counter:
    if c < 10:
        print (key, counter[key])
    c += 1

Diehard 1
Croat 4
fighter 4
surrender 2
serbian 14
force 71
Monday 62
86-day 1
siege 7
bosnian 16


### Optional

In [20]:
# filter out low-frequency word
min_threshold = 1
count = {x: count for x, count in cnt.items() if count >= min_threshold}

In [21]:
# filter out high-frequency word
# max_threshold = 1
# count = {x: count for x, count in cnt.items() if count <= max_threshold}

In [22]:
count
c = 0
for key in count:
    if c < 10:
        print (key, count[key])
    c += 1

Diehard 1
Croat 4
fighter 4
surrender 2
serbian 14
force 71
Monday 62
86-day 1
siege 7
bosnian 16


## Split dataset into train set and test set

In [23]:
X = np.array(text)
y = np.array(labels)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y)

## Prepare for word embedding

In [24]:
# download glove dictionary
# def download_glove():
#     ! wget http://nlp.stanford.edu/data/glove.6B.zip
#     ! unzip glove.6B.zip -C data
    
# download_glove()
# ! unzip glove.6B.zip

In [25]:
# load word embedding dictionary (<key=word : value=vector>)
def load_embedding_dict():
    embeddings_dict = {}
    with open("glove.6B.50d.txt", 'r') as file:
        for line in file:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], "float32")
            embeddings_dict[word] = vector
    return embeddings_dict

glove_dic = load_embedding_dict()

In [35]:
glove_dic
c=0
for key in glove_dic:
    if c < 3:
        print (key,': ', glove_dic[key])
    c += 1

the :  [ 4.1800e-01  2.4968e-01 -4.1242e-01  1.2170e-01  3.4527e-01 -4.4457e-02
 -4.9688e-01 -1.7862e-01 -6.6023e-04 -6.5660e-01  2.7843e-01 -1.4767e-01
 -5.5677e-01  1.4658e-01 -9.5095e-03  1.1658e-02  1.0204e-01 -1.2792e-01
 -8.4430e-01 -1.2181e-01 -1.6801e-02 -3.3279e-01 -1.5520e-01 -2.3131e-01
 -1.9181e-01 -1.8823e+00 -7.6746e-01  9.9051e-02 -4.2125e-01 -1.9526e-01
  4.0071e+00 -1.8594e-01 -5.2287e-01 -3.1681e-01  5.9213e-04  7.4449e-03
  1.7778e-01 -1.5897e-01  1.2041e-02 -5.4223e-02 -2.9871e-01 -1.5749e-01
 -3.4758e-01 -4.5637e-02 -4.4251e-01  1.8785e-01  2.7849e-03 -1.8411e-01
 -1.1514e-01 -7.8581e-01]
, :  [ 0.013441  0.23682  -0.16899   0.40951   0.63812   0.47709  -0.42852
 -0.55641  -0.364    -0.23938   0.13001  -0.063734 -0.39575  -0.48162
  0.23291   0.090201 -0.13324   0.078639 -0.41634  -0.15428   0.10068
  0.48891   0.31226  -0.1252   -0.037512 -1.5179    0.12612  -0.02442
 -0.042961 -0.28351   3.5416   -0.11956  -0.014533 -0.1499    0.21864
 -0.33412  -0.13872   0.3180

In [37]:
# create dictionaries(<key=word : value=index number>) (<key=word : value=vector>)
def create_embedding_matrix(count,emb_size=50):
    size = len(count) + 2
    word_idx_dict = {}
    word_vec = np.zeros((size, emb_size), dtype="float32")
    
    # add padding and UNK keyword
    word_idx_dict[""] = 0
    word_vec[0] = np.zeros(emb_size, dtype='float32')
    word_idx_dict["UNK"] = 1
    word_vec[1] = np.random.uniform(-0.25, 0.25, emb_size)

    for i, word in enumerate(count.keys()):
        word_idx_dict[word] = i + 2

        if word in glove_dic:
            word_vec[i + 2] = glove_dic[word]
        else:
            word_vec[i + 2] = np.random.uniform(-0.25,0.25, emb_size)

    return word_idx_dict, word_vec
    
word_idx_dict, pretrained_weight = create_embedding_matrix(count)

In [39]:
word_idx_dict
c=0
for key in glove_dic:
    if c < 5:
        print (key,': ', glove_dic[key])
    c += 1

the :  [ 4.1800e-01  2.4968e-01 -4.1242e-01  1.2170e-01  3.4527e-01 -4.4457e-02
 -4.9688e-01 -1.7862e-01 -6.6023e-04 -6.5660e-01  2.7843e-01 -1.4767e-01
 -5.5677e-01  1.4658e-01 -9.5095e-03  1.1658e-02  1.0204e-01 -1.2792e-01
 -8.4430e-01 -1.2181e-01 -1.6801e-02 -3.3279e-01 -1.5520e-01 -2.3131e-01
 -1.9181e-01 -1.8823e+00 -7.6746e-01  9.9051e-02 -4.2125e-01 -1.9526e-01
  4.0071e+00 -1.8594e-01 -5.2287e-01 -3.1681e-01  5.9213e-04  7.4449e-03
  1.7778e-01 -1.5897e-01  1.2041e-02 -5.4223e-02 -2.9871e-01 -1.5749e-01
 -3.4758e-01 -4.5637e-02 -4.4251e-01  1.8785e-01  2.7849e-03 -1.8411e-01
 -1.1514e-01 -7.8581e-01]
, :  [ 0.013441  0.23682  -0.16899   0.40951   0.63812   0.47709  -0.42852
 -0.55641  -0.364    -0.23938   0.13001  -0.063734 -0.39575  -0.48162
  0.23291   0.090201 -0.13324   0.078639 -0.41634  -0.15428   0.10068
  0.48891   0.31226  -0.1252   -0.037512 -1.5179    0.12612  -0.02442
 -0.042961 -0.28351   3.5416   -0.11956  -0.014533 -0.1499    0.21864
 -0.33412  -0.13872   0.3180

In [43]:
pretrained_weight[:3]

array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-0.07017878, -0.04507172, -0.14554831, -0.09171732,  0.13337933,
         0.02589959,  0.2437299 ,  0.06947749,  0.14554648, -0.01943418,
         0.22567232,  0.03857403,  0.05386337,  0.24910834, -0.16631441,
         0.03747147,  0.23714148,  0.09276155,  0.

## Prepare for encoding sentence

In [100]:
def encode_sentence(line, word_idx_dict, N=400, padding_start=True):
    tokens = tokenize(line)
    enc = np.zeros(N, dtype=np.int32)
    enc1 = np.array([word_idx_dict.get(word, word_idx_dict["UNK"]) for word in tokens])
    length = min(N, len(enc1))
    if padding_start:
        enc[:length] = enc1[:length]
    else:
        enc[N - length:] = enc1[:length]
    return enc, length

## Build DataSet and DataLoader for model

In [101]:
class EventDataset(Dataset):
    def __init__(self, X, y, N=40, padding_start=False):
        self.y = y
        self.X = [encode_sentence(line, word_idx_dict, N, padding_start) for line in X]
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        x, s = self.X[idx]
        return x, s, self.y[idx]

In [119]:
X_train[0]

"Kuwaiti resistance fighters staged a suicide bomb attack on Iraqi targets, Kuwait's ambassador to France said on Friday."

In [120]:
encode_sentence(X_train[0],word_idx_dict,400,False)

(array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0

In [102]:
train_ds = EventDataset(X_train, y_train)
valid_ds = EventDataset(X_val, y_val)
train_dl = DataLoader(train_ds, batch_size=30, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=30)

In [103]:
valid_ds[1]

(array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,  147,  113, 1080, 1081, 1082, 1083,  222,
         259,   69,  335,  193, 1084, 1085, 1086], dtype=int32), 14, 0)

In [104]:
next(iter(valid_dl))

[tensor([[   0,    0,    0,  ..., 1745, 1746, 1747],
         [   0,    0,    0,  ..., 1084, 1085, 1086],
         [   0,    0,    0,  ...,  646,   57,  349],
         ...,
         [   0,    0,    0,  ..., 1063, 1935, 1609],
         [   0,    0,    0,  ..., 1099,  259,   88],
         [   0,    0,    0,  ...,  110,   57,   88]], dtype=torch.int32),
 tensor([16, 14, 22, 15, 24, 16, 10, 12,  7, 15, 13,  6, 11, 14, 15, 15, 11,  5,
         16, 16, 20, 18, 13, 19, 12, 16,  7, 15, 20, 15]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0])]

## Training loop

In [105]:
def update_optimizer(optimizer, lr):
    for i, param_group in enumerate(optimizer.param_groups):
        param_group["lr"] = lr

In [106]:
def train_epocs(model, optimizer, train_dl, val_dl, epochs=10):
    global max_acc
    for i in range(epochs):
        model.train()
        sum_loss = 0.0
        total = 0
        for x, s, y in train_dl:
            # s is not used in this model
            x = x.long() 
            y = y.long() 
            y_pred = model(x)           
            optimizer.zero_grad()
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            optimizer.step()
            sum_loss += loss.item()*y.shape[0]
            total += y.shape[0]
        val_loss, val_acc = val_metrics(model, val_dl)
        if val_acc > max_acc:
            max_acc = val_acc
#         if i % 5 == 0:
        print("train loss %.3f val loss %.3f and val accuracy %.3f" % (sum_loss/total, val_loss, val_acc))

## Evaluation

In [107]:
from sklearn.metrics import *
f1 = False
def val_metrics(model, val_dl):
    global f1
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0
    for x, s, y in val_dl:
        x = x.long()  #.cuda()
        y = y.long()
        batch = y.shape[0]
#         print(y.size())
        out = model(x)
        
        loss = F.cross_entropy(out, y)
        sum_loss += batch*(loss.item())
        total += batch
        _, pred = torch.max(out, 1) 
#         print(pred.numpy())
#         print(y.data.numpy())
        if f1 == False:
            f1_weighted = f1_score(y.data.numpy(), pred.numpy(), average='weighted')
            print('f1', f1_weighted)
            f1 = True
        correct += (pred == y.data).float().sum().item()
    val_loss = sum_loss/total
    val_acc = correct/total
    return val_loss, val_acc

## Model



In [108]:
class GRUModel(torch.nn.Module) :
    def __init__(self, vocab_size, embedding_dim, hidden_dim, glove_weights=None) :
        super(GRUModel,self).__init__()
        self.hidden_dim = hidden_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        if glove_weights is not None:
            self.embeddings.weight.data.copy_(torch.from_numpy(glove_weights))
            self.embeddings.weight.requires_grad = False ## freeze embeddings
            
        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, 3)
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        x = self.embeddings(x)
        x = self.dropout(x)
        out_pack, ht = self.gru(x)
        x = self.linear(ht[-1])
        return x

In [118]:
x, s, y = next(iter(valid_dl))
embeddings = nn.Embedding(vocab_size, 50, padding_idx=0)
embeddings(x.long())


tensor([[[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [-1.5964, -1.3766,  1.4420,  ..., -1.7100, -1.3228,  2.1490],
         [-0.1428, -1.4456,  0.1120,  ..., -0.3541, -1.7548, -0.3913],
         [ 2.2461, -2.0732, -0.7869,  ...,  0.2735, -2.7883, -0.0315]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [ 1.5526, -0.0541, -0.2870,  ...,  1.3877,  1.8290, -0.1891],
         [-0.3558, -1.4996, -1.7340,  ...,  0.4983, -1.2700,  0.3907],
         [-0.7816, -1.0165,  1.7426,  ..., -0.4021,  0.3671, -1.8707]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0

In [111]:
x, s, y = next(iter(valid_dl))
out = model(x.long())
_, pred = torch.max(out, 1) 
pred
pred == y.data

tensor([ True,  True,  True,  True,  True,  True,  True,  True, False, False,
         True,  True,  True,  True,  True,  True,  True, False,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True])

In [110]:
vocab_size = len(word_idx_dict)
model = GRUModel(vocab_size, 50, 50*(i+1), glove_weights = pretrained_weight)
parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(parameters, lr=0.01)
max_acc = 0
train_epocs(model, optimizer, train_dl, valid_dl, epochs=60)
print(max_acc)

f1 0.908395061728395
train loss 0.643 val loss 0.543 and val accuracy 0.809
train loss 0.444 val loss 0.526 and val accuracy 0.829
train loss 0.432 val loss 0.599 and val accuracy 0.819
train loss 0.406 val loss 0.549 and val accuracy 0.824
train loss 0.396 val loss 0.543 and val accuracy 0.814
train loss 0.351 val loss 0.517 and val accuracy 0.829
train loss 0.348 val loss 0.526 and val accuracy 0.834
train loss 0.331 val loss 0.577 and val accuracy 0.824
train loss 0.283 val loss 0.571 and val accuracy 0.824
train loss 0.269 val loss 0.530 and val accuracy 0.809
train loss 0.272 val loss 0.644 and val accuracy 0.794
train loss 0.248 val loss 0.539 and val accuracy 0.819
train loss 0.266 val loss 0.668 and val accuracy 0.824
train loss 0.224 val loss 0.613 and val accuracy 0.809
train loss 0.221 val loss 0.769 and val accuracy 0.829
train loss 0.204 val loss 0.714 and val accuracy 0.799
train loss 0.207 val loss 0.691 and val accuracy 0.814
train loss 0.185 val loss 0.983 and val accu

### hyperparameter tuning

In [53]:
for i in range(4): 
    vocab_size = len(word_idx_dict)
    model = GRUModel(vocab_size, 50, 50*(i+1), glove_weights = pretrained_weight)
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=0.01)
    max_acc = 0
    train_epocs(model, optimizer, train_dl, valid_dl, epochs=60)
    print(max_acc)
    
    vocab_size = len(word_idx_dict)
    model = GRUModel(vocab_size, 50, 50*(i+1), glove_weights = pretrained_weight)
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=0.1)
    max_acc = 0
    train_epocs(model, optimizer, train_dl, valid_dl, epochs=60)
    print(max_acc)

f1 0.7493333333333332
train loss 0.653 val loss 0.470 and val accuracy 0.804
train loss 0.440 val loss 0.435 and val accuracy 0.819
train loss 0.348 val loss 0.511 and val accuracy 0.829
train loss 0.347 val loss 0.497 and val accuracy 0.814
train loss 0.281 val loss 0.551 and val accuracy 0.799
train loss 0.251 val loss 0.538 and val accuracy 0.824
train loss 0.257 val loss 0.531 and val accuracy 0.819
train loss 0.246 val loss 0.593 and val accuracy 0.819
train loss 0.234 val loss 0.632 and val accuracy 0.824
train loss 0.255 val loss 0.623 and val accuracy 0.799
train loss 0.222 val loss 0.602 and val accuracy 0.799
train loss 0.211 val loss 0.572 and val accuracy 0.804
0.8391959798994975
train loss 0.870 val loss 0.553 and val accuracy 0.739
train loss 0.893 val loss 0.805 and val accuracy 0.729
train loss 0.771 val loss 0.720 and val accuracy 0.744
train loss 0.705 val loss 0.735 and val accuracy 0.774
train loss 0.693 val loss 0.727 and val accuracy 0.734
train loss 0.661 val los

### decreasing learning rate

In [305]:
model = GRUModel1(vocab_size, 50, 50, 50, glove_weights = pretrained_weight)
parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(parameters, lr=0.1)
max_acc = 0
train_epocs(model, optimizer, train_dl, valid_dl, epochs=30)

update_optimizer(optimizer, 0.005)
train_epocs(model, optimizer, train_dl, valid_dl, epochs=30)



train loss 2.593 val loss 1.238 and val accuracy 0.719
train loss 0.598 val loss 0.486 and val accuracy 0.814
train loss 0.608 val loss 0.690 and val accuracy 0.724
train loss 0.653 val loss 0.570 and val accuracy 0.769
train loss 0.832 val loss 0.779 and val accuracy 0.729
train loss 0.807 val loss 0.647 and val accuracy 0.754
train loss 0.694 val loss 0.774 and val accuracy 0.744
train loss 0.547 val loss 0.667 and val accuracy 0.739
train loss 0.520 val loss 0.638 and val accuracy 0.754
train loss 0.502 val loss 0.668 and val accuracy 0.714
train loss 0.523 val loss 0.849 and val accuracy 0.648
train loss 0.516 val loss 0.762 and val accuracy 0.744


In [306]:
max_acc

0.8241206030150754

## Model with two linear layer

In [301]:
class GRUModel1(torch.nn.Module) :
    def __init__(self, vocab_size, embedding_dim, hidden_dim, hidden_dim1, glove_weights=None) :
        super(GRUModel1,self).__init__()
        self.hidden_dim = hidden_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        if glove_weights is not None:
            self.embeddings.weight.data.copy_(torch.from_numpy(glove_weights))
            self.embeddings.weight.requires_grad = False ## freeze embeddings
            
        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, hidden_dim1)
        self.linear1 = nn.Linear(hidden_dim1, 3)
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        x = self.embeddings(x)
        x = self.dropout(x)
        out_pack, ht = self.gru(x)
        x = self.linear(ht[-1])
        x = self.linear1(x)
        return x

### hyperparameter tuning

In [302]:
for i in range(4):
    model1 = GRUModel1(vocab_size, 50, 50*(i+1), 50, glove_weights = pretrained_weight)
    parameters1 = filter(lambda p: p.requires_grad, model1.parameters())
    optimizer1 = torch.optim.Adam(parameters1, lr=0.01)
    max_acc = 0
    train_epocs(model1, optimizer1, train_dl, valid_dl, epochs=30)
    print(max_acc)
    
    model1 = GRUModel1(vocab_size, 50, 50*(i+1), 50, glove_weights = pretrained_weight)
    parameters1 = filter(lambda p: p.requires_grad, model1.parameters())
    optimizer1 = torch.optim.Adam(parameters1, lr=0.1)
    max_acc = 0
    train_epocs(model1, optimizer1, train_dl, valid_dl, epochs=30)
    print(max_acc)

train loss 0.704 val loss 0.526 and val accuracy 0.804
train loss 0.447 val loss 0.468 and val accuracy 0.814
train loss 0.376 val loss 0.480 and val accuracy 0.834
train loss 0.326 val loss 0.612 and val accuracy 0.819
train loss 0.309 val loss 0.519 and val accuracy 0.794
train loss 0.284 val loss 0.502 and val accuracy 0.839
0.8391959798994975
train loss 2.369 val loss 0.861 and val accuracy 0.744
train loss 0.680 val loss 0.678 and val accuracy 0.714
train loss 0.980 val loss 1.079 and val accuracy 0.653
train loss 3.440 val loss 1.982 and val accuracy 0.739
train loss 0.842 val loss 0.928 and val accuracy 0.749
train loss 0.767 val loss 0.737 and val accuracy 0.754
0.7587939698492462
train loss 0.738 val loss 0.568 and val accuracy 0.754
train loss 0.437 val loss 0.504 and val accuracy 0.794
train loss 0.360 val loss 0.509 and val accuracy 0.814
train loss 0.306 val loss 0.616 and val accuracy 0.779
train loss 0.271 val loss 0.666 and val accuracy 0.774
train loss 0.225 val loss 0