In [1]:
#from utils import *

import numpy as np
import torch
import matplotlib.pyplot as plt
import pandas as pd
import re
from os import mkdir
from os.path import join, isfile, isdir, exists
import bcolz
import pickle 
import emoji
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
from pattern.en import spelling
from tqdm import tqdm
import ast

In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload

## Word Embedings : GloVe
This Class Loads the GloVe Embeding, processes it, and create a word embedding given the DataLoader.

In [46]:
class GloVe_embedding(object):
    def __init__(self,dim_vect = 25 ):
        ########## VARIABLES ##########
        self.dim_vect = dim_vect

        # Defining variables for GloVe: 
        self.words = []
        self.word2idx = {}
        self.glove_dict = {}
        
        ########## LOADING GLOVE DATA ##########
        
        # Defining path for GloVe Data : 
        self.path = join('..','data','glove') # Path of glove
        self.path_glove = join(self.path,'glove.twitter.27B.'+str(dim_vect))
        if not(isdir(self.path_glove)):
            mkdir(self.path_glove)
        self.path_vec_original = join(self.path,'glove.twitter.27B.'+str(dim_vect)+'d.txt') # Path of glove original vectors
        self.path_vec_save = join(self.path_glove,'glove.twitter.27B.'+str(dim_vect)+'d.vectors.dat')  # Path of glove saved vectors
        self.path_words = join(self.path_glove,'glove.twitter.27B.'+str(dim_vect)+'d.words.pkl')
        self.path_word2idx = join(self.path_glove,'glove.twitter.27B.'+str(dim_vect)+'d.word2idx.pkl')
                
        if not(isdir(self.path_vec_save) and isfile(self.path_words) and isfile(self.path_word2idx)) : 
            # If files are allready processed, just load them
            print('---- Processing the GloVe files : ',end='')
            self.process_GloVe()
            print('Done')
            
        # Load the wordvec files
        print('---- Loading the processed GloVe files : ',end='')
        self.load_GloVe()
        print('Done')
        
        ########## TORCH EMBEDDING ##########
        
        # Defining variables for our Embedding:
        self.size_vocab = len(self.words)
        
        # Creating the Pytorch Embedding Layer : 
        print('---- Creating the Pytorch Embedding Layer  : ',end='')
        self.emb_layer = nn.Embedding(self.size_vocab, self.dim_vect)
        self.create_emb_layer(non_trainable=True)
        print('Done')

               
    def process_GloVe(self):
        ''' Processes the GloVe Dataset - Saves files'''
        words = []
        word2idx = {}
        
        vectors = bcolz.carray(np.zeros(1) , rootdir=self.path_vec_save , mode='w' ) # defining vector saved
        
        # Adding Padding vector : 
        word2idx['<pad>'] = 0
        words.append('<pad>')
        #vect = np.random.normal(scale=0.6, size=(self.dim_vect , )) # random padding vect
        vect = np.zeros((self.dim_vect , )) # 0's padding vect. 
        vectors.append(vect)
        
        idx = 1
        with open(self.path_vec_original, 'rb') as f:
            for l in f:
                line = l.decode().split()
                word = line[0]
                words.append(word)
                word2idx[word] = idx
                idx += 1
                vect = np.array(line[1:]).astype(np.float)
                vectors.append(vect)
                

        vectors = bcolz.carray(vectors[:].reshape((-1, self.dim_vect)), rootdir=self.path_vec_save, mode='w')

        vectors.flush()
        pickle.dump(words, open(self.path_words, 'wb'))
        pickle.dump(word2idx, open(self.path_word2idx, 'wb'))
        
    def load_GloVe(self):
        ''' Loads previously processed dataset'''
        
        vectors = bcolz.open(self.path_vec_save)[:]
        
        self.words = pickle.load(open(self.path_words, 'rb'))
        self.word2idx = pickle.load(open(self.path_word2idx, 'rb'))
        
        self.glove_dict = {w: vectors[self.word2idx[w]] for w in self.words}
        self.emb_matrix = torch.Tensor(vectors)
    
    def create_emb_layer(self, non_trainable=True):
        self.emb_layer.load_state_dict({'weight': self.emb_matrix})
        if non_trainable:
            self.emb_layer.weight.requires_grad = False
        


In [69]:
myEmbedding = GloVe_embedding(dim_vect=25)

---- Loading the processed GloVe files : Done
---- Creating the Pytorch Embedding Layer  : Done


## Data Loader
This Class Loads the Tweet Dataset, Cleans it. It also enables the loading for the training and testing. 
TODO : Loading for training and testing

In [5]:
class TestTweetDataset(Dataset):
    def __init__(self,data, subtask):
        data_  = data
        
        self.id = data_.index.tolist()
        self.token = data_.token.tolist()
        self.token_id = data_.token_id.tolist()
        
        
    def __getitem__(self, index):
        return torch.LongTensor(self.token_id[index]), torch.FloatTensor([self.id[index]])

    def __len__(self):
        return len(self.token) 
    
class TrainTweetDataset(Dataset):
    def __init__(self,data, subtask ):
        
          # Adapt to each subtask : 
        if subtask == 'subtask_a' :
            label_id_n = 'labelA'
            self.classes_dict = {'NOT' : 0 ,'OFF' : 1}
            data_ = data
            
        elif subtask == 'subtask_b' :
            label_id_n = 'labelB'
            self.classes_dict = {'UNT' : 0 ,'TIN' : 1}
            data_ = data.loc[(data['subtask_b'] == 'UNT') | (data['subtask_b'] == 'TIN') ]
            
        elif subtask == 'subtask_c' :
            label_id_n = 'labelC'
            self.classes_dict = {'IND' : 0 ,'OTH' : 1, 'GRP' : 2}
            data_ = data.loc[(data['subtask_c'] == 'IND') | (data['subtask_c'] == 'OTH') | (data['subtask_c'] == 'GRP') ]
            
        data_[label_id_n] = data_[subtask].apply(lambda x : self.classes_dict[x])  
        self.id = data_.index.tolist()
        self.label = data_[subtask].tolist()
        self.label_id = data_[label_id_n].tolist()
        self.token = data_.token.tolist()
        self.token_id = data_.token_id.tolist()
        
    def __getitem__(self, index):
        return torch.LongTensor(self.token_id[index]), torch.FloatTensor([self.label_id[index]])

    def __len__(self):
        return len(self.token) 


    
class DataHandling(object):
    def __init__(self, embedding):
        
        self.embedding = embedding

        # All the Text Data path
        self.path = {}
        self.path_clean = {}
        
        self.path['train'] = join('..','data','start-kit','training-v1','offenseval-training-v1.tsv')
        self.path_clean['train'] = join('..','data','start-kit','training-v1','clean-offenseval-training-v1.tsv')
        
        self.path['subtask_a'] = join('..','data','Test A Release','testset-taska.tsv')
        self.path_clean['subtask_a'] = join('..','data','Test A Release','clean-testset-taska.tsv')
        
        self.path['subtask_b'] = join('..','data','Test B Release','testset-taskb.tsv')
        self.path_clean['subtask_b'] = join('..','data','Test B Release','clean-testset-taskb.tsv')
        
        self.path['subtask_c'] = join('..','data','Test C Release','test_set_taskc.tsv')
        self.path_clean['subtask_c'] = join('..','data','Test C Release','clean-test_set_taskc.tsv')
        
        self.data = {}
        
        if not(isfile(self.path_clean['train']) and isfile(self.path_clean['subtask_a']) and isfile(self.path_clean['subtask_b']) and isfile(self.path_clean['subtask_c']) ) : 
            ### PROCESSING OF THE ORIGINAL DATASET
            # Load, Clean and Tokenize the Datasets
            print('---- Load, Clean and Tokensize Dataset : ',end='')
            
            self.inital_dataload()
            print('Done')
            
            # Compute List of All words in the datasets
            print('---- Finalize tokenized words and translation to id : ',end='')
            self.all_words_freq = {}
            self.all_words = []
            self.compute_wordlist()
            self.token2id()
            print('Done')

            # Add Embedding and correct clean the words not in embedding : 
            print('---- Adapt Dataset for Embedding : ',end='')
            self.adaptDataset()
            print('Done')

            # Save the Cleaned Datasets
            print('---- Saving all tokenized words : ',end='')
            self.save_cleanDataset()
            print('Done')
        else : 
            # Save the Cleaned Datasets
            print('---- Load the Clean Adapted Dataset : ',end='')
            self.load_cleanDataset()
            print('Done')
            
            # Compute List of All words in the datasets
            print('---- Compute word list : ',end='')
            self.all_words_freq = {}
            self.all_words = []
            self.compute_wordlist()
            print('Done')
        
    def getDataset(self, dataT='train',subtask='subtask_a'):
        ''' Returns the pytorch Dataset
            - file : {'train','test'}
            - subtask : {'subtask_a','subtask_b','subtask_c'} '''
        
        if dataT == 'train':
            dataset = TrainTweetDataset(self.data[dataT], subtask)
        elif dataT == 'test':
            dataset = TestTweetDataset(self.data[subtask], subtask)
            
        return dataset
    
    def token2id(self):
        for f in self.path : 
            def token2id_x(x):
                
                return [self.embedding.word2idx[k] for k in x if k in self.embedding.words]
            self.data[f]['token_id'] = self.data[f]['token'].apply(lambda x : token2id_x(x))
            #print(self.data[f])

    def save_cleanDataset(self):
        for f in self.path : 
            self.data[f].to_csv(self.path_clean[f])
        
    def load_cleanDataset(self):
        for f in self.path : 
            self.data[f] = pd.read_csv(self.path_clean[f],index_col='id')
            self.data[f]['token'] = self.data[f]['token'].apply(lambda x : ast.literal_eval(x))
            self.data[f]['token_id'] = self.data[f]['token_id'].apply(lambda x : ast.literal_eval(x))
           
               
    def adaptDataset(self):
        # Find all words wich are not in the Embedding :
        missing_words = []
        for i, word in enumerate(self.all_words) :
            if self.embedding.word2idx.get(word) == None : 
                missing_words.append(word)
        
        # Correct if possible the missing_words : 
        ### We use theshold over which we correct the word. Under which we discard the word
        t = 0.5 # threshold
        rejected_words = []
        corrected_words = {}
        for word in tqdm(missing_words) : 
            suggestion, prob = spelling.suggest(word)[0]
            if prob < t : 
                rejected_words.append(word)
            else : 
                corrected_words[word] = suggestion
        
        # Modify the Original Datasets with those corrected_words : 
        for f in self.path : 
            self.data[f]['token'] = self.data[f]['token'].apply(lambda x : [corrected_words.get(k,k) for k in x])
            self.data[f]['token'] = self.data[f]['token'].apply(lambda x : [k for k in x if k not in rejected_words ])
        nb_rejected = len(rejected_words)
        nb_corrected = len(corrected_words)
        nb_vocab = len(self.embedding.glove_dict)
        p_rejected = 100* nb_rejected / nb_vocab
        p_corrected = 100* nb_corrected / nb_vocab
        print('---- Words removed   : {0:} / {1:.2f} - {2:} %'.format(nb_rejected,nb_vocab,p_rejected))
        print('---- Words corrected : {0:} / {1:.2f} - {2:} %'.format(nb_corrected,nb_vocab,p_corrected))
        
    def inital_dataload(self):
        for f in self.path : 
            self.data[f] = pd.read_table(self.path[f],index_col='id')
            self.data[f]['token'] = self.data[f]['tweet'].apply(lambda x : self.clean_tweet(x))
            
    def compute_wordlist(self):
        
        for f in self.data : 
            for i in range(len(self.data[f])):
                for e in self.data[f].iloc[i].token:
                    self.all_words_freq[e] = 1 + self.all_words_freq.get(e,0)
        self.all_words = list(self.all_words_freq.keys())
    
    def clean_tweet(self,text):
        ''' Function that is applied to every to tweet in the dataset '''
        
        # =========== TEXT ===========
        # Replace @USER by <user>
        text = re.compile(r'@USER').sub(r'<user>',text)

        # Replace URL by <url>
        text = re.compile(r'URL').sub(r'<url>',text)

        # Remove numbers :
        text = re.compile(r'[0-9]+').sub(r' ',text)

        # Remove some special characters
        text = re.compile(r'([\xa0_\{\}\[\]¬•$,:;/@#|\^*%().~`”"“-])').sub(r' ',text) 

        # Space the special characters with white spaces
        text = re.compile(r'([$&+,:;=?@#|\'.^*()%!"’“-])').sub(r' \1 ',text)
        
        # Replace some special characters : 
        replace_dict = {r'&' : 'and' , 
                        r'\+' : 'plus'}
        for cha in replace_dict:
            text = re.compile(str(cha)).sub(str(replace_dict[cha]),text)
            
        # Handle Emoji : translate some and delete the others
        text = self.handle_emoji(text)
        
        # Word delengthening : 
        text = re.compile(r'(.)\1{3,}').sub(r'\1\1',text)

        # Cut the words with caps in them : 
        text = re.compile(r'([a-z]+|[A-Z]+|[A-Z][a-z]+)([A-Z][a-z]+)').sub(r'\1 \2',text)
        text = re.compile(r'([a-z]+|[A-Z]+|[A-Z][a-z]+)([A-Z][a-z]+)').sub(r'\1 \2',text)        
        # =========== TOKENS ===========
        # TOKENIZE 
        text = text.split(' ')

        # Remove white spaces tokens
        text = [text[i] for i in range(len(text)) if text[i] != ' ']

        # Remove empty tokens
        text = [text[i] for i in range(len(text)) if text[i] != '']

        # Remove repetition in tokens (!!! => !)
        text = [text[i] for i in range(len(text)) if text[i] != text[i-1]]

        #  Handle the ALL CAPS Tweets 
        ### if ratio of caps in the word > 75% add allcaps tag <allcaps>
        caps_r = np.mean([text[i].isupper() for i in range(len(text))])
        if caps_r > 0.6 : 
            text.append('<allcaps>')

        # Lower Case : 
        text = [text[i].lower() for i in range(len(text))]

        return text

    def handle_emoji(self,text):
        # Dictionnary of "important" emojis : 
        emoji_dict =  {'♥️': ' love ',
                       '❤️' : ' love ',
                       '❤' : ' love ',
                       '😘' : ' kisses ',
                      '😭' : ' cry ',
                      '💪' : ' strong ',
                      '🌍' : ' earth ',
                      '💰' : ' money ',
                      '👍' : ' ok ',
                       '👌' : ' ok ',
                      '😡' : ' angry ',
                      '🍆' : ' dick ',
                      '🤣' : ' haha ',
                      '😂' : ' haha ',
                      '🖕' : ' fuck you '}

        for cha in emoji_dict:
            text = re.compile(str(cha)).sub(str(emoji_dict[cha]),text)
        # Remove ALL emojis
        text = emoji.get_emoji_regexp().sub(r' ',text) 
        text = re.compile("([\U0001f3fb-\U0001f3ff])").sub(r'',text) 
        text = re.compile("([\U00010000-\U0010ffff])").sub(r'',text) 
        text = re.compile("(\u00a9|\u00ae|[\u2000-\u3300]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff])").sub(r'',text)

        # Add Space between  the Emoji Expressions : 
        text = re.compile("([\U00010000-\U0010ffff])").sub(r' \1 ',text) 
        return text

In [6]:
mydata = DataHandling(myEmbedding)

---- Load the Clean Adapted Dataset : Done
---- Compute word list : Done


In [7]:
train = mydata.getDataset(dataT='train',subtask='subtask_c')
train[1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(tensor([     1,  30168,     71,     76, 666233,     10]), tensor([1.]))

In [8]:
words = np.array(list(mydata.all_words_freq.keys()))
list(words[list(np.argsort(list(mydata.all_words_freq.values())))[::-1][:20]])

['<user>',
 'the',
 'is',
 'to',
 'and',
 'a',
 'you',
 'of',
 "'",
 'i',
 'are',
 '!',
 'he',
 'that',
 'in',
 'it',
 'she',
 '?',
 's',
 'for']

In [9]:
mydata.data['train'].head(10)


Unnamed: 0_level_0,tweet,subtask_a,subtask_b,subtask_c,token,token_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
86426,@USER She should ask a few native Americans wh...,OFF,UNT,,"[<user>, she, should, ask, a, few, native, ame...","[1, 148, 277, 565, 12, 1087, 17411, 7437, 87, ..."
90194,@USER @USER Go home you’re drunk!!! @USER #MAG...,OFF,TIN,IND,"[<user>, go, home, you, re, drunk, !, <user>, ...","[1, 112, 336, 16, 797, 1511, 10, 1, 29896, 286..."
16820,Amazon is investigating Chinese employees who ...,NOT,,,"[amazon, is, investigating, chinese, employees...","[2547, 33, 39459, 3492, 16270, 128, 71, 5442, ..."
62688,"@USER Someone should'veTaken"" this piece of sh...",OFF,UNT,,"[<user>, someone, should, ', ve, taken, this, ...","[1, 239, 277, 49, 572, 1942, 54, 2479, 40, 186..."
43605,@USER @USER Obama wanted liberals &amp; illega...,NOT,,,"[<user>, obama, wanted, liberals, and, amp, il...","[1, 1382, 953, 30168, 27, 12801, 114750, 17, 9..."
97670,@USER Liberals are all Kookoo !!!,OFF,TIN,OTH,"[<user>, liberals, are, all, kookoo, !]","[1, 30168, 71, 76, 666233, 10]"
77444,@USER @USER Oh noes! Tough shit.,OFF,UNT,,"[<user>, oh, noes, !, tough, shit]","[1, 194, 73880, 10, 3832, 186]"
52415,@USER was literally just talking about this lo...,OFF,TIN,GRP,"[<user>, was, literally, just, talking, about,...","[1, 94, 1469, 60, 654, 122, 54, 89, 76, 6866, ..."
45157,@USER Buy more icecream!!!,NOT,,,"[<user>, buy, more, icecream, !]","[1, 873, 146, 18658, 10]"
13384,@USER Canada doesn’t need another CUCK! We alr...,OFF,TIN,IND,"[<user>, canada, doesn, t, need, another, cuck...","[1, 3160, 69393, 188, 172, 599, 407870, 10, 81..."


## Classifier 
Set of Classes used as classifier for the tweets. 

In [137]:
# Classification NN : 
class FFNN(nn.Module):
    
    def __init__(self, embedding, hidden_dim , num_classes ,embedding_dim):
        print('---- Creating FFNN : ',end='')
        
        super(FFNN, self).__init__()
        
        # Embedding
        self.embedding = embedding
        
        # Fully Connected Layers
        self.fc1 = nn.Linear(embedding_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_classes) 
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        torch.nn.init.xavier_uniform_(self.fc2.weight)


        
        # Activation Layers
        self.relu1 = nn.ReLU()
        self.output = nn.Sigmoid()
        print('Done')
    
    def forward(self, x):
        
        embedded = self.embedding(x)
        # we average the embeddings of words in a sentence
        averaged = embedded.mean(1)
        # (batch size, max sent length, embedding dim) to (batch size, embedding dim)

        out = self.fc1(averaged)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.output(out)
        return out
    
    def loss_fn(self):
        ''' Returns the loss function best associated with the model'''
        return nn.BCELoss()


Pad Data Method : Used with the pytorch DataLoader in order to pad the length of the tweets by batch. 

In [138]:
def padding_tweet(batch):
    '''
    Pad Data Method : Used with the pytorch DataLoader in order to pad the length of the tweets by batch. 
    args: 
        batch - List of elements ( x , label )
    return 
        batch - Padded ( list(x) , list(label))
    
    '''
    batch = list(zip(*batch))
    max_len = max([len(t) for t in batch[0]])
    batch[0] = torch.stack([pad_tensor(vec=t, pad=max_len, dim=0) for t in batch[0]],dim=0)
    batch[1] = torch.stack(batch[1])
    return batch[0] , batch[1]

def pad_tensor(vec, pad, dim):
    """
    args:
        vec - tensor to pad
        pad - the size to pad to
        dim - dimension to pad

    return:
        a new tensor padded to 'pad' in dimension 'dim'
    """
    pad_size = list(vec.shape)
    pad_size[dim] = pad - vec.size(dim)
    return torch.cat([vec, torch.zeros(*pad_size,dtype=torch.long)], dim=dim)


## Main Class : Trainer
Main Class for the loading, training, testing etc ...

In [139]:
class OffensiveClassifier(object):
    ''' Main Class for the loading, training, testing etc ...'''
    def __init__(self,subtask='subtask_a', dim_vect=25,cType='FFNN'):
        
        self.dim_vect = dim_vect
        
        self.subtask = subtask

        # Loading the GloVe Embedding and Torch Formating of this Embedding
        self.GloVe = GloVe_embedding(dim_vect= dim_vect )
        self.embedding = self.GloVe.emb_layer
        
        # Loading the Tweet Data : 
        self.dataHandler = DataHandling(self.GloVe)
        
        # Retrieving Training DataSet (pytorch)
        self.train_set = self.dataHandler.getDataset('train',subtask)
        
        
        # Retrieving Test DataSet (pytorch)
        self.test_set = self.dataHandler.getDataset('test',subtask)
        
        # Classification : 
        if cType == 'FFNN':
            # Creating the Neuronal Network
            self.model = FFNN(self.embedding, 20, 1, self.dim_vect)
        if cType == 'logistic':
            pass
        

    def accuracy(self, output, target ):

        output = torch.round(torch.sigmoid(output))
        correct = (output == target).float()
        acc = correct.sum()/len(correct)
        return acc
    
    def train( self, nb_epochs, lr=0.001, batch_size = 1000 ):
        
        self.train_generator = DataLoader(self.train_set, batch_size=batch_size,collate_fn=padding_tweet, shuffle=True)

        # we use the stochastic gradient descent (SGD) optimizer
        optimizer = optim.Adam(self.model.parameters(),lr=lr)
        
        loss_fn = self.model.loss_fn()

        for epoch in range(nb_epochs):

            for tokens, target  in self.train_generator :
                target = target.float()
                tokens = tokens.long()
                #to ensure the dropout (exlained later) is "turned on" while training
                #good practice to include even if do not use here
                self.model.train()

                #we zero the gradients as they are not removed automatically
                optimizer.zero_grad()

                # queeze is needed as the predictions are initially size (batch size, 1) and we need to remove the dimension of size 1 
                predictions = self.model(tokens)
                #print(predictions,target)
                loss = loss_fn(predictions, target)
                acc = self.accuracy(predictions, target)
                
                #calculate the gradient of each parameter
                loss.backward()
                
                #update the parameters using the gradients and optimizer algorithm 
                optimizer.step()

                epoch_loss = loss.item()
                epoch_acc = acc

                print(f'| Epoch: {epoch:02} | Train Loss: {epoch_loss:.3f} | Train Acc: {epoch_acc*100:.2f}%')
        
    def test(self):
        ''' 
            Test Function : Tests the Network on the Test Data of the Subtask and Saves in a file
        '''
        self.test_generator = DataLoader(self.test_set,collate_fn= padding_tweet )




In [140]:
taskAclassifier = OffensiveClassifier(subtask='subtask_a', dim_vect=100, cType='FFNN')

---- Loading the processed GloVe files : Done
---- Creating the Pytorch Embedding Layer  : Done
---- Load the Clean Adapted Dataset : Done
---- Compute word list : Done
---- Creating FFNN : Done


In [142]:
taskAclassifier.train(500,lr=0.001,batch_size=2000)

| Epoch: 00 | Train Loss: 0.536 | Train Acc: 32.95%
| Epoch: 00 | Train Loss: 1.264 | Train Acc: 33.60%
| Epoch: 00 | Train Loss: 0.639 | Train Acc: 32.65%
| Epoch: 00 | Train Loss: 0.833 | Train Acc: 32.80%
| Epoch: 00 | Train Loss: 0.798 | Train Acc: 32.80%
| Epoch: 00 | Train Loss: 0.561 | Train Acc: 34.55%
| Epoch: 00 | Train Loss: 0.621 | Train Acc: 33.31%
| Epoch: 01 | Train Loss: 0.674 | Train Acc: 32.60%
| Epoch: 01 | Train Loss: 0.706 | Train Acc: 33.05%
| Epoch: 01 | Train Loss: 0.645 | Train Acc: 33.75%
| Epoch: 01 | Train Loss: 0.565 | Train Acc: 32.90%
| Epoch: 01 | Train Loss: 0.572 | Train Acc: 34.05%
| Epoch: 01 | Train Loss: 0.622 | Train Acc: 32.50%
| Epoch: 01 | Train Loss: 0.629 | Train Acc: 34.11%
| Epoch: 02 | Train Loss: 0.585 | Train Acc: 33.70%
| Epoch: 02 | Train Loss: 0.538 | Train Acc: 31.80%
| Epoch: 02 | Train Loss: 0.566 | Train Acc: 33.30%
| Epoch: 02 | Train Loss: 0.614 | Train Acc: 34.70%
| Epoch: 02 | Train Loss: 0.600 | Train Acc: 33.65%
| Epoch: 02 

KeyboardInterrupt: 