In [1]:
#from utils import *

import numpy as np
import torch
import matplotlib.pyplot as plt
import pandas as pd
import re
from os import mkdir
from os.path import join, isfile, isdir, exists
import bcolz
import pickle 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
#from pattern.en import spelling
from tqdm import tqdm
import ast

In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload

In [3]:
GPU = True
device_idx = 0
if GPU:
    device = torch.device("cuda:" + str(device_idx) if torch.cuda.is_available() else "cpu")
else:
    device = torch.device("cpu")
print(device)

cuda:0


## Word Embedings : GloVe
This Class Loads the GloVe Embeding, processes it, and create a word embedding given the DataLoader.

In [4]:
class GloVe_embedding(object):
    def __init__(self,dim_vect = 25 ):
        ########## VARIABLES ##########
        self.dim_vect = dim_vect

        # Defining variables for GloVe: 
        self.words = []
        self.word2idx = {}
        self.glove_dict = {}
        
        ########## LOADING GLOVE DATA ##########
        
        # Defining path for GloVe Data : 
        self.path = join('..','data','glove') # Path of glove
        self.path_glove = join(self.path,'glove.twitter.27B.'+str(dim_vect))
        if not(isdir(self.path_glove)):
            mkdir(self.path_glove)
        self.path_vec_original = join(self.path,'glove.twitter.27B.'+str(dim_vect)+'d.txt') # Path of glove original vectors
        self.path_vec_save = join(self.path_glove,'glove.twitter.27B.'+str(dim_vect)+'d.vectors.dat')  # Path of glove saved vectors
        self.path_words = join(self.path_glove,'glove.twitter.27B.'+str(dim_vect)+'d.words.pkl')
        self.path_word2idx = join(self.path_glove,'glove.twitter.27B.'+str(dim_vect)+'d.word2idx.pkl')
                
        if not(isdir(self.path_vec_save) and isfile(self.path_words) and isfile(self.path_word2idx)) : 
            # If files are allready processed, just load them
            print('---- Processing the GloVe files : ',end='')
            self.process_GloVe()
            print('Done')
            
        # Load the wordvec files
        print('---- Loading the processed GloVe files : ',end='')
        self.load_GloVe()
        print('Done')
        
        ########## TORCH EMBEDDING ##########
        
        # Defining variables for our Embedding:
        self.size_vocab = len(self.words)
        
        # Creating the Pytorch Embedding Layer : 
        print('---- Creating the Pytorch Embedding Layer  : ',end='')
        self.emb_layer = nn.Embedding(self.size_vocab, self.dim_vect)
        self.create_emb_layer(non_trainable=True)
        print('Done')

               
    def process_GloVe(self):
        ''' Processes the GloVe Dataset - Saves files'''
        words = []
        word2idx = {}
        
        vectors = bcolz.carray(np.zeros(1) , rootdir=self.path_vec_save , mode='w' ) # defining vector saved
        
        # Adding Padding vector : 
        word2idx['<pad>'] = 0
        words.append('<pad>')
        #vect = np.random.normal(scale=0.6, size=(self.dim_vect , )) # random padding vect
        vect = np.zeros((self.dim_vect , )) # 0's padding vect. 
        vectors.append(vect)
        
        idx = 1
        with open(self.path_vec_original, 'rb') as f:
            for l in f:
                line = l.decode().split()
                word = line[0]
                words.append(word)
                word2idx[word] = idx
                idx += 1
                vect = np.array(line[1:]).astype(np.float)
                vectors.append(vect)
                

        vectors = bcolz.carray(vectors[:].reshape((-1, self.dim_vect)), rootdir=self.path_vec_save, mode='w')

        vectors.flush()
        pickle.dump(words, open(self.path_words, 'wb'))
        pickle.dump(word2idx, open(self.path_word2idx, 'wb'))
        
    def load_GloVe(self):
        ''' Loads previously processed dataset'''
        
        vectors = bcolz.open(self.path_vec_save)[:]
        
        self.words = pickle.load(open(self.path_words, 'rb'))
        self.word2idx = pickle.load(open(self.path_word2idx, 'rb'))
        
        self.glove_dict = {w: vectors[self.word2idx[w]] for w in self.words}
        self.emb_matrix = torch.Tensor(vectors)
    
    def create_emb_layer(self, non_trainable=True):
        self.emb_layer.load_state_dict({'weight': self.emb_matrix})
        if non_trainable:
            self.emb_layer.weight.requires_grad = False
        


In [5]:
myEmbedding = GloVe_embedding(dim_vect=200)

---- Loading the processed GloVe files : Done
---- Creating the Pytorch Embedding Layer  : Done


## Data Loader
This Class Loads the Tweet Dataset, Cleans it. It also enables the loading for the training and testing. 


In [6]:
class TestTweetDataset(Dataset):
    ''' 
    Pytorch Dataset for the Test set. 
    initialisation : - data : training pandas dataframe
                     - subtask : subtask we are working on {'subtask_a', 'subtask_b', 'subtask_c', }
                     - balanced : if we balance the dataset by oversampling it in the smallest classes
    '''
    def __init__(self,data, subtask):
        self.id = data.index.tolist()
        self.token = data.token.tolist()
        self.token_id = data.token_id.tolist()
        
    def __getitem__(self, index):
        return torch.LongTensor(self.token_id[index]), torch.FloatTensor([self.id[index]])

    def __len__(self):
        return len(self.token) 

        
    
class TweetDataset(Dataset):
    ''' 
    Pytorch Dataset for the Training set. 
    initialisation : - data : training pandas dataframe
                     - subtask : subtask we are working on {'subtask_a', 'subtask_b', 'subtask_c', }
                     - balanced : if we balance the dataset by oversampling it in the smallest classes
    '''
    def __init__(self,data,subtask):        
        # Save in lists the ids, labels, label_id, token, and token_id . 
        self.id = data.index.tolist()
        self.label_id = data[subtask].tolist()
        self.token = data.token.tolist()
        self.token_id = data.token_id.tolist()
        
    def __getitem__(self, index):
        return torch.LongTensor(self.token_id[index]), torch.FloatTensor([self.label_id[index]])

    def __len__(self):
        return len(self.token) 
    
    

In [7]:
class DataHandling(object):
    def __init__(self, embedding, pValid):
        print('-- Data Handling : ')
        
        self.embedding = embedding
        
        self.defineClasses()

        # All the Text Data path
        self.definePath()
        
        self.data = {}
        
        processed_ = True
        for f in self.path:
            processed_ = processed_ and isfile(self.path_clean[f])
        
        if  not(processed_) : 
            ### PROCESSING OF THE ORIGINAL DATASET
            # Load, Clean and Tokenize the Datasets
            print('---- Load, Clean and Tokensize Dataset : ',end='')
            self.inital_dataload()
            print('Done')
            
            # Compute List of All words in the datasets
            print('---- Finalize tokenized words and translation to id : ',end='')
            self.compute_wordlist()
            self.token2id()
            print('Done')

            # Add Embedding and correct clean the words not in embedding : 
            print('---- Adapt Dataset for Embedding : ',end='')
            self.adaptDataset()
            print('Done')

            # Save the Cleaned Datasets
            print('---- Saving all tokenized words : ',end='')
            self.save_cleanDataset()
            print('Done')
        else : 
            # Save the Cleaned Datasets
            print('---- Load the Clean Adapted Dataset : ',end='')
            self.load_cleanDataset()
            
            # Compute List of All words in the datasets
            self.compute_wordlist()
            print('Done')
        
        # Create Validation Set (split the test dataset) for every subtask
        self.splitValidation(p=pValid)
        self.prepareLabels()
        
        
    def defineClasses(self):
        ''' Function that defines the classes labels and id per subtask '''
        self.classes_dict = {}
        self.classes_dict['subtask_a'] = {'NOT' : 0 ,'OFF' : 1}
        self.classes_dict['subtask_b'] = {'UNT' : 0 ,'TIN' : 1}
        self.classes_dict['subtask_c'] = {'IND' : 0 ,'OTH' : 1, 'GRP' : 2}
        
    def definePath(self):
        ''' Function that defines all the paths of the datasets. '''
        self.path = {}
        self.path_clean = {}
        
        self.path['train'] = join('..','data','start-kit','training-v1','offenseval-training-v1.tsv')
        self.path_clean['train'] = join('..','data','start-kit','training-v1','clean-offenseval-training-v1.tsv')
        
        self.path['subtask_a'] = join('..','data','Test A Release','testset-taska.tsv')
        self.path_clean['subtask_a'] = join('..','data','Test A Release','clean-testset-taska.tsv')
        
        self.path['subtask_b'] = join('..','data','Test B Release','testset-taskb.tsv')
        self.path_clean['subtask_b'] = join('..','data','Test B Release','clean-testset-taskb.tsv')
        
        self.path['subtask_c'] = join('..','data','Test C Release','test_set_taskc.tsv')
        self.path_clean['subtask_c'] = join('..','data','Test C Release','clean-test_set_taskc.tsv')
        
    def getDataset(self, dataT='train',subtask='subtask_a',balanced = True):
        ''' Returns the pytorch Dataset
            - file : {'train','test','validation'}
            - subtask : {'subtask_a','subtask_b','subtask_c'} '''
        
            
        if dataT == 'train':
            if balanced : 
                data_train = self.balanceData(self.data[dataT][subtask],subtask)
            else : 
                data_train = self.data[dataT][subtask]
            dataset = TweetDataset(data_train, subtask)
        elif dataT == 'validation':
            dataset = TweetDataset(self.data[dataT][subtask], subtask)
        elif dataT == 'test':
            dataset = TestTweetDataset(self.data[subtask], subtask)
            
        return dataset
    
    def token2id(self):
        ''' Function that translates the list of tokens into a list of token id of the embedding.
            Adds a new 'token_id' column to the dataframe '''
        for f in self.path : 
            def token2id_x(x):
                
                return [self.embedding.word2idx[k] for k in x if k in self.embedding.words]
            self.data[f]['token_id'] = self.data[f]['token'].apply(lambda x : token2id_x(x))

    def save_cleanDataset(self):
        ''' Saves at the defined path the cleaned dataset '''
        for f in self.path : 
            self.data[f].to_csv(self.path_clean[f])
        
    def load_cleanDataset(self):
        ''' Loads at the defined path the cleaned dataset '''
        for f in self.path : 
            self.data[f] = pd.read_csv(self.path_clean[f],index_col='id')
            self.data[f]['token'] = self.data[f]['token'].apply(lambda x : ast.literal_eval(x))
            self.data[f]['token_id'] = self.data[f]['token_id'].apply(lambda x : ast.literal_eval(x))
           
               
    def adaptDataset(self):
        ''' Function that finds all the words which are not in the embedding and tries to 
            correct them with the pattern.en package by taking the most probable replacement.
            If the suggested word in very unlikely, the word is removed from the tweets. 
        '''
        # Find all words wich are not in the Embedding :
        missing_words = []
        for i, word in enumerate(self.all_words) :
            if self.embedding.word2idx.get(word) == None : 
                missing_words.append(word)
        
        # Correct if possible the missing_words : 
        ### We use theshold over which we correct the word. Under which we discard the word
        t = 0.5 # threshold
        rejected_words = []
        corrected_words = {}
        for word in tqdm(missing_words) : 
            suggestion, prob = spelling.suggest(word)[0]
            if prob < t : 
                rejected_words.append(word)
            else : 
                corrected_words[word] = suggestion
        
        # Modify the Original Datasets with those corrected_words : 
        for f in self.path : 
            self.data[f]['token'] = self.data[f]['token'].apply(lambda x : [corrected_words.get(k,k) for k in x])
            self.data[f]['token'] = self.data[f]['token'].apply(lambda x : [k for k in x if k not in rejected_words ])
        nb_rejected = len(rejected_words)
        nb_corrected = len(corrected_words)
        nb_vocab = len(self.embedding.glove_dict)
        p_rejected = 100* nb_rejected / nb_vocab
        p_corrected = 100* nb_corrected / nb_vocab
        print('---- Words removed   : {0:} / {1:.2f} - {2:} %'.format(nb_rejected,nb_vocab,p_rejected))
        print('---- Words corrected : {0:} / {1:.2f} - {2:} %'.format(nb_corrected,nb_vocab,p_corrected))
        
    def inital_dataload(self):
        for f in self.path : 
            self.data[f] = pd.read_table(self.path[f],index_col='id')
            self.data[f]['token'] = self.data[f]['tweet'].apply(lambda x : self.clean_tweet(x))
            
    def compute_wordlist(self):
        self.all_words_freq = {}
        self.all_words = []
        
        for f in self.data : 
            for i in range(len(self.data[f])):
                for e in self.data[f].iloc[i].token:
                    self.all_words_freq[e] = 1 + self.all_words_freq.get(e,0)
        self.all_words = list(self.all_words_freq.keys())
        
    def splitValidation(self,p):
        ''' Creates the validation set by  taking p % of the train dataset '''
        data = self.data['train'].copy()
        self.data['train'] = {}
        self.data['validation'] = {}

        for subtask in self.classes_dict: # per subtask
            self.data['train'][subtask] = pd.DataFrame()
            self.data['validation'][subtask]= pd.DataFrame()
            for label in self.classes_dict[subtask]: #per label in this subtask 
                data_label =  data[data[subtask]==label]
                self.data['train'][subtask] = self.data['train'][subtask].append(data.loc[data_label.index])
                nb_valid = int(len(data_label)*p)
                # Select randmoly (without repetition) the indexes of the selected vaidation tweets
                index_valid = np.random.choice(data_label.index,(nb_valid,),replace=False)
                # Add the the selected validation tweets to the new dataframe
                self.data['validation'][subtask] = self.data['validation'][subtask].append(self.data['train'][subtask].loc[index_valid,:])
                # Drop the selected validation tweets from the training set
                self.data['train'][subtask] = self.data['train'][subtask].drop(index = index_valid)
                
    def prepareLabels(self) : 
        ''' Transform the labels into classes id '''
        for subtask in self.classes_dict: # per subtask
            self.data['validation'][subtask][subtask] =self.data['validation'][subtask][subtask].apply(lambda x : self.classes_dict[subtask][x])  
            self.data['train'][subtask][subtask] = self.data['train'][subtask][subtask].apply(lambda x : self.classes_dict[subtask][x])  

    def balanceData(self,data,subtask):
        ''' Augments the Data given in input in order to balance the dataset'''
        class_size = {}
        for label in self.classes_dict[subtask]:
            class_size[label] = len(data[data[subtask]==self.classes_dict[subtask][label]])
        largest_class = max(class_size, key=class_size.get)
        print('---- Augmenting the Data : ')
        print('Before Augmentation : ',class_size)

        for label in self.classes_dict[subtask]:  
            if label != largest_class:
                id_list = data[data[subtask]==self.classes_dict[subtask][label]].index
                nb_augmentation = class_size[largest_class] - class_size[label]
                id_augmentation = np.random.choice(id_list, (nb_augmentation,))
                data = data.append(data.loc[id_augmentation,:])
        # Check if it went well
        for label in self.classes_dict[subtask]:
            class_size[label] = len(data[data[subtask]==self.classes_dict[subtask][label]])
        
        print('After Augmentation : ',class_size)
        return data
    
    def clean_tweet(self,text):
        ''' Function that is applied to every to tweet in the dataset '''
        
        # =========== TEXT ===========
        # Replace @USER by <user>
        text = re.compile(r'@USER').sub(r'<user>',text)

        # Replace URL by <url>
        text = re.compile(r'URL').sub(r'<url>',text)

        # Remove numbers :
        text = re.compile(r'[0-9]+').sub(r' ',text)

        # Remove some special characters
        text = re.compile(r'([\xa0_\{\}\[\]¬•$,:;/@#|\^*%().~`”"“-])').sub(r' ',text) 

        # Space the special characters with white spaces
        text = re.compile(r'([$&+,:;=?@#|\'.^*()%!"’“-])').sub(r' \1 ',text)
        
        # Replace some special characters : 
        replace_dict = {r'&' : 'and' , 
                        r'\+' : 'plus'}
        for cha in replace_dict:
            text = re.compile(str(cha)).sub(str(replace_dict[cha]),text)
            
        # Handle Emoji : translate some and delete the others
        text = self.handle_emoji(text)
        
        # Word delengthening : 
        text = re.compile(r'(.)\1{3,}').sub(r'\1\1',text)

        # Cut the words with caps in them : 
        text = re.compile(r'([a-z]+|[A-Z]+|[A-Z][a-z]+)([A-Z][a-z]+)').sub(r'\1 \2',text)
        text = re.compile(r'([a-z]+|[A-Z]+|[A-Z][a-z]+)([A-Z][a-z]+)').sub(r'\1 \2',text)        
        # =========== TOKENS ===========
        # TOKENIZE 
        text = text.split(' ')

        # Remove white spaces tokens
        text = [text[i] for i in range(len(text)) if text[i] != ' ']

        # Remove empty tokens
        text = [text[i] for i in range(len(text)) if text[i] != '']

        # Remove repetition in tokens (!!! => !)
        text = [text[i] for i in range(len(text)) if text[i] != text[i-1]]

        #  Handle the ALL CAPS Tweets 
        ### if ratio of caps in the word > 75% add allcaps tag <allcaps>
        caps_r = np.mean([text[i].isupper() for i in range(len(text))])
        if caps_r > 0.6 : 
            text.append('<allcaps>')

        # Lower Case : 
        text = [text[i].lower() for i in range(len(text))]

        return text

    def handle_emoji(self,text):
        # Dictionnary of "important" emojis : 
        emoji_dict =  {'♥️': ' love ',
                       '❤️' : ' love ',
                       '❤' : ' love ',
                       '😘' : ' kisses ',
                      '😭' : ' cry ',
                      '💪' : ' strong ',
                      '🌍' : ' earth ',
                      '💰' : ' money ',
                      '👍' : ' ok ',
                       '👌' : ' ok ',
                      '😡' : ' angry ',
                      '🍆' : ' dick ',
                      '🤣' : ' haha ',
                      '😂' : ' haha ',
                      '🖕' : ' fuck you '}

        for cha in emoji_dict:
            text = re.compile(str(cha)).sub(str(emoji_dict[cha]),text)
        # Remove ALL emojis
        text = emoji.get_emoji_regexp().sub(r' ',text) 
        text = re.compile("([\U0001f3fb-\U0001f3ff])").sub(r'',text) 
        text = re.compile("([\U00010000-\U0010ffff])").sub(r'',text) 
        text = re.compile("(\u00a9|\u00ae|[\u2000-\u3300]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff])").sub(r'',text)

        # Add Space between  the Emoji Expressions : 
        text = re.compile("([\U00010000-\U0010ffff])").sub(r' \1 ',text) 
        return text

In [8]:
mydata = DataHandling(myEmbedding, pValid=0.15)

-- Data Handling : 
---- Load the Clean Adapted Dataset : Done


In [35]:
mydata.data['train']['subtask_a'].head(10)

Unnamed: 0_level_0,tweet,subtask_a,subtask_b,subtask_c,token,token_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
16820,Amazon is investigating Chinese employees who ...,0,,,"[amazon, is, investigating, chinese, employees...","[2547, 33, 39459, 3492, 16270, 128, 71, 5442, ..."
43605,@USER @USER Obama wanted liberals &amp; illega...,0,,,"[<user>, obama, wanted, liberals, and, amp, il...","[1, 1382, 953, 30168, 27, 12801, 114750, 17, 9..."
45157,@USER Buy more icecream!!!,0,,,"[<user>, buy, more, icecream, !]","[1, 873, 146, 18658, 10]"
42992,@USER What’s the difference between #Kavanaugh...,0,,,"[<user>, what, s, the, difference, between, an...","[1, 87, 138, 14, 2954, 1472, 27, 1, 97, 40, 37..."
54920,@USER @USER @USER It should scare every Americ...,0,,,"[<user>, it, should, scare, every, american, !...","[1, 34, 277, 9603, 382, 2034, 10, 148, 33, 701..."
56392,@USER @USER @USER @USER @USER @USER @USER @USE...,0,,,"[<user>, i, like, my, soda, like, i, like, my,...","[1, 11, 64, 30, 9477, 64, 11, 64, 30, 290103, ..."
86735,@USER you are also the king of taste,0,,,"[<user>, you, are, also, the, king, of, taste]","[1, 16, 71, 895, 14, 1697, 40, 3212]"
95686,#MAGA @USER 🎶 Sing like no one is listening ...,0,,,"[maga, <user>, sing, like, no, one, is, listen...","[29896, 1, 1547, 64, 31, 97, 33, 1484, 69, 69,..."
71446,5/5: @USER The time is right for this House to...,0,,,"[<user>, the, time, is, right, for, this, hous...","[1, 14, 136, 33, 209, 38, 54, 544, 17, 7094, 1..."
67757,@USER @USER You are correct.,0,,,"[<user>, you, are, correct]","[1, 16, 71, 5550]"


In [71]:
# Example of Tweet Processing
print(mydata.data['train']['subtask_a'].iloc[331]['tweet'])
print(mydata.data['train']['subtask_a'].iloc[331]['token'])
print(mydata.data['train']['subtask_a'].iloc[331]['token_id'])

@USER Get the hell out of my country that u hate @USER the u have trashed &amp; weaponized OUR GOVT for 8 yrs to destroy us &amp; quit telling our kids to hate the best county your a disgrace!
['<user>', 'get', 'the', 'hell', 'out', 'of', 'my', 'country', 'that', 'u', 'hate', '<user>', 'the', 'u', 'have', 'trashed', 'and', 'amp', 'weaponized', 'our', 'govt', 'for', 'yrs', 'to', 'destroy', 'us', 'and', 'amp', 'quit', 'telling', 'our', 'kids', 'to', 'hate', 'the', 'best', 'county', 'your', 'a', 'disgrace', '!']
[1, 88, 14, 674, 100, 40, 30, 1671, 46, 52, 258, 1, 14, 52, 65, 65748, 27, 12801, 610387, 278, 10666, 38, 6661, 17, 7994, 292, 27, 12801, 3269, 2013, 278, 885, 17, 258, 14, 210, 5741, 62, 12, 23234, 10]


## Classifier 
Set of Classes used as classifier for the tweets. 

In [75]:
# Classification NN : 
class FFNN(nn.Module):
    
    def __init__(self, embedding, hidden_dim , num_classes ,embedding_dim):
        print('------ Creating FFNN : ',end='')
        
        super(FFNN, self).__init__()
        
        # Embedding
        self.embedding = embedding
        
        # Fully Connected Layers
        self.fc1 = nn.Linear(embedding_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_classes) 
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        torch.nn.init.xavier_uniform_(self.fc2.weight)

        # Activation Layers
        self.relu1 = nn.ReLU()

        self.output = nn.LogSoftmax(dim=1)
        print('Done')
    
    def forward(self, x):
        
        embedded = self.embedding(x)
        # we average the embeddings of words in a sentence
        
        non_zero_nb = (x!=0).sum(1,keepdim=True)
        #print(x.shape, non_zero_nb,embedded.sum(1).shape)
        averaged = embedded.sum(1) / non_zero_nb.float()
        #averaged = embedded.mean(1)
        # (batch size, max sent length, embedding dim) to (batch size, embedding dim)

        out = self.fc1(averaged)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.output(out)
        return out
    
    def loss_fn(self):
        ''' Returns the loss function best associated with the model'''
        return nn.NLLLoss()


In [76]:
# CNN : 
class CNN(nn.Module):
    
    def __init__(self, embedding, embedding_dim, out_channels, window_size, output_dim, dropout):
        
        super(CNN, self).__init__()
        
        self.embedding = embedding
        
        #in_channels -- 1 text channel
        #out_channels -- the number of output channels
        #kernel_size is (window size x embedding dim)
        
        self.conv = nn.Conv2d(in_channels=1, out_channels=out_channels, kernel_size=(window_size,embedding_dim))
        
        #the dropout layer
        self.dropout = nn.Dropout(dropout)
    
        #the output layer
        self.fc = nn.Linear(out_channels, output_dim)
        
        self.out =  nn.LogSoftmax(dim=1)
        
    def forward(self, x):
                
        #(batch size, max sent length)
        
        embedded = self.embedding(x)
        #print('embedded',embedded.shape)
                
        #(batch size, max sent length, embedding dim)
        
        #images have 3 RGB channels 
        #for the text we add 1 channel
        embedded = embedded.unsqueeze(1)
        #print('embedded_unsqueeze',embedded.shape)
        
        #(batch size, 1, max sent length, embedding dim)
        
        feature_maps = self.conv(embedded)
        #print('feature_maps',feature_maps.shape)

        #??? what is the shape of the convolution output
        
        #(batch size, n filters, max input length - window size +1, 1)
        
        feature_maps = feature_maps.squeeze(3)
        #print('feature_maps_unsqueeze',feature_maps.shape)

        #??? why do we reduce 1 dimention here
        
        # we do need the 1 channel anymore
                
        feature_maps = F.relu(feature_maps)
        #print('feature_maps_relu',feature_maps.shape)

  
        #the max pooling layer
        pooled = F.max_pool1d(feature_maps, feature_maps.shape[2])
        #print('pooled',pooled.shape)

        pooled = pooled.squeeze(2)
        #print('pooled_squeeze',pooled.shape)

        #??? what is the shape of the pooling output
        #(batch size, n_filters)
        
        dropped = self.dropout(pooled)
        #print('dropped',dropped.shape)

        preds = self.fc(dropped)
        #print('preds', preds.shape)
        out = self.out(preds)
        #print('out',out.shape)
        
        return out
    
    def loss_fn(self):
        return nn.NLLLoss()

In [77]:
class CNN_paper(nn.Module):
    '''https://arxiv.org/pdf/1408.5882'''
    def __init__(self, embedding, embedding_dim, out_channels, window_size, output_dim, dropout):
        
        super(CNN_paper, self).__init__()
        
        self.embedding = embedding

        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=out_channels, kernel_size=(window_size[0],embedding_dim))
        self.conv2 = nn.Conv2d(in_channels=1, out_channels=out_channels, kernel_size=(window_size[1],embedding_dim))
        self.conv3 = nn.Conv2d(in_channels=1, out_channels=out_channels, kernel_size=(window_size[2],embedding_dim))
        #the dropout layer
        self.dropout = nn.Dropout(dropout)
    
        #the output layer
        self.fc = nn.Linear(out_channels*3, output_dim)
        
        self.out =  nn.LogSoftmax(dim=1)
    
    def conv_block(self,embedded,conv_layer):
        feature_maps = conv_layer(embedded)
        features_maps = feature_maps.squeeze(3)
        feature_maps = F.relu(feature_maps)
        return F.max_pool1d(feature_maps, feature_maps.shape[2]).squeeze(2)
    
    def forward(self, x):
                
        #(batch size, max sent length)
        
        embedded = self.embedding(x)
                        
        #images have 3 RGB channels 
        #for the text we add 1 channel
        embedded = embedded.unsqueeze(1)
         

        feature_maps1 = self.conv1(embedded)
        feature_maps2 = self.conv2(embedded)
        feature_maps3 = self.conv3(embedded)
        
        feature_maps1 = feature_maps1.squeeze(3)
        feature_maps2 = feature_maps2.squeeze(3)
        feature_maps3 = feature_maps3.squeeze(3)


                
        feature_maps1 = F.relu(feature_maps1)
        feature_maps2 = F.relu(feature_maps2)
        feature_maps3 = F.relu(feature_maps3)

  
        #the max pooling layer
        pooled1 = F.max_pool1d(feature_maps1, feature_maps1.shape[2])
        pooled2 = F.max_pool1d(feature_maps2, feature_maps2.shape[2])
        pooled3 = F.max_pool1d(feature_maps3, feature_maps3.shape[2])

        pooled1 = pooled1.squeeze(2)
        pooled2 = pooled2.squeeze(2)
        pooled3 = pooled3.squeeze(2)
        all_pooled =torch.cat((pooled1, pooled2, pooled3), 1)
        
        dropped = self.dropout(all_pooled)

        preds = self.fc(dropped)
        out = self.out(preds)
        
        return out
    
    def loss_fn(self):
        return nn.NLLLoss()

In [78]:
# RNN, GRU and LSTM
class RNN_base(nn.Module):
    def __init__(self, embedding, embedding_dim=25, hidden_size=25, output_size=2, num_layers=1, output_layer=None):
        super(RNN_base, self).__init__()
        
        self.hidden_size = hidden_size
        self.embedding = embedding
        
        if output_layer==None:
            self.i2o = nn.Linear(hidden_size, output_size)
        else:
            self.i2o = output_layer
        self.final_activation = nn.LogSoftmax(dim=1)
        self.final_activation = nn.LogSoftmax(dim=1)
        
        
    def forward(self, inputs):
        inputs_embedded = self.embedding(inputs).float()
 
        sentence_lengths = (inputs_embedded.sum(dim=2,keepdim=False)!=0).sum(dim=1)
        
        output, hidden = self.rnn(inputs_embedded)
        
        output_final = torch.empty(inputs.shape[0],self.hidden_size).float()
        output_final = output[torch.arange(inputs.shape[0]),sentence_lengths-1,:]

        output = self.i2o(output_final)
        output = self.final_activation(output)
        
        return output

    def loss_fn(self):
        ''' Returns the loss function best associated with the model'''
        return nn.NLLLoss()
    
class RNN(RNN_base):
    def __init__(self, embedding, embedding_dim=25, hidden_size=25, output_size=2, num_layers=1, output_layer=None):
        RNN_base.__init__(self, 
                          embedding=embedding, 
                          embedding_dim=embedding_dim, 
                          hidden_size=hidden_size, 
                          output_size=output_size, 
                          num_layers=num_layers, 
                          output_layer=output_layer)
        self.rnn = nn.RNN(
            input_size = embedding_dim, 
            hidden_size = hidden_size,
            num_layers = num_layers,
            nonlinearity = "relu",
            batch_first = True
        )

class LSTM(RNN_base):
    def __init__(self, embedding, embedding_dim=25, hidden_size=25, output_size=2, num_layers=1, output_layer=None):
        RNN_base.__init__(self, 
                          embedding=embedding, 
                          embedding_dim=embedding_dim, 
                          hidden_size=hidden_size, 
                          output_size=output_size, 
                          num_layers=num_layers, 
                          output_layer=output_layer)
        self.rnn = nn.LSTM(
            input_size = embedding_dim, 
            hidden_size = hidden_size,
            num_layers = num_layers,
            batch_first = True
        )  
        
class GRU(RNN_base):
    def __init__(self, embedding, embedding_dim=25, hidden_size=25, output_size=2, num_layers=1, output_layer=None):
        RNN_base.__init__(self, 
                          embedding=embedding, 
                          embedding_dim=embedding_dim, 
                          hidden_size=hidden_size, 
                          output_size=output_size, 
                          num_layers=num_layers, 
                          output_layer=output_layer)
        self.rnn = nn.GRU(
            input_size = embedding_dim, 
            hidden_size = hidden_size,
            num_layers = num_layers,
            batch_first = True
        ) 
        
def output_layer(input_size,output_size,middle_layers=[]):
    "Input and output size - scalars. Middle layers - list (ignore if just one layer wanted)"
    
    #For current implementation, the input_size should be equal to the size of the hidden layer for the RNNs.
    activation = nn.ReLU()

    component_layers=[]
    if middle_layers!=[]:

        component_layers.append(
            nn.Linear(
                in_features  = input_size,
                out_features  = middle_layers[0]))
        component_layers.append(activation)


        if len(middle_layers)>1:
            for layer in range(1,len(middle_layers)):
                component_layers.append(
                    nn.Linear(in_features  = middle_layers[layer-1],
                              out_features  = middle_layers[layer]))
                component_layers.append(activation)

        component_layers.append(
            nn.Linear(in_features  = middle_layers[-1],
                      out_features  = output_size))

    else:
        component_layers.append(
            nn.Linear(in_features  = input_size,
                      out_features  = output_size))

    i2o = nn.Sequential(*component_layers)
    
    return i2o

Pad Data Method : Used with the pytorch DataLoader in order to pad the length of the tweets by batch. 

In [79]:
class padding_tweet:
    '''
    Pad Data Method : Used with the pytorch DataLoader in order to pad the length of the tweets by batch. 
    args: 
        batch - List of elements ( x , label )
    return 
        batch - Padded ( list(x) , list(label))
    
    '''
    def __init__(self, max_len):
        self.max_len = max_len
    def __call__(self,batch):
        
        batch = list(zip(*batch))
       
        batch[0] = torch.stack([self.pad_tensor(vec=t, pad=self.max_len, dim=0) for t in batch[0]],dim=0)
        batch[1] = torch.stack(batch[1])
        return batch[0] , batch[1]

    def pad_tensor(self,vec, pad, dim):
        """
        args:
            vec - tensor to pad
            pad - the size to pad to
            dim - dimension to pad

        return:
            a new tensor padded to 'pad' in dimension 'dim'
        """
        pad_size = list(vec.shape)
        pad_size[dim] = pad - vec.size(dim)
        return torch.cat([vec, torch.zeros(*pad_size,dtype=torch.long)], dim=dim)


## Main Class : Trainer
Main Class for the loading, training, testing etc ...

In [104]:
class OffensiveClassifier(object):
    ''' Main Class for the loading, training, testing etc ...'''
    def __init__(self,subtask='subtask_a', dim_vect=25,pValid = 0.15):
        
        self.dim_vect = dim_vect
        
        self.subtask = subtask
        

        # Loading the GloVe Embedding and Torch Formating of this Embedding
        self.GloVe = GloVe_embedding(dim_vect= dim_vect )
        self.embedding = self.GloVe.emb_layer
        
        # Loading the Data Handler : 
        self.dataHandler = DataHandling(self.GloVe,pValid=pValid)
        
        # Retrieving Training DataSet (pytorch)
        self.train_set = self.dataHandler.getDataset('train',subtask,balanced=True)
        
        # Retrieving the Validation Set (pytorch)
        self.valid_set = self.dataHandler.getDataset('validation',subtask)

        # Retrieving Test DataSet (pytorch)
        self.test_set = self.dataHandler.getDataset('test',subtask)
        
        # Usefull info on classes : 
        self.class2id  = self.dataHandler.classes_dict[self.subtask]
        self.id2class = dict(zip(self.class2id.values(),self.class2id.keys()))
        self.nb_class = len(self.class2id)
        self.max_len  = max([ max([len(t) for t in self.train_set.token]),
                              max([len(t) for t in self.valid_set.token]),
                              max([len(t) for t in self.test_set.token])])
            
        
    
    def train( self, nb_epochs, optimizer , model , batch_size = 1000 ,  ):
        
        self.train_generator = DataLoader(self.train_set, batch_size=batch_size,collate_fn=padding_tweet(self.max_len), shuffle=True)

        self.model = model.to(device)
        loss_fn = self.model.loss_fn()

        for epoch in range(nb_epochs):
            i_batch = 0
            accuracy_average = 0
            self.model.train() 
            
            for tokens, target  in self.train_generator :
                i_batch += 1
                target = target.long().view((-1,)).to(device)
                tokens = tokens.long().to(device)
                #to ensure the dropout (exlained later) is "turned on" while training
                #good practice to include even if do not use here
                self.model.train()

                #we zero the gradients as they are not removed automatically
                optimizer.zero_grad()

                output = self.model(tokens)
                #print(output.shape)

                predictions = torch.argmax(output,dim=1).float()
                loss = loss_fn(output, target)
                acc, correct = self.accuracy(predictions, target)
                accuracy_average += acc
                #calculate the gradient of each parameter
                loss.backward()
                
                #update the parameters using the gradients and optimizer algorithm 
                optimizer.step()

                epoch_loss = loss.item()

                if i_batch % (nb_epochs -1 )  ==0:
                    pass
            print(f'| Epoch: {epoch:02} | Train Loss: {epoch_loss:.3f} | Train Acc: {accuracy_average*100/i_batch:.2f}%')
            self.validation(batch_size = 50)
            print('=============================================================================')
            print()
            
    def validation(self, batch_size = 1000):
        all_correct = 0
        self.validation_generator = DataLoader(self.valid_set, batch_size=batch_size, collate_fn=padding_tweet(self.max_len), shuffle=True)
        loss_fn = self.model.loss_fn()
        nb_valid = len(taskAclassifier.valid_set)
        self.model.eval()  # set model to evaluation mode
        all_prediction = []
        all_target = []
        
        with torch.no_grad(): 
            for tokens, target  in self.validation_generator :
                target = target.long().view((-1,)).to(device)
                tokens = tokens.long().to(device)
                
                output = self.model(tokens)
                predictions = torch.argmax(output,dim=1)
                
                loss = loss_fn(output, target)
                
                all_prediction.extend(predictions.view(-1,).tolist())
                all_target.extend(target.view(-1,).tolist())
                
        accuracy, CM, stats_df = self.results(all_prediction,all_target)
        print(f'| Stats on the Validation : ')
        print(stats_df)
        
        print(f'| Validation Accuracy : {100*accuracy:.2f} % - Macro F1 Score : {stats_df.loc["f1-measure"].mean():.4f} ')
        print(f'| CM on the Validation : ')
        print(CM)
        print()
        
    def test(self,folder):
        ''' 
            Test Function : Tests the Network on the Test Data of the Subtask and Saves in a file
        '''
        test_result = pd.DataFrame()
        self.test_generator = DataLoader(self.test_set,collate_fn= padding_tweet(self.max_len) )
        nb_valid = len(taskAclassifier.valid_set)
        self.model.eval()  # set model to evaluation mode
        all_prediction = []
        all_id = []
        with torch.no_grad(): 
            for tokens, id_tweet  in self.test_generator :
                id_tweet = id_tweet.long().view((-1,))
                tokens = tokens.long().to(device)
                
                output = self.model(tokens)
                predictions = torch.argmax(output,dim=1)
                
                all_prediction.extend(predictions.view(-1,).tolist())
                all_id.extend(id_tweet.view(-1,).tolist())
        results_df = pd.DataFrame(all_prediction, index=all_id)
              
        # Change id_labels to labels
        results_df[0] = results_df[0].apply(lambda x : self.id2class[x])
              
        # Saving the Results in the right format in the folder ../data/results/*folder*
        save_path = join('..','data','results')
        if not(exists(save_path)) : mkdir(save_path)
        save_path = join(save_path,folder)
        if not(exists(save_path)) : mkdir(save_path)
        
        path_file = join(save_path, self.subtask + '.csv')
        i = 0
        while exists(path_file):
              i += 1
              path_file = join(save_path, self.subtask + '_'+ str(i) +'.csv')
        results_df.to_csv(path_file, header=False)
        print('| Successfully save to : ', path_file)
        

    def accuracy(self, output, target):
        target = np.array(target.tolist()).astype(int)
        output = np.array(torch.round(output).tolist()).astype(int)
        correct = (output == target)
        accuracy = correct.sum()/len(correct)
        return accuracy, correct
        
    def results(self, output, target ):
        target = np.array(target).astype(int)
        output = np.round(output).astype(int)
        correct = (output == target)
        accuracy = correct.sum()/len(correct)
        
        CM = np.zeros((self.nb_class,self.nb_class))
        for i in range(len(output)): 
            CM[target[i], output[i]] += 1
        
        # compute the interesting stats on the classification : 
        stats_df = self.stats(CM)
        stats_df.columns = map(lambda x : self.id2class[x] ,stats_df.columns)
        return (accuracy, CM, stats_df)

    def stats(self,CM):   
        n_class = self.nb_class
    
        stats = {}
        stats['precision'] = {} 
        stats['recall'] = {} 
        stats['accuracy'] = {} 
        stats['f1-measure'] = {} 

        for t in range(self.nb_class):
            tp = CM[t,t]
            tn = np.sum([CM[i,i] for i in range(self.nb_class) if i != t])

            fp = np.sum([CM[i,t] for i in range(self.nb_class) if i != t])
            fn = np.sum([CM[t,i] for i in range(self.nb_class) if i != t])

            # compute accuracy per class :
            accuracy = (tp + tn) / (tp + tn + fp + fn)

            # compute recall per class
            recall = tp /(tp + fn)

            # compute precision per class 
            precision = tp / (tp + fp)

            # compute F1-measure per class 
            f1 = 2*tp / (2* tp + fp + fn)

            # saving stats : 

            stats['precision'][t] = precision
            stats['recall'][t] = recall
            stats['accuracy'][t] = accuracy
            stats['f1-measure'][t] = f1

        # Compute Average classification rate
        stats = pd.DataFrame(stats).transpose()
        return stats


## Evaludation of the Model 
Choice of the sub-task, the dimension of the embedding vectors, and the percentage of validation taken from train.

**Best choice** : `dim_vect = 200` and `pValid = 0.2`


In [114]:
# 1st step : Define Classifier for specific task
taskAclassifier = OffensiveClassifier(subtask='subtask_a', dim_vect=200 , pValid = 0.2)

---- Loading the processed GloVe files : Done
---- Creating the Pytorch Embedding Layer  : Done
-- Data Handling : 
---- Load the Clean Adapted Dataset : Done
---- Augmenting the Data : 
Before Augmentation :  {'NOT': 7072, 'OFF': 3520}
After Augmentation :  {'NOT': 7072, 'OFF': 7072}


### 1) FFNN
Best Parameters :

In [None]:
# 2nd Step : Define Model : 
model = FFNN(embedding= taskAclassifier.embedding, 
             hidden_dim= 100 ,
             num_classes = taskAclassifier.nb_class,
             embedding_dim= taskAclassifier.dim_vect)

In [None]:
# 3rd Step : Define Optinizer : 
optimizer = optim.Adam(model.parameters(), lr=0.00001,weight_decay=0.005)

In [None]:
# 4th Step : Train the Model
taskAclassifier.train( nb_epochs= 50,
                       optimizer= optimizer , 
                       model= model ,
                       batch_size= 100)

In [109]:
# 5th Step : Test the Model and save data 
taskAclassifier.test(folder='FFNN')

| Successfully save to :  ../data/results/LSTM/subtask_a_2.csv


### 2) CNN : 1 Layer
Best Parameters :

In [None]:
# 2nd Step : Define Model : 
model = CNN( embedding= taskAclassifier.embedding, 
             embedding_dim = taskAclassifier.dim_vect,
             out_channels= 150,
             window_size= 3,
             output_dim= taskAclassifier.nb_class,
             dropout = 0.5)

In [None]:
# 3rd Step : Define Optinizer : 
optimizer = optim.Adam(model.parameters(), lr=0.00001,weight_decay=0.005)

In [None]:
# 4th Step : Train the Model
taskAclassifier.train( nb_epochs= 50,
                       optimizer= optimizer , 
                       model= model ,
                       batch_size= 100)

In [109]:
# 5th Step : Test the Model and save data 
taskAclassifier.test(folder='CNN')

| Successfully save to :  ../data/results/LSTM/subtask_a_2.csv


### 3) CNN : 3 Layers
Best Parameters :

In [127]:
# 2nd Step : Define Model : 
model = CNN_paper(embedding= taskAclassifier.embedding, 
             embedding_dim = taskAclassifier.dim_vect,
             out_channels= 100,
             window_size= [3,4,5],
             output_dim= taskAclassifier.nb_class,
             dropout = 0.5)

In [128]:
# 3rd Step : Define Optinizer : 
optimizer = optim.Adam(model.parameters(), lr=0.001,weight_decay=0.005)

In [129]:
# 4th Step : Train the Model
taskAclassifier.train( nb_epochs= 50,
                       optimizer= optimizer , 
                       model= model ,
                       batch_size= 20)

| Epoch: 00 | Train Loss: 0.664 | Train Acc: 50.00%
| Stats on the Validation : 
                 NOT       OFF
precision   0.853746  0.537302
recall      0.670249  0.769318
accuracy    0.703172  0.703172
f1-measure  0.750951  0.632710
| Validation Accuracy : 70.32 % - Macro F1 Score : 0.6918 
| CM on the Validation : 
[[1185.  583.]
 [ 203.  677.]]


| Epoch: 01 | Train Loss: 0.883 | Train Acc: 75.00%
| Stats on the Validation : 
                 NOT       OFF
precision   0.844109  0.626385
recall      0.790158  0.706818
accuracy    0.762462  0.762462
f1-measure  0.816243  0.664175
| Validation Accuracy : 76.25 % - Macro F1 Score : 0.7402 
| CM on the Validation : 
[[1397.  371.]
 [ 258.  622.]]


| Epoch: 02 | Train Loss: 0.610 | Train Acc: 75.00%
| Stats on the Validation : 
                 NOT       OFF
precision   0.884185  0.526504
recall      0.626131  0.835227
accuracy    0.695619  0.695619
f1-measure  0.733113  0.645870
| Validation Accuracy : 69.56 % - Macro F1 Score : 0.689

KeyboardInterrupt: 

In [109]:
# 5th Step : Test the Model and save data 
taskAclassifier.test(folder='CNN_paper')

| Successfully save to :  ../data/results/LSTM/subtask_a_2.csv


### 4) RNN : 
Best Parameters :

In [121]:
# 2nd Step : Define Model : 
model = RNN(embedding= taskAclassifier.embedding,  
            embedding_dim = taskAclassifier.dim_vect,
            hidden_size=taskAclassifier.dim_vect, 
            output_size=taskAclassifier.nb_class,
            num_layers=2,
            output_layer = output_layer(taskAclassifier.dim_vect, taskAclassifier.nb_class,
                middle_layers = [50])) #Change this to add/remove middle layers from output


In [122]:
# 3rd Step : Define Optinizer : 
optimizer = optim.Adam(model.parameters(), lr=0.00001,weight_decay=0.005)

In [123]:
# 4th Step : Train the Model
taskAclassifier.train( nb_epochs= 50,
                       optimizer= optimizer , 
                       model= model ,
                       batch_size= 100)

| Epoch: 00 | Train Loss: 0.692 | Train Acc: 52.27%




| Stats on the Validation : 
                 NOT       OFF
precision        NaN  0.332326
recall      0.000000  1.000000
accuracy    0.332326  0.332326
f1-measure  0.000000  0.498866
| Validation Accuracy : 33.23 % - Macro F1 Score : 0.2494 
| CM on the Validation : 
[[   0. 1768.]
 [   0.  880.]]


| Epoch: 01 | Train Loss: 0.696 | Train Acc: 43.18%
| Stats on the Validation : 
                 NOT       OFF
precision   0.666667  0.332324
recall      0.002262  0.997727
accuracy    0.333082  0.333082
f1-measure  0.004510  0.498580
| Validation Accuracy : 33.31 % - Macro F1 Score : 0.2515 
| CM on the Validation : 
[[   4. 1764.]
 [   2.  878.]]


| Epoch: 02 | Train Loss: 0.701 | Train Acc: 36.36%
| Stats on the Validation : 
                 NOT       OFF
precision   0.717850  0.344617
recall      0.211538  0.832955
accuracy    0.418051  0.418051
f1-measure  0.326780  0.487529
| Validation Accuracy : 41.81 % - Macro F1 Score : 0.4072 
| CM on the Validation : 
[[ 374. 1394.]
 [ 147. 

KeyboardInterrupt: 

In [None]:
# 5th Step : Test the Model and save data 
taskAclassifier.test(folder='RNN')

### 5) LSTM : 
Best Parameters :

In [118]:
# 2nd Step : Define Model : 
model = LSTM(embedding= taskAclassifier.embedding, 
            embedding_dim = taskAclassifier.dim_vect,
            hidden_size=taskAclassifier.dim_vect, 
            output_size=taskAclassifier.nb_class,
            num_layers=1,
            output_layer = output_layer(taskAclassifier.dim_vect, taskAclassifier.nb_class,
                middle_layers = [150])) #Change this to add/remove middle layers from output

In [119]:
# 3rd Step : Define Optinizer : 
optimizer = optim.Adam(model.parameters(), lr=0.00001,weight_decay=0.005)

In [120]:
# 4th Step : Train the Model
taskAclassifier.train( nb_epochs= 50,
                       optimizer= optimizer , 
                       model= model ,
                       batch_size= 100)

| Epoch: 00 | Train Loss: 0.695 | Train Acc: 47.73%
| Stats on the Validation : 
                 NOT       OFF
precision   0.668315  0.421053
recall      0.993778  0.009091
accuracy    0.666541  0.666541
f1-measure  0.799181  0.017798
| Validation Accuracy : 66.65 % - Macro F1 Score : 0.4085 
| CM on the Validation : 
[[1757.   11.]
 [ 872.    8.]]




KeyboardInterrupt: 

In [None]:
# 5th Step : Test the Model and save data 
taskAclassifier.test(folder='LSTM')

### 6) GRU : 
Best Parameters :

In [115]:
# 2nd Step : Define Model : 
model = GRU(embedding= taskAclassifier.embedding,  
            embedding_dim = taskAclassifier.dim_vect,
            hidden_size=100, 
            output_size=taskAclassifier.nb_class,
            num_layers=2,
            output_layer = output_layer(taskAclassifier.dim_vect, taskAclassifier.nb_class,
                middle_layers = [50])) #Change this to add/remove middle layers from output

In [116]:
# 3rd Step : Define Optinizer : 
optimizer = optim.Adam(model.parameters(), lr=0.00001,weight_decay=0.005)

In [117]:
# 4th Step : Train the Model
taskAclassifier.train( nb_epochs= 50,
                       optimizer= optimizer , 
                       model= model ,
                       batch_size= 100)

RuntimeError: size mismatch, m1: [100 x 100], m2: [200 x 50] at /opt/conda/conda-bld/pytorch_1549628766161/work/aten/src/THC/generic/THCTensorMathBlas.cu:266

In [None]:
# 5th Step : Test the Model and save data 
taskAclassifier.test(folder='GRU')