<a href="https://colab.research.google.com/github/KairaNithin/XML-Experiments/blob/main/XLM_with_max_and_avg_Pooling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install sentence_transformers
!pip install transformers
from sentence_transformers import SentenceTransformer

# !git clone https://github.com/sayarghoshroy/Hate-Speech-Detection.git

Collecting sentence_transformers
[?25l  Downloading https://files.pythonhosted.org/packages/c4/87/49dc49e13ac107ce912c2f3f3fd92252c6d4221e88d1e6c16747044a11d8/sentence-transformers-1.1.0.tar.gz (78kB)
[K     |████████████████████████████████| 81kB 8.4MB/s 
[?25hCollecting transformers<5.0.0,>=3.1.0
[?25l  Downloading https://files.pythonhosted.org/packages/d8/b2/57495b5309f09fa501866e225c84532d1fd89536ea62406b2181933fb418/transformers-4.5.1-py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 17.0MB/s 
Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/f5/99/e0808cb947ba10f575839c43e8fafc9cc44e4a7a2c8f79c60db48220a577/sentencepiece-0.1.95-cp37-cp37m-manylinux2014_x86_64.whl (1.2MB)
[K     |████████████████████████████████| 1.2MB 35.8MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)

In [None]:
import random
import pickle
import re
import time
import datetime

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm, neighbors
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
# Check where we need this
# from nltk.corpus import stopwordsm
from nltk.tokenize import sent_tokenize

import torch
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler, SequentialSampler
from torch.utils.data import Dataset
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import get_linear_schedule_with_warmup
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelWithLMHead


import torch.nn as nn
from transformers import XLMRobertaTokenizer, XLMRobertaModel
import gensim.models as gsm

from torch.utils.data import Subset
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score


from tqdm import tqdm 
import gc
import os

In [None]:
if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

No GPU available, using the CPU instead.


In [None]:
data_loc = 'Hate-Speech-Detection/data'

In [None]:
e2v = gsm.KeyedVectors.load_word2vec_format('emoji2vec.bin', binary=True)

def getEmojiEmbeddings(emojiList,dim=300,verbose = False):
  # Generates an emoji vector by averaging the emoji representation for each emoji
  # If no emoji returns an empty list of dimension dim
  if dim < 300:
    raise IndexError("Dim has to be greater than 300")
  result = np.zeros(dim)
  if (len(emojiList) == 0):
    return result
  else:
    embs = None
    for i in emojiList:
      if verbose:
        if i not in e2v.vocab:
          print(i)
    embs = np.mean([e2v[i] for i in emojiList if i in e2v.vocab], axis=0)
  if np.any(np.isnan(embs)):
    return result
  result[:300] = embs
  return result

FileNotFoundError: ignored

In [None]:
def loadData(lang):
  """ Function to load data for one language from the preprocessed pickle file"""
  if lang not in ['hi','en','ge']:
      raise NameError("Language not found")
  fileName = lang + '.pickle'
  with open(DATASET_ROOT+fileName, 'rb') as f:
    ged = pickle.load(f)
  df = pd.DataFrame.from_dict(ged)
  train_df, test_df = model_selection.train_test_split(df, random_state = 42, test_size = 0.25)
  return train_df, test_df, df

def loadDataAllLangs():
  """ Function to load data for all languages from the preprocessed pickle file"""

  hi_train,hi_test,hi_df = loadData('hi')
  en_train,en_test,en_df = loadData('en')
  ge_train,ge_test,ge_df = loadData('ge')
  print("total size:", len(ge_df) + len(hi_df)+len(en_df))
  train_df = pd.concat([hi_train,en_train,ge_train],ignore_index=True)
  test_df =  pd.concat([hi_test,en_test,ge_test],ignore_index=True)
  df = pd.concat([hi_df,en_df,ge_df],ignore_index=True)
  train_df = train_df.sample(frac = 1, random_state=42)
  test_df = test_df.sample(frac = 1, random_state=42)
  df = df.sample(frac = 1, random_state=42)
  return train_df,test_df,df

class HASOCDataset(Dataset):
  """ Data loader to load the data for the Torch """
  def __init__(self, dataPath, isDF = False):
    if isDF:
      self.df = pd.DataFrame.from_dict(dataPath)
    else:
      data = pickle.load(open(dataPath,'rb'))
      self.df = pd.DataFrame.from_dict(data)
  def __len__(self):
    return len(self.df)
  def __getitem__(self,index):
    return self.df.iloc[index]

In [None]:
def set_seed(seed):
     # """ Sets all seed to the given value, so we can reproduce (:3) """
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
set_seed(42)

In [None]:
class FullExample(object):
  """ Not necessary any more, mainly here in case we might need to use the entire thing. """
  def __init__(self, id, task_1, task_2, hasoc_id, full_tweet, tweet_raw_text, hashtags, smiley, emoji, url, mentions, numerals, reserved_word, segmented_hash):
    self.id  = id
    self.task_1 = task_1
    self.task_2 = task_2
    self.hasoc_id = hasoc_id
    self.full_tweet = full_tweet
    self.tweet_raw_text = tweet_raw_text
    self.hashtags = hashtags
    self.smiley = smiley
    self.emoji = emoji
    self.url = url 
    self. mentions = mentions 
    self.numerals = numerals
    self.reserved_word = reserved_word
    self.segmented_hash = segmented_hash
  
class Example(object):
  """ Contains the data for one example from the dataset """
  def __init__(self, id, task_1, task_2, hasoc_id, full_tweet, tweet_raw_text,  emoji,  segmented_hash):
    self.id  = id
    self.task_1 = task_1
    self.task_2 = task_2
    self.hasoc_id = hasoc_id
    self.full_tweet = full_tweet
    self.tweet_raw_text = tweet_raw_text
    self.emoji = emoji
    self.segmented_hash = segmented_hash

class ExampleFeautres(object):
    """ Contains the dataset in a batch friendly feaute set """
    def __init__(self, id, task_1, task_2, input_ids, input_mask,input_length,  emoji,  hash):
      self.id  = id
      self.task_1 = task_1
      self.task_2 = task_2
      self.emoji = torch.tensor(emoji)
      self.input_ids = input_ids
      self.input_mask = input_mask
      self.input_length = input_length 
      self.hash = torch.tensor(hash)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens")
max_seq_length = 74
# e2v = gsm.KeyedVectors.load_word2vec_format('/content/drive/My Drive/emoji2vec.bin', binary = True)
sent_encoder = SentenceTransformer('xlm-r-100langs-bert-base-nli-mean-tokens')

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=541.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=5069051.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=150.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=147.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, max=1014108634.0), HTML(value='')))




In [None]:
labels_task1 = {'NOT':0, 'HOF':1}
labels_task2 = {'NONE':0,'PRFN':1,'OFFN':2,'HATE':3}

def convertExamplesToFeature(example):
  """ Given a data row convert it to feautres so it's batch friendly """
  raw_text = example.tweet_raw_text
  tokens = tokenizer.tokenize(raw_text)
  if (len(tokens) > (max_seq_length-2)):
    tokens = tokens[: (max_seq_length-2)]
  tokens = [tokenizer.cls_token] + tokens + [tokenizer.sep_token]
  input_ids = tokenizer.convert_tokens_to_ids(tokens)
  input_mask = [1] * len(input_ids)
  input_length = len(input_ids)
  padding = [0] * (max_seq_length - len(input_ids))
  input_ids += padding
  input_mask += padding
  hashtags = ' '.join(example.segmented_hash)
  hashembs = sent_encoder.encode(hashtags)
  # Do we want to propage the values across hashtags? ~ Prolly not
  # but the following code keeps that provision in case we need it. 
  # hashtags = []
  # hashtokens = tokenizer.tokenize(hashtags)
  # if (len(hashtokens) > (max_hash_length-2)):
  #   hashtokens = tokens[: (max_hash_length-2)]
  # tokens = [tokenizer.cls_token] + hashtokens + [tokenizer.sep_token]
  # hashinput_ids = tokenizer.convert_tokens_to_ids(hashtokens)
  # hashinput_mask = [1] * len(hashinput_ids)
  # input_length = len(hashinput_ids)
  # padding = [0] * (max_hash_length - len(hashinput_ids))
  # input_ids += padding
  # input_mask += padding
  emojiVec = getEmojiEmbeddings(example.emoji)
  task1 = labels_task1[example.task_1]
  task2 = labels_task2[example.task_2]
  id = example.id
  return ExampleFeautres(id, task1, task2, input_ids, input_mask, input_length, emojiVec, hashembs)

In [None]:
def getDataset(input_features):
    """
    Mappings for index-> features 
    0 -> ID
    1 -> input ids
    2 -> input masks
    3 -> input lengths 
    4 -> hash embs 
    5 -> emoji embs 
    6 -> task1
    7 -> task2
    """
    all_input_page_ids = torch.tensor([f.id for f in input_features], dtype=torch.long)
    all_input_ids = torch.tensor([f.input_ids for f in input_features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in input_features], dtype=torch.long)
    all_input_lengths = torch.tensor([f.input_length for f in input_features], dtype=torch.long)
    all_hash_embs = torch.stack([f.hash for f in input_features])
    all_emoji_embs = torch.stack([f.emoji for f in input_features])
    all_task_1 = torch.tensor([f.task_1 for f in input_features], dtype=torch.long)
    all_task_2 = torch.tensor([f.task_2 for f in input_features], dtype=torch.long)

    dataset = TensorDataset(all_input_page_ids, all_input_ids, all_input_mask,all_input_lengths, all_hash_embs, all_emoji_embs, all_task_1,  all_task_2)
    return dataset

In [None]:
d = None
def train_val_dataset(dataset, val_split = 0.2):
    train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size=val_split)
    datasets = {}
    datasets['train'] = Subset(dataset, train_idx)
    datasets['valid'] = Subset(dataset, val_idx)
    return datasets

def getDataloader(path_to_pickle, val_split = 0.2, batch_size = 16, multiLing = True):
  if multiLing:
    tr,tt,df = loadDataAllLangs()
    tempDataset = HASOCDataset(df, isDF=True)
  else:
    tempDataset = HASOCDataset(path_to_pickle)
  input_features = []
  for i in tqdm(range(len(tempDataset))):
    example = Example(i,tempDataset[i]['task_1'],tempDataset[i]['task_2'],tempDataset[i]['hasoc_id'], tempDataset[i]['full_tweet'],tempDataset[i]['tweet_raw_text'], tempDataset[i]['emoji'],tempDataset[i]['segmented_hash'])
    input_feature = convertExamplesToFeature(example)
    input_features.append(input_feature)
  dataset = getDataset(input_features)
  # print(len(dataset))
  set_seed(42)
  data_sampler = RandomSampler(dataset)
  dd = train_val_dataset(dataset, val_split)
  train_dataloader = DataLoader(dd['train'], sampler = RandomSampler(dd['train']), batch_size=batch_size, drop_last=True)
  valid_dataloader = DataLoader(dd['valid'] , batch_size=batch_size, drop_last=True)
  dataloader = DataLoader(dataset , batch_size=batch_size, drop_last=True)
  dataloaders = {x:DataLoader(dd[x], 32, shuffle = True, num_workers = 4) for x in ['train','valid']} 

  return train_dataloader, valid_dataloader, dataloader, dataloaders

In [None]:
# data_loc = "/content/Hate-Speech-Detection/data/2020_processed_train/"
# DATASET_ROOT = data_loc
# train_dataloader, valid_dataloader, dataloader, dataloaders = getDataloader(data_loc , multiLing = True)

train_dataloader, valid_dataloader, dataloader, dataloaders = torch.load('drive/MyDrive/train_dataloader.pth'), torch.load('drive/MyDrive/valid_dataloader.pth'),torch.load('drive/MyDrive/dataloader.pth'),torch.load('drive/MyDrive/dataloaders.pth')

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
# for i,batch in enumerate(train_dataloader):
#   break

In [None]:
# input_ids = batch[1]
# input_mask = batch[2]
# labels = batch[6]
# emoji = batch[5]
# hashtag = batch[4]

In [None]:
# emoji.shape

In [None]:
# hashtag.shape

In [None]:
# bert = XLMRobertaModel.from_pretrained("sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens")

In [None]:
# outputs = bert(input_ids,attention_mask = input_mask)

In [None]:
# ans = attn(outputs[0],768)

In [None]:
# def attn(x,model_dim):
#       mlp = nn.Sequential(
#           nn.Linear(model_dim, model_dim // 2),
#           nn.ReLU(),
#           nn.Linear(model_dim // 2, model_dim),
#           nn.ReLU()
#           )
#       normal_inputs = x.permute(0,2,1)
#       max = F.max_pool1d(normal_inputs,kernel_size=normal_inputs.shape[2]).squeeze(2)

#       avg = F.avg_pool1d(normal_inputs,kernel_size=normal_inputs.shape[2]).squeeze(2)

#       max_n = mlp(max)

#       avg_n = mlp(avg)

#       res_channel = torch.cat((max_n.unsqueeze(1),avg_n.unsqueeze(1)),axis=1)

#       # spacial attention


#       res_spacial = nn.Conv1d(2,1,kernel_size=1)(torch.cat((max.unsqueeze(1),avg.unsqueeze(1)),axis=1))

#       res_attention = torch.cat((res_channel,res_spacial,hashtag.unsqueeze(1)),axis=1)

#       print(res_attention.shape)

#       # gated convolutional networks

#       A = nn.Conv1d(4, 8, kernel_size=(469))(res_attention)
#       B = nn.Conv1d(4, 8, kernel_size=(469))(res_attention)


#       h = A*F.sigmoid(B)

#       print(h.shape)
#       ans = torch.cat((h,emoji.unsqueeze(1)),axis=1)


#       ans = ans.view(ans.shape[0],-1)

#       # print("oo")
#       # ans = nn.Linear(ans.shape[1],ans.shape[1]/3)(ans.type(torch.float))
#       # print("kk")
#       # ans = nn.Linear(ans.shape[1],2)(ans)
#       # print("pp")
#       # ans = nn.Linear(ans.shape[1]*ans.shape[2],(ans.shape[1]*ans.shape[2]/2))(ans)

#       return ans

In [None]:
# def forward(x,model_dim):
#   mlp = nn.Sequential(
#         nn.Linear(model_dim, model_dim // 2),
#         nn.ReLU(),
#         nn.Linear(model_dim // 2, model_dim),
#         nn.ReLU()
#         )
#   normal_inputs = x.permute(0,2,1)
#   max = F.max_pool1d(normal_inputs,kernel_size=normal_inputs.shape[2]).squeeze(2)

#   avg = F.avg_pool1d(normal_inputs,kernel_size=normal_inputs.shape[2]).squeeze(2)
  
#   max_n = mlp(max)
#   avg_n = mlp(avg)

#   res_channel = torch.cat((max_n.unsqueeze(1),avg_n.unsqueeze(1)),axis=1)

#   # spacial attention


#   res_spacial = nn.Conv1d(2,1,kernel_size=1)(torch.cat((max.unsqueeze(1),avg.unsqueeze(1)),axis=1))

#   res_attention = torch.cat((res_channel,res_spacial),axis=1)

#   # gated convolutional networks
#   A = nn.Conv1d(4, 8, kernel_size=(469))(ans)
#   B = nn.Conv1d(4, 8, kernel_size=(469))(ans)

#   h = A*F.sigmoid(B)
  
#   ans = torch.cat((h,emoji.unsqueeze(1)),axis=1)(h)

#   ans = nn.Linear(ans.shape[1]*ans.shape[2],(ans.shape[1]*ans.shape[2]/2))(ans)

#   return ans

In [None]:
# from transformers import XLMRobertaTokenizer, XLMRobertaModel
# bert = XLMRobertaModel.from_pretrained("sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens")
# import torch
# inputs = torch.tensor([[0]*76]*16)
# # inputs = torch.nn.Embedding(90000,768)(inputs)
# outputs = bert(inputs.to(torch.int64))
# import torch.nn.functional as F
# a = F.max_pool1d(outputs[0],kernel_size=768).squeeze(2)
# b = F.avg_pool1d(outputs[0],kernel_size=768).squeeze(2)
# a = a.unsqueeze(1)
# b = b.unsqueeze(1)
# import torch.nn as nn
# mlp = nn.Sequential(
#             nn.Linear(150, 150 // 2),
#             nn.ReLU(),
#             nn.Linear(150 // 2, 150),
#             nn.ReLU()
#             )
# c = mlp(a)
# d = mlp(b)
# e = torch.cat((a,b),axis=1)

In [None]:
# outputs[0].permute(0,2,1).shape

In [None]:
# class ClassificationHead(nn.Module):
#   """ Classification head for the Roberta Model """ 
#   def __init__(self, numberOfClasses, hidden_size_bert, hidden_size_post_feats, dropout_val = 0.2):
#     super().__init__()
   
#     self.denseInit = nn.Linear(hidden_size_post_feats, hidden_size_bert)
#     self.dense = nn.Linear(hidden_size_bert, hidden_size_bert)
#     self.dropout = nn.Dropout(dropout_val)
#     self.output = nn.Linear(hidden_size_bert, numberOfClasses)
#   def forward(self, x):

   
#     x = self.dropout(x)
#     x = self.denseInit(x)
#     x = torch.tanh(x)
#     x = self.dropout(x)
#     x = self.dense(x)
#     x  = torch.tanh(x)
#     x = self.dropout(x)
#     x  = self.output(x)
#     return x

# def attn(x,model_dim,emoji,hashtag):
#       mlp = nn.Sequential(
#           nn.Linear(model_dim, model_dim // 2),
#           nn.ReLU(),
#           nn.Linear(model_dim // 2, model_dim),
#           nn.ReLU()
#           )
#       normal_inputs = x.permute(0,2,1)
#       max = F.max_pool1d(normal_inputs,kernel_size=normal_inputs.shape[2]).squeeze(2)

#       avg = F.avg_pool1d(normal_inputs,kernel_size=normal_inputs.shape[2]).squeeze(2)

#       max_n = mlp(max)

#       avg_n = mlp(avg)

#       res_channel = torch.cat((max_n.unsqueeze(1),avg_n.unsqueeze(1)),axis=1)

#       # spacial attention


#       res_spacial = nn.Conv1d(2,1,kernel_size=1)(torch.cat((max.unsqueeze(1),avg.unsqueeze(1)),axis=1))

#       res_attention = torch.cat((res_channel,res_spacial,hashtag.unsqueeze(1)),axis=1)

#       # print(res_attention.shape)

#       # gated convolutional networks

#       A = nn.Conv1d(4, 8, kernel_size=(469))(res_attention)
#       B = nn.Conv1d(4, 8, kernel_size=(469))(res_attention)


#       h = A*F.sigmoid(B)

#       # print(h.shape)
#       ans = torch.cat((h,emoji.unsqueeze(1)),axis=1)


#       ans = nn.Conv1d(9,1,kernel_size=1)(ans)

#       # ans = ans.view(ans.shape[0],-1)




#       # ans = nn.Linear(ans.shape[1],900)(ans.type(torch.float))

#       ans = nn.Linear(300,2)(ans)

#       # ans = nn.Linear(ans.shape[1]*ans.shape[2],(ans.shape[1]*ans.shape[2]/2))(ans)

#       return ans

# class TextClassification(nn.Module):
#   """ Classifier with feature injection """
#   def __init__(self, numberOfClasses,dropout_val = 0.1, batch_size = 16):
#      super(TextClassification, self).__init__()
#      self.bert = XLMRobertaModel.from_pretrained("sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens")
#      self.classifier = ClassificationHead(numberOfClasses, self.bert.config.hidden_size, (self.bert.config.hidden_size * 2 + 300) , dropout_val)
#   def forward(self, input_seq, attention_mask, emoji, hashTag):
#     # print("input seq :",input_seq.shape)
#     bert_pooled_output = self.bert(input_seq, attention_mask=attention_mask)[0]
#     # bert_pooled_output = bert_pooled_output[:, 0, :]
#     # print("bert output shape :",bert_pooled_output.shape)
#     # bert_pooled_out_feat = torch.cat([bert_pooled_output, emoji, hashTag], axis = 1)
#     # print("bert_final_pool shape :",bert_pooled_out_feat.shape)
#     # print("Shape",bert_pooled_out_feat.shape)

#     # bert_pooled_out_feat = attn(bert_pooled_output,768)
#     # output = self.classifier(bert_pooled_out_feat)

#     output = attn(bert_pooled_output,768,emoji,hashTag)
#     return output


class ClassificationHead(nn.Module):
  """ Classification head for the Roberta Model """ 
  def __init__(self, numberOfClasses, hidden_size_bert, hidden_size_post_feats, dropout_val = 0.2):
    super().__init__()
   
    self.denseInit = nn.Linear(hidden_size_post_feats, hidden_size_bert)
    self.dense = nn.Linear(hidden_size_bert, hidden_size_bert)
    self.dropout = nn.Dropout(dropout_val)
    self.output = nn.Linear(hidden_size_bert, numberOfClasses)
    
  def forward(self, x):

   
    x = self.dropout(x)
    x = self.denseInit(x)
    x = torch.tanh(x)
    x = self.dropout(x)
    x = self.dense(x)
    x  = torch.tanh(x)
    x = self.dropout(x)
    x  = self.output(x)
    return x

def attn(x,model_dim,emoji,hashtag):

        mlp = nn.Sequential(

            nn.Linear(model_dim, model_dim // 2),
            nn.ReLU(),
            nn.Linear(model_dim // 2, model_dim),
            nn.ReLU()
            )
        normal_inputs = x.permute(0,2,1)
        max = F.max_pool1d(normal_inputs,kernel_size=normal_inputs.shape[2]).squeeze(2)

        avg = F.avg_pool1d(normal_inputs,kernel_size=normal_inputs.shape[2]).squeeze(2)

        max_n = mlp(max)

        avg_n = mlp(avg)

        res_channel = torch.cat((max_n.unsqueeze(1),avg_n.unsqueeze(1)),axis=1)

        # spacial attention


        res_spacial = nn.Conv1d(2,1,kernel_size=1)(torch.cat((max.unsqueeze(1),avg.unsqueeze(1)),axis=1))

        res_attention = torch.cat((res_channel,res_spacial,hashtag.unsqueeze(1)),axis=1)

        # print(res_attention.shape)

        # gated convolutional networks

        A = nn.Conv1d(4, 8, kernel_size=(469))(res_attention)
        b = nn.Parameter()
        B = nn.Conv1d(4, 8, kernel_size=(469))(res_attention)


        h = A*torch.sigmoid(B)

        # print(h.shape)
        ans = torch.cat((h,emoji.unsqueeze(1)),axis=1)


        ans = nn.Conv1d(9,1,kernel_size=1)(ans)

        # ans = ans.view(ans.shape[0],-1)




        # ans = nn.Linear(ans.shape[1],900)(ans.type(torch.float))

        ans = nn.Linear(300,2)(ans)

        # ans = nn.Linear(ans.shape[1]*ans.shape[2],(ans.shape[1]*ans.shape[2]/2))(ans)

        return ans

class TextClassification(nn.Module):
  """ Classifier with feature injection """
  def __init__(self, numberOfClasses,dropout_val = 0.1, batch_size = 16):
     super(TextClassification, self).__init__()
     self.bert = XLMRobertaModel.from_pretrained("sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens")
     self.classifier = ClassificationHead(numberOfClasses, self.bert.config.hidden_size, (self.bert.config.hidden_size * 2 + 300) , dropout_val)
     self.mlp = nn.Sequential(

            nn.Linear(self.bert.config.hidden_size, self.bert.config.hidden_size // 2),
            nn.ReLU(),
            nn.Linear(self.bert.config.hidden_size // 2, self.bert.config.hidden_size),
            nn.ReLU()
            )
     self.conv0 = nn.Conv1d(2,1,kernel_size=1)
  def forward(self, input_seq, attention_mask, emoji, hashTag):
    # print("input seq :",input_seq.shape)
    bert_pooled_output = self.bert(input_seq, attention_mask=attention_mask)[0]

    bert_pooled_output = bert_pooled_output.permute(0,2,1)

    # max = F.max_pool1d(bert_pooled_output,kernel_size=bert_pooled_output.shape[2]).squeeze(2)

    avg = F.avg_pool1d(bert_pooled_output,kernel_size=bert_pooled_output.shape[2]).squeeze(2)
    max = F.max_pool1d(bert_pooled_output,kernel_size=bert_pooled_output.shape[2]).squeeze(2)

    # print("max shape :",max.shape)

    avg = self.mlp(avg)
    max = self.mlp(max)

    # print("max shape :",max.shape)

    final = self.conv0(torch.cat((avg.unsqueeze(1),max.unsqueeze(1)),axis=1))
    # max_n = self.mlp(max)

    # avg_n = self.mlp(avg)

    # res_channel = torch.cat((max_n.unsqueeze(1),avg_n.unsqueeze(1)),axis=1)

    # bert_pooled_output = nn.Conv1d(2,1,kernel_size=1)(res_channel)

    # bert_pooled_output = bert_pooled_output.squeeze(1)
    
    
    # print("bert_pooled_output shape :",final.shape)
    bert_pooled_out_feat = torch.cat([final.squeeze(1), emoji, hashTag], axis = 1)

    # bert_pooled_out_feat = attn(bert_pooled_output,768)
    output = self.classifier(bert_pooled_out_feat)

    # output = attn(bert_pooled_output,768,emoji,hashTag)
    return output

In [None]:
model_name = 'adaptive'
model_loc = 'hasoc_saved/'

In [None]:
def modelEvaluate(model, valid_dataloader = valid_dataloader, task = 1):
  gc.collect()
  if task == 1:
    taskIndex = 6
  elif task == 2:
    taskIndex = 7
  model.eval()
  predictions, true_labels = [], []
  logits = []
  # Predict 
  for batch in valid_dataloader:
    # Add batch to GPU
    b_input_ids = batch[1]
    b_input_mask = batch[2]
    b_labels = batch[taskIndex]
    b_emoji = batch[5]
    b_hashtag = batch[4]
    with torch.no_grad():
      pred = model(b_input_ids,b_input_mask ,b_emoji.float(), b_hashtag.float())
    logits.append(pred.detach().cpu().numpy())
    label_ids = b_labels.to('cpu').numpy()
    # Store predictions and true labels
    predictions.append(logits)
    true_labels.append(label_ids)
    flat_true_labels = np.concatenate(true_labels, axis = 0)
    predictions = []
    for i in logits:
      for j in i:
        predictions.append(j)
    flat_predictions = [np.argmax(i) for i in predictions]
    assert(len(flat_predictions) == len(flat_true_labels))
    return flat_predictions, flat_true_labels

In [None]:
path = "/content/Hate-Speech-Detection" + model_name + ".pt"
scale = 1

In [None]:
def make_optim(model, rate = 2e-5):
  return AdamW(model.parameters(),
                lr = rate, # default = 5e-5, using 2e-5
                eps = 1e-8) # default = 1e-8

def train_model(train_dataloader, valid_dataloader, numberOfEpochs = 10, task = 1):
  """ Train Loop for the model """
  scale = 1
  if task == 2:
    classNum = 4
    taskIndex = 7
  elif task == 1:
    classNum = 2
    taskIndex = 6
  else:
    raise NameError("Task not defined")
  total_steps = len(train_dataloader)
  print("Start")

  model = TextClassification(classNum) # task 1 
  if device == "gpu":
    model.cuda()
  
  loss_function = nn.CrossEntropyLoss().to(device)
  epoch_loss = 0
  batch_accuracy_scores = []
  global_pred = []
  global_label = []

  present_rate = 2e-5
  old_best = -1
  epoch = 0

  while(1):
    # when the learn rate falls below a lower threshold, you stop your training
    # until that moment, march on
    epoch += 1
    print("\nEpoch:", epoch)
    print("Present Rate: " + str(present_rate))
    optimizer = make_optim(model, present_rate)
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                              num_warmup_steps = 0, # Default value in run_glue.py
                                              num_training_steps = total_steps)
    gc.collect()
    model.train()
    epoch_loss = 0
    batch_accuracy_scores = []
    train_data_count = float(len(train_dataloader))

    # to check if performance with default weights
    predictions, true_labels = modelEvaluate(model, valid_dataloader, task)
    score_now = f1_score(true_labels, predictions, average = 'macro')
    print("Validation Macro: " + str(score_now))

    if (score_now > old_best):
      print("Continuing on track")
      old_best = score_now

      # delete previous best 
      delete_filename = path
      open(delete_filename, 'w').close() # overwrite and make the file blank instead
      os.remove(delete_filename) # delete the blank file from google drive will move the file to bin instead
      torch.save(model.state_dict(), path)

    else:
      print("Backtrack")
      model.load_state_dict(torch.load(path))
      present_rate /= (4 * scale)
      scale *= 4
      if present_rate < 1e-8:
        break

    # For quick eval
    cnt = 0
    # for i, batch in tqdm(enumerate(train_dataloader)):
    for i, batch in enumerate(train_dataloader):
        print("Iter: " + str(cnt + 1))
        # COMMENT OUT THE NEXT 2 LINES IN ACTUAL TRAINING
        # if cnt == 4:
        #   break
        cnt += 1
        b_input_ids = batch[1]
        b_input_mask = batch[2]
        b_labels = batch[taskIndex]
        b_emoji = batch[5]
        b_hashtag = batch[4]
        pred = model(b_input_ids,b_input_mask ,b_emoji.float(), b_hashtag.float())
        loss = loss_function(pred.view(-1, classNum), b_labels.view(-1))
        with torch.no_grad():
          epoch_loss += (loss.item() * len(b_labels))
          global_pred.append(pred)
          global_label.append(b_labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # scheduler.step()

  return model

In [None]:
gc.collect()
model = train_model(train_dataloader, valid_dataloader, 2, task = 1)

Start

Epoch: 1
Present Rate: 2e-05
Validation Macro: 0.15789473684210525
Continuing on track
Iter: 1
Iter: 2
Iter: 3
Iter: 4
Iter: 5
Iter: 6
Iter: 7
Iter: 8
Iter: 9
Iter: 10
Iter: 11
Iter: 12
Iter: 13
Iter: 14
Iter: 15
Iter: 16
Iter: 17
Iter: 18
Iter: 19
Iter: 20
Iter: 21
Iter: 22
Iter: 23
Iter: 24
Iter: 25
Iter: 26
Iter: 27
Iter: 28
Iter: 29
Iter: 30
Iter: 31
Iter: 32
Iter: 33
Iter: 34
Iter: 35
Iter: 36
Iter: 37
Iter: 38
Iter: 39
Iter: 40
Iter: 41
Iter: 42
Iter: 43
Iter: 44
Iter: 45
Iter: 46
Iter: 47
Iter: 48
Iter: 49
Iter: 50
Iter: 51
Iter: 52
Iter: 53
Iter: 54
Iter: 55
Iter: 56
Iter: 57
Iter: 58
Iter: 59
Iter: 60
Iter: 61
Iter: 62
Iter: 63
Iter: 64
Iter: 65
Iter: 66
Iter: 67
Iter: 68
Iter: 69
Iter: 70
Iter: 71
Iter: 72
Iter: 73
Iter: 74
Iter: 75
Iter: 76
Iter: 77
Iter: 78
Iter: 79
Iter: 80
Iter: 81
Iter: 82
Iter: 83
Iter: 84
Iter: 85
Iter: 86
Iter: 87
Iter: 88
Iter: 89
Iter: 90
Iter: 91
Iter: 92
Iter: 93
Iter: 94
Iter: 95
Iter: 96
Iter: 97
Iter: 98
Iter: 99
Iter: 100
Iter: 101
Iter

In [None]:
import csv
import re

In [None]:
!pip install ekphrasis
from ekphrasis.classes.segmenter import Segmenter
seg_tw = Segmenter(corpus = "twitter")

In [None]:
!pip install tweet-preprocessor
import preprocessor as tweet_proc

Collecting tweet-preprocessor
  Downloading https://files.pythonhosted.org/packages/17/9d/71bd016a9edcef8860c607e531f30bd09b13103c7951ae73dd2bf174163c/tweet_preprocessor-0.6.0-py3-none-any.whl
Installing collected packages: tweet-preprocessor
Successfully installed tweet-preprocessor-0.6.0


In [None]:
!pip install emot
from emot.emo_unicode import UNICODE_EMO, EMOTICONS
!pip install tweet-preprocessor
import preprocessor as tweet_proc

Collecting emot
  Downloading https://files.pythonhosted.org/packages/49/07/20001ade19873de611b7b66a4d5e5aabbf190d65abea337d5deeaa2bc3de/emot-2.1-py3-none-any.whl
Installing collected packages: emot
Successfully installed emot-2.1


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [None]:
def make_list(proc_obj):
  if proc_obj == None:
    return []
  
  store = []
  for unit in proc_obj:
    store.append(unit.match)
  
  return store

In [None]:
def emotext(text):
    for emot in UNICODE_EMO:
        text = text.replace(emot, "_".join(UNICODE_EMO[emot].replace(",", "").replace(":", "").split()))
    return text

In [None]:
file_name = "hate_speech.tsv"

In [None]:
german = False

datapoints_count = 0
see_index = True

tweets = []
raw_tweet_texts = []

hashtags = []
smileys = []
emojis = []
urls = []
mentions = []
numbers = []
reserveds = []

task_1_labels = []
task_2_labels = []
task_3_labels = []

with open(file_name) as file:
    file_reader = csv.reader(file, delimiter = "\t")
    for line in file_reader:
      if see_index == True:
        see_index = False
        continue
      
      datapoints_count += 1

      task_1_labels.append(line[1])

     
      
      tweets.append(line[0])
      raw_tweet_texts.append(tweet_proc.clean(line[0]))

      parse_obj = tweet_proc.parse(line[0])

      hashtags.append(make_list(parse_obj.hashtags))
      smileys.append(make_list(parse_obj.smileys))
      emojis.append(make_list(parse_obj.emojis))
      urls.append(make_list(parse_obj.urls))
      mentions.append(make_list(parse_obj.mentions))
      numbers.append(make_list(parse_obj.numbers))
      reserveds.append(make_list(parse_obj.reserved))

print("Number of Datapoints: " + str(datapoints_count))

Number of Datapoints: 4578


In [None]:
emoji_texts = []

for emo_list in emojis:
  texts = []
  for emoji in emo_list:
    print(emoji)
    text = emotext(emoji)
    texts.append(text)
  emoji_texts.append(texts)

print("Emoji Descriptions:")
print(emoji_texts[0: 5])

🖓
♫
Emoji Descriptions:
[[], [], [], [], []]


In [None]:
segmented_hashtags = []

for hashset in hashtags:
  segmented_set = []
  for tag in hashset:
    word = tag[1: ]
    # removing the hash symbol
    segmented_set.append(seg_tw.segment(word))
  segmented_hashtags.append(segmented_set)

In [None]:
name = 'hi-en.pickle'
dickie = {}

dickie['task_1'] = task_1_labels


dickie['full_tweet'] = tweets
dickie['tweet_raw_text'] = raw_tweet_texts
dickie['hashtags'] = hashtags
dickie['smiley'] = smileys
dickie['emoji'] = emojis
dickie['url'] = urls
dickie['mentions'] = mentions
dickie['numerals'] = numbers
dickie['reserved_word'] = reserveds
dickie['emotext'] = emoji_texts
dickie['segmented_hash'] = segmented_hashtags

with open(name, 'wb') as f:
  pickle.dump(dickie, f)

In [None]:
!pip install transformers
from transformers import XLMRobertaTokenizer, XLMRobertaModel
bert = XLMRobertaModel.from_pretrained("sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens")

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/d8/b2/57495b5309f09fa501866e225c84532d1fd89536ea62406b2181933fb418/transformers-4.5.1-py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 7.3MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/c1/92/bd06be977adfe6cd92038f8c263313961980617890daf3f0de636395a3ef/sacremoses-0.0.45.tar.gz (880kB)
[K     |████████████████████████████████| 880kB 33.9MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/ae/04/5b870f26a858552025a62f1649c20d29d2672c02ff3c3fb4c688ca46467a/tokenizers-0.10.2-cp37-cp37m-manylinux2010_x86_64.whl (3.3MB)
[K     |████████████████████████████████| 3.3MB 51.6MB/s 
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.45-cp37-none-any.whl size=894380 sha256=0b61d70a6ec

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=541.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1112256686.0, style=ProgressStyle(descr…




In [None]:
import pandas as pd
import xlrd
import re
import pickle
import csv

In [None]:
!pip install ekphrasis
from ekphrasis.classes.segmenter import Segmenter
seg_tw = Segmenter(corpus = "twitter")

Collecting ekphrasis
[?25l  Downloading https://files.pythonhosted.org/packages/92/e6/37c59d65e78c3a2aaf662df58faca7250eb6b36c559b912a39a7ca204cfb/ekphrasis-0.5.1.tar.gz (80kB)
[K     |████████████████████████████████| 81kB 7.1MB/s 
Collecting colorama
  Downloading https://files.pythonhosted.org/packages/44/98/5b86278fbbf250d239ae0ecb724f8572af1c91f4a11edf4d36a206189440/colorama-0.4.4-py2.py3-none-any.whl
Collecting ujson
[?25l  Downloading https://files.pythonhosted.org/packages/17/4e/50e8e4cf5f00b537095711c2c86ac4d7191aed2b4fffd5a19f06898f6929/ujson-4.0.2-cp37-cp37m-manylinux1_x86_64.whl (179kB)
[K     |████████████████████████████████| 184kB 27.0MB/s 
Collecting ftfy
[?25l  Downloading https://files.pythonhosted.org/packages/ce/b5/5da463f9c7823e0e575e9908d004e2af4b36efa8d02d3d6dad57094fcb11/ftfy-6.0.1.tar.gz (63kB)
[K     |████████████████████████████████| 71kB 10.8MB/s 
Building wheels for collected packages: ekphrasis, ftfy
  Building wheel for ekphrasis (setup.py) ... [?2

  regexes = {k.lower(): re.compile(self.expressions[k]) for k, v in


In [None]:
!pip install tweet-preprocessor
import preprocessor as tweet_proc

Collecting tweet-preprocessor
  Downloading https://files.pythonhosted.org/packages/17/9d/71bd016a9edcef8860c607e531f30bd09b13103c7951ae73dd2bf174163c/tweet_preprocessor-0.6.0-py3-none-any.whl
Installing collected packages: tweet-preprocessor
Successfully installed tweet-preprocessor-0.6.0


In [None]:
!pip install emot
from emot.emo_unicode import UNICODE_EMO, EMOTICONS

Collecting emot
  Downloading https://files.pythonhosted.org/packages/49/07/20001ade19873de611b7b66a4d5e5aabbf190d65abea337d5deeaa2bc3de/emot-2.1-py3-none-any.whl
Installing collected packages: emot
Successfully installed emot-2.1


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [None]:
def make_list(proc_obj):
  if proc_obj == None:
    return []
  
  store = []
  for unit in proc_obj:
    store.append(unit.match)
  
  return store

def emotext(text):
    for emot in UNICODE_EMO:
        text = text.replace(emot, "_".join(UNICODE_EMO[emot].replace(",", "").replace(":", "").split()))
    return text

In [None]:
is_hindi = 0

datatype = "test"

file_name = "/content/drive/My Drive/HASOC_raw_data/2020_test_data/english_test_1509.csv"


In [None]:
datapoints_count = 0
see_index = True

tweets = []
raw_tweet_texts = []
tokenized_tweets = []
hashtags = []
smileys = []
emojis = []
urls = []
mentions = []
numbers = []
reserveds = []

task_1_labels = []
task_2_labels = []
tweet_ids = []
hasoc_ID = []

In [None]:
if datatype == 'test':
    file = open(file_name, 'r')
    file_reader = csv.reader(file, delimiter = ",")
    for line in file_reader:
        if see_index == True:
            see_index = False
            continue

        datapoints_count += 1
        tweet_ids.append(line[0])
        task_1_labels.append(line[2])
        task_2_labels.append(line[3])
        hasoc_ID.append(line[4])
        tweets.append(line[1].replace("\n", " "))

        parse_obj = tweet_proc.parse(line[1].replace("\n", " "))
        tokenized_tweets.append(tweet_proc.tokenize(line[1].replace("\n", " ")))
        hashtags.append(strip_list(make_list(parse_obj.hashtags)))
        smileys.append(strip_list(make_list(parse_obj.smileys)))
        emojis.append(strip_list(make_list(parse_obj.emojis)))
        urls.append(strip_list(make_list(parse_obj.urls)))
        mentions.append(strip_list(make_list(parse_obj.mentions)))
        numbers.append(strip_list(make_list(parse_obj.numbers)))
        reserveds.append(strip_list(make_list(parse_obj.reserved)))

        if is_hindi == 0:
          raw_tweet_texts.append(tweet_proc.clean(line[1].replace("\n", " ")))
        else:
          raw_tweet_texts.append(hindi_clean(line[1].replace("\n", " "), parse_obj))

    print("Number of Datapoints: " + str(datapoints_count))

In [None]:
!git clone https://github.com/sayarghoshroy/Hate-Speech-Detection.git

Cloning into 'Hate-Speech-Detection'...
remote: Enumerating objects: 280, done.[K
remote: Counting objects: 100% (280/280), done.[K
remote: Compressing objects: 100% (213/213), done.[K
remote: Total 280 (delta 113), reused 189 (delta 59), pack-reused 0[K
Receiving objects: 100% (280/280), 13.28 MiB | 15.86 MiB/s, done.
Resolving deltas: 100% (113/113), done.


In [None]:
import pickle
import pandas as pd
data = pickle.load(open("/content/Hate-Speech-Detection/data/2020_processed_train/en.pickle",'rb'))
df = pd.DataFrame.from_dict(data)

In [None]:
df

Unnamed: 0,tweet_id,task_1,task_2,hasoc_id,full_tweet,tweet_raw_text,hashtags,smiley,emoji,url,mentions,numerals,reserved_word,emotext,segmented_hash
0,1123757263427186690,HOF,PRFN,hasoc_2020_en_2574,"hate wen females hit ah nigga with tht bro 😂😂,...","hate wen females hit ah nigga with tht bro , I...",[],[],"[😂, 😂]",[],[],[],[],"[face with tears of joy, face with tears of joy]",[]
1,1123733301397733380,HOF,PRFN,hasoc_2020_en_3627,RT @airjunebug: When you're from the Bay but y...,: When you're from the Bay but you're really a...,[],[],[],[https://t.co/mZ8BAYlnlf],"[@airjunebug, @supportcaleon]",[],[RT],[],[]
2,1123734094108659712,NOT,NONE,hasoc_2020_en_3108,RT @DonaldJTrumpJr: Dear Democrats: The Americ...,: Dear Democrats: The American people arent st...,[],[],[],[],[@DonaldJTrumpJr],[],[RT],[],[]
3,1126951188170199049,HOF,PRFN,hasoc_2020_en_3986,RT @SheLoveTimothy: He ain’t on drugs he just ...,: He aint on drugs he just bored. I be doing t...,[],[],[😂],[https://t.co/tkdjSbddET],[@SheLoveTimothy],[],[RT],[face with tears of joy],[]
4,1126863510447710208,NOT,NONE,hasoc_2020_en_5152,RT @TavianJordan: Summer ‘19 I’m coming for yo...,: Summer Im coming for you ! No boring shit ! ...,[],[],[],[],[@TavianJordan],[],[RT],[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3703,1126887103437123584,NOT,NONE,hasoc_2020_en_109,RT @FilthyArt_: TONIGHT TONIGHT TONIGHT \n\nCa...,: TONIGHT TONIGHT TONIGHT Catch me doing some ...,[],[],[✨],[https://t.co/AUMpsdJYW8],[@FilthyArt_],[],[RT],[sparkles],[]
3704,1126825614906937344,HOF,PRFN,hasoc_2020_en_2345,RT @abbn0rmal_: Eat my ass,: Eat my ass,[],[],[],[],[@abbn0rmal_],[],[RT],[],[]
3705,1126880392550731776,NOT,NONE,hasoc_2020_en_1039,RT @FlyTPA: BREAKING NEWS: TPA is about to get...,: BREAKING NEWS: TPA is about to get even bett...,[],[],"[👩, 👦, 💧]",[],[@FlyTPA],[ 2020],[RT],"[woman, boy, droplet]",[]
3706,1130290906932891648,HOF,PRFN,hasoc_2020_en_2817,RT @StarrThaRapper: It’s been a hr FUCK THAT G...,: Its been a hr FUCK THAT GAME,[],[],"[👿, 👿]",[https://t.co/AkI6BW8Qlz],[@StarrThaRapper],[],[RT],"[angry face with horns, angry face with horns]",[]


In [None]:
!pip install nltk
!pip install bert-tensorflow
!pip install transformers
!pip install seaborn
!pip install sklearn-crfsuite
!pip install -U sentence-transformers
import nltk
nltk.download('all')

In [None]:
import random
import pickle
import re
import time
import datetime

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm, neighbors
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
# Check where we need this
# from nltk.corpus import stopwordsm
from nltk.tokenize import sent_tokenize

import torch
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler, SequentialSampler
from torch.utils.data import Dataset
from transformers import BertTokenizer
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import get_linear_schedule_with_warmup
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelWithLMHead


import torch.nn as nn
from transformers import XLMRobertaTokenizer, XLMRobertaModel
import gensim.models as gsm

from torch.utils.data import Subset
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score


from tqdm import tqdm 
import gc
import os

In [None]:
if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla T4


In [None]:
data_loc = '/content/Hate-Speech-Detection/data/2020_processed_train/'

In [None]:
e2v = gsm.KeyedVectors.load_word2vec_format('emoji2vec.bin', binary=True)

def getEmojiEmbeddings(emojiList,dim=300,verbose = False):
  # Generates an emoji vector by averaging the emoji representation for each emoji
  # If no emoji returns an empty list of dimension dim
  if dim < 300:
    raise IndexError("Dim has to be greater than 300")
  result = np.zeros(dim)
  if (len(emojiList) == 0):
    return result
  else:
    embs = None
    for i in emojiList:
      if verbose:
        if i not in e2v.vocab:
          print(i)
    embs = np.mean([e2v[i] for i in emojiList if i in e2v.vocab], axis=0)
  if np.any(np.isnan(embs)):
    return result
  result[:300] = embs
  return result

In [None]:
def loadData(lang):
  """ Function to load data for one language from the preprocessed pickle file"""
  if lang not in ['hi','en','hi-en']:
      raise NameError("Language not found")
  fileName = lang + '.pickle'
  with open(DATASET_ROOT+fileName, 'rb') as f:
    ged = pickle.load(f)
  df = pd.DataFrame.from_dict(ged)
  if lang in ['hi','en']:
    df = df.drop(['tweet_id','task_2','hasoc_id'],axis=1)
  if lang == 'hi-en':
    df.task_1[df.task_1.str.startswith("y")] = "HOF"
    df.task_1[df.task_1.str.startswith("n")] = "NOT"
    df.task_1[df.task_1.str.startswith("o")] = "HOF"
  train_df, test_df = model_selection.train_test_split(df, random_state = 42, test_size = 0.25)
  return train_df, test_df, df

def loadDataAllLangs():
  """ Function to load data for all languages from the preprocessed pickle file"""

  hi_train,hi_test,hi_df = loadData('hi')
  en_train,en_test,en_df = loadData('en')
  ge_train,ge_test,ge_df = loadData('hi-en')
  print("total size:", len(ge_df) + len(hi_df)+len(en_df))
  train_df = pd.concat([hi_train,en_train,ge_train],ignore_index=True)
  test_df =  pd.concat([hi_test,en_test,ge_test],ignore_index=True)
  df = pd.concat([hi_df,en_df,ge_df],ignore_index=True)
  train_df = train_df.sample(frac = 1, random_state=42)
  test_df = test_df.sample(frac = 1, random_state=42)
  df = df.sample(frac = 1, random_state=42)
  return train_df,test_df,df

class HASOCDataset(Dataset):
  """ Data loader to load the data for the Torch """
  def __init__(self, dataPath, isDF = False):
    if isDF:
      self.df = pd.DataFrame.from_dict(dataPath)
    else:
      data = pickle.load(open(dataPath,'rb'))
      self.df = pd.DataFrame.from_dict(data)
  def __len__(self):
    return len(self.df)
  def __getitem__(self,index):
    return self.df.iloc[index]

In [None]:
def set_seed(seed):
     # """ Sets all seed to the given value, so we can reproduce (:3) """
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
set_seed(42)

In [None]:
class FullExample(object):
  """ Not necessary any more, mainly here in case we might need to use the entire thing. """
  def __init__(self, task_1, full_tweet, tweet_raw_text, hashtags, smiley, emoji, url, mentions, numerals, reserved_word, segmented_hash):
    self.task_1 = task_1
    self.full_tweet = full_tweet
    self.tweet_raw_text = tweet_raw_text
    self.hashtags = hashtags
    self.smiley = smiley
    self.emoji = emoji
    self.url = url 
    self. mentions = mentions 
    self.numerals = numerals
    self.reserved_word = reserved_word
    self.segmented_hash = segmented_hash
  
class Example(object):
  """ Contains the data for one example from the dataset """
  def __init__(self,id, task_1, full_tweet, tweet_raw_text,  emoji,  segmented_hash):
    self.id  = id
    self.task_1 = task_1

    self.full_tweet = full_tweet
    self.tweet_raw_text = tweet_raw_text
    self.emoji = emoji
    self.segmented_hash = segmented_hash

class ExampleFeautres(object):
    """ Contains the dataset in a batch friendly feaute set """
    def __init__(self, id, task_1, input_ids, input_mask,input_length,  emoji,  hash):
      self.id  = id
      self.task_1 = task_1
      self.emoji = torch.tensor(emoji)
      self.input_ids = input_ids
      self.input_mask = input_mask
      self.input_length = input_length 
      self.hash = torch.tensor(hash)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens")
max_seq_length = 74
# e2v = gsm.KeyedVectors.load_word2vec_format('/content/drive/My Drive/emoji2vec.bin', binary = True)
sent_encoder = SentenceTransformer('xlm-r-100langs-bert-base-nli-mean-tokens')

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=5069051.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=150.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=147.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, max=1014108634.0), HTML(value='')))




In [None]:
labels_task1 = {'NOT':0, 'HOF':1}
labels_task2 = {'NONE':0,'PRFN':1,'OFFN':2,'HATE':3}

def convertExamplesToFeature(example):
  """ Given a data row convert it to feautres so it's batch friendly """
  raw_text = example.tweet_raw_text
  tokens = tokenizer.tokenize(raw_text)
  if (len(tokens) > (max_seq_length-2)):
    tokens = tokens[: (max_seq_length-2)]
  tokens = [tokenizer.cls_token] + tokens + [tokenizer.sep_token]
  input_ids = tokenizer.convert_tokens_to_ids(tokens)
  input_mask = [1] * len(input_ids)
  input_length = len(input_ids)
  padding = [0] * (max_seq_length - len(input_ids))
  input_ids += padding
  input_mask += padding
  hashtags = ' '.join(example.segmented_hash)
  hashembs = sent_encoder.encode(hashtags)

  emojiVec = getEmojiEmbeddings(example.emoji)
  task1 = labels_task1[example.task_1]
  id = example.id

  return ExampleFeautres(id,task1, input_ids, input_mask, input_length, emojiVec, hashembs)

In [None]:
def getDataset(input_features):
    """
    Mappings for index-> features 
    0 -> ID
    1 -> input ids
    2 -> input masks
    3 -> input lengths 
    4 -> hash embs 
    5 -> emoji embs 
    6 -> task1
    7 -> task2
    """
    all_input_page_ids = torch.tensor([f.id for f in input_features], dtype=torch.long)
    all_input_ids = torch.tensor([f.input_ids for f in input_features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in input_features], dtype=torch.long)
    all_input_lengths = torch.tensor([f.input_length for f in input_features], dtype=torch.long)
    all_hash_embs = torch.stack([f.hash for f in input_features])
    all_emoji_embs = torch.stack([f.emoji for f in input_features])
    all_task_1 = torch.tensor([f.task_1 for f in input_features], dtype=torch.long)

    dataset = TensorDataset(all_input_page_ids, all_input_ids, all_input_mask,all_input_lengths, all_hash_embs, all_emoji_embs, all_task_1)
    return dataset

In [None]:
def train_val_dataset(dataset, val_split = 0.2):
    train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size=val_split)
    datasets = {}
    datasets['train'] = Subset(dataset, train_idx)
    datasets['valid'] = Subset(dataset, val_idx)
    return datasets

def getDataloader(path_to_pickle, val_split = 0.2, batch_size = 16, multiLing = True):
  if multiLing:
    tr,tt,df = loadDataAllLangs()
    tempDataset = HASOCDataset(df, isDF=True)
  else:
    print(path_to_pickle)
    tempDataset = HASOCDataset(path_to_pickle)

  input_features = []
  for i in tqdm(range(len(tempDataset))):
    example = Example(i,tempDataset[i]['task_1'], tempDataset[i]['full_tweet'],tempDataset[i]['tweet_raw_text'], tempDataset[i]['emoji'],tempDataset[i]['segmented_hash'])
    input_feature = convertExamplesToFeature(example)
    input_features.append(input_feature)
  dataset = getDataset(input_features)
  # print(len(dataset))
  set_seed(42)
  data_sampler = RandomSampler(dataset)
  dd = train_val_dataset(dataset, val_split)
  train_dataloader = DataLoader(dd['train'], sampler = RandomSampler(dd['train']), batch_size=batch_size, drop_last=True)
  valid_dataloader = DataLoader(dd['valid'] , batch_size=batch_size, drop_last=True)
  dataloader = DataLoader(dataset , batch_size=batch_size, drop_last=True)
  dataloaders = {x:DataLoader(dd[x], 32, shuffle = True, num_workers = 4) for x in ['train','valid']} 

  return train_dataloader, valid_dataloader, dataloader, dataloaders

In [None]:
DATASET_ROOT = data_loc
train_dataloader, valid_dataloader, dataloader, dataloaders = getDataloader(data_loc , multiLing = True)

In [None]:
class ClassificationHead(nn.Module):
  """ Classification head for the Roberta Model """ 
  def __init__(self, numberOfClasses, hidden_size_bert, hidden_size_post_feats, dropout_val = 0.2):
    super().__init__()
    self.denseInit = nn.Linear(hidden_size_post_feats, hidden_size_bert)
    self.dense = nn.Linear(hidden_size_bert, hidden_size_bert)
    self.dropout = nn.Dropout(dropout_val)
    self.output = nn.Linear(hidden_size_bert, numberOfClasses)
  def forward(self, x):
    # print(x.shape)
    x = self.dropout(x)
    x = self.denseInit(x)
    x = torch.tanh(x)
    x = self.dropout(x)
    x = self.dense(x)
    x  = torch.tanh(x)
    x = self.dropout(x)
    x  = self.output(x)
    return x

class TextClassification(nn.Module):
  """ Classifier with feature injection """
  def __init__(self, numberOfClasses,dropout_val = 0.1, batch_size = 16):
     super(TextClassification, self).__init__()
     self.bert = XLMRobertaModel.from_pretrained("sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens")
     self.classifier = ClassificationHead(numberOfClasses, self.bert.config.hidden_size, (self.bert.config.hidden_size * 2 + 300) , dropout_val)
  def forward(self, input_seq, attention_mask, emoji, hashTag):
    bert_pooled_output = self.bert(input_seq, attention_mask=attention_mask)[0]
    bert_pooled_output = bert_pooled_output[:, 0, :]
    bert_pooled_out_feat = torch.cat([bert_pooled_output, emoji, hashTag], axis = 1)
    # print("Shape",bert_pooled_out_feat.shape)
    output = self.classifier(bert_pooled_out_feat)
    return output

In [None]:
model_name = 'adaptive'
model_loc = 'hasoc_saved/'

In [None]:
def modelEvaluate(model, valid_dataloader = valid_dataloader, task = 1):
  gc.collect()
  if task == 1:
    taskIndex = 6
  elif task == 2:
    taskIndex = 7
  model.eval()
  predictions, true_labels = [], []
  logits = []
  # Predict 
  for batch in valid_dataloader:
    # Add batch to GPU
    b_input_ids = batch[1]
    b_input_mask = batch[2]
    b_labels = batch[taskIndex]
    b_emoji = batch[5]
    b_hashtag = batch[4]
    with torch.no_grad():
      pred = model(b_input_ids,b_input_mask ,b_emoji.float(), b_hashtag.float())
    logits.append(pred.detach().cpu().numpy())
    label_ids = b_labels.to('cpu').numpy()
    # Store predictions and true labels
    predictions.append(logits)
    true_labels.append(label_ids)
    flat_true_labels = np.concatenate(true_labels, axis = 0)
    predictions = []
    for i in logits:
      for j in i:
        predictions.append(j)
    flat_predictions = [np.argmax(i) for i in predictions]
    assert(len(flat_predictions) == len(flat_true_labels))
    return flat_predictions, flat_true_labels

In [None]:
path =  model_name + ".pt"
scale = 1

In [None]:
def make_optim(model, rate = 2e-5):
  return AdamW(model.parameters(),
                lr = rate, # default = 5e-5, using 2e-5
                eps = 1e-8) # default = 1e-8

def train_model(train_dataloader, valid_dataloader, numberOfEpochs = 10, task = 1):
  """ Train Loop for the model """
  scale = 1
  if task == 2:
    classNum = 4
    taskIndex = 7
  elif task == 1:
    classNum = 2
    taskIndex = 6
  else:
    raise NameError("Task not defined")
  total_steps = len(train_dataloader)
  print("Start")

  model = TextClassification(classNum) # task 1 
  if device == "gpu":
    model.cuda()
  
  loss_function = nn.CrossEntropyLoss().to(device)
  epoch_loss = 0
  batch_accuracy_scores = []
  global_pred = []
  global_label = []

  present_rate = 2e-5
  old_best = -1
  epoch = 0

  while(1):
    # when the learn rate falls below a lower threshold, you stop your training
    # until that moment, march on
    epoch += 1
    print("\nEpoch:", epoch)
    print("Present Rate: " + str(present_rate))
    optimizer = make_optim(model, present_rate)
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                              num_warmup_steps = 0, # Default value in run_glue.py
                                              num_training_steps = total_steps)
    gc.collect()
    model.train()
    epoch_loss = 0
    batch_accuracy_scores = []
    train_data_count = float(len(train_dataloader))

    # to check if performance with default weights
    predictions, true_labels = modelEvaluate(model, valid_dataloader, task)
    score_now = f1_score(true_labels, predictions, average = 'macro')
    print("Validation Macro: " + str(score_now))

    if (score_now > old_best):
      print("Continuing on track")
      old_best = score_now

      # delete previous best 
      delete_filename = path
      open(delete_filename, 'w').close() # overwrite and make the file blank instead
      os.remove(delete_filename) # delete the blank file from google drive will move the file to bin instead
      torch.save(model.state_dict(), path)

    else:
      print("Backtrack")
      model.load_state_dict(torch.load(path))
      present_rate /= (4 * scale)
      scale *= 4
      if present_rate < 1e-8:
        break

    # For quick eval
    cnt = 0
    # for i, batch in tqdm(enumerate(train_dataloader)):
    for i, batch in enumerate(train_dataloader):
        print("Iter: " + str(cnt + 1))
        # COMMENT OUT THE NEXT 2 LINES IN ACTUAL TRAINING
        # if cnt == 4:
        #   break
        cnt += 1
        b_input_ids = batch[1]
        b_input_mask = batch[2]
        b_labels = batch[taskIndex]
        b_emoji = batch[5]
        b_hashtag = batch[4]
        pred = model(b_input_ids,b_input_mask ,b_emoji.float(), b_hashtag.float())
        loss = loss_function(pred.view(-1, classNum), b_labels.view(-1))
        with torch.no_grad():
          epoch_loss += (loss.item() * len(b_labels))
          global_pred.append(pred)
          global_label.append(b_labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # scheduler.step()

  return model

In [None]:
gc.collect()
model = train_model(train_dataloader, valid_dataloader, 2, task = 1)

In [None]:
for batch in valid_dataloader:
    # Add batch to GPU
    b_input_ids = batch[1]
    b_input_mask = batch[2]
    print(batch[taskIndex])
    b_labels = batch[taskIndex]
    b_emoji = batch[5]
    b_hashtag = batch[4]

NameError: ignored

In [None]:
import nltk

In [None]:
import gensim
from gensim import corpora, models, similarities

In [None]:
!git clone https://github.com/ashishgupta1350/Hindi-English-Code-Mixed-Stemmer.git

Cloning into 'Hindi-English-Code-Mixed-Stemmer'...
remote: Enumerating objects: 22, done.[K
remote: Counting objects: 100% (22/22), done.[K
remote: Compressing objects: 100% (21/21), done.[K
remote: Total 22 (delta 7), reused 9 (delta 0), pack-reused 0[K
Unpacking objects: 100% (22/22), done.


In [None]:
w2vModel = gensim.models.Word2Vec.load("/content/Hindi-English-Code-Mixed-Stemmer/w2vModel")

In [None]:
similarWordsList =[w2vModel.wv.most_similar("kyaa", topn = 10 )[i][0] for i in range(10)]

In [None]:
similarWordsList

['fanaaa',
 'sakte',
 'maloom',
 'alaykum',
 'as-salamu',
 'aapka',
 'behtar',
 'lalala',
 'intezar',
 'bewakoofian']

In [None]:
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
   
ps = PorterStemmer()
  
# choose some words to be stemmed
words = ["program", "programs", "prooogramer", "programing", "programers"]
  
for w in words:
    print(w, " : ", ps.stem(w))

program  :  program
programs  :  program
prooogramer  :  prooogram
programing  :  program
programers  :  program


In [None]:
import pandas as pd
import xlrd
import re
import pickle
import csv

In [None]:
!pip install ekphrasis
from ekphrasis.classes.segmenter import Segmenter
# to leverage word statistics from Twitter
seg_tw = Segmenter(corpus = "twitter")

Collecting ekphrasis
[?25l  Downloading https://files.pythonhosted.org/packages/92/e6/37c59d65e78c3a2aaf662df58faca7250eb6b36c559b912a39a7ca204cfb/ekphrasis-0.5.1.tar.gz (80kB)
[K     |████████████████████████████████| 81kB 6.8MB/s 
Collecting colorama
  Downloading https://files.pythonhosted.org/packages/44/98/5b86278fbbf250d239ae0ecb724f8572af1c91f4a11edf4d36a206189440/colorama-0.4.4-py2.py3-none-any.whl
Collecting ujson
[?25l  Downloading https://files.pythonhosted.org/packages/17/4e/50e8e4cf5f00b537095711c2c86ac4d7191aed2b4fffd5a19f06898f6929/ujson-4.0.2-cp37-cp37m-manylinux1_x86_64.whl (179kB)
[K     |████████████████████████████████| 184kB 12.6MB/s 
Collecting ftfy
[?25l  Downloading https://files.pythonhosted.org/packages/ce/b5/5da463f9c7823e0e575e9908d004e2af4b36efa8d02d3d6dad57094fcb11/ftfy-6.0.1.tar.gz (63kB)
[K     |████████████████████████████████| 71kB 9.5MB/s 
Building wheels for collected packages: ekphrasis, ftfy
  Building wheel for ekphrasis (setup.py) ... [?25

  regexes = {k.lower(): re.compile(self.expressions[k]) for k, v in


In [None]:
import torch.nn as nn
class GatedCNN(nn.Module):
    '''
        In : (N, sentence_len)
        Out: (N, sentence_len, embd_size)
    '''
    def __init__(self,
                 seq_len,
                 vocab_size,
                 embd_size,
                 n_layers,
                 kernel,
                 out_chs,
                 res_block_count,
                 ans_size):
        super(GatedCNN, self).__init__()
        self.res_block_count = res_block_count
        # self.embd_size = embd_size

        self.embedding = nn.Embedding(vocab_size, embd_size)

        # nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, ...
        self.conv_0 = nn.Conv2d(1, out_chs, kernel, padding=(2, 0))
        self.b_0 = nn.Parameter(torch.randn(1, out_chs, 1, 1))
        self.conv_gate_0 = nn.Conv2d(1, out_chs, kernel, padding=(2, 0))
        self.c_0 = nn.Parameter(torch.randn(1, out_chs, 1, 1))

        self.conv = nn.ModuleList([nn.Conv2d(out_chs, out_chs, (kernel[0], 1), padding=(2, 0)) for _ in range(n_layers)])
        self.conv_gate = nn.ModuleList([nn.Conv2d(out_chs, out_chs, (kernel[0], 1), padding=(2, 0)) for _ in range(n_layers)])
        self.b = nn.ParameterList([nn.Parameter(torch.randn(1, out_chs, 1, 1)) for _ in range(n_layers)])
        self.c = nn.ParameterList([nn.Parameter(torch.randn(1, out_chs, 1, 1)) for _ in range(n_layers)])

        self.fc = nn.Linear(out_chs*seq_len, ans_size)

    def forward(self, x):
        # x: (N, seq_len)

        # Embedding
        bs = x.size(0) # batch size
        seq_len = x.size(1)
        x = self.embedding(x) # (bs, seq_len, embd_size)

        # CNN
        x = x.unsqueeze(1) # (bs, Cin, seq_len, embd_size), insert Channnel-In dim
        # Conv2d
        #    Input : (bs, Cin,  Hin,  Win )
        #    Output: (bs, Cout, Hout, Wout)
        A = self.conv_0(x)      # (bs, Cout, seq_len, 1)
        A += self.b_0.repeat(1, 1, seq_len, 1)
        B = self.conv_gate_0(x) # (bs, Cout, seq_len, 1)
        B += self.c_0.repeat(1, 1, seq_len, 1)
        h = A * F.sigmoid(B)    # (bs, Cout, seq_len, 1)
        res_input = h # TODO this is h1 not h0

        for i, (conv, conv_gate) in enumerate(zip(self.conv, self.conv_gate)):
            A = conv(h) + self.b[i].repeat(1, 1, seq_len, 1)
            B = conv_gate(h) + self.c[i].repeat(1, 1, seq_len, 1)
            h = A * F.sigmoid(B) # (bs, Cout, seq_len, 1)
            if i % self.res_block_count == 0: # size of each residual block
                h += res_input
                res_input = h

        h = h.view(bs, -1) # (bs, Cout*seq_len)
        out = self.fc(h) # (bs, ans_size)
        out = F.log_softmax(out)

        return out

In [None]:
vocab_size      = 2000
seq_len         = 21
embd_size       = 200
n_layers        = 10
kernel          = (5, embd_size)
out_chs         = 64
res_block_count = 5
batch_size      = 64

In [None]:
import torch

embedding = nn.Embedding(vocab_size, embd_size)

# nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, ...
conv_0 = nn.Conv2d(1, out_chs, kernel, padding=(2, 0))
b_0 = nn.Parameter(torch.randn(1, out_chs, 1, 1))
conv_gate_0 = nn.Conv2d(1, out_chs, kernel, padding=(2, 0))
c_0 = nn.Parameter(torch.randn(1, out_chs, 1, 1))

conv = nn.ModuleList([nn.Conv2d(out_chs, out_chs, (kernel[0], 1), padding=(2, 0)) for _ in range(n_layers)])
conv_gate = nn.ModuleList([nn.Conv2d(out_chs, out_chs, (kernel[0], 1), padding=(2, 0)) for _ in range(n_layers)])
b = nn.ParameterList([nn.Parameter(torch.randn(1, out_chs, 1, 1)) for _ in range(n_layers)])
c = nn.ParameterList([nn.Parameter(torch.randn(1, out_chs, 1, 1)) for _ in range(n_layers)])

fc = nn.Linear(out_chs*seq_len, 2)

In [None]:
bs = x.size(0) # batch size
seq_len = x.size(1)
x = self.embedding(x) # (bs, seq_len, embd_size)

# CNN
x = x.unsqueeze(1) # (bs, Cin, seq_len, embd_size), insert Channnel-In dim
# Conv2d
#    Input : (bs, Cin,  Hin,  Win )
#    Output: (bs, Cout, Hout, Wout)
A = self.conv_0(x)      # (bs, Cout, seq_len, 1)
A += self.b_0.repeat(1, 1, seq_len, 1)
B = self.conv_gate_0(x) # (bs, Cout, seq_len, 1)
B += self.c_0.repeat(1, 1, seq_len, 1)
h = A * F.sigmoid(B)    # (bs, Cout, seq_len, 1)
res_input = h # TODO this is h1 not h0

for i, (conv, conv_gate) in enumerate(zip(self.conv, self.conv_gate)):
    A = conv(h) + self.b[i].repeat(1, 1, seq_len, 1)
    B = conv_gate(h) + self.c[i].repeat(1, 1, seq_len, 1)
    h = A * F.sigmoid(B) # (bs, Cout, seq_len, 1)
    if i % self.res_block_count == 0: # size of each residual block
        h += res_input
        res_input = h

h = h.view(bs, -1) # (bs, Cout*seq_len)
out = self.fc(h) # (bs, ans_size)
out = F.log_softmax(out)

Conv2d(1, 64, kernel_size=(5, 200), stride=(1, 1), padding=(2, 0))

In [None]:
b_0.shape

torch.Size([1, 64, 1, 1])