In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [2]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module='bs4')

In [3]:
import torch
import torch.nn as nn
device = "cuda" if torch.cuda.is_available() else "cpu"


In [4]:
import numpy as np
import pandas as pd

In [5]:
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None) 

In [6]:
train = pd.read_pickle("train_p2_flattened.pkl")

In [7]:
from transformers import pipeline
class XLMR:
    def __init__(self):
        self.model_name = 'xlmr'
        self.nlp = pipeline(task ="feature-extraction", model = 'xlm-roberta-base', tokenizer='xlm-roberta-base', framework='pt', device=0)

    def GetFeatures(self, sentences=None):
        if self.model_name == 'xlmr':
            features = self.nlp(sentences, truncation=True)      
            return pd.DataFrame(features[0][0])

2022-02-12 13:37:45.885047: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-02-12 13:37:45.885081: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [8]:
xlmr = XLMR()

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.dense.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [10]:
class CustomBERTModel(nn.Module):
    def __init__(self):
          super(CustomBERTModel, self).__init__()
          self.linear = nn.Linear(768,2)
          self.outputlayer = nn.Softmax(dim=1)

    def concatenate(self, tensor1, tensor2):
      return torch.cat((tensor1, tensor2), 1)
      
    def forward(self, row_embeddings=None):
        linear_output = self.linear(row_embeddings)
        outputs = self.outputlayer(linear_output)
        return outputs

In [11]:
model = CustomBERTModel()

In [24]:
learning_rate = 1e-4
epochs = 2
criterion = nn.BCELoss()
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate)

In [13]:
model = model.to(device)

In [14]:
from operator import add
from scipy import spatial

In [15]:
def cosine_similarity(list1, list2, distance=False):
        if distance:
          return spatial.distance.cosine(list1, list2)

        return 1 - spatial.distance.cosine(list1, list2)

In [16]:
TASK1 = {'HOF': 1, 'NONE': 0}
train["label"]=train["label"].map(TASK1)

In [26]:
print("Training....")

for epoch in range(epochs):
    total_train_loss=0
    print("==========Epochs:{}===========".format(epoch)) 
    row_embeddings=[]

    for context,text,label in zip(train["context"], train["text"], train["label"]):
      row_embeddings=[]
      text_embeddings = xlmr.GetFeatures([text]).iloc[0].tolist()
      final_embeddings = np.zeros(768)
      final_embeddings = list(map(add, text_embeddings, final_embeddings))
      for sub_context in context:
          context_embeddings = xlmr.GetFeatures([sub_context]).iloc[0].tolist()
          cosine_distance = cosine_similarity(text_embeddings, context_embeddings, distance=True)
          weighted_contexts = [x * cosine_distance for x in context_embeddings]
          final_embeddings = list(map(add, weighted_contexts, final_embeddings))
      row_embeddings.append(final_embeddings) 
      row_embeddings=torch.Tensor(row_embeddings).to(device)
      logits=model(row_embeddings)     
      one_hot = torch.zeros(1, 2)
      one_hot[0, label] = 1
      one_hot = one_hot.to(device) 
      loss = criterion(input = logits, target = one_hot)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      total_train_loss += loss
      
      torch.save(model.state_dict(), "rev_xlmr_model_adamw.pth")
    print("Training Loss: {}".format((total_train_loss/len(train)).item()))

Training....


In [None]:
torch.save(model.state_dict(), "rev_xlmr_model_adamw.pth")

In [None]:
model = CustomBERTModel()
model.load_state_dict(torch.load("rev_xlmr_model_adamw.pth"))

<All keys matched successfully>

In [None]:
# Testing Setup

In [None]:
test = pd.read_pickle("test_p2_flattened.pkl")

In [None]:
test['label']=test['label'].map(TASK1)

In [None]:
from sklearn.metrics import accuracy_score, f1_score

print("Testing....")

predictions = []
labels = []

model.eval()

model=model.to(device)

with torch.no_grad():
  for  context, text, label in zip(test["context"], test["text"], test["label"]):
      row_embeddings=[]
      text_embeddings = xlmr.GetFeatures([text]).iloc[0].tolist()
      final_embeddings = np.zeros(768)
      final_embeddings = list(map(add, text_embeddings, final_embeddings))
      for sub_context in context:
          context_embeddings = xlmr.GetFeatures([sub_context]).iloc[0].tolist()
          cosine_distance = cosine_similarity(text_embeddings, context_embeddings, distance=True)
          weighted_contexts = [x * cosine_distance for x in context_embeddings]
          final_embeddings = list(map(add, weighted_contexts, final_embeddings))
      row_embeddings.append(final_embeddings) 
      row_embeddings=torch.Tensor(row_embeddings).to(device) 
      logits=model(row_embeddings) 
      y_pred = torch.argmax(logits, dim=-1).item()
      predictions.append(y_pred)
      labels.append(label)

  print("Accuracy: {}".format(accuracy_score(predictions, labels)))
  print("F1 Score: {}".format(f1_score(predictions, labels)))

In [None]:
import pickle

with open('FT_XLMR_1e-4.pkl', 'wb') as f:
  pickle.dump(predictions, f)