# Imports

In [152]:
import os 
import pandas as pd
import numpy as np
import torch 
from torch import nn, optim
from transformers import AutoTokenizer, AutoModelForPreTraining, AdamW, get_scheduler, get_linear_schedule_with_warmup
import torch.nn.functional as F
from torch.utils.data import DataLoader
from transformers import ElectraModel
from transformers.utils import logging
logging.set_verbosity_error() #Remove warning msg - missing fine-tunning
from tqdm import tqdm
from collections import defaultdict

# Data

In [153]:
class Dataset():
  def __init__(self, texts, targets, tokenizer, max_len,network_features):
    self.network_features = network_features
    self.text = texts
    self.targets = targets
    self.tokenizer = tokenizer
    self.max_len = max_len

  def __len__(self):
    return len(self.text)

  def __getitem__(self, item):
    network_features = self.network_features[item]
    text = str(self.text[item])
    target = self.targets[item]
    encoding = self.tokenizer.encode_plus(
      text,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      padding='max_length',
      truncation=True,
      return_attention_mask=True,
      return_tensors='pt',
    )
    return {
        'network_features': torch.tensor(network_features, dtype=torch.float),
        'text': text,
        'input_ids': encoding['input_ids'].flatten(),
        'attention_mask': encoding['attention_mask'].flatten(),
        'targets': torch.tensor(target, dtype=torch.long)}

In [154]:
path='/home/pelle/Master_Thesis/data/processed/dataloaders/week10/'

eval_dataloader = torch.load(path+'eval_dataloader.pt')
train_dataloader = torch.load(path+'train_dataloader.pt')
test_dataloader = torch.load(path+'test_dataloader.pt')

In [155]:
print(type(eval_dataloader))
print(iter(eval_dataloader).next().keys())

print(type(train_dataloader))
print(iter(train_dataloader).next().keys())

print(type(test_dataloader))
print(iter(test_dataloader).next().keys())

<class 'torch.utils.data.dataloader.DataLoader'>
dict_keys(['network_features', 'text', 'input_ids', 'attention_mask', 'targets'])
<class 'torch.utils.data.dataloader.DataLoader'>
dict_keys(['network_features', 'text', 'input_ids', 'attention_mask', 'targets'])
<class 'torch.utils.data.dataloader.DataLoader'>
dict_keys(['network_features', 'text', 'input_ids', 'attention_mask', 'targets'])


# Model

In [156]:
electraModel = ElectraModel.from_pretrained('google/electra-small-discriminator')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

dataiter = iter(eval_dataloader)
n_features = next(dataiter)['network_features'].shape[1]
print(n_features)

class ElectraClassifier(nn.Module):
    def __init__(self,num_labels=2):
        super(ElectraClassifier, self).__init__()
        self.num_labels = num_labels

        # text features
        self.electra = ElectraModel.from_pretrained('google/electra-small-discriminator')
        self.dense_txt = nn.Linear(self.electra.config.hidden_size, self.electra.config.hidden_size)
        self.dropout_txt = nn.Dropout(self.electra.config.hidden_dropout_prob)

        # network features
        self.dense_net = nn.Linear(in_features=n_features,out_features=20)
        self.dense_net2 = nn.Linear(in_features=20,out_features=20)
        self.dense_net3 = nn.Linear(in_features=20,out_features=20)

        # combined features
        self.dense_cat1 = nn.Linear(in_features=276,out_features=524)
        self.out_proj = nn.Linear(524, self.num_labels)

    def classifier(self,sequence_output,network_features):
        x_txt = sequence_output[:, 0, :]
        x_txt = F.gelu(self.dense_txt(x_txt))
        x_txt = self.dropout_txt(x_txt)

        x_net = self.dense_net(network_features)
        x_net = self.dense_net2(x_net)
        x_net = self.dense_net3(x_net)
        
        x = torch.cat((x_txt,x_net),dim=1)
        x = self.dense_cat1(x)

        logits = self.out_proj(x)
        sm = nn.Softmax(dim=1)
        return sm(logits)

    def forward(self, input_ids=None,attention_mask=None,network_features=None):
        discriminator_hidden_states = self.electra(input_ids=input_ids,attention_mask=attention_mask)
        sequence_output = discriminator_hidden_states[0]

        logits = self.classifier(sequence_output,network_features)
        
        return logits

cpu
7


In [147]:
model=ElectraClassifier()
model = model.to(device)

for p in model.electra.parameters():
    p.requires_grad = False

# Train

In [148]:
EPOCHS = 1
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False,no_deprecation_warning=True)
total_steps = len(train_dataloader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps=0,num_training_steps=total_steps)
loss_fn = nn.CrossEntropyLoss().to(device)

In [149]:
def eval_model(model, data_loader, loss_fn, device, n_examples):
  model = model.eval()
  losses = []
  correct_predictions = 0
  with torch.no_grad():
    for d in data_loader:
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
      targets = d["targets"].to(device)
      network_features = d["network_features"].to(device)

      outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        network_features=network_features)

      _, preds = torch.max(outputs, dim=1)
      loss = loss_fn(outputs, targets)
      correct_predictions += torch.sum(preds == targets)
      losses.append(loss.item())
  return correct_predictions.double() / n_examples, np.mean(losses)

In [150]:
def train_epoch(model,data_loader,loss_fn,optimizer,device,scheduler,n_examples):
  model = model.train()
  losses = []
  correct_predictions = 0
  
  for d in data_loader:
    input_ids = d["input_ids"].to(device)
    attention_mask = d["attention_mask"].to(device)
    targets = d["targets"].to(device)
    network_features = d["network_features"].to(device)
    
    outputs = model(
      input_ids=input_ids,
      attention_mask=attention_mask,
      network_features=network_features)

    _, preds = torch.max(outputs, dim=1)
    loss = loss_fn(outputs, targets)
    correct_predictions += torch.sum(preds == targets)
    losses.append(loss.item())
    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()

  return correct_predictions.double() / n_examples, np.mean(losses)

In [151]:
history = defaultdict(list)
best_accuracy = 0
for epoch in tqdm(range(EPOCHS)):

  train_acc, train_loss = train_epoch(model, train_dataloader, loss_fn, optimizer, device, scheduler,len(train_dataloader.dataset))
  val_acc, val_loss = eval_model(model, eval_dataloader, loss_fn, device, len(eval_dataloader.dataset))

  history['train_acc'].append(train_acc)
  history['train_loss'].append(train_loss)
  history['val_acc'].append(val_acc)
  history['val_loss'].append(val_loss)
  
  if val_acc > best_accuracy:
    torch.save(model.state_dict(), 'best_model_state.bin')
    best_accuracy = val_acc

  0%|          | 0/1 [01:35<?, ?it/s]


KeyboardInterrupt: 

# Predection

In [None]:
def get_predictions(model, data_loader):
  model = model.eval()
  review_texts = []
  predictions = []
  prediction_probs = []
  ground_truth = []
  with torch.no_grad():
    for d in data_loader:
      texts = d["text"]
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
      network_features = d["network_features"].to(device)
      targets = d["targets"].to(device)
      outputs = model(
        input_ids=input_ids, 
        attention_mask=attention_mask,
        network_features=network_features)
      _, preds = torch.max(outputs, dim=1)
      review_texts.extend(texts)
      predictions.extend(preds)
      prediction_probs.extend(outputs)
      ground_truth.extend(targets)
      break

  predictions = torch.stack(predictions)
  prediction_probs = torch.stack(prediction_probs)
  ground_truth = torch.stack(ground_truth)
  
  return review_texts, predictions, prediction_probs, ground_truth

y_review_texts, y_pred, y_pred_probs, y_test = get_predictions(model,eval_dataloader)

print(y_pred)
print(y_pred_probs)

tensor([1, 1, 1, 1, 1, 1, 0, 1])
tensor([[0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.4859, 0.5141],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.5063, 0.4937],
        [0.0000, 1.0000]])
