# Imports

In [1]:
import os 
import pandas as pd
import numpy as np
import torch 
from torch import nn, optim
from transformers import AutoTokenizer, AutoModelForPreTraining, AdamW, get_scheduler, get_linear_schedule_with_warmup
import torch.nn.functional as F
from torch.utils.data import DataLoader
from transformers import ElectraModel
from transformers.utils import logging
logging.set_verbosity_error() #Remove warning msg - missing fine-tunning
from tqdm import tqdm
from collections import defaultdict

# Data

In [3]:
class Dataset():
  def __init__(self, texts, targets, tokenizer, max_len,network_features):
    self.network_features = network_features
    self.text = texts
    self.targets = targets
    self.tokenizer = tokenizer
    self.max_len = max_len

  def __len__(self):
    return len(self.text)

  def __getitem__(self, item):
    network_features = self.network_features[item]
    text = str(self.text[item])
    target = self.targets[item]
    encoding = self.tokenizer.encode_plus(
      text,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      padding='max_length',
      truncation=True,
      return_attention_mask=True,
      return_tensors='pt',
    )
    return {
        'network_features': torch.tensor(network_features, dtype=torch.float),
        'text': text,
        'input_ids': encoding['input_ids'].flatten(),
        'attention_mask': encoding['attention_mask'].flatten(),
        'targets': torch.tensor(target, dtype=torch.long)}

In [4]:
path='/home/pelle/Master_Thesis/data/processed/dataloaders/week10/'

eval_dataloader = torch.load(path+'eval_dataloader_full.pt')
train_dataloader = torch.load(path+'train_dataloader_full.pt')
test_dataloader = torch.load(path+'test_dataloader_full.pt')

In [5]:
print(type(eval_dataloader))
print(iter(eval_dataloader).next().keys())

print(type(train_dataloader))
print(iter(train_dataloader).next().keys())

print(type(test_dataloader))
print(iter(test_dataloader).next().keys())

<class 'torch.utils.data.dataloader.DataLoader'>
dict_keys(['network_features', 'text', 'input_ids', 'attention_mask', 'targets'])
<class 'torch.utils.data.dataloader.DataLoader'>
dict_keys(['network_features', 'text', 'input_ids', 'attention_mask', 'targets'])
<class 'torch.utils.data.dataloader.DataLoader'>
dict_keys(['network_features', 'text', 'input_ids', 'attention_mask', 'targets'])


In [10]:
batch = 32
print(len(eval_dataloader)*batch)
print(len(train_dataloader)*batch)
print(len(test_dataloader)*batch)
print()
# sum
print('Sum')
print(len(eval_dataloader)*batch + len(train_dataloader)*batch + len(test_dataloader)*batch)


# get batch size
next(iter(eval_dataloader))['targets'].shape[0]

5696
45408
5696

Sum
56800


32

# Model

In [70]:
electraModel = ElectraModel.from_pretrained('google/electra-small-discriminator')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

dataiter = iter(eval_dataloader)
n_features = next(dataiter)['network_features'].shape[1]
print(n_features)

class ElectraClassifier(nn.Module):
    def __init__(self,num_labels=2):
        super(ElectraClassifier,self).__init__()
        self.num_labels = num_labels

        # network features
        self.network_input = nn.Linear(in_features=9,out_features=2048) # 9 network features
        self.dense_net2 = nn.Linear(in_features=2048,out_features=2048)
        self.dense_net3 = nn.Linear(in_features=2048,out_features=2048)
        self.dense_net4 = nn.Linear(in_features=2048,out_features=2048)

        # output layer
        self.out_proj = nn.Linear(2048, self.num_labels)


    def forward(self,network_features=None):
        x_net = self.network_input(network_features)
        x_net = F.gelu(x_net)
        x_net = self.dense_net2(x_net)
        x_net = F.gelu(x_net)
        x_net = self.dense_net3(x_net)
        x_net = F.gelu(x_net)
        x_net = self.dense_net4(x_net)
        x = F.gelu(x_net)

        logits = self.out_proj(x_net)
        
        return logits

model=ElectraClassifier()
model = model.to(device)

cpu
9


In [30]:
iter(test_dataloader).next()['targets']

# random torch tensor
network_features = torch.rand(1, 524)

dense=nn.Linear(in_features=524,out_features=2)

sm=nn.Softmax(dim=1)

# torch.max(sm(dense(network_features)),dim=1)
print(dense(network_features))
print(sm(dense(network_features)))

tensor([[0.1563, 0.1171]], grad_fn=<AddmmBackward0>)
tensor([[0.5098, 0.4902]], grad_fn=<SoftmaxBackward0>)


# Train

In [148]:
EPOCHS = 1
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False,no_deprecation_warning=True)
total_steps = len(train_dataloader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps=0,num_training_steps=total_steps)
loss_fn = nn.CrossEntropyLoss().to(device)

In [149]:
def eval_model(model, data_loader, loss_fn, device, n_examples):
  model = model.eval()
  losses = []
  correct_predictions = 0
  with torch.no_grad():
    for d in data_loader:
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
      targets = d["targets"].to(device)
      network_features = d["network_features"].to(device)

      outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        network_features=network_features)

      _, preds = torch.max(outputs, dim=1)
      loss = loss_fn(outputs, targets)
      correct_predictions += torch.sum(preds == targets)
      losses.append(loss.item())
  return correct_predictions.double() / n_examples, np.mean(losses)

In [150]:
def train_epoch(model,data_loader,loss_fn,optimizer,device,scheduler,n_examples):
  model = model.train()
  losses = []
  correct_predictions = 0
  
  for d in data_loader:
    input_ids = d["input_ids"].to(device)
    attention_mask = d["attention_mask"].to(device)
    targets = d["targets"].to(device)
    network_features = d["network_features"].to(device)
    
    outputs = model(
      input_ids=input_ids,
      attention_mask=attention_mask,
      network_features=network_features)

    _, preds = torch.max(outputs, dim=1)
    loss = loss_fn(outputs, targets)
    correct_predictions += torch.sum(preds == targets)
    losses.append(loss.item())
    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()

  return correct_predictions.double() / n_examples, np.mean(losses)

In [72]:
# history = defaultdict(list)
# best_accuracy = 0
# for epoch in tqdm(range(EPOCHS)):

#   train_acc, train_loss = train_epoch(model, train_dataloader, loss_fn, optimizer, device, scheduler,len(train_dataloader.dataset))
#   val_acc, val_loss = eval_model(model, eval_dataloader, loss_fn, device, len(eval_dataloader.dataset))

#   history['train_acc'].append(train_acc)
#   history['train_loss'].append(train_loss)
#   history['val_acc'].append(val_acc)
#   history['val_loss'].append(val_loss)
  
#   if val_acc > best_accuracy:
#     torch.save(model.state_dict(), 'best_model_state.bin')
#     best_accuracy = val_acc

# Predection

In [73]:
electraModel = ElectraModel.from_pretrained('google/electra-small-discriminator')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

dataiter = iter(eval_dataloader)
n_features = next(dataiter)['network_features'].shape[1]
print(n_features)

class ElectraClassifier(nn.Module):
    def __init__(self,num_labels=2):
        super(ElectraClassifier,self).__init__()
        self.num_labels = num_labels

        # network features
        self.network_input = nn.Linear(in_features=9,out_features=2048) # 9 network features
        self.dense_net2 = nn.Linear(in_features=2048,out_features=2048)
        self.dense_net3 = nn.Linear(in_features=2048,out_features=2048)
        self.dense_net4 = nn.Linear(in_features=2048,out_features=2048)

        # output layer
        self.out_proj = nn.Linear(2048, 2)


    def forward(self,network_features=None):
        x_net = self.network_input(network_features)
        x_net = F.gelu(x_net)
        x_net = self.dense_net2(x_net)
        x_net = F.gelu(x_net)
        x_net = self.dense_net3(x_net)
        x_net = F.gelu(x_net)
        x_net = self.dense_net4(x_net)
        x = F.gelu(x_net)

        logits = self.out_proj(x_net)
        
        return logits

model=ElectraClassifier()
model = model.to(device)

cpu
9


In [109]:
input_size = 9
hidden_sizes = [2048, 64]
output_size = 2

model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.Softmax(dim=1))

In [113]:
with torch.no_grad():
    d = next(iter(test_dataloader))
    network_features = d["network_features"].to(device)
    logits = model(network_features)
    _, preds = torch.max(logits, dim=1)
    print(preds)
    print(logits)

tensor([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
        1, 1, 1, 1, 1, 0, 1, 1])
tensor([[0.9577, 0.0423],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.6927, 0.3073],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.8944, 0.1056],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.4973, 0.5027],
        [0.6693, 0.3307],
        [0.0000, 1.0000],
        [0.0000, 1.0000]])


In [115]:
def get_predictions(model, data_loader):
    model = model.eval()
    predictions = []
    prediction_probs = []
    ground_truth = []
    with torch.no_grad():
        for d in data_loader:
            network_features = d["network_features"].to(device)
            targets = d["targets"].to(device)

            outputs = model(network_features)
            _, preds = torch.max(outputs, dim=1)

            predictions.extend(preds)
            prediction_probs.extend(outputs)
            ground_truth.extend(targets)
            break

    predictions = torch.stack(predictions).cpu()
    prediction_probs = torch.stack(prediction_probs).cpu()
    ground_truth = torch.stack(ground_truth).cpu()

    return predictions, prediction_probs, ground_truth

y_pred, y_pred_probs, y_test = get_predictions(model,test_dataloader)

print(y_pred)
print(y_pred_probs)
print(y_test)

tensor([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
        1, 1, 1, 1, 1, 0, 1, 1])
tensor([[0.9577, 0.0423],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.6927, 0.3073],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.8944, 0.1056],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.0000, 1.0000],
        [0.4973, 0.5027],
        [0.6693, 0.3307],
        [0.0000, 1.0000],
        [0.0000, 1.0000]])
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1

In [118]:
len(eval_dataloader.dataset)

5674