In [21]:
from data_utils import straitified_train_validation_split, dataloader
from net import Net
from sklearn.metrics import recall_score, precision_score, f1_score, accuracy_score
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from tqdm import tqdm

## Read in and prepare data

In [22]:
bert_datset = torch.load('data/processed/bert_data.pt')
train_labels = np.load('data/processed/targets_sent_train.npy')
valid_labels = np.load('data/processed/targets_sent_valid.npy')
labels = np.hstack((train_labels, valid_labels))

In [23]:
bert_train, labels_train, bert_val, labels_val = straitified_train_validation_split(bert_datset, labels)

train_dataloader = dataloader(bert_train, labels_train, batch_size=32)
valid_dataloader = dataloader(bert_val, labels_val, batch_size=32)
del bert_train, bert_val, labels_train, labels_val, bert_datset, train_labels, valid_labels

## CNN for the N x S x K embeddings

In [27]:
net = Net(num_classes = 2)
net = net.cuda()
print(net)

Net(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchnorm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=6624, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=2, bias=True)
)


In [28]:
optimizer = optim.Adam(net.parameters(), lr=0.001)
w = torch.FloatTensor([0.1, 1.0]).cuda()
max_epocs = 5
criterion = nn.CrossEntropyLoss(weight=w)
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

In [29]:
max_epochs = 5
train_acc = []
losses = []
val_recall_scores = []
# Loop over epochs
for epoch in range(max_epochs):
    # Training
    train_preds, train_targs = [], []
    cur_loss = 0.0
    net.train()
    training_loop = tqdm(iter(train_dataloader), leave=True)
    for local_batch, local_labels in training_loop:
        # Zero gradient
        optimizer.zero_grad()
        # Transfer to GPU
        local_batch = local_batch.to(device, dtype=torch.float)
        local_labels = local_labels.to(device, dtype=torch.long)
        # zero the parameter gradients
       
        # Model computations
        output = net(local_batch)
        batch_loss = criterion(output, local_labels)
        batch_loss.backward()
        optimizer.step()
        training_loop.set_description(f'Epoch {epoch+1}')
        training_loop.set_postfix(loss=batch_loss.item())
        cur_loss += batch_loss
        # Get predictions
        predicted = torch.max(output, 1)[1]
        train_targs += list(local_labels.cpu().numpy())
        train_preds += list(predicted.data.cpu().numpy())
    
    print('-----------Training Metrics-----------')
    print('Accuracy: {}'.format(accuracy_score(train_targs, train_preds)))
    print('F1: {}'.format(f1_score(train_targs, train_preds)))
    print('Precision: {}'.format(precision_score(train_targs, train_preds)))
    print('Recall: {}'.format(recall_score(train_targs, train_preds)))

    # Validation
    net.eval()
    val_preds, val_targs = [], []
    for local_batch, local_labels in valid_dataloader:
        local_batch = local_batch.to(device, dtype=torch.float)
        local_labels =  local_labels.to(device, dtype=torch.long)
        outputs = net(local_batch)
        predicted = torch.max(outputs.data, 1)[1]
        val_targs += list(local_labels.cpu().numpy())
        val_preds += list(predicted.data.cpu().numpy())
    print('-----------Validation Metrics-----------')
    print('Accuracy: {}'.format(accuracy_score(val_targs, val_preds)))
    print('F1: {}'.format(f1_score(val_targs, val_preds)))
    print('Precision: {}'.format(precision_score(val_targs, val_preds)))
    print('Recall: {}'.format(recall_score(val_targs, val_preds)))
    losses.append(cur_loss / train_dataloader.batch_size)
    print(f'Current training Loss: {cur_loss / train_dataloader.batch_size}')
    print('-' * 66)



Epoch 1: 100%|██████████| 472/472 [00:57<00:00,  8.16it/s, loss=0.0515]

-----------Training Metrics-----------
Accuracy: 0.9640774125132555
F1: 0.14779874213836477
Precision: 0.42727272727272725
Recall: 0.08935361216730038





-----------Validation Metrics-----------
Accuracy: 0.9923117709437964
F1: 0.8835341365461848
Precision: 0.9401709401709402
Recall: 0.8333333333333334
Current training Loss: 7.290677070617676
------------------------------------------------------------------


Epoch 2: 100%|██████████| 472/472 [01:02<00:00,  7.51it/s, loss=0.0291] 

-----------Training Metrics-----------
Accuracy: 0.9931071049840933
F1: 0.8978388998035364
Precision: 0.9288617886178862
Recall: 0.8688212927756654





-----------Validation Metrics-----------
Accuracy: 0.9957582184517497
F1: 0.9365079365079364
Precision: 0.9833333333333333
Recall: 0.8939393939393939
Current training Loss: 2.2129974365234375
------------------------------------------------------------------


Epoch 3: 100%|██████████| 472/472 [01:04<00:00,  7.35it/s, loss=0.0381] 

-----------Training Metrics-----------
Accuracy: 0.9936373276776246
F1: 0.9062500000000001
Precision: 0.9317269076305221
Recall: 0.8821292775665399





-----------Validation Metrics-----------
Accuracy: 0.9954931071049841
F1: 0.9328063241106719
Precision: 0.9752066115702479
Recall: 0.8939393939393939
Current training Loss: 1.88265061378479
------------------------------------------------------------------


Epoch 4: 100%|██████████| 472/472 [01:04<00:00,  7.33it/s, loss=0.0593] 

-----------Training Metrics-----------
Accuracy: 0.9941012725344645
F1: 0.9130009775171065
Precision: 0.9396378269617707
Recall: 0.8878326996197718





-----------Validation Metrics-----------
Accuracy: 0.9933722163308589
F1: 0.9042145593869731
Precision: 0.9147286821705426
Recall: 0.8939393939393939
Current training Loss: 1.656335711479187
------------------------------------------------------------------


Epoch 5: 100%|██████████| 472/472 [01:04<00:00,  7.28it/s, loss=0.0364] 

-----------Training Metrics-----------
Accuracy: 0.9946314952279958
F1: 0.9203539823008849
Precision: 0.9531568228105907
Recall: 0.8897338403041825





-----------Validation Metrics-----------
Accuracy: 0.9941675503711559
F1: 0.9147286821705426
Precision: 0.9365079365079365
Recall: 0.8939393939393939
Current training Loss: 1.5432658195495605
------------------------------------------------------------------
