In [None]:
import matplotlib
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import re
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import transformers

from sklearn.metrics import classification_report

In [None]:
# load data
x_train = torch.load("x_train")
x_val = torch.load("x_val")
x_test = torch.load("x_test")
y_train = torch.load("y_train")
y_val = torch.load("y_val")
y_test = torch.load("y_test")

In [None]:
class KimCNN(nn.Module):
    def __init__(self, embed_num, embed_dim, class_num, kernel_num, kernel_sizes, dropout, static):
        super(KimCNN, self).__init__()
        V = embed_num
        D = embed_dim
        C = class_num
        Co = kernel_num
        Ks = kernel_sizes
        
        self.static = static
        self.embed = nn.Embedding(V, D)
        self.convs1 = nn.ModuleList([nn.Conv2d(1, Co, (K, D)) for K in Ks])
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(len(Ks) * Co, C)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        if self.static:
            x = Variable(x)
        x = x.unsqueeze(1)  
        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1]  
        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]  
        x = torch.cat(x, 1) 
        x = self.dropout(x)  
        logit = self.fc1(x)  
        output = self.sigmoid(logit) 
        return output

In [None]:
# hyperparameters
embed_num = x_train2.shape[1]
embed_dim = x_train2.shape[2] 
class_num = y_train2.shape[1] 
kernel_num = 3 
kernel_sizes = [2, 3, 4] 
dropout = 0.1 
static = True

model = KimCNN(
    embed_num=embed_num,
    embed_dim=embed_dim,
    class_num=class_num,
    kernel_num=kernel_num,
    kernel_sizes=kernel_sizes,
    dropout=dropout,
    static=static,
)

n_epochs = 10 
lr = 0.0001 
optimizer = torch.optim.Adam(model.parameters(), lr=lr) 
loss_fn = nn.BCELoss() 

In [None]:
train_losses = []

for epoch in range(n_epochs):
    start_time = time.time()
    train_loss = 0
    model.train(True)
    y_pred = model(x_train)
    optimizer.zero_grad()
    loss = loss_fn(y_pred, y_train)
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
    train_losses.append(train_loss)
    elapsed = time.time() - start_time
    print("Epoch %d. Train loss: %.2f. Elapsed time: %.2fs."
    % (epoch + 1, train_losses[-1], elapsed))

In [None]:
val_losses = []

for epoch in range(n_epochs):
    start_time = time.time()
    val_loss = 0
    model.eval() 
    y_pred = model(x_val)
    loss = loss_fn(y_pred, y_val)
    val_loss += loss.item()
    val_losses.append(val_loss)
    print("Epoch %d. Validation loss: %.2f. Elapsed time: %.2fs."
    % (epoch + 1, val_losses[-1], elapsed))

In [None]:
# plot losses
plt.plot(train_losses, label="Training loss")
plt.plot(val_losses, label="Validation loss")
plt.legend()
plt.title("Losses")

In [None]:
# evaluate predictions
model.eval() 

with torch.no_grad():
    y_preds = []
    y_pred = model(x_test)
    y_preds.extend(y_pred.cpu().numpy().tolist())
    y_preds_np = np.array(y_preds)

y_preds_np = (y_preds_np > 0.5)
y_test_np = y_test.numpy()

print(classification_report(y_test_np, y_preds_np))