In [1]:
import pickle
import numpy as np

from tqdm import tqdm
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader 

SEED = 42

In [2]:
wv_path = ['./data/train_data/train', './data/test_data/test', './data/dev_data/dev']


with open(wv_path[0] + f'_onehot_sst2.pkl', 'rb') as f:
    X_train_tensor, y_train = pickle.load(f)
with open(wv_path[1] + f'_onehot_sst2.pkl', 'rb') as f:
    X_test_tensor, y_test = pickle.load(f)
with open(wv_path[2] + f'_onehot_sst2.pkl', 'rb') as f:
    X_dev_tensor, y_dev = pickle.load(f)

In [3]:
wv_num = X_train_tensor.shape[1]
max_length = X_train_tensor.shape[2]

In [4]:
print(f"Shape of training data {[X_train_tensor.shape[i] for i in range(3)]}")
print(f"Shape of training data {[X_dev_tensor.shape[i] for i in range(3)]}")

Shape of training data [67349, 69, 268]
Shape of training data [872, 69, 268]


In [5]:
# process labels
labels = list(set(y_train + y_dev))
label_num = len(labels)
print(f"Number of label types: {label_num}")

Number of label types: 2


In [6]:
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X =torch.tensor(X, dtype=torch.float)
        self.y =torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self,idx):
        return self.X[idx], self.y[idx]

def initialize_loader(X_train_tensor, X_dev_tensor, y_train, y_dev, batch_size=64):
    # no need to do the scale since original wv already did
    train_data = CustomDataset(X_train_tensor, y_train)
    dev_data = CustomDataset(X_dev_tensor, y_dev)

    # convert to DataLoader for batch processing and shuffling
    train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    for inputs, targets in train_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
    dev_dataloader = DataLoader(dev_data, batch_size=batch_size, shuffle=False)
    for inputs, targets in dev_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
    return train_dataloader, dev_dataloader

train_dataloader, dev_dataloader = initialize_loader(X_train_tensor, X_dev_tensor, y_train, y_dev)

  self.X =torch.tensor(X, dtype=torch.float)


In [7]:
# model
class CNN(nn.Module):
    def __init__(self, dim_input, dim_out):
        super(CNN, self).__init__()
        self.model = nn.Sequential(
            nn.Conv1d(in_channels=dim_input, out_channels=1024, kernel_size=7, stride=1),
            nn.MaxPool1d(kernel_size=3, stride=3),
            nn.Conv1d(in_channels=1024, out_channels=1024, kernel_size=7, stride=1),
            nn.MaxPool1d(kernel_size=3, stride=3),
            nn.Conv1d(in_channels=1024, out_channels=1024, kernel_size=3, stride=1),
            nn.Conv1d(in_channels=1024, out_channels=1024, kernel_size=3, stride=1),
            nn.Conv1d(in_channels=1024, out_channels=1024, kernel_size=3, stride=1),
            nn.MaxPool1d(kernel_size=3, stride=3),
            nn.Flatten(),

            nn.Linear(7168, 2048),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(2048, 2048),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(2048, dim_out),
            nn.Softmax()
        )

        # para initialization

    def forward(self, x):
        x = self.model(x)
        return x

In [8]:
torch.manual_seed(SEED)
dim_in = wv_num
dim_out = label_num
lr = 0.0001
model = CNN(dim_in, dim_out)
model.to(device)
# model.init_weights()

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# a = torch.randn(1, 300, 56)
# a = a.to(device)
# model(a)

In [9]:
# training
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    optimizer.zero_grad()
    train_loss, correct_num = 0, 0
    model.train()
    for X, y in dataloader:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        model.eval()
        
        train_loss += loss.item() * X.size(0)
        correct_num += (torch.eq(torch.argmax(pred, dim=1), y)).type(torch.float).sum().item()

    train_loss /= size
    train_acc = correct_num / size
    return train_loss, train_acc

def val_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    val_loss, correct_num = 0, 0
    model.eval()  # inform no dropout

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)

            pred = model(X)
            val_loss += loss_fn(pred, y).item() * X.size(0)
            correct_num += (torch.eq(torch.argmax(pred, dim=1), y)).type(torch.float).sum().item()

    val_loss /= size
    val_acc = correct_num / size
    return val_loss, val_acc
        

In [10]:
patience = 15
best_val_loss = np.Inf
best_val_acc = 0
train_loss_, train_acc_, val_loss_, val_acc_ = [], [], [], []
no_epochs = 100

# start training
for epoch in tqdm(range(no_epochs)):
    train_loss, train_acc = train_loop(train_dataloader, model, loss_fn, optimizer)
    val_loss, val_acc = val_loop(dev_dataloader, model, loss_fn)

    train_loss_.append(train_loss), train_acc_.append(train_acc)
    val_loss_.append(val_loss), val_acc_.append(val_acc)

    # early stopping
    if val_acc > best_val_acc:
        best_val_acc = val_acc
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1
        if epochs_without_improvement >= patience:
            print(f'Early stopping after {epoch+1} epochs')
            print(f'Best validation accuracy: {best_val_acc}')
            break


    print(f"Epoch {epoch+1}, train_loss {train_loss:>7f} train_acc {train_acc:>4f}, val_loss {val_loss:>7f}, val_acc {val_acc:>4f}")

  0%|          | 0/100 [00:00<?, ?it/s]

  return self._call_impl(*args, **kwargs)
  1%|          | 1/100 [01:00<1:39:08, 60.09s/it]

Epoch 1, train_loss 0.671050 train_acc 0.579697, val_loss 0.695254, val_acc 0.565367


  2%|▏         | 2/100 [01:53<1:31:47, 56.20s/it]

Epoch 2, train_loss 0.569457 train_acc 0.723366, val_loss 0.588622, val_acc 0.708716


  2%|▏         | 2/100 [02:42<2:12:59, 81.42s/it]


KeyboardInterrupt: 