In [1]:
import pickle
import numpy as np

from tqdm import tqdm
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader 

SEED = 42

In [2]:
model_type = 'glove-twitter-25'
wv_path = ['./data/train_data/train', './data/test_data/test']

with open(wv_path[0] + f'_{model_type}.pkl', 'rb') as f:
    X_train, y_train = pickle.load(f)
with open(wv_path[1] + f'_{model_type}.pkl', 'rb') as f:
    X_test, y_test = pickle.load(f)

In [14]:
X_train, y_train = X_train[:200], y_train[:200]
X_test, y_test = X_test[:50], y_test[:50]

In [15]:
# data process
max_length = max(len(i) for i in X_test + X_train)

def preprocess_dataset(X):
    X_list = []
    for i in X:
        temp = i
        for _ in range(max_length - len(i)):
            temp.append(np.zeros(25))
        np_array = np.array(temp)
        trans = np.transpose(np_array)
        X_list.append(torch.from_numpy(trans))
    X_tensor = torch.stack(X_list)
    return X_tensor

X_train_tensor = preprocess_dataset(X_train)
print(f"Shape of training data {[X_train_tensor.shape[i] for i in range(3)]}")
X_test_tensor = preprocess_dataset(X_test)
print(f"Shape of training data {[X_test_tensor.shape[i] for i in range(3)]}")

Shape of training data [200, 25, 54]
Shape of training data [50, 25, 54]


In [16]:
# process labels
labels = list(set(y_train + y_test))
label_num = len(labels)
for i in range(len(y_train)):
    y_train[i] = labels.index(y_train[i])
for i in range(len(y_test)):
    y_test[i] = labels.index(y_test[i])
print(f"Number of label types: {label_num}")

Number of label types: 12


In [17]:
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X =torch.tensor(X, dtype=torch.float)
        self.y =torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self,idx):
        return self.X[idx], self.y[idx]

def initialize_loader(X_train_tensor, X_test_tensor, y_train, y_test):
    # no need to do the scale since original wv already did
    train_data = CustomDataset(X_train_tensor, y_train)
    test_data = CustomDataset(X_test_tensor, y_test)

    # convert to DataLoader for batch processing and shuffling
    train_dataloader = DataLoader(train_data, batch_size=128, shuffle=True)
    for inputs, targets in train_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
    test_dataloader = DataLoader(test_data, batch_size=50, shuffle=False)
    for inputs, targets in test_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
    return train_dataloader, test_dataloader

train_dataloader, test_dataloader = initialize_loader(X_train_tensor, X_test_tensor, y_train, y_test)

  self.X =torch.tensor(X, dtype=torch.float)


In [18]:
# model
class CNN(nn.Module):
    def __init__(self, dim_in, dim_conv, dim_out, dropout_rate=0.5, l2_norm=3):
        super(CNN, self).__init__()
        self.conv1_3 = nn.Conv1d(dim_in, dim_conv, 3, padding=5)  # 33
        self.conv1_4 = nn.Conv1d(dim_in, dim_conv, 4, padding=5)  # 32
        self.conv1_5 = nn.Conv1d(dim_in, dim_conv, 5, padding=5)  # 31
        self.bn = nn.BatchNorm1d(dim_conv * 3)
        self.ReLU = nn.ReLU()
        self.maxpool_1 = nn.MaxPool1d(kernel_size=max_length+8)
        self.maxpool_2 = nn.MaxPool1d(kernel_size=max_length+7)
        self.maxpool_3 = nn.MaxPool1d(kernel_size=max_length+6)
        self.fc = nn.Linear(dim_conv * 3, dim_out)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.softmax = nn.Softmax()


    def forward(self, x):
        x1 = self.conv1_3(x)
        x1 = self.ReLU(x1)
        x1 = self.maxpool_1(x1)


        x2 = self.conv1_4(x)
        x2 = self.ReLU(x2)
        x2 = self.maxpool_2(x2)

        x3 = self.conv1_5(x)
        x3 = self.ReLU(x3)
        x3 = self.maxpool_3(x3)

        x = torch.cat((x1, x2, x3), dim=1)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        x = self.dropout(x)
        x = self.softmax(x)
        return x


In [19]:
torch.manual_seed(SEED)
dim_in = 25
dim_conv = 100
dim_out = label_num
lr = 0.001
model = CNN(dim_in, dim_conv, dim_out)
model.to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [20]:
# training
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    optimizer.zero_grad()
    train_loss, correct_num = 0, 0
    model.train()
    for X, y in dataloader:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        loss = loss_fn(pred, y)

        loss.backward()
        optimizer.step()

        train_loss += loss.item() * X.size(0)
        correct_num += (torch.eq(torch.argmax(pred, dim=1), y)).type(torch.float).sum().item()

    train_loss /= size
    train_acc = correct_num / size
    return train_loss, train_acc

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    test_loss, correct_num = 0, 0
    model.eval()  # inform no dropout and fix bn during testing

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)

            pred = model(X)
            test_loss += loss_fn(pred, y).item() * X.size(0)
            correct_num += (torch.eq(torch.argmax(pred, dim=1), y)).type(torch.float).sum().item()

    test_loss /= size
    test_acc = correct_num / size
    return test_loss, test_acc
        

In [21]:
train_loss_, train_acc_, test_loss_, test_acc_ = [], [], [], []
no_epochs = 1000

# initialize early stopper
# early_stopper = EarlyStopper(patience=3, min_delta=0)

# start training
for epoch in range(no_epochs):
    train_loss, train_acc = train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loss, test_acc = test_loop(test_dataloader, model, loss_fn)

    train_loss_.append(train_loss), train_acc_.append(train_acc)
    test_loss_.append(test_loss), test_acc_.append(test_acc)

    print(f"Epoch {epoch+1}, train_loss {train_loss:>7f} train_acc {train_acc:>4f}, test_loss {test_loss:>7f}, test_acc {test_acc:>4f}")

Epoch 1, train_loss 2.484124 train_acc 0.115000, test_loss 2.487494, test_acc 0.080000
Epoch 2, train_loss 2.473181 train_acc 0.115000, test_loss 2.489590, test_acc 0.080000
Epoch 3, train_loss 2.475293 train_acc 0.120000, test_loss 2.490917, test_acc 0.080000
Epoch 4, train_loss 2.465861 train_acc 0.125000, test_loss 2.492034, test_acc 0.060000
Epoch 5, train_loss 2.458362 train_acc 0.165000, test_loss 2.492848, test_acc 0.080000
Epoch 6, train_loss 2.462922 train_acc 0.110000, test_loss 2.492199, test_acc 0.100000
Epoch 7, train_loss 2.462324 train_acc 0.105000, test_loss 2.491209, test_acc 0.100000
Epoch 8, train_loss 2.458241 train_acc 0.140000, test_loss 2.490172, test_acc 0.040000
Epoch 9, train_loss 2.430703 train_acc 0.270000, test_loss 2.489311, test_acc 0.060000
Epoch 10, train_loss 2.424370 train_acc 0.255000, test_loss 2.488974, test_acc 0.040000
Epoch 11, train_loss 2.449313 train_acc 0.190000, test_loss 2.489041, test_acc 0.080000
Epoch 12, train_loss 2.397477 train_acc 0