In [1]:
from tqdm import tqdm
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchtext.vocab import build_vocab_from_iterator
import string
from data_preprocessing import load_sst
from utils import remove_special_content, replace_punct, preprocess
import numpy as np
import pandas as pd
import gensim.downloader as api
import matplotlib.pyplot as plt
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from keras.preprocessing.text import text_to_word_sequence
from sklearn.model_selection import train_test_split
nltk.download('stopwords')
SEED = 42

torch.manual_seed(SEED)

[nltk_data] Downloading package stopwords to C:\Users\Chihao
[nltk_data]     Shen\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Chihao
[nltk_data]     Shen\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


<torch._C.Generator at 0x1d660fdbc50>

In [2]:
train_data, train_labels, val_data, val_labels, dev_data, dev_labels = load_sst()

INFO: SST-2 loaded


In [3]:
df_train = pd.DataFrame({'content':train_data, 'sentiment':train_labels})
df_train.reset_index(drop=True, inplace=True)
X_train, X_val, y_train, y_val = train_test_split(df_train['content'], df_train['sentiment'], test_size=0.2, random_state=42)


In [4]:
model_type = 'word2vec-google-news-300'  # 25 100 200 word2vec 300 
w2vModel = api.load(model_type)

INFO: loading projection weights from C:\Users\Chihao Shen/gensim-data\word2vec-google-news-300\word2vec-google-news-300.gz
INFO: KeyedVectors lifecycle event {'msg': 'loaded (3000000, 300) matrix of type float32 from C:\\Users\\Chihao Shen/gensim-data\\word2vec-google-news-300\\word2vec-google-news-300.gz', 'binary': True, 'encoding': 'utf8', 'datetime': '2024-04-09T12:45:23.954998', 'gensim': '4.3.2', 'python': '3.10.11 (tags/v3.10.11:7d4cc5a, Apr  5 2023, 00:38:17) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.22631-SP0', 'event': 'load_word2vec_format'}


In [5]:
texts = X_train
val_text = X_val

In [6]:
def tokenize(texts, max_length, tokenizer):
    texts = [s.lower() for s in texts]
    data = torch.zeros((len(texts), max_length), dtype=torch.int)
    for i, sentences in enumerate(texts):
        word_sequence = text_to_word_sequence(sentences)
        j = 0
        for word in word_sequence:
            try:
                if j < max_length:
                    data[i, j] = tokenizer[word]
                    j += 1
            except:
                data[i, j] = 0
                j += 1
    return data

In [7]:
words = list(w2vModel.key_to_index.keys())

def yield_tokens(data_iter):
    for text in data_iter:
        yield text_to_word_sequence(text)

vocab = build_vocab_from_iterator(yield_tokens(words))

In [8]:
max_length_1 = 56
max_length_2 = 1506

word_to_idx = {word: idx for idx, word in enumerate(vocab.get_itos())}
data = tokenize(texts, max_length_1, word_to_idx)
val_data = tokenize(val_text, max_length_1, word_to_idx)

In [9]:
def process(data, ori_df):
    labels = ori_df.values
    print('Shape of data tensor:', data.shape)
    print('Shape of label tensor:', labels.shape)
    
    return data, labels

X_train_tensor, y_train= process(data, y_train)
X_val_tensor, y_val = process(val_data, y_val)

Shape of data tensor: torch.Size([53879, 56])
Shape of label tensor: (53879,)
Shape of data tensor: torch.Size([13470, 56])
Shape of label tensor: (13470,)


In [10]:
GLOVE_DIM = int(model_type.split('-')[-1])

def embed(model, word_to_idx):
    embed_size = GLOVE_DIM
    embedding_matrix = np.zeros((len(word_to_idx)+1, embed_size), dtype=np.float32)

    hits = 0
    misses = 0

    for word, i in word_to_idx.items():
        try:
            embedding_vector = model.get_vector(word)
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector
                hits += 1
            else:
                misses += 1
        except:
            misses += 1
            
    print("Converted %d words (%d misses)" % (hits, misses))
    return torch.tensor(embedding_matrix)


embedding_matrix = embed (w2vModel, word_to_idx)

Converted 159622 words (613542 misses)


In [11]:
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X =torch.tensor(X, dtype=torch.int)
        self.y =torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self,idx):
        return self.X[idx], self.y[idx]

def initialize_loader(X_train_tensor, X_val_tensor, y_train, y_val, batch_size=50):
    # no need to do the scale since original wv already did
    train_data = CustomDataset(X_train_tensor, y_train)
    val_data = CustomDataset(X_val_tensor, y_val)

    # convert to DataLoader for batch processing and shuffling
    train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    for inputs, targets in train_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
    val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
    for inputs, targets in val_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
    return train_dataloader, val_dataloader

In [12]:
# model
class CNN(nn.Module):
    def __init__(self, embedding_matrix, dim_in, dim_conv, dim_out, dropout_rate=0.5, max_length=max_length_1):
        super(CNN, self).__init__()
        self.embedding = nn.Embedding(embedding_matrix.shape[0], dim_in).from_pretrained(embedding_matrix, freeze=True)
        self.conv1_1 = nn.Conv1d(dim_in, dim_conv, 3, padding=5)
        self.conv1_2 = nn.Conv1d(dim_in, dim_conv, 4, padding=5)
        self.conv1_3 = nn.Conv1d(dim_in, dim_conv, 5, padding=5)
        self.conv1_4 = nn.Conv1d(dim_in, dim_conv, 6, padding=5)
        self.ReLU = nn.ReLU()
        self.maxpool_1 = nn.MaxPool1d(kernel_size=max_length+8)
        self.maxpool_2 = nn.MaxPool1d(kernel_size=max_length+7)
        self.maxpool_3 = nn.MaxPool1d(kernel_size=max_length+6)
        self.maxpool_4 = nn.MaxPool1d(kernel_size=max_length+5)
        self.fc_1 = nn.Linear(dim_conv * 4, dim_out)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.softmax = nn.Softmax()


    def forward(self, x):
        x = self.embedding(x).transpose(1, 2)
        x1 = self.conv1_1(x)
        x1 = self.ReLU(x1)
        x1 = self.maxpool_1(x1)


        x2 = self.conv1_2(x)
        x2 = self.ReLU(x2)
        x2 = self.maxpool_2(x2)

        x3 = self.conv1_3(x)
        x3 = self.ReLU(x3)
        x3 = self.maxpool_3(x3)

        x4 = self.conv1_4(x)
        x4 = self.ReLU(x4)
        x4 = self.maxpool_4(x4)

        x = torch.cat((x1, x2, x3, x4), dim=1)
        x = torch.flatten(x, 1)
        x = self.fc_1(x)
        x = self.dropout(x)
        x = self.softmax(x)
        return x

In [13]:
# training
max_norm = 3
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    optimizer.zero_grad()
    train_loss, correct_num = 0, 0
    model.train()
    for X, y in dataloader:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        loss = loss_fn(pred, y)

        loss.backward()

        for _, module in model.named_modules():
            if isinstance(module, nn.Linear):
                for _, param in module.named_parameters():
                    param_norm = param.data.norm(2)
                    if param_norm > max_norm:
                        param.data.mul_(max_norm / (param_norm + 1e-6))

        optimizer.step()
        model.eval()
        with torch.no_grad():
            pred = model(X)
            loss = loss_fn(pred, y)
            train_loss += loss.item() * X.size(0)
            correct_num += (torch.eq(torch.argmax(pred, dim=1), y)).type(torch.float).sum().item()

    train_loss /= size
    train_acc = correct_num / size
    return train_loss, train_acc

def val_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    val_loss, correct_num = 0, 0
    model.eval()  # inform no dropout and fix bn during testing

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)

            pred = model(X)
            val_loss += loss_fn(pred, y).item() * X.size(0)
            correct_num += (torch.eq(torch.argmax(pred, dim=1), y)).type(torch.float).sum().item()

    val_loss /= size
    val_acc = correct_num / size
    return val_loss, val_acc
        

In [14]:
patience = 10
train_loss_, train_acc_, val_loss_, val_acc_ = [], [], [], []
no_epochs = 100


dim_in = GLOVE_DIM
dim_out = 2
lr = 0.0005
batch_sizes = [64, 128, 256]
dim_convs = [128, 256, 512, 1024]

for batch_size in batch_sizes:

    for dim_conv in dim_convs:
        # set the seed to avoid dataloader difference
        torch.manual_seed(SEED)
        train_dataloader, val_dataloader = initialize_loader(X_train_tensor, X_val_tensor, y_train, y_val, batch_size=batch_size)

        print(f'batch size: {batch_size}; conv layer dimension: {dim_conv}')
        epochs_without_improvement = 0
        best_val_loss = np.Inf

        model = CNN(embedding_matrix, dim_in, dim_conv, dim_out)
        model.to(device)
        loss_fn = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

        # start training
        for epoch in tqdm(range(no_epochs)):
            train_loss, train_acc = train_loop(train_dataloader, model, loss_fn, optimizer)
            val_loss, val_acc = val_loop(val_dataloader, model, loss_fn)

            train_loss_.append(train_loss), train_acc_.append(train_acc)
            val_loss_.append(val_loss), val_acc_.append(val_acc)

            # early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1
                if epochs_without_improvement >= patience:
                    print(f'early stopping after {epoch+1} epochs')
                    print(f'best test loss: {best_val_loss}')
                    break


            if (epoch+1) % 5 == 0:
                print(
                    f"Epoch {epoch+1}, train_loss {train_loss:>7f} train_acc {train_acc:>4f}, val_loss {val_loss:>7f}, val_acc {val_acc:>4f}")

  self.X =torch.tensor(X, dtype=torch.int)


batch size: 64; conv layer dimension: 128


  return self._call_impl(*args, **kwargs)
  5%|▌         | 5/100 [00:35<11:22,  7.18s/it]

Epoch 5, train_loss 0.355872 train_acc 0.963047, val_loss 0.383526, val_acc 0.930067


 10%|█         | 10/100 [00:56<06:57,  4.64s/it]

Epoch 10, train_loss 0.339517 train_acc 0.977078, val_loss 0.376589, val_acc 0.937936


 15%|█▌        | 15/100 [01:18<06:03,  4.27s/it]

Epoch 15, train_loss 0.333998 train_acc 0.981254, val_loss 0.375096, val_acc 0.937268


 20%|██        | 20/100 [01:39<05:46,  4.34s/it]

Epoch 20, train_loss 0.331455 train_acc 0.983018, val_loss 0.374154, val_acc 0.937936


 25%|██▌       | 25/100 [02:01<05:28,  4.38s/it]

Epoch 25, train_loss 0.330315 train_acc 0.983797, val_loss 0.373505, val_acc 0.937639


 30%|███       | 30/100 [02:33<07:22,  6.32s/it]

Epoch 30, train_loss 0.328868 train_acc 0.984966, val_loss 0.375330, val_acc 0.935412


 35%|███▌      | 35/100 [02:58<05:58,  5.51s/it]

Epoch 35, train_loss 0.328518 train_acc 0.985300, val_loss 0.374394, val_acc 0.936823


 40%|████      | 40/100 [03:29<05:50,  5.84s/it]

Epoch 40, train_loss 0.328060 train_acc 0.985634, val_loss 0.373656, val_acc 0.938382


 40%|████      | 40/100 [03:33<05:20,  5.35s/it]

early stopping after 41 epochs
best test loss: 0.37297010057576074



  self.X =torch.tensor(X, dtype=torch.int)


batch size: 64; conv layer dimension: 256


  5%|▌         | 5/100 [00:27<08:36,  5.44s/it]

Epoch 5, train_loss 0.351174 train_acc 0.966388, val_loss 0.381358, val_acc 0.931626


 10%|█         | 10/100 [00:54<08:02,  5.36s/it]

Epoch 10, train_loss 0.337131 train_acc 0.978879, val_loss 0.375340, val_acc 0.937639


 15%|█▌        | 15/100 [01:20<07:33,  5.34s/it]

Epoch 15, train_loss 0.333419 train_acc 0.981217, val_loss 0.374174, val_acc 0.938085


 20%|██        | 20/100 [01:47<07:04,  5.31s/it]

Epoch 20, train_loss 0.331256 train_acc 0.982721, val_loss 0.375393, val_acc 0.936526


 25%|██▌       | 25/100 [02:15<07:17,  5.84s/it]

Epoch 25, train_loss 0.330210 train_acc 0.983741, val_loss 0.374918, val_acc 0.936600


 28%|██▊       | 28/100 [02:40<06:52,  5.73s/it]

early stopping after 29 epochs
best test loss: 0.37397878350995606





batch size: 64; conv layer dimension: 512


  5%|▌         | 5/100 [00:45<14:28,  9.14s/it]

Epoch 5, train_loss 0.347810 train_acc 0.969042, val_loss 0.380144, val_acc 0.932220


 10%|█         | 10/100 [01:30<13:29,  9.00s/it]

Epoch 10, train_loss 0.336096 train_acc 0.978971, val_loss 0.375301, val_acc 0.936526


 15%|█▌        | 15/100 [02:15<12:50,  9.06s/it]

Epoch 15, train_loss 0.333192 train_acc 0.981013, val_loss 0.377378, val_acc 0.933333


 20%|██        | 20/100 [03:01<12:02,  9.03s/it]

Epoch 20, train_loss 0.331331 train_acc 0.982516, val_loss 0.372697, val_acc 0.938901


 25%|██▌       | 25/100 [03:47<11:25,  9.14s/it]

Epoch 25, train_loss 0.330820 train_acc 0.982962, val_loss 0.374608, val_acc 0.937491


 30%|███       | 30/100 [04:34<11:01,  9.45s/it]

Epoch 30, train_loss 0.330185 train_acc 0.983407, val_loss 0.373816, val_acc 0.937416


 31%|███       | 31/100 [04:54<10:55,  9.50s/it]

early stopping after 32 epochs
best test loss: 0.37231875157126165





batch size: 64; conv layer dimension: 1024


  5%|▌         | 5/100 [01:30<28:18, 17.88s/it]

Epoch 5, train_loss 0.349104 train_acc 0.966480, val_loss 0.383283, val_acc 0.928211


 10%|█         | 10/100 [02:56<26:07, 17.42s/it]

Epoch 10, train_loss 0.338073 train_acc 0.976466, val_loss 0.381310, val_acc 0.930661


 15%|█▌        | 15/100 [04:21<24:18, 17.16s/it]

Epoch 15, train_loss 0.334911 train_acc 0.979046, val_loss 0.378293, val_acc 0.933333


 20%|██        | 20/100 [05:47<22:46, 17.08s/it]

Epoch 20, train_loss 0.333628 train_acc 0.980085, val_loss 0.374185, val_acc 0.936526


 25%|██▌       | 25/100 [07:13<21:36, 17.29s/it]

Epoch 25, train_loss 0.333005 train_acc 0.980568, val_loss 0.375572, val_acc 0.935189


 30%|███       | 30/100 [08:42<20:44, 17.78s/it]

Epoch 30, train_loss 0.331526 train_acc 0.982071, val_loss 0.376197, val_acc 0.935561


 35%|███▌      | 35/100 [10:12<19:26, 17.94s/it]

Epoch 35, train_loss 0.331246 train_acc 0.982275, val_loss 0.375705, val_acc 0.935486


 35%|███▌      | 35/100 [10:29<19:29, 18.00s/it]

early stopping after 36 epochs
best test loss: 0.37410328097492124





batch size: 128; conv layer dimension: 128


  5%|▌         | 5/100 [00:18<05:46,  3.64s/it]

Epoch 5, train_loss 0.360562 train_acc 0.959576, val_loss 0.385859, val_acc 0.929621


 10%|█         | 10/100 [00:35<05:13,  3.48s/it]

Epoch 10, train_loss 0.343139 train_acc 0.975278, val_loss 0.378104, val_acc 0.935857


 15%|█▌        | 15/100 [00:53<05:07,  3.61s/it]

Epoch 15, train_loss 0.336353 train_acc 0.980048, val_loss 0.375697, val_acc 0.936897


 20%|██        | 20/100 [01:11<04:46,  3.58s/it]

Epoch 20, train_loss 0.332990 train_acc 0.982535, val_loss 0.373776, val_acc 0.938382


 25%|██▌       | 25/100 [01:28<04:29,  3.59s/it]

Epoch 25, train_loss 0.330947 train_acc 0.983816, val_loss 0.373722, val_acc 0.938307


 30%|███       | 30/100 [01:46<04:08,  3.55s/it]

Epoch 30, train_loss 0.329427 train_acc 0.984911, val_loss 0.374919, val_acc 0.936080


 35%|███▌      | 35/100 [02:04<03:50,  3.55s/it]

Epoch 35, train_loss 0.328837 train_acc 0.985208, val_loss 0.373321, val_acc 0.938159


 40%|████      | 40/100 [02:22<03:32,  3.54s/it]

Epoch 40, train_loss 0.328001 train_acc 0.985876, val_loss 0.374067, val_acc 0.937194


 45%|████▌     | 45/100 [02:40<03:22,  3.67s/it]

Epoch 45, train_loss 0.327602 train_acc 0.986173, val_loss 0.373452, val_acc 0.938010


 50%|█████     | 50/100 [02:59<03:05,  3.70s/it]

Epoch 50, train_loss 0.327220 train_acc 0.986581, val_loss 0.375024, val_acc 0.936377


 55%|█████▌    | 55/100 [03:16<02:37,  3.51s/it]

Epoch 55, train_loss 0.326954 train_acc 0.986674, val_loss 0.373392, val_acc 0.937788


 55%|█████▌    | 55/100 [03:20<02:43,  3.64s/it]

early stopping after 56 epochs
best test loss: 0.37316124169757475





batch size: 128; conv layer dimension: 256


  5%|▌         | 5/100 [00:23<07:36,  4.80s/it]

Epoch 5, train_loss 0.354309 train_acc 0.964365, val_loss 0.383700, val_acc 0.930512


 10%|█         | 10/100 [00:47<07:07,  4.75s/it]

Epoch 10, train_loss 0.338567 train_acc 0.978248, val_loss 0.378311, val_acc 0.936006


 15%|█▌        | 15/100 [01:11<06:44,  4.76s/it]

Epoch 15, train_loss 0.333564 train_acc 0.981830, val_loss 0.374824, val_acc 0.936674


 20%|██        | 20/100 [01:35<06:26,  4.83s/it]

Epoch 20, train_loss 0.331051 train_acc 0.983649, val_loss 0.375194, val_acc 0.936971


 25%|██▌       | 25/100 [02:00<06:05,  4.87s/it]

Epoch 25, train_loss 0.329594 train_acc 0.984502, val_loss 0.375820, val_acc 0.934744


 30%|███       | 30/100 [02:24<05:37,  4.82s/it]

Epoch 30, train_loss 0.328456 train_acc 0.985449, val_loss 0.374731, val_acc 0.935486


 35%|███▌      | 35/100 [02:49<05:22,  4.96s/it]

Epoch 35, train_loss 0.328029 train_acc 0.985802, val_loss 0.373205, val_acc 0.937862


 40%|████      | 40/100 [03:14<04:59,  5.00s/it]

Epoch 40, train_loss 0.327865 train_acc 0.985709, val_loss 0.373517, val_acc 0.937936


 45%|████▌     | 45/100 [03:38<04:29,  4.89s/it]

Epoch 45, train_loss 0.327408 train_acc 0.986117, val_loss 0.374766, val_acc 0.936229


 47%|████▋     | 47/100 [03:52<04:22,  4.95s/it]


early stopping after 48 epochs
best test loss: 0.3720446774095806
batch size: 128; conv layer dimension: 512


  5%|▌         | 5/100 [00:41<13:11,  8.33s/it]

Epoch 5, train_loss 0.349012 train_acc 0.968503, val_loss 0.379279, val_acc 0.933556


 10%|█         | 10/100 [01:23<12:42,  8.47s/it]

Epoch 10, train_loss 0.336318 train_acc 0.979287, val_loss 0.375711, val_acc 0.937120


 15%|█▌        | 15/100 [02:09<12:36,  8.90s/it]

Epoch 15, train_loss 0.332161 train_acc 0.982498, val_loss 0.375725, val_acc 0.936080


 20%|██        | 20/100 [02:50<11:21,  8.52s/it]

Epoch 20, train_loss 0.330532 train_acc 0.983686, val_loss 0.374963, val_acc 0.935932


 21%|██        | 21/100 [03:08<11:47,  8.96s/it]

early stopping after 22 epochs
best test loss: 0.37342219456266984





batch size: 128; conv layer dimension: 1024


  5%|▌         | 5/100 [01:24<26:59, 17.04s/it]

Epoch 5, train_loss 0.347627 train_acc 0.968745, val_loss 0.384616, val_acc 0.927617


 10%|█         | 10/100 [02:51<25:57, 17.31s/it]

Epoch 10, train_loss 0.336446 train_acc 0.978693, val_loss 0.374692, val_acc 0.936897


 15%|█▌        | 15/100 [04:14<23:44, 16.76s/it]

Epoch 15, train_loss 0.333399 train_acc 0.980809, val_loss 0.378456, val_acc 0.932814


 20%|██        | 20/100 [05:36<21:46, 16.33s/it]

Epoch 20, train_loss 0.331793 train_acc 0.982071, val_loss 0.374533, val_acc 0.936600


 25%|██▌       | 25/100 [06:59<20:47, 16.64s/it]

Epoch 25, train_loss 0.330669 train_acc 0.983185, val_loss 0.375129, val_acc 0.936897


 30%|███       | 30/100 [08:25<19:44, 16.92s/it]

Epoch 30, train_loss 0.330306 train_acc 0.983370, val_loss 0.374977, val_acc 0.935635


 35%|███▌      | 35/100 [09:49<18:08, 16.74s/it]

Epoch 35, train_loss 0.329690 train_acc 0.983927, val_loss 0.375371, val_acc 0.935115


 40%|████      | 40/100 [11:11<16:35, 16.60s/it]

Epoch 40, train_loss 0.329123 train_acc 0.984410, val_loss 0.374028, val_acc 0.936971


 45%|████▌     | 45/100 [12:34<15:12, 16.59s/it]

Epoch 45, train_loss 0.328409 train_acc 0.985022, val_loss 0.378813, val_acc 0.933482


 50%|█████     | 50/100 [13:56<13:38, 16.37s/it]

Epoch 50, train_loss 0.328974 train_acc 0.984484, val_loss 0.376212, val_acc 0.935709


 51%|█████     | 51/100 [14:29<13:55, 17.05s/it]

early stopping after 52 epochs
best test loss: 0.3731446010365518





batch size: 256; conv layer dimension: 128


  5%|▌         | 5/100 [00:19<06:18,  3.98s/it]

Epoch 5, train_loss 0.368641 train_acc 0.951799, val_loss 0.390849, val_acc 0.923682


 10%|█         | 10/100 [00:40<06:14,  4.16s/it]

Epoch 10, train_loss 0.348608 train_acc 0.971343, val_loss 0.381061, val_acc 0.933705


 15%|█▌        | 15/100 [01:02<06:03,  4.28s/it]

Epoch 15, train_loss 0.340409 train_acc 0.977988, val_loss 0.376544, val_acc 0.937120


 20%|██        | 20/100 [01:22<05:32,  4.16s/it]

Epoch 20, train_loss 0.336148 train_acc 0.980939, val_loss 0.375253, val_acc 0.937120


 25%|██▌       | 25/100 [01:44<05:32,  4.43s/it]

Epoch 25, train_loss 0.333248 train_acc 0.982869, val_loss 0.374318, val_acc 0.938456


 30%|███       | 30/100 [02:06<05:07,  4.40s/it]

Epoch 30, train_loss 0.331580 train_acc 0.983964, val_loss 0.374284, val_acc 0.938085


 35%|███▌      | 35/100 [02:29<04:53,  4.51s/it]

Epoch 35, train_loss 0.330154 train_acc 0.984874, val_loss 0.374454, val_acc 0.937045


 40%|████      | 40/100 [02:53<04:47,  4.79s/it]

Epoch 40, train_loss 0.329025 train_acc 0.985505, val_loss 0.373920, val_acc 0.937491


 45%|████▌     | 45/100 [03:18<04:28,  4.88s/it]

Epoch 45, train_loss 0.328179 train_acc 0.986284, val_loss 0.372835, val_acc 0.938604


 50%|█████     | 50/100 [03:41<03:54,  4.68s/it]

Epoch 50, train_loss 0.327606 train_acc 0.986674, val_loss 0.373136, val_acc 0.938307


 55%|█████▌    | 55/100 [04:05<03:32,  4.72s/it]

Epoch 55, train_loss 0.327278 train_acc 0.986859, val_loss 0.372611, val_acc 0.939569


 57%|█████▋    | 57/100 [04:18<03:15,  4.54s/it]

early stopping after 58 epochs
best test loss: 0.37252527705899147





batch size: 256; conv layer dimension: 256


  5%|▌         | 5/100 [00:28<08:50,  5.59s/it]

Epoch 5, train_loss 0.361285 train_acc 0.958128, val_loss 0.387694, val_acc 0.927914


 10%|█         | 10/100 [00:54<07:55,  5.28s/it]

Epoch 10, train_loss 0.342871 train_acc 0.975538, val_loss 0.377387, val_acc 0.936229


 15%|█▌        | 15/100 [01:21<07:41,  5.43s/it]

Epoch 15, train_loss 0.336108 train_acc 0.980623, val_loss 0.374909, val_acc 0.938307


 20%|██        | 20/100 [01:49<07:13,  5.42s/it]

Epoch 20, train_loss 0.333190 train_acc 0.982553, val_loss 0.374363, val_acc 0.938159


 25%|██▌       | 25/100 [02:15<06:21,  5.09s/it]

Epoch 25, train_loss 0.330999 train_acc 0.983927, val_loss 0.374658, val_acc 0.936600


 30%|███       | 30/100 [02:38<05:28,  4.69s/it]

Epoch 30, train_loss 0.329841 train_acc 0.984874, val_loss 0.375841, val_acc 0.936154


 35%|███▌      | 35/100 [03:01<05:01,  4.64s/it]

Epoch 35, train_loss 0.328830 train_acc 0.985467, val_loss 0.373895, val_acc 0.937565


 40%|████      | 40/100 [03:23<04:34,  4.58s/it]

Epoch 40, train_loss 0.328236 train_acc 0.985727, val_loss 0.373477, val_acc 0.937268


 40%|████      | 40/100 [03:28<05:13,  5.22s/it]

early stopping after 41 epochs
best test loss: 0.3729956217137813





batch size: 256; conv layer dimension: 512


  5%|▌         | 5/100 [00:46<14:35,  9.22s/it]

Epoch 5, train_loss 0.354268 train_acc 0.964736, val_loss 0.382057, val_acc 0.931255


 10%|█         | 10/100 [01:31<13:31,  9.01s/it]

Epoch 10, train_loss 0.338403 train_acc 0.978563, val_loss 0.376169, val_acc 0.936823


 15%|█▌        | 15/100 [02:16<12:43,  8.98s/it]

Epoch 15, train_loss 0.333047 train_acc 0.982424, val_loss 0.374968, val_acc 0.937045


 20%|██        | 20/100 [03:03<12:42,  9.54s/it]

Epoch 20, train_loss 0.330580 train_acc 0.984187, val_loss 0.373600, val_acc 0.938382


 25%|██▌       | 25/100 [03:53<12:33, 10.05s/it]

Epoch 25, train_loss 0.329567 train_acc 0.984781, val_loss 0.374498, val_acc 0.937045


 30%|███       | 30/100 [04:42<11:17,  9.69s/it]

Epoch 30, train_loss 0.328560 train_acc 0.985449, val_loss 0.374688, val_acc 0.936526


 32%|███▏      | 32/100 [05:10<11:00,  9.72s/it]

early stopping after 33 epochs
best test loss: 0.3731804768908174





batch size: 256; conv layer dimension: 1024


  5%|▌         | 5/100 [01:26<27:44, 17.52s/it]

Epoch 5, train_loss 0.349178 train_acc 0.968485, val_loss 0.383213, val_acc 0.929473


 10%|█         | 10/100 [02:52<25:51, 17.24s/it]

Epoch 10, train_loss 0.336058 train_acc 0.979640, val_loss 0.376745, val_acc 0.935932


 15%|█▌        | 15/100 [04:16<24:01, 16.96s/it]

Epoch 15, train_loss 0.332186 train_acc 0.982461, val_loss 0.374595, val_acc 0.937120


 20%|██        | 20/100 [05:40<22:24, 16.81s/it]

Epoch 20, train_loss 0.330484 train_acc 0.983704, val_loss 0.373650, val_acc 0.938085


 25%|██▌       | 25/100 [07:06<21:42, 17.37s/it]

Epoch 25, train_loss 0.329006 train_acc 0.984799, val_loss 0.374406, val_acc 0.936674


 27%|██▋       | 27/100 [07:59<21:35, 17.75s/it]

early stopping after 28 epochs
best test loss: 0.3731068546141884





Parameter of the best model: 

batch size: 128; conv layer dimension: 256