In [1]:
from tqdm import tqdm
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchtext.vocab import build_vocab_from_iterator
import string
from data_preprocessing import load_sst
from utils import remove_special_content, replace_punct, preprocess
import numpy as np
import pandas as pd
import gensim.downloader as api
import matplotlib.pyplot as plt
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from keras.preprocessing.text import text_to_word_sequence
from sklearn.model_selection import train_test_split
nltk.download('stopwords')
SEED = 42

torch.manual_seed(SEED)

[nltk_data] Downloading package stopwords to C:\Users\Chihao
[nltk_data]     Shen\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Chihao
[nltk_data]     Shen\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


<torch._C.Generator at 0x23edda27cd0>

In [2]:
train_data, train_labels, val_data, val_labels, dev_data, dev_labels = load_sst()

INFO: SST-2 loaded


In [3]:
df_train = pd.DataFrame({'content':train_data, 'sentiment':train_labels})
df_train.reset_index(drop=True, inplace=True)
X_train, X_val, y_train, y_val = train_test_split(df_train['content'], df_train['sentiment'], test_size=0.2, random_state=42)


In [4]:
model_type = 'word2vec-google-news-300'  # 25 100 200 word2vec 300 
w2vModel = api.load(model_type)

INFO: loading projection weights from C:\Users\Chihao Shen/gensim-data\word2vec-google-news-300\word2vec-google-news-300.gz
INFO: KeyedVectors lifecycle event {'msg': 'loaded (3000000, 300) matrix of type float32 from C:\\Users\\Chihao Shen/gensim-data\\word2vec-google-news-300\\word2vec-google-news-300.gz', 'binary': True, 'encoding': 'utf8', 'datetime': '2024-04-08T04:54:24.407056', 'gensim': '4.3.2', 'python': '3.10.11 (tags/v3.10.11:7d4cc5a, Apr  5 2023, 00:38:17) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.22631-SP0', 'event': 'load_word2vec_format'}


In [5]:
texts = X_train
val_text = X_val

In [6]:
def tokenize(texts, max_length, tokenizer):
    texts = [s.lower() for s in texts]
    data = torch.zeros((len(texts), max_length), dtype=torch.int)
    for i, sentences in enumerate(texts):
        word_sequence = text_to_word_sequence(sentences)
        j = 0
        for word in word_sequence:
            try:
                if j < max_length:
                    data[i, j] = tokenizer[word]
                    j += 1
            except:
                data[i, j] = 0
                j += 1
    return data

In [7]:
words = list(w2vModel.key_to_index.keys())

def yield_tokens(data_iter):
    for text in data_iter:
        yield text_to_word_sequence(text)

vocab = build_vocab_from_iterator(yield_tokens(words))

In [8]:
max_length_1 = 56
max_length_2 = 1506

word_to_idx = {word: idx for idx, word in enumerate(vocab.get_itos())}
data = tokenize(texts, max_length_1, word_to_idx)
val_data = tokenize(val_text, max_length_1, word_to_idx)

In [9]:
def process(data, ori_df):
    labels = ori_df.values
    print('Shape of data tensor:', data.shape)
    print('Shape of label tensor:', labels.shape)
    
    return data, labels

X_train_tensor, y_train= process(data, y_train)
X_val_tensor, y_val = process(val_data, y_val)

Shape of data tensor: torch.Size([53879, 56])
Shape of label tensor: (53879,)
Shape of data tensor: torch.Size([13470, 56])
Shape of label tensor: (13470,)


In [10]:
GLOVE_DIM = int(model_type.split('-')[-1])

def embed(model, word_to_idx):
    embed_size = GLOVE_DIM
    embedding_matrix = np.zeros((len(word_to_idx)+1, embed_size), dtype=np.float32)

    hits = 0
    misses = 0

    for word, i in word_to_idx.items():
        try:
            embedding_vector = model.get_vector(word)
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector
                hits += 1
            else:
                misses += 1
        except:
            misses += 1
            
    print("Converted %d words (%d misses)" % (hits, misses))
    return torch.tensor(embedding_matrix)


embedding_matrix = embed (w2vModel, word_to_idx)

Converted 159622 words (613542 misses)


In [11]:
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X =torch.tensor(X, dtype=torch.int)
        self.y =torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self,idx):
        return self.X[idx], self.y[idx]

def initialize_loader(X_train_tensor, X_val_tensor, y_train, y_val, batch_size=50):
    # no need to do the scale since original wv already did
    train_data = CustomDataset(X_train_tensor, y_train)
    val_data = CustomDataset(X_val_tensor, y_val)

    # convert to DataLoader for batch processing and shuffling
    train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    for inputs, targets in train_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
    val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
    for inputs, targets in val_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
    return train_dataloader, val_dataloader

In [12]:
# model
class CNN(nn.Module):
    def __init__(self, embedding_matrix, dim_in, dim_conv, dim_out, dropout_rate=0.5, max_length=max_length_1):
        super(CNN, self).__init__()
        self.embedding = nn.Embedding(embedding_matrix.shape[0], dim_in).from_pretrained(embedding_matrix, freeze=True)
        self.conv1_3 = nn.Conv1d(dim_in, dim_conv, 3, padding=5)
        self.conv1_4 = nn.Conv1d(dim_in, dim_conv, 4, padding=5)
        self.conv1_5 = nn.Conv1d(dim_in, dim_conv, 5, padding=5)
        self.ReLU = nn.ReLU()
        self.maxpool_1 = nn.MaxPool1d(kernel_size=max_length+8)
        self.maxpool_2 = nn.MaxPool1d(kernel_size=max_length+7)
        self.maxpool_3 = nn.MaxPool1d(kernel_size=max_length+6)
        self.fc = nn.Linear(dim_conv * 3, dim_out)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.softmax = nn.Softmax()


    def forward(self, x):
        x = self.embedding(x).transpose(1, 2)
        x1 = self.conv1_3(x)
        x1 = self.ReLU(x1)
        x1 = self.maxpool_1(x1)


        x2 = self.conv1_4(x)
        x2 = self.ReLU(x2)
        x2 = self.maxpool_2(x2)

        x3 = self.conv1_5(x)
        x3 = self.ReLU(x3)
        x3 = self.maxpool_3(x3)

        x = torch.cat((x1, x2, x3), dim=1)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        x = self.dropout(x)
        x = self.softmax(x)
        return x

In [13]:
# training
max_norm = 3
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    optimizer.zero_grad()
    train_loss, correct_num = 0, 0
    model.train()
    for X, y in dataloader:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        loss = loss_fn(pred, y)

        loss.backward()

        for _, module in model.named_modules():
            if isinstance(module, nn.Linear):
                for _, param in module.named_parameters():
                    param_norm = param.data.norm(2)
                    if param_norm > max_norm:
                        param.data.mul_(max_norm / (param_norm + 1e-6))

        optimizer.step()
        model.eval()
        with torch.no_grad():
            pred = model(X)
            loss = loss_fn(pred, y)
            train_loss += loss.item() * X.size(0)
            correct_num += (torch.eq(torch.argmax(pred, dim=1), y)).type(torch.float).sum().item()

    train_loss /= size
    train_acc = correct_num / size
    return train_loss, train_acc

def val_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    val_loss, correct_num = 0, 0
    model.eval()  # inform no dropout and fix bn during testing

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)

            pred = model(X)
            val_loss += loss_fn(pred, y).item() * X.size(0)
            correct_num += (torch.eq(torch.argmax(pred, dim=1), y)).type(torch.float).sum().item()

    val_loss /= size
    val_acc = correct_num / size
    return val_loss, val_acc
        

In [14]:
torch.manual_seed(SEED)

patience = 10
train_loss_, train_acc_, val_loss_, val_acc_ = [], [], [], []
no_epochs = 100


dim_in = GLOVE_DIM
dim_out = 2
lr = 0.001
batch_sizes = [32, 64, 128]
dim_convs = [128, 256, 512, 1024]

for batch_size in batch_sizes:
    train_dataloader, val_dataloader = initialize_loader(X_train_tensor, X_val_tensor, y_train, y_val, batch_size=batch_size)

    for dim_conv in dim_convs:
        print(f'batch size: {batch_size}; conv layer dimension: {dim_conv}')
        epochs_without_improvement = 0
        best_val_loss = np.Inf

        model = CNN(embedding_matrix, dim_in, dim_conv, dim_out)
        model.to(device)
        loss_fn = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

        # start training
        for epoch in tqdm(range(no_epochs)):
            train_loss, train_acc = train_loop(train_dataloader, model, loss_fn, optimizer)
            val_loss, val_acc = val_loop(val_dataloader, model, loss_fn)

            train_loss_.append(train_loss), train_acc_.append(train_acc)
            val_loss_.append(val_loss), val_acc_.append(val_acc)

            # early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1
                if epochs_without_improvement >= patience:
                    print(f'early stopping after {epoch+1} epochs')
                    print(f'best test loss: {best_val_loss}')
                    break


            if (epoch+1) % 5 == 0:
                print(
                    f"Epoch {epoch+1}, train_loss {train_loss:>7f} train_acc {train_acc:>4f}, val_loss {val_loss:>7f}, val_acc {val_acc:>4f}")

  self.X =torch.tensor(X, dtype=torch.int)


batch size: 32; conv layer dimension: 128


  return self._call_impl(*args, **kwargs)
  5%|▌         | 5/100 [00:49<15:44,  9.95s/it]

Epoch 5, train_loss 0.352537 train_acc 0.963771, val_loss 0.388655, val_acc 0.924425


 10%|█         | 10/100 [01:39<14:48,  9.87s/it]

Epoch 10, train_loss 0.339892 train_acc 0.975037, val_loss 0.378350, val_acc 0.933630


 15%|█▌        | 15/100 [02:28<14:02,  9.92s/it]

Epoch 15, train_loss 0.335922 train_acc 0.978229, val_loss 0.378499, val_acc 0.932591


 19%|█▉        | 19/100 [03:17<14:02, 10.41s/it]

early stopping after 20 epochs
best test loss: 0.37835026886964074
batch size: 32; conv layer dimension: 256



  5%|▌         | 5/100 [00:48<15:35,  9.84s/it]

Epoch 5, train_loss 0.353177 train_acc 0.962490, val_loss 0.384536, val_acc 0.925761


 10%|█         | 10/100 [01:38<14:52,  9.92s/it]

Epoch 10, train_loss 0.342662 train_acc 0.971696, val_loss 0.379606, val_acc 0.932146


 15%|█▌        | 15/100 [02:28<14:04,  9.94s/it]

Epoch 15, train_loss 0.337868 train_acc 0.976039, val_loss 0.378985, val_acc 0.933259


 20%|██        | 20/100 [03:17<13:12,  9.90s/it]

Epoch 20, train_loss 0.336072 train_acc 0.977617, val_loss 0.378946, val_acc 0.932814


 25%|██▌       | 25/100 [04:07<12:27,  9.96s/it]

Epoch 25, train_loss 0.335324 train_acc 0.978155, val_loss 0.379874, val_acc 0.932220


 26%|██▌       | 26/100 [04:27<12:39, 10.27s/it]

early stopping after 27 epochs
best test loss: 0.37588964621491494
batch size: 32; conv layer dimension: 512



  5%|▌         | 5/100 [00:51<16:27, 10.40s/it]

Epoch 5, train_loss 0.354358 train_acc 0.960430, val_loss 0.387631, val_acc 0.922791


 10%|█         | 10/100 [01:43<15:33, 10.37s/it]

Epoch 10, train_loss 0.342752 train_acc 0.971399, val_loss 0.382639, val_acc 0.929102


 15%|█▌        | 15/100 [02:35<14:40, 10.36s/it]

Epoch 15, train_loss 0.340251 train_acc 0.973552, val_loss 0.383666, val_acc 0.927394


 20%|██        | 20/100 [03:27<13:50, 10.39s/it]

Epoch 20, train_loss 0.337467 train_acc 0.976076, val_loss 0.379130, val_acc 0.932294


 25%|██▌       | 25/100 [04:19<12:58, 10.38s/it]

Epoch 25, train_loss 0.336400 train_acc 0.977097, val_loss 0.382005, val_acc 0.929547


 28%|██▊       | 28/100 [05:00<12:53, 10.74s/it]

early stopping after 29 epochs
best test loss: 0.3771600264704661
batch size: 32; conv layer dimension: 1024



  5%|▌         | 5/100 [01:16<24:12, 15.28s/it]

Epoch 5, train_loss 0.358445 train_acc 0.955938, val_loss 0.387683, val_acc 0.924202


 10%|█         | 10/100 [02:32<22:52, 15.25s/it]

Epoch 10, train_loss 0.345638 train_acc 0.968151, val_loss 0.383446, val_acc 0.928211


 15%|█▌        | 15/100 [03:49<21:36, 15.26s/it]

Epoch 15, train_loss 0.341772 train_acc 0.971974, val_loss 0.380416, val_acc 0.931255


 20%|██        | 20/100 [05:05<20:24, 15.30s/it]

Epoch 20, train_loss 0.339300 train_acc 0.974257, val_loss 0.382056, val_acc 0.928879


 25%|██▌       | 25/100 [06:22<19:04, 15.26s/it]

Epoch 25, train_loss 0.338320 train_acc 0.975111, val_loss 0.381982, val_acc 0.929324


 30%|███       | 30/100 [07:38<17:44, 15.21s/it]

Epoch 30, train_loss 0.337021 train_acc 0.976429, val_loss 0.387024, val_acc 0.925390


 35%|███▌      | 35/100 [08:54<16:28, 15.21s/it]

Epoch 35, train_loss 0.337475 train_acc 0.976002, val_loss 0.379281, val_acc 0.931552


 40%|████      | 40/100 [10:10<15:14, 15.25s/it]

Epoch 40, train_loss 0.336064 train_acc 0.977375, val_loss 0.379307, val_acc 0.932368


 45%|████▌     | 45/100 [11:26<13:58, 15.25s/it]

Epoch 45, train_loss 0.335198 train_acc 0.978155, val_loss 0.377843, val_acc 0.933779


 46%|████▌     | 46/100 [11:57<14:01, 15.59s/it]

early stopping after 47 epochs
best test loss: 0.3771123662425043



  self.X =torch.tensor(X, dtype=torch.int)


batch size: 64; conv layer dimension: 128


  5%|▌         | 5/100 [00:26<08:21,  5.28s/it]

Epoch 5, train_loss 0.352847 train_acc 0.964884, val_loss 0.384078, val_acc 0.928434


 10%|█         | 10/100 [00:52<07:54,  5.28s/it]

Epoch 10, train_loss 0.338798 train_acc 0.976856, val_loss 0.376526, val_acc 0.934298


 15%|█▌        | 15/100 [01:19<07:25,  5.24s/it]

Epoch 15, train_loss 0.334932 train_acc 0.979584, val_loss 0.375612, val_acc 0.935412


 20%|██        | 20/100 [01:45<06:54,  5.18s/it]

Epoch 20, train_loss 0.332725 train_acc 0.981421, val_loss 0.377682, val_acc 0.933779


 25%|██▌       | 25/100 [02:11<06:32,  5.24s/it]

Epoch 25, train_loss 0.331583 train_acc 0.982331, val_loss 0.378483, val_acc 0.932814


 30%|███       | 30/100 [02:37<06:08,  5.27s/it]

Epoch 30, train_loss 0.330907 train_acc 0.982758, val_loss 0.375338, val_acc 0.936674


 35%|███▌      | 35/100 [03:03<05:40,  5.24s/it]

Epoch 35, train_loss 0.330536 train_acc 0.983147, val_loss 0.374575, val_acc 0.936451


 38%|███▊      | 38/100 [03:24<05:34,  5.39s/it]

early stopping after 39 epochs
best test loss: 0.37422966547780506
batch size: 64; conv layer dimension: 256



  5%|▌         | 5/100 [00:27<08:42,  5.50s/it]

Epoch 5, train_loss 0.350448 train_acc 0.966109, val_loss 0.381650, val_acc 0.930512


 10%|█         | 10/100 [00:55<08:14,  5.50s/it]

Epoch 10, train_loss 0.338871 train_acc 0.975909, val_loss 0.388631, val_acc 0.923014


 15%|█▌        | 15/100 [01:22<07:49,  5.52s/it]

Epoch 15, train_loss 0.335622 train_acc 0.978600, val_loss 0.376628, val_acc 0.935041


 20%|██        | 20/100 [01:50<07:22,  5.53s/it]

Epoch 20, train_loss 0.333435 train_acc 0.980586, val_loss 0.375582, val_acc 0.936154


 25%|██▌       | 25/100 [02:17<06:53,  5.51s/it]

Epoch 25, train_loss 0.332588 train_acc 0.981106, val_loss 0.376783, val_acc 0.934521


 30%|███       | 30/100 [02:45<06:24,  5.50s/it]

Epoch 30, train_loss 0.331775 train_acc 0.981793, val_loss 0.375998, val_acc 0.936006


 35%|███▌      | 35/100 [03:12<05:56,  5.49s/it]

Epoch 35, train_loss 0.331371 train_acc 0.982164, val_loss 0.375150, val_acc 0.935932


 40%|████      | 40/100 [03:40<05:29,  5.49s/it]

Epoch 40, train_loss 0.331145 train_acc 0.982331, val_loss 0.376218, val_acc 0.935041


 43%|████▎     | 43/100 [04:02<05:20,  5.63s/it]

early stopping after 44 epochs
best test loss: 0.3744508146133437
batch size: 64; conv layer dimension: 512



  5%|▌         | 5/100 [00:37<11:56,  7.54s/it]

Epoch 5, train_loss 0.351533 train_acc 0.964253, val_loss 0.385235, val_acc 0.926355


 10%|█         | 10/100 [01:15<11:21,  7.57s/it]

Epoch 10, train_loss 0.340150 train_acc 0.974201, val_loss 0.379264, val_acc 0.932591


 15%|█▌        | 15/100 [01:53<10:44,  7.58s/it]

Epoch 15, train_loss 0.336975 train_acc 0.977171, val_loss 0.377181, val_acc 0.934224


 20%|██        | 20/100 [02:31<10:05,  7.57s/it]

Epoch 20, train_loss 0.334575 train_acc 0.979176, val_loss 0.377647, val_acc 0.933556


 25%|██▌       | 25/100 [03:09<09:30,  7.60s/it]

Epoch 25, train_loss 0.333963 train_acc 0.979584, val_loss 0.375606, val_acc 0.935486


 26%|██▌       | 26/100 [03:24<09:42,  7.87s/it]

early stopping after 27 epochs
best test loss: 0.37549032950808403
batch size: 64; conv layer dimension: 1024



  5%|▌         | 5/100 [01:04<20:29, 12.95s/it]

Epoch 5, train_loss 0.353427 train_acc 0.961414, val_loss 0.391246, val_acc 0.920564


 10%|█         | 10/100 [02:09<19:27, 12.97s/it]

Epoch 10, train_loss 0.342592 train_acc 0.971714, val_loss 0.383489, val_acc 0.928211


 15%|█▌        | 15/100 [03:14<18:23, 12.98s/it]

Epoch 15, train_loss 0.338803 train_acc 0.974925, val_loss 0.379634, val_acc 0.932146


 20%|██        | 20/100 [04:19<17:18, 12.98s/it]

Epoch 20, train_loss 0.336643 train_acc 0.977078, val_loss 0.376740, val_acc 0.934298


 25%|██▌       | 25/100 [05:24<16:12, 12.96s/it]

Epoch 25, train_loss 0.335764 train_acc 0.977765, val_loss 0.378937, val_acc 0.933259


 30%|███       | 30/100 [06:29<15:08, 12.98s/it]

Epoch 30, train_loss 0.334946 train_acc 0.978545, val_loss 0.376265, val_acc 0.936303


 35%|███▌      | 35/100 [07:33<14:03, 12.98s/it]

Epoch 35, train_loss 0.334359 train_acc 0.979046, val_loss 0.375077, val_acc 0.936674


 40%|████      | 40/100 [08:38<12:57, 12.96s/it]

Epoch 40, train_loss 0.333819 train_acc 0.979640, val_loss 0.378811, val_acc 0.932071


 44%|████▍     | 44/100 [09:43<12:23, 13.27s/it]

early stopping after 45 epochs
best test loss: 0.37507651878801557





batch size: 128; conv layer dimension: 128


  5%|▌         | 5/100 [00:16<05:13,  3.30s/it]

Epoch 5, train_loss 0.357238 train_acc 0.961376, val_loss 0.384017, val_acc 0.930215


 10%|█         | 10/100 [00:33<04:57,  3.31s/it]

Epoch 10, train_loss 0.341199 train_acc 0.975148, val_loss 0.377090, val_acc 0.934967


 15%|█▌        | 15/100 [00:49<04:39,  3.29s/it]

Epoch 15, train_loss 0.335378 train_acc 0.980104, val_loss 0.375483, val_acc 0.936526


 20%|██        | 20/100 [01:05<04:23,  3.30s/it]

Epoch 20, train_loss 0.332865 train_acc 0.981663, val_loss 0.375260, val_acc 0.937045


 25%|██▌       | 25/100 [01:22<04:05,  3.27s/it]

Epoch 25, train_loss 0.331405 train_acc 0.982776, val_loss 0.374596, val_acc 0.936823


 30%|███       | 30/100 [01:38<03:50,  3.29s/it]

Epoch 30, train_loss 0.330345 train_acc 0.983519, val_loss 0.379151, val_acc 0.932962


 31%|███       | 31/100 [01:45<03:54,  3.40s/it]

early stopping after 32 epochs
best test loss: 0.37428933659923813
batch size: 128; conv layer dimension: 256



  5%|▌         | 5/100 [00:20<06:32,  4.13s/it]

Epoch 5, train_loss 0.350786 train_acc 0.966647, val_loss 0.383199, val_acc 0.929324


 10%|█         | 10/100 [00:41<06:12,  4.14s/it]

Epoch 10, train_loss 0.337504 train_acc 0.978322, val_loss 0.375968, val_acc 0.936154


 15%|█▌        | 15/100 [01:02<05:54,  4.17s/it]

Epoch 15, train_loss 0.333572 train_acc 0.981087, val_loss 0.374051, val_acc 0.938679


 20%|██        | 20/100 [01:23<05:34,  4.18s/it]

Epoch 20, train_loss 0.331859 train_acc 0.982257, val_loss 0.373752, val_acc 0.937936


 25%|██▌       | 25/100 [01:44<05:12,  4.17s/it]

Epoch 25, train_loss 0.330840 train_acc 0.983036, val_loss 0.374562, val_acc 0.937045


 25%|██▌       | 25/100 [01:48<05:24,  4.33s/it]

early stopping after 26 epochs
best test loss: 0.372628566408122
batch size: 128; conv layer dimension: 512



  5%|▌         | 5/100 [00:33<10:35,  6.69s/it]

Epoch 5, train_loss 0.350257 train_acc 0.966016, val_loss 0.381895, val_acc 0.930883


 10%|█         | 10/100 [01:06<10:01,  6.68s/it]

Epoch 10, train_loss 0.338718 train_acc 0.976076, val_loss 0.377947, val_acc 0.934001


 15%|█▌        | 15/100 [01:40<09:28,  6.69s/it]

Epoch 15, train_loss 0.334759 train_acc 0.979751, val_loss 0.378699, val_acc 0.932220


 20%|██        | 20/100 [02:13<08:54,  6.68s/it]

Epoch 20, train_loss 0.333277 train_acc 0.980530, val_loss 0.374598, val_acc 0.936377


 25%|██▌       | 25/100 [02:47<08:21,  6.68s/it]

Epoch 25, train_loss 0.331983 train_acc 0.981811, val_loss 0.376704, val_acc 0.934818


 30%|███       | 30/100 [03:20<07:49,  6.70s/it]

Epoch 30, train_loss 0.331434 train_acc 0.982164, val_loss 0.375699, val_acc 0.935635


 30%|███       | 30/100 [03:27<08:03,  6.91s/it]


early stopping after 31 epochs
best test loss: 0.3735746946348999
batch size: 128; conv layer dimension: 1024


  5%|▌         | 5/100 [01:02<19:47, 12.50s/it]

Epoch 5, train_loss 0.349362 train_acc 0.966518, val_loss 0.382066, val_acc 0.929027


 10%|█         | 10/100 [02:05<18:47, 12.53s/it]

Epoch 10, train_loss 0.338780 train_acc 0.975668, val_loss 0.377702, val_acc 0.934892


 15%|█▌        | 15/100 [03:07<17:45, 12.54s/it]

Epoch 15, train_loss 0.336152 train_acc 0.978062, val_loss 0.380703, val_acc 0.931106


 20%|██        | 20/100 [04:10<16:41, 12.52s/it]

Epoch 20, train_loss 0.333776 train_acc 0.979992, val_loss 0.375406, val_acc 0.935857


 25%|██▌       | 25/100 [05:13<15:38, 12.52s/it]

Epoch 25, train_loss 0.332697 train_acc 0.980809, val_loss 0.375197, val_acc 0.935783


 30%|███       | 30/100 [06:16<14:47, 12.68s/it]

Epoch 30, train_loss 0.331848 train_acc 0.981663, val_loss 0.376133, val_acc 0.935189


 35%|███▌      | 35/100 [07:18<13:19, 12.31s/it]

Epoch 35, train_loss 0.332132 train_acc 0.981403, val_loss 0.380179, val_acc 0.930809


 40%|████      | 40/100 [08:19<12:20, 12.34s/it]

Epoch 40, train_loss 0.331159 train_acc 0.982182, val_loss 0.375394, val_acc 0.936377


 40%|████      | 40/100 [08:32<12:48, 12.81s/it]

early stopping after 41 epochs
best test loss: 0.37458059731110344



