In [8]:
import os
import re
from tqdm import tqdm
import numpy as np
import pandas as pd
import nltk
# nltk.download("all")
import matplotlib.pyplot as plt
import torch

%matplotlib inline

# Data loading
Fetch spam detection dataset from Huggingface and load parquet files.

In [1]:
!pip install fastparquet

Collecting fastparquet
  Downloading fastparquet-2024.11.0-cp312-cp312-win_amd64.whl.metadata (4.3 kB)
Collecting cramjam>=2.3 (from fastparquet)
  Downloading cramjam-2.10.0-cp312-cp312-win_amd64.whl.metadata (5.1 kB)
Downloading fastparquet-2024.11.0-cp312-cp312-win_amd64.whl (673 kB)
   ---------------------------------------- 0.0/673.3 kB ? eta -:--:--
   --------------- ------------------------ 262.1/673.3 kB ? eta -:--:--
   ------------------------------- -------- 524.3/673.3 kB 1.5 MB/s eta 0:00:01
   ---------------------------------------- 673.3/673.3 kB 1.1 MB/s eta 0:00:00
Downloading cramjam-2.10.0-cp312-cp312-win_amd64.whl (1.7 MB)
   ---------------------------------------- 0.0/1.7 MB ? eta -:--:--
   ------ --------------------------------- 0.3/1.7 MB ? eta -:--:--
   ------ --------------------------------- 0.3/1.7 MB ? eta -:--:--
   ------------ --------------------------- 0.5/1.7 MB 599.9 kB/s eta 0:00:02
   ------------------ --------------------- 0.8/1.7 MB 799.2 



In [3]:
from datasets import load_dataset
import pandas as pd
dataset = load_dataset('Deysi/spam-detection-dataset')

# Downloaded parquet files can be accessed via dataset['train'] and dataset['test']
train_path = dataset['train'].cache_files[0]['filename']
test_path = dataset['test'].cache_files[0]['filename']


parquet_data = dataset['train'].to_pandas()
parquet_data.head()

Unnamed: 0,text,label
0,hey I am looking for Xray baggage datasets can...,not_spam
1,"""Get rich quick! Make millions in just days wi...",spam
2,URGENT MESSAGE: YOU WON'T BELIEVE WHAT WE HAVE...,spam
3,[Google AI Blog: Contributing Data to Deepfake...,not_spam
4,Trying to see if anyone already has timestamps...,not_spam


# Create data sets
Create numpy arrays: texts and labels.

In [5]:
texts = parquet_data['text'].values
labels = parquet_data['label'].apply(lambda x: 1 if x == 'spam' else 0).values
assert(len(texts) == len(labels))

# Download FastText vectors
Download and unzip crawl-300d-2M.vec.zip from https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip and place in the notebook directory.

# Sentence pre-processing
Tokenize, build vocabulary, and encode texts.

In [6]:
from nltk.tokenize import word_tokenize
from collections import defaultdict

def tokenize(texts):
    max_len = 0
    tokenized_texts = []
    word2idx = {'<pad>': 0, '<unk>': 1}
    idx = 2
    for sent in texts:
        tokenized_sent = word_tokenize(str(sent).lower())
        tokenized_texts.append(tokenized_sent)
        for token in tokenized_sent:
            if token not in word2idx:
                word2idx[token] = idx
                idx += 1
        max_len = max(max_len, len(tokenized_sent))
    return tokenized_texts, word2idx, max_len

def encode(tokenized_texts, word2idx, max_len):
    input_ids = []
    for tokenized_sent in tokenized_texts:
        tokenized_sent += ['<pad>'] * (max_len - len(tokenized_sent))
        input_id = [word2idx.get(token, word2idx['<unk>']) for token in tokenized_sent]
        input_ids.append(input_id)
    return np.array(input_ids)

In [9]:
print('Tokenizing...\n')
tokenized_texts, word2idx, max_len = tokenize(texts)
input_ids = encode(tokenized_texts, word2idx, max_len)
print(f'Vocab size: {len(word2idx)}, Max length: {max_len}')

Tokenizing...

Vocab size: 25959, Max length: 8322
Vocab size: 25959, Max length: 8322


# Load pretrained FastText vectors

In [10]:
from tqdm.notebook import tqdm

def load_pretrained_vectors(word2idx, fname):
    print('Loading pretrained vectors...')
    fin = open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
    n, d = map(int, fin.readline().split())
    embeddings = np.random.uniform(-0.25, 0.25, (len(word2idx), d))
    embeddings[word2idx['<pad>']] = np.zeros((d,))
    count = 0
    for line in tqdm(fin):
        tokens = line.rstrip().split(' ')
        word = tokens[0]
        if word in word2idx:
            count += 1
            embeddings[word2idx[word]] = np.array(tokens[1:], dtype=np.float32)
    print(f'There are {count} / {len(word2idx)} pretrained vectors found.')
    return embeddings

In [11]:
# Place crawl-300d-2M.vec in the working directory
embeddings = load_pretrained_vectors(word2idx, 'crawl-300d-2M.vec')
embeddings = torch.tensor(embeddings)

Loading pretrained vectors...


0it [00:00, ?it/s]

There are 16646 / 25959 pretrained vectors found.


# PyTorch data loaders

In [12]:
from torch.utils.data import (TensorDataset, DataLoader, RandomSampler, SequentialSampler)

def data_loader(train_inputs, val_inputs, train_labels, val_labels, batch_size=10):
    train_inputs = torch.tensor(train_inputs, dtype=torch.long)
    val_inputs = torch.tensor(val_inputs, dtype=torch.long)
    train_labels = torch.tensor(train_labels, dtype=torch.long)
    val_labels = torch.tensor(val_labels, dtype=torch.long)

    train_data = TensorDataset(train_inputs, train_labels)
    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

    val_data = TensorDataset(val_inputs, val_labels)
    val_sampler = SequentialSampler(val_data)
    val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)

    return train_dataloader, val_dataloader

In [13]:
from sklearn.model_selection import train_test_split

# Train Test Split with test set size = 5%
train_inputs, val_inputs, train_labels, val_labels = train_test_split(input_ids, labels, test_size=0.05, random_state=42)

# Load data to PyTorch DataLoader, batch_size = 50
train_dataloader, val_dataloader = data_loader(train_inputs, val_inputs, train_labels, val_labels, batch_size=50)

# CNN Model
Define the CNN_NLP model class.

In [14]:
import torch.nn as nn
import torch.nn.functional as F

class CNN_NLP(nn.Module):
    def __init__(self,
                 pretrained_embedding=None,
                 freeze_embedding=False,
                 vocab_size=None,
                 embed_dim=300,
                 filter_sizes=[3, 4, 5],
                 num_filters=[100, 100, 100],
                 num_classes=2,
                 dropout=0.5):
        super(CNN_NLP, self).__init__()
        if pretrained_embedding is not None:
            self.vocab_size, self.embed_dim = pretrained_embedding.shape
            self.embedding = nn.Embedding.from_pretrained(pretrained_embedding, freeze=freeze_embedding)
        else:
            self.embed_dim = embed_dim
            self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                          embedding_dim=self.embed_dim,
                                          padding_idx=0,
                                          max_norm=5.0)
        # Conv Network
        self.conv1d_list = nn.ModuleList([
            nn.Conv1d(in_channels=self.embed_dim, out_channels=num_filters[i], kernel_size=filter_sizes[i])
            for i in range(len(filter_sizes))
        ])
        # Fully-connected layer and Dropout
        self.fc = nn.Linear(sum(num_filters), num_classes)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_ids):
        x_embed = self.embedding(input_ids).float()
        x_reshaped = x_embed.permute(0, 2, 1)
        x_conv_list = [F.relu(conv1d(x_reshaped)) for conv1d in self.conv1d_list]
        x_pool_list = [F.max_pool1d(x_conv, kernel_size=x_conv.shape[2]) for x_conv in x_conv_list]
        x_fc = torch.cat([x_pool.squeeze(dim=2) for x_pool in x_pool_list], dim=1)
        logits = self.fc(self.dropout(x_fc))
        return logits

# Set device

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Optimizer and Model Initialization

In [16]:
import torch.optim as optim

def initialize_model(pretrained_embedding=None,
                    freeze_embedding=False,
                    vocab_size=None,
                    embed_dim=300,
                    filter_sizes=[3, 4, 5],
                    num_filters=[100, 100, 100],
                    num_classes=2,
                    dropout=0.5,
                    learning_rate=0.01):
    assert (len(filter_sizes) == len(num_filters)), "filter_sizes and num_filters need to be of the same length."
    cnn_model = CNN_NLP(pretrained_embedding=pretrained_embedding,
                        freeze_embedding=freeze_embedding,
                        vocab_size=vocab_size,
                        embed_dim=embed_dim,
                        filter_sizes=filter_sizes,
                        num_filters=num_filters,
                        num_classes=num_classes,
                        dropout=dropout)
    cnn_model.to(device)
    optimizer = optim.RMSprop(cnn_model.parameters(), lr=learning_rate)
    return cnn_model, optimizer

# Training loop and evaluation

In [22]:
import random
import time
from tqdm import tqdm

loss_fn = nn.CrossEntropyLoss()

def set_seed(seed_value=42):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def train(model, optimizer, train_dataloader, val_dataloader=None, epochs=10):
    best_accuracy = 0
    print("Start training...\n")
    print(f"{'Epoch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9} | {'Elapsed':^9}")
    print("-"*60)
    for epoch_i in range(epochs):
        t0_epoch = time.time()
        total_loss = 0
        model.train()
        for step, batch in enumerate(tqdm(train_dataloader, desc=f"Epoch {epoch_i+1}")):
            b_input_ids, b_labels = tuple(t.to(device) for t in batch)
            optimizer.zero_grad()
            logits = model(b_input_ids)
            loss = loss_fn(logits, b_labels)
            total_loss += loss.item()
            loss.backward()
            optimizer.step()
            if (step + 1) % 5 == 0:
                preds = torch.argmax(logits, dim=1).flatten()
                acc = (preds == b_labels).float().mean().item() * 100
                print(f"Batch {step+1}/{len(train_dataloader)}: Loss = {loss.item():.4f}, Acc = {acc:.2f}%", end='\r')
        avg_train_loss = total_loss / len(train_dataloader)
        if val_dataloader is not None:
            val_loss, val_accuracy = evaluate(model, val_dataloader)
            if val_accuracy > best_accuracy:
                best_accuracy = val_accuracy
            time_elapsed = time.time() - t0_epoch
            print(f"{epoch_i + 1:^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^9.2f} | {time_elapsed:^9.2f}")
    print("\n")
    print(f"Training complete! Best accuracy: {best_accuracy:.2f}%.")

def evaluate(model, val_dataloader):
    model.eval()
    val_accuracy = []
    val_loss = []
    for batch in val_dataloader:
        b_input_ids, b_labels = tuple(t.to(device) for t in batch)
        with torch.no_grad():
            logits = model(b_input_ids)
        loss = loss_fn(logits, b_labels)
        val_loss.append(loss.item())
        preds = torch.argmax(logits, dim=1).flatten()
        accuracy = (preds == b_labels).cpu().numpy().mean() * 100
        val_accuracy.append(accuracy)
    val_loss = np.mean(val_loss)
    val_accuracy = np.mean(val_accuracy)
    return val_loss, val_accuracy

# Model evaluation

In [23]:
# CNN-rand: Word vectors are randomly initialized.
set_seed(42)
cnn_rand, optimizer = initialize_model(vocab_size=len(word2idx),
                                      embed_dim=300,
                                      learning_rate=0.25,
                                      dropout=0.5)
train(cnn_rand, optimizer, train_dataloader, val_dataloader, epochs=3)

# CNN-static: fastText pretrained word vectors are used and freezed during training.
set_seed(42)
cnn_static, optimizer = initialize_model(pretrained_embedding=embeddings,
                                        freeze_embedding=True,
                                        learning_rate=0.25,
                                        dropout=0.5)
train(cnn_static, optimizer, train_dataloader, val_dataloader, epochs=3)

# CNN-non-static: fastText pretrained word vectors are fine-tuned during training.
set_seed(42)
cnn_non_static, optimizer = initialize_model(pretrained_embedding=embeddings,
                                            freeze_embedding=False,
                                            learning_rate=0.25,
                                            dropout=0.5)
train(cnn_non_static, optimizer, train_dataloader, val_dataloader, epochs=3)

Start training...

 Epoch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
------------------------------------------------------------


Epoch 1:   3%|▎         | 5/156 [00:30<15:13,  6.05s/it]

Batch 5/156: Loss = 27377.9766, Acc = 50.00%

Epoch 1:   6%|▋         | 10/156 [01:00<14:28,  5.95s/it]

Batch 10/156: Loss = 1783.5225, Acc = 70.00%

Epoch 1:  10%|▉         | 15/156 [01:29<13:57,  5.94s/it]

Batch 15/156: Loss = 36.3870, Acc = 92.00%

Epoch 1:  13%|█▎        | 20/156 [01:58<13:14,  5.84s/it]

Batch 20/156: Loss = 284.6755, Acc = 98.00%

Epoch 1:  16%|█▌        | 25/156 [02:28<12:43,  5.83s/it]

Batch 25/156: Loss = 129.0953, Acc = 94.00%

Epoch 1:  19%|█▉        | 30/156 [02:57<12:20,  5.87s/it]

Batch 30/156: Loss = 5.8158, Acc = 98.00%

Epoch 1:  22%|██▏       | 35/156 [03:27<11:43,  5.82s/it]

Batch 35/156: Loss = 457.0629, Acc = 90.00%

Epoch 1:  26%|██▌       | 40/156 [03:56<11:13,  5.80s/it]

Batch 40/156: Loss = 251.7479, Acc = 86.00%

Epoch 1:  29%|██▉       | 45/156 [04:25<10:47,  5.83s/it]

Batch 45/156: Loss = 20.9557, Acc = 98.00%

Epoch 1:  32%|███▏      | 50/156 [04:54<10:23,  5.88s/it]

Batch 50/156: Loss = 306.5027, Acc = 92.00%

Epoch 1:  35%|███▌      | 55/156 [05:23<09:41,  5.75s/it]

Batch 55/156: Loss = 22.5504, Acc = 98.00%

Epoch 1:  38%|███▊      | 60/156 [05:52<09:10,  5.73s/it]

Batch 60/156: Loss = 100.8613, Acc = 96.00%

Epoch 1:  42%|████▏     | 65/156 [06:21<08:48,  5.80s/it]

Batch 65/156: Loss = 296.9424, Acc = 88.00%

Epoch 1:  45%|████▍     | 70/156 [06:51<08:23,  5.85s/it]

Batch 70/156: Loss = 37.7500, Acc = 96.00%

Epoch 1:  48%|████▊     | 75/156 [07:21<08:07,  6.01s/it]

Batch 75/156: Loss = 4.2555, Acc = 98.00%

Epoch 1:  51%|█████▏    | 80/156 [07:52<07:43,  6.10s/it]

Batch 80/156: Loss = 0.9526, Acc = 98.00%

Epoch 1:  54%|█████▍    | 85/156 [08:22<07:06,  6.01s/it]

Batch 85/156: Loss = 20.4439, Acc = 98.00%

Epoch 1:  58%|█████▊    | 90/156 [08:52<06:35,  5.99s/it]

Batch 90/156: Loss = 75.4480, Acc = 98.00%

Epoch 1:  61%|██████    | 95/156 [09:22<06:06,  6.01s/it]

Batch 95/156: Loss = 2.7498, Acc = 98.00%

Epoch 1:  64%|██████▍   | 100/156 [09:52<05:34,  5.97s/it]

Batch 100/156: Loss = 23.4383, Acc = 98.00%

Epoch 1:  67%|██████▋   | 105/156 [10:23<05:08,  6.05s/it]

Batch 105/156: Loss = 4.1569, Acc = 98.00%

Epoch 1:  71%|███████   | 110/156 [10:55<04:50,  6.32s/it]

Batch 110/156: Loss = 265.4052, Acc = 90.00%

Epoch 1:  74%|███████▎  | 115/156 [11:24<04:02,  5.91s/it]

Batch 115/156: Loss = 352.0807, Acc = 90.00%

Epoch 1:  77%|███████▋  | 120/156 [11:54<03:37,  6.04s/it]

Batch 120/156: Loss = 433.6518, Acc = 94.00%

Epoch 1:  80%|████████  | 125/156 [12:25<03:10,  6.14s/it]

Batch 125/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  83%|████████▎ | 130/156 [12:57<02:39,  6.14s/it]

Batch 130/156: Loss = 211.9575, Acc = 92.00%

Epoch 1:  87%|████████▋ | 135/156 [13:27<02:06,  6.03s/it]

Batch 135/156: Loss = 230.9163, Acc = 92.00%

Epoch 1:  90%|████████▉ | 140/156 [13:57<01:35,  5.98s/it]

Batch 140/156: Loss = 51.9174, Acc = 96.00%

Epoch 1:  93%|█████████▎| 145/156 [14:25<01:03,  5.74s/it]

Batch 145/156: Loss = 308.4862, Acc = 92.00%

Epoch 1:  96%|█████████▌| 150/156 [14:54<00:34,  5.73s/it]

Batch 150/156: Loss = 397.9604, Acc = 94.00%

Epoch 1:  99%|█████████▉| 155/156 [15:24<00:05,  5.93s/it]

Batch 155/156: Loss = 107.8315, Acc = 94.00%

Epoch 1: 100%|██████████| 156/156 [15:26<00:00,  5.94s/it]


   1    | 1786.903363  | 35.079835  |   98.22   |  944.89  


Epoch 2:   3%|▎         | 5/156 [00:29<14:36,  5.80s/it]

Batch 5/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:   6%|▋         | 10/156 [00:59<15:07,  6.21s/it]

Batch 10/156: Loss = 234.8246, Acc = 96.00%

Epoch 2:  10%|▉         | 15/156 [01:29<14:00,  5.96s/it]

Batch 15/156: Loss = 45.7396, Acc = 98.00%

Epoch 2:  13%|█▎        | 20/156 [02:00<13:54,  6.14s/it]

Batch 20/156: Loss = 150.5545, Acc = 90.00%

Epoch 2:  16%|█▌        | 25/156 [02:29<12:48,  5.87s/it]

Batch 25/156: Loss = 94.5969, Acc = 96.00%

Epoch 2:  19%|█▉        | 30/156 [03:00<12:55,  6.16s/it]

Batch 30/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  22%|██▏       | 35/156 [03:31<12:03,  5.98s/it]

Batch 35/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  26%|██▌       | 40/156 [04:01<11:29,  5.95s/it]

Batch 40/156: Loss = 239.2509, Acc = 94.00%

Epoch 2:  29%|██▉       | 45/156 [04:31<10:57,  5.93s/it]

Batch 45/156: Loss = 50.5062, Acc = 96.00%

Epoch 2:  32%|███▏      | 50/156 [05:00<10:00,  5.66s/it]

Batch 50/156: Loss = 164.3174, Acc = 94.00%

Epoch 2:  35%|███▌      | 55/156 [05:40<12:58,  7.70s/it]

Batch 55/156: Loss = 137.4626, Acc = 96.00%

Epoch 2:  38%|███▊      | 60/156 [06:18<11:41,  7.31s/it]

Batch 60/156: Loss = 281.3778, Acc = 98.00%

Epoch 2:  42%|████▏     | 65/156 [06:51<10:00,  6.59s/it]

Batch 65/156: Loss = 1390.9001, Acc = 86.00%

Epoch 2:  45%|████▍     | 70/156 [07:23<09:11,  6.42s/it]

Batch 70/156: Loss = 279.6105, Acc = 96.00%

Epoch 2:  48%|████▊     | 75/156 [07:53<08:06,  6.01s/it]

Batch 75/156: Loss = 93.1397, Acc = 96.00%

Epoch 2:  51%|█████▏    | 80/156 [08:22<07:21,  5.82s/it]

Batch 80/156: Loss = 2027.9541, Acc = 82.00%

Epoch 2:  54%|█████▍    | 85/156 [08:51<06:43,  5.68s/it]

Batch 85/156: Loss = 36.5405, Acc = 98.00%

Epoch 2:  58%|█████▊    | 90/156 [09:20<06:16,  5.70s/it]

Batch 90/156: Loss = 519.8629, Acc = 92.00%

Epoch 2:  61%|██████    | 95/156 [09:50<06:11,  6.08s/it]

Batch 95/156: Loss = 199.2042, Acc = 92.00%

Epoch 2:  64%|██████▍   | 100/156 [10:19<05:23,  5.77s/it]

Batch 100/156: Loss = 553.6790, Acc = 92.00%

Epoch 2:  67%|██████▋   | 105/156 [10:47<04:49,  5.67s/it]

Batch 105/156: Loss = 464.5710, Acc = 90.00%

Epoch 2:  71%|███████   | 110/156 [11:16<04:18,  5.62s/it]

Batch 110/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  74%|███████▎  | 115/156 [11:46<04:09,  6.09s/it]

Batch 115/156: Loss = 350.0073, Acc = 94.00%

Epoch 2:  77%|███████▋  | 120/156 [12:17<03:38,  6.08s/it]

Batch 120/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  80%|████████  | 125/156 [12:48<03:05,  5.98s/it]

Batch 125/156: Loss = 1050.8623, Acc = 92.00%

Epoch 2:  83%|████████▎ | 130/156 [13:17<02:31,  5.82s/it]

Batch 130/156: Loss = 1176.0646, Acc = 94.00%

Epoch 2:  87%|████████▋ | 135/156 [13:45<01:59,  5.70s/it]

Batch 135/156: Loss = 327.1490, Acc = 98.00%

Epoch 2:  90%|████████▉ | 140/156 [14:14<01:30,  5.66s/it]

Batch 140/156: Loss = 118.0258, Acc = 96.00%

Epoch 2:  93%|█████████▎| 145/156 [14:43<01:01,  5.63s/it]

Batch 145/156: Loss = 639.8015, Acc = 98.00%

Epoch 2:  96%|█████████▌| 150/156 [15:13<00:36,  6.14s/it]

Batch 150/156: Loss = 44.1819, Acc = 98.00%

Epoch 2:  99%|█████████▉| 155/156 [15:47<00:06,  6.29s/it]

Batch 155/156: Loss = 65.6240, Acc = 98.00%

Epoch 2: 100%|██████████| 156/156 [15:49<00:00,  6.09s/it]


   2    |  465.079322  | 433.807337 |   95.43   |  966.53  


Epoch 3:   3%|▎         | 5/156 [00:29<15:08,  6.02s/it]

Batch 5/156: Loss = 38.7762, Acc = 98.00%

Epoch 3:   6%|▋         | 10/156 [01:01<14:53,  6.12s/it]

Batch 10/156: Loss = 35.3080, Acc = 98.00%

Epoch 3:  10%|▉         | 15/156 [01:31<14:12,  6.05s/it]

Batch 15/156: Loss = 1231.4637, Acc = 86.00%

Epoch 3:  13%|█▎        | 20/156 [02:01<13:23,  5.90s/it]

Batch 20/156: Loss = 40.6689, Acc = 98.00%

Epoch 3:  16%|█▌        | 25/156 [02:30<12:51,  5.89s/it]

Batch 25/156: Loss = 501.9663, Acc = 90.00%

Epoch 3:  19%|█▉        | 30/156 [03:00<12:26,  5.92s/it]

Batch 30/156: Loss = 3290.2988, Acc = 76.00%

Epoch 3:  22%|██▏       | 35/156 [03:32<12:32,  6.22s/it]

Batch 35/156: Loss = 5821.6948, Acc = 76.00%

Epoch 3:  26%|██▌       | 40/156 [04:02<11:37,  6.01s/it]

Batch 40/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  29%|██▉       | 45/156 [04:32<11:05,  5.99s/it]

Batch 45/156: Loss = 721.8890, Acc = 98.00%

Epoch 3:  32%|███▏      | 50/156 [05:03<10:48,  6.12s/it]

Batch 50/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  35%|███▌      | 55/156 [05:33<10:05,  5.99s/it]

Batch 55/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  38%|███▊      | 60/156 [06:03<09:11,  5.75s/it]

Batch 60/156: Loss = 546.8475, Acc = 98.00%

Epoch 3:  42%|████▏     | 65/156 [06:31<08:35,  5.66s/it]

Batch 65/156: Loss = 1166.4834, Acc = 98.00%

Epoch 3:  45%|████▍     | 70/156 [06:59<08:02,  5.61s/it]

Batch 70/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  48%|████▊     | 75/156 [07:28<07:38,  5.66s/it]

Batch 75/156: Loss = 707.5438, Acc = 98.00%

Epoch 3:  51%|█████▏    | 80/156 [07:57<07:10,  5.66s/it]

Batch 80/156: Loss = 621.3919, Acc = 98.00%

Epoch 3:  54%|█████▍    | 85/156 [08:25<06:41,  5.66s/it]

Batch 85/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  58%|█████▊    | 90/156 [08:54<06:15,  5.68s/it]

Batch 90/156: Loss = 89.5617, Acc = 96.00%

Epoch 3:  61%|██████    | 95/156 [09:22<05:43,  5.63s/it]

Batch 95/156: Loss = 166.6324, Acc = 98.00%

Epoch 3:  64%|██████▍   | 100/156 [09:52<05:21,  5.75s/it]

Batch 100/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  67%|██████▋   | 105/156 [10:22<05:00,  5.89s/it]

Batch 105/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  71%|███████   | 110/156 [10:52<04:31,  5.91s/it]

Batch 110/156: Loss = 69.7896, Acc = 98.00%

Epoch 3:  74%|███████▎  | 115/156 [11:21<04:04,  5.96s/it]

Batch 115/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  77%|███████▋  | 120/156 [11:49<03:19,  5.54s/it]

Batch 120/156: Loss = 10.5115, Acc = 98.00%

Epoch 3:  80%|████████  | 125/156 [12:16<02:45,  5.33s/it]

Batch 125/156: Loss = 132.4612, Acc = 94.00%

Epoch 3:  83%|████████▎ | 130/156 [12:42<02:18,  5.32s/it]

Batch 130/156: Loss = 161.7717, Acc = 96.00%

Epoch 3:  87%|████████▋ | 135/156 [13:09<01:50,  5.26s/it]

Batch 135/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  90%|████████▉ | 140/156 [13:35<01:23,  5.23s/it]

Batch 140/156: Loss = 221.3047, Acc = 96.00%

Epoch 3:  93%|█████████▎| 145/156 [14:01<00:57,  5.25s/it]

Batch 145/156: Loss = 1455.7998, Acc = 90.00%

Epoch 3:  96%|█████████▌| 150/156 [14:28<00:33,  5.58s/it]

Batch 150/156: Loss = 3625.3794, Acc = 90.00%

Epoch 3:  99%|█████████▉| 155/156 [14:55<00:05,  5.39s/it]

Batch 155/156: Loss = 3324.4688, Acc = 90.00%

Epoch 3: 100%|██████████| 156/156 [14:57<00:00,  5.75s/it]


   3    |  977.862984  | 4394.007731 |   85.78   |  913.06  


Training complete! Best accuracy: 98.22%.
Start training...

 Epoch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
------------------------------------------------------------


Epoch 1:   3%|▎         | 5/156 [00:19<09:41,  3.85s/it]

Batch 5/156: Loss = 307.6980, Acc = 98.00%

Epoch 1:   6%|▋         | 10/156 [00:38<09:17,  3.82s/it]

Batch 10/156: Loss = 34.4763, Acc = 98.00%

Epoch 1:  10%|▉         | 15/156 [00:56<08:42,  3.70s/it]

Batch 15/156: Loss = 198.7181, Acc = 98.00%

Epoch 1:  13%|█▎        | 20/156 [01:15<08:34,  3.78s/it]

Batch 20/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  16%|█▌        | 25/156 [01:34<08:10,  3.74s/it]

Batch 25/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  19%|█▉        | 30/156 [01:53<08:02,  3.83s/it]

Batch 30/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  22%|██▏       | 35/156 [02:12<07:36,  3.77s/it]

Batch 35/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  26%|██▌       | 40/156 [02:31<07:11,  3.72s/it]

Batch 40/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  29%|██▉       | 45/156 [02:49<06:54,  3.73s/it]

Batch 45/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  32%|███▏      | 50/156 [03:09<06:52,  3.89s/it]

Batch 50/156: Loss = 36.3959, Acc = 96.00%

Epoch 1:  35%|███▌      | 55/156 [03:28<06:34,  3.90s/it]

Batch 55/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  38%|███▊      | 60/156 [03:48<06:10,  3.86s/it]

Batch 60/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  42%|████▏     | 65/156 [04:06<05:46,  3.80s/it]

Batch 65/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  45%|████▍     | 70/156 [04:26<05:35,  3.90s/it]

Batch 70/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  48%|████▊     | 75/156 [04:46<05:22,  3.98s/it]

Batch 75/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  51%|█████▏    | 80/156 [05:05<04:56,  3.91s/it]

Batch 80/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  54%|█████▍    | 85/156 [05:25<04:32,  3.84s/it]

Batch 85/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  58%|█████▊    | 90/156 [05:44<04:13,  3.85s/it]

Batch 90/156: Loss = 150.6114, Acc = 98.00%

Epoch 1:  61%|██████    | 95/156 [06:03<03:50,  3.77s/it]

Batch 95/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  64%|██████▍   | 100/156 [06:22<03:30,  3.77s/it]

Batch 100/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  67%|██████▋   | 105/156 [06:41<03:12,  3.78s/it]

Batch 105/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  71%|███████   | 110/156 [06:59<02:53,  3.78s/it]

Batch 110/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  74%|███████▎  | 115/156 [07:19<02:36,  3.81s/it]

Batch 115/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  77%|███████▋  | 120/156 [07:38<02:16,  3.78s/it]

Batch 120/156: Loss = 161.6618, Acc = 96.00%

Epoch 1:  80%|████████  | 125/156 [07:57<01:57,  3.79s/it]

Batch 125/156: Loss = 650.4989, Acc = 96.00%

Epoch 1:  83%|████████▎ | 130/156 [08:15<01:37,  3.76s/it]

Batch 130/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  87%|████████▋ | 135/156 [08:36<01:25,  4.07s/it]

Batch 135/156: Loss = 87.7163, Acc = 98.00%

Epoch 1:  90%|████████▉ | 140/156 [08:55<01:02,  3.89s/it]

Batch 140/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  93%|█████████▎| 145/156 [09:14<00:41,  3.78s/it]

Batch 145/156: Loss = 104.2529, Acc = 98.00%

Epoch 1:  96%|█████████▌| 150/156 [09:33<00:22,  3.80s/it]

Batch 150/156: Loss = 65.2195, Acc = 98.00%

Epoch 1:  99%|█████████▉| 155/156 [09:52<00:03,  3.77s/it]

Batch 155/156: Loss = 0.0000, Acc = 100.00%

Epoch 1: 100%|██████████| 156/156 [09:54<00:00,  3.81s/it]


   1    |  524.277505  | 11.904245  |   99.78   |  611.28  


Epoch 2:   3%|▎         | 5/156 [00:18<09:23,  3.73s/it]

Batch 5/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:   6%|▋         | 10/156 [00:37<09:19,  3.83s/it]

Batch 10/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  10%|▉         | 15/156 [00:56<09:02,  3.85s/it]

Batch 15/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  13%|█▎        | 20/156 [01:16<08:41,  3.83s/it]

Batch 20/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  16%|█▌        | 25/156 [01:35<08:19,  3.81s/it]

Batch 25/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  19%|█▉        | 30/156 [01:54<07:59,  3.81s/it]

Batch 30/156: Loss = 798.3925, Acc = 98.00%

Epoch 2:  22%|██▏       | 35/156 [02:13<07:47,  3.86s/it]

Batch 35/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  26%|██▌       | 40/156 [02:32<07:23,  3.82s/it]

Batch 40/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  29%|██▉       | 45/156 [02:51<07:03,  3.81s/it]

Batch 45/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  32%|███▏      | 50/156 [03:11<06:46,  3.84s/it]

Batch 50/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  35%|███▌      | 55/156 [03:30<06:28,  3.85s/it]

Batch 55/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  38%|███▊      | 60/156 [03:49<06:09,  3.85s/it]

Batch 60/156: Loss = 213.0556, Acc = 98.00%

Epoch 2:  42%|████▏     | 65/156 [04:08<05:50,  3.85s/it]

Batch 65/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  45%|████▍     | 70/156 [04:28<05:32,  3.87s/it]

Batch 70/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  48%|████▊     | 75/156 [04:48<05:24,  4.01s/it]

Batch 75/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  51%|█████▏    | 80/156 [05:09<05:25,  4.28s/it]

Batch 80/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  54%|█████▍    | 85/156 [05:31<05:01,  4.25s/it]

Batch 85/156: Loss = 270.5273, Acc = 98.00%

Epoch 2:  58%|█████▊    | 90/156 [05:53<04:39,  4.24s/it]

Batch 90/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  61%|██████    | 95/156 [06:13<04:12,  4.14s/it]

Batch 95/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  64%|██████▍   | 100/156 [06:35<04:08,  4.43s/it]

Batch 100/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  67%|██████▋   | 105/156 [06:58<03:45,  4.42s/it]

Batch 105/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  71%|███████   | 110/156 [07:19<03:17,  4.30s/it]

Batch 110/156: Loss = 224.3159, Acc = 98.00%

Epoch 2:  74%|███████▎  | 115/156 [07:41<02:56,  4.31s/it]

Batch 115/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  77%|███████▋  | 120/156 [08:02<02:32,  4.24s/it]

Batch 120/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  80%|████████  | 125/156 [08:24<02:17,  4.43s/it]

Batch 125/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  83%|████████▎ | 130/156 [08:47<01:57,  4.51s/it]

Batch 130/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  87%|████████▋ | 135/156 [09:09<01:33,  4.47s/it]

Batch 135/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  90%|████████▉ | 140/156 [09:31<01:08,  4.29s/it]

Batch 140/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  93%|█████████▎| 145/156 [09:53<00:48,  4.42s/it]

Batch 145/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  96%|█████████▌| 150/156 [10:14<00:25,  4.29s/it]

Batch 150/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  99%|█████████▉| 155/156 [10:37<00:04,  4.36s/it]

Batch 155/156: Loss = 0.0000, Acc = 100.00%

Epoch 2: 100%|██████████| 156/156 [10:38<00:00,  4.09s/it]


   2    |  23.421395   | 47.662401  |   99.78   |  659.07  


Epoch 3:   3%|▎         | 5/156 [00:21<10:49,  4.30s/it]

Batch 5/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:   6%|▋         | 10/156 [00:43<10:16,  4.22s/it]

Batch 10/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  10%|▉         | 15/156 [01:03<09:43,  4.14s/it]

Batch 15/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  13%|█▎        | 20/156 [01:24<09:11,  4.05s/it]

Batch 20/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  16%|█▌        | 25/156 [01:44<08:46,  4.02s/it]

Batch 25/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  19%|█▉        | 30/156 [02:05<08:39,  4.12s/it]

Batch 30/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  22%|██▏       | 35/156 [02:26<08:28,  4.20s/it]

Batch 35/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  26%|██▌       | 40/156 [02:49<08:30,  4.40s/it]

Batch 40/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  29%|██▉       | 45/156 [03:10<07:56,  4.30s/it]

Batch 45/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  32%|███▏      | 50/156 [03:31<07:15,  4.10s/it]

Batch 50/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  35%|███▌      | 55/156 [03:52<07:14,  4.31s/it]

Batch 55/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  38%|███▊      | 60/156 [04:15<07:14,  4.53s/it]

Batch 60/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  42%|████▏     | 65/156 [04:38<06:53,  4.54s/it]

Batch 65/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  45%|████▍     | 70/156 [05:00<06:05,  4.25s/it]

Batch 70/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  48%|████▊     | 75/156 [05:21<05:43,  4.24s/it]

Batch 75/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  51%|█████▏    | 80/156 [05:42<05:17,  4.17s/it]

Batch 80/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  54%|█████▍    | 85/156 [06:03<04:53,  4.13s/it]

Batch 85/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  58%|█████▊    | 90/156 [06:24<04:32,  4.13s/it]

Batch 90/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  61%|██████    | 95/156 [06:44<04:11,  4.12s/it]

Batch 95/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  64%|██████▍   | 100/156 [07:05<03:51,  4.13s/it]

Batch 100/156: Loss = 20.6550, Acc = 98.00%

Epoch 3:  67%|██████▋   | 105/156 [07:26<03:28,  4.09s/it]

Batch 105/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  71%|███████   | 110/156 [07:46<03:06,  4.06s/it]

Batch 110/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  74%|███████▎  | 115/156 [08:07<02:51,  4.18s/it]

Batch 115/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  77%|███████▋  | 120/156 [08:27<02:27,  4.10s/it]

Batch 120/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  80%|████████  | 125/156 [08:49<02:10,  4.20s/it]

Batch 125/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  83%|████████▎ | 130/156 [09:10<01:50,  4.26s/it]

Batch 130/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  87%|████████▋ | 135/156 [09:31<01:26,  4.14s/it]

Batch 135/156: Loss = 6.2698, Acc = 98.00%

Epoch 3:  90%|████████▉ | 140/156 [09:53<01:09,  4.32s/it]

Batch 140/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  93%|█████████▎| 145/156 [10:13<00:45,  4.13s/it]

Batch 145/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  96%|█████████▌| 150/156 [10:34<00:24,  4.07s/it]

Batch 150/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  99%|█████████▉| 155/156 [10:55<00:04,  4.20s/it]

Batch 155/156: Loss = 0.0000, Acc = 100.00%

Epoch 3: 100%|██████████| 156/156 [10:56<00:00,  4.21s/it]


   3    |  24.076233   | 20.639106  |   99.78   |  675.59  


Training complete! Best accuracy: 99.78%.
Start training...

 Epoch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
------------------------------------------------------------


Epoch 1:   3%|▎         | 5/156 [00:32<16:14,  6.46s/it]

Batch 5/156: Loss = 19607.1953, Acc = 94.00%

Epoch 1:   6%|▋         | 10/156 [01:05<16:01,  6.58s/it]

Batch 10/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  10%|▉         | 15/156 [01:38<15:15,  6.49s/it]

Batch 15/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  13%|█▎        | 20/156 [02:10<14:28,  6.39s/it]

Batch 20/156: Loss = 3018.6873, Acc = 98.00%

Epoch 1:  16%|█▌        | 25/156 [02:42<14:13,  6.51s/it]

Batch 25/156: Loss = 10398.1211, Acc = 96.00%

Epoch 1:  19%|█▉        | 30/156 [03:16<14:01,  6.67s/it]

Batch 30/156: Loss = 6481.8477, Acc = 98.00%

Epoch 1:  22%|██▏       | 35/156 [03:47<12:48,  6.35s/it]

Batch 35/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  26%|██▌       | 40/156 [04:22<13:07,  6.79s/it]

Batch 40/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  29%|██▉       | 45/156 [04:54<11:53,  6.42s/it]

Batch 45/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  32%|███▏      | 50/156 [05:25<10:56,  6.20s/it]

Batch 50/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  35%|███▌      | 55/156 [05:58<11:08,  6.62s/it]

Batch 55/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  38%|███▊      | 60/156 [06:31<10:24,  6.51s/it]

Batch 60/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  42%|████▏     | 65/156 [07:06<10:23,  6.85s/it]

Batch 65/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  45%|████▍     | 70/156 [07:40<10:04,  7.02s/it]

Batch 70/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  48%|████▊     | 75/156 [08:15<09:22,  6.94s/it]

Batch 75/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  51%|█████▏    | 80/156 [08:51<09:00,  7.11s/it]

Batch 80/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  54%|█████▍    | 85/156 [09:25<08:17,  7.00s/it]

Batch 85/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  58%|█████▊    | 90/156 [09:59<07:17,  6.63s/it]

Batch 90/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  61%|██████    | 95/156 [10:31<06:33,  6.46s/it]

Batch 95/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  64%|██████▍   | 100/156 [11:05<06:16,  6.73s/it]

Batch 100/156: Loss = 2473.5759, Acc = 98.00%

Epoch 1:  67%|██████▋   | 105/156 [11:38<05:30,  6.49s/it]

Batch 105/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  71%|███████   | 110/156 [12:14<05:27,  7.11s/it]

Batch 110/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  74%|███████▎  | 115/156 [12:48<04:35,  6.71s/it]

Batch 115/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  77%|███████▋  | 120/156 [13:23<04:05,  6.81s/it]

Batch 120/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  80%|████████  | 125/156 [13:54<03:18,  6.39s/it]

Batch 125/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  83%|████████▎ | 130/156 [14:26<02:42,  6.26s/it]

Batch 130/156: Loss = 2368.3652, Acc = 96.00%

Epoch 1:  87%|████████▋ | 135/156 [14:58<02:15,  6.46s/it]

Batch 135/156: Loss = 5617.8110, Acc = 98.00%

Epoch 1:  90%|████████▉ | 140/156 [15:30<01:41,  6.35s/it]

Batch 140/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  93%|█████████▎| 145/156 [16:02<01:09,  6.27s/it]

Batch 145/156: Loss = 0.0000, Acc = 100.00%

Epoch 1:  96%|█████████▌| 150/156 [16:33<00:37,  6.31s/it]

Batch 150/156: Loss = 3853.1035, Acc = 98.00%

Epoch 1:  99%|█████████▉| 155/156 [17:05<00:06,  6.28s/it]

Batch 155/156: Loss = 0.0000, Acc = 100.00%

Epoch 1: 100%|██████████| 156/156 [17:07<00:00,  6.59s/it]


   1    | 24455.164478 |  0.000000  |  100.00   |  1046.11 


Epoch 2:   3%|▎         | 5/156 [00:31<15:46,  6.27s/it]

Batch 5/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:   6%|▋         | 10/156 [01:03<15:23,  6.32s/it]

Batch 10/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  10%|▉         | 15/156 [01:34<14:45,  6.28s/it]

Batch 15/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  13%|█▎        | 20/156 [02:06<14:16,  6.30s/it]

Batch 20/156: Loss = 233.4203, Acc = 98.00%

Epoch 2:  16%|█▌        | 25/156 [02:40<14:32,  6.66s/it]

Batch 25/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  19%|█▉        | 30/156 [03:11<12:58,  6.18s/it]

Batch 30/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  22%|██▏       | 35/156 [03:41<12:13,  6.06s/it]

Batch 35/156: Loss = 2551.1877, Acc = 98.00%

Epoch 2:  26%|██▌       | 40/156 [04:12<12:10,  6.30s/it]

Batch 40/156: Loss = 10373.6504, Acc = 98.00%

Epoch 2:  29%|██▉       | 45/156 [04:46<12:18,  6.65s/it]

Batch 45/156: Loss = 6382.1011, Acc = 94.00%

Epoch 2:  32%|███▏      | 50/156 [05:22<12:39,  7.16s/it]

Batch 50/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  35%|███▌      | 55/156 [05:54<10:54,  6.48s/it]

Batch 55/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  38%|███▊      | 60/156 [06:25<10:16,  6.42s/it]

Batch 60/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  42%|████▏     | 65/156 [06:58<09:48,  6.46s/it]

Batch 65/156: Loss = 582.0009, Acc = 96.00%

Epoch 2:  45%|████▍     | 70/156 [07:30<09:06,  6.35s/it]

Batch 70/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  48%|████▊     | 75/156 [08:02<08:37,  6.39s/it]

Batch 75/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  51%|█████▏    | 80/156 [08:33<07:55,  6.25s/it]

Batch 80/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  54%|█████▍    | 85/156 [09:05<07:31,  6.36s/it]

Batch 85/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  58%|█████▊    | 90/156 [09:38<07:01,  6.39s/it]

Batch 90/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  61%|██████    | 95/156 [10:11<06:28,  6.37s/it]

Batch 95/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  64%|██████▍   | 100/156 [10:43<05:55,  6.34s/it]

Batch 100/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  67%|██████▋   | 105/156 [11:14<05:15,  6.19s/it]

Batch 105/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  71%|███████   | 110/156 [11:44<04:37,  6.03s/it]

Batch 110/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  74%|███████▎  | 115/156 [12:14<04:08,  6.07s/it]

Batch 115/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  77%|███████▋  | 120/156 [12:45<03:37,  6.03s/it]

Batch 120/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  80%|████████  | 125/156 [13:15<03:08,  6.07s/it]

Batch 125/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  83%|████████▎ | 130/156 [13:47<02:40,  6.18s/it]

Batch 130/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  87%|████████▋ | 135/156 [14:17<02:09,  6.15s/it]

Batch 135/156: Loss = 11364.5303, Acc = 98.00%

Epoch 2:  90%|████████▉ | 140/156 [14:47<01:37,  6.08s/it]

Batch 140/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  93%|█████████▎| 145/156 [15:18<01:07,  6.12s/it]

Batch 145/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  96%|█████████▌| 150/156 [15:49<00:36,  6.12s/it]

Batch 150/156: Loss = 0.0000, Acc = 100.00%

Epoch 2:  99%|█████████▉| 155/156 [16:19<00:06,  6.07s/it]

Batch 155/156: Loss = 0.0000, Acc = 100.00%

Epoch 2: 100%|██████████| 156/156 [16:21<00:00,  6.29s/it]


   2    |  966.453664  | 669.754123 |   99.78   |  999.59  


Epoch 3:   3%|▎         | 5/156 [00:30<15:12,  6.04s/it]

Batch 5/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:   6%|▋         | 10/156 [01:00<14:35,  6.00s/it]

Batch 10/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  10%|▉         | 15/156 [01:31<14:35,  6.21s/it]

Batch 15/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  13%|█▎        | 20/156 [02:02<13:51,  6.11s/it]

Batch 20/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  16%|█▌        | 25/156 [02:32<13:12,  6.05s/it]

Batch 25/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  19%|█▉        | 30/156 [03:02<12:34,  5.99s/it]

Batch 30/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  22%|██▏       | 35/156 [03:33<12:20,  6.12s/it]

Batch 35/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  26%|██▌       | 40/156 [04:04<11:53,  6.15s/it]

Batch 40/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  29%|██▉       | 45/156 [04:34<11:22,  6.14s/it]

Batch 45/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  32%|███▏      | 50/156 [05:04<10:40,  6.04s/it]

Batch 50/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  35%|███▌      | 55/156 [05:35<10:16,  6.11s/it]

Batch 55/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  38%|███▊      | 60/156 [06:05<09:46,  6.11s/it]

Batch 60/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  42%|████▏     | 65/156 [06:36<09:12,  6.07s/it]

Batch 65/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  45%|████▍     | 70/156 [07:06<08:46,  6.12s/it]

Batch 70/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  48%|████▊     | 75/156 [07:38<08:26,  6.25s/it]

Batch 75/156: Loss = 18662.2148, Acc = 98.00%

Epoch 3:  51%|█████▏    | 80/156 [08:09<07:59,  6.31s/it]

Batch 80/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  54%|█████▍    | 85/156 [08:41<07:30,  6.34s/it]

Batch 85/156: Loss = 10772.7529, Acc = 98.00%

Epoch 3:  58%|█████▊    | 90/156 [09:13<07:03,  6.41s/it]

Batch 90/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  61%|██████    | 95/156 [09:44<06:16,  6.17s/it]

Batch 95/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  64%|██████▍   | 100/156 [10:14<05:43,  6.13s/it]

Batch 100/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  67%|██████▋   | 105/156 [10:44<05:06,  6.02s/it]

Batch 105/156: Loss = 4054.3660, Acc = 98.00%

Epoch 3:  71%|███████   | 110/156 [11:15<04:40,  6.09s/it]

Batch 110/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  74%|███████▎  | 115/156 [11:46<04:08,  6.06s/it]

Batch 115/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  77%|███████▋  | 120/156 [12:16<03:34,  5.95s/it]

Batch 120/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  80%|████████  | 125/156 [12:46<03:05,  6.00s/it]

Batch 125/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  83%|████████▎ | 130/156 [13:16<02:38,  6.10s/it]

Batch 130/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  87%|████████▋ | 135/156 [13:47<02:08,  6.12s/it]

Batch 135/156: Loss = 9047.4873, Acc = 98.00%

Epoch 3:  90%|████████▉ | 140/156 [14:18<01:39,  6.20s/it]

Batch 140/156: Loss = 29544.4902, Acc = 98.00%

Epoch 3:  93%|█████████▎| 145/156 [14:48<01:07,  6.14s/it]

Batch 145/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  96%|█████████▌| 150/156 [15:19<00:36,  6.04s/it]

Batch 150/156: Loss = 0.0000, Acc = 100.00%

Epoch 3:  99%|█████████▉| 155/156 [15:49<00:06,  6.08s/it]

Batch 155/156: Loss = 0.0000, Acc = 100.00%

Epoch 3: 100%|██████████| 156/156 [15:51<00:00,  6.10s/it]


   3    | 2995.716349  | 875.869846 |   99.78   |  969.03  


Training complete! Best accuracy: 100.00%.


In [24]:
import torch

# Зберегти модель CNN-rand
torch.save(cnn_rand.state_dict(), "cnn_rand.pth")

# Зберегти модель CNN-static
torch.save(cnn_static.state_dict(), "cnn_static.pth")

# Зберегти модель CNN-non-static
torch.save(cnn_non_static.state_dict(), "cnn_non_static.pth")

# Test predictions

In [None]:
cnn_rand.load_state_dict(torch.load("cnn_rand.pth", map_location=device))
cnn_static.load_state_dict(torch.load("cnn_static.pth", map_location=device))
cnn_non_static.load_state_dict(torch.load("cnn_non_static.pth", map_location=device))

cnn_rand.eval()
cnn_static.eval()
cnn_non_static.eval()

def test_model(text, model, max_len=max_len):
    tokens = word_tokenize(text.lower())
    padded_tokens = tokens + ['<pad>'] * (max_len - len(tokens))
    input_id = [word2idx.get(token, word2idx['<unk>']) for token in padded_tokens]
    input_id = torch.tensor(input_id).unsqueeze(dim=0).to(device)
    with torch.no_grad():
        logits = model(input_id)
        probs = torch.softmax(logits, dim=1).squeeze(dim=0)
    print(f"Spam: {probs[1]*100:.2f}%, Not spam: {probs[0]*100:.2f}%")

test_texts = [
    "Congratulations! You have won a free ticket.",
    "Hi, can we meet tomorrow at 10am?",
    "URGENT! Your account has been compromised. Click here to reset your password.",
    "This is not spam, just checking in.",
    "You are selected for a $1000 gift card. Reply YES to claim.",
    "Let's have lunch next week.",
    "Limited time offer! Buy now and save 50%.",
    "Please review the attached document and let me know your thoughts.",
    "Call this number to get your prize.",
    "See you at the meeting later.",
    "Сьогодні пара з Обробки природної мови.",
    "Вітаю! Ви виграли безкоштовний квиток.",
    "Привіт, можемо зустрітися завтра о 10 ранку?",
    "Терміново! Ваш обліковий запис був зламаний. Натисніть тут, щоб скинути пароль."
]

for text in test_texts:
    print(f"\nТекст: {text}")
    print("CNN-rand:")
    test_model(text, cnn_rand)
    print("CNN-static:")
    test_model(text, cnn_static)
    print("CNN-non-static:")
    test_model(text, cnn_non_static)

  cnn_rand.load_state_dict(torch.load("cnn_rand.pth", map_location=device))
  cnn_static.load_state_dict(torch.load("cnn_static.pth", map_location=device))
  cnn_non_static.load_state_dict(torch.load("cnn_non_static.pth", map_location=device))



Текст: Congratulations! You have won a free ticket.
CNN-rand:
Spam: 100.00%, Not spam: 0.00%
CNN-static:
Spam: 100.00%, Not spam: 0.00%
CNN-non-static:
Spam: 100.00%, Not spam: 0.00%

Текст: Hi, can we meet tomorrow at 10am?
CNN-rand:
Spam: 0.00%, Not spam: 100.00%
CNN-static:
Spam: 100.00%, Not spam: 0.00%
CNN-non-static:
Spam: 100.00%, Not spam: 0.00%

Текст: URGENT! Your account has been compromised. Click here to reset your password.
CNN-rand:
Spam: 0.00%, Not spam: 100.00%
CNN-static:
Spam: 100.00%, Not spam: 0.00%
CNN-non-static:
Spam: 100.00%, Not spam: 0.00%

Текст: This is not spam, just checking in.
CNN-rand:
Spam: 0.00%, Not spam: 100.00%
CNN-static:
Spam: 100.00%, Not spam: 0.00%
CNN-non-static:
Spam: 100.00%, Not spam: 0.00%

Текст: You are selected for a $1000 gift card. Reply YES to claim.
CNN-rand:
Spam: 0.00%, Not spam: 100.00%
CNN-static:
Spam: 100.00%, Not spam: 0.00%
CNN-non-static:
Spam: 0.00%, Not spam: 100.00%

Текст: Let's have lunch next week.
CNN-rand:
Spam: 