In [27]:
!pip install torch
!pip install torchtext
import os
import re
import sys
import time
import random
import argparse
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.legacy.data import Field, LabelField, TabularDataset, Pipeline, BucketIterator
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

"""
# RNN Classification with PyTorch
In this task, you will implement the RNN model and text classification in PyTorch.
Complete four methods(or class):
- `train(model, iterator, optimizer, criterion) -> (epoch_loss, epoch_acc)`
- `evaluate(model, iterator, criterion) -> (epoch_loss, epoch_acc)`
- `RNN(num_embeddings, padding_idx, embedding_dim, 
        hidden_dim,  num_layers, dropout, bidirectional) -> predictions`
- `set_hyperparameter_dict() -> param_dict`

## Instruction
* See skeleton codes below for more details.
* Do not remove assert lines.
* Do not modify return variables.
* Do not modify methods that start with an underscore.
* Do not import additional libraries. You can complete implementation using the given libraries.

## Submission
* Before submit your code in KLMS, please change the name of the file to your student id (e.g., 2019xxxx.py).

## Grading
* Functionality and prediction accuracy for unknown test samples (i.e., we do not give them to you) will be your grade.
* For functionality, we will run unit tests of `train` and `evaluate`.
* For prediction accuracy, we will run your `run` to get your model's score.
* If it is on par with the score of TA, you will get a perfect score.
* TA's validation accuracy is 0.86.

## Evironment Setting 
* our task is designed with PyTorch==1.8.1 TorchText==0.9.1
* We stronly recommend using Google Colab with GPU for students who have not a GPU in your local or remote computer.
    - Runtime > Change runtime type > Hardware accelerator: GPU
    - !pip install torch
    - !pip install torchtext

* For one epoch of training, Colab+GPU takes 10s, Colab+CPU takes very longtime (more than 20m).
* TA's code got 86 validation accuracy in a total of 150s (15 epochs) at Colab+GPU.
* Even if we set random seed, results can vary depending on an allocated environment at Colab.
"""


def seed_reset(SEED=0):
    random.seed(SEED)
    torch.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True


def _download_dataset(size=10000):
    assert sys.version_info >= (3, 6), "Use Python3.6+"

    import ssl
    import urllib.request
    url = "https://raw.githubusercontent.com/dongkwan-kim/small_dataset/master/review_{}k.csv".format(size // 1000)

    dir_path = "../data"
    file_path = os.path.join(dir_path, "review_{}k.csv".format(size // 1000))
    if not os.path.isfile(file_path):
        print("Download: {}".format(file_path))
        os.makedirs(dir_path, exist_ok=True)
        ctx = ssl._create_unverified_context()
        with urllib.request.urlopen(url, context=ctx) as u, open(file_path, 'wb') as f:
            f.write(u.read())
    else:
        print("Already exist: {}".format(file_path))


def _load_dataset(test_data_path=None, size=10000, train_test_ratio=0.8, seed=0):
    _download_dataset()

    preprocess_pipeline = Pipeline(lambda x: re.sub(r'[^a-z]+', ' ', x))

    TEXT = Field(batch_first = True,
                include_lengths = True, 
                lower=True, 
                preprocessing=preprocess_pipeline)
    LABEL = LabelField(dtype = torch.float)

    train_data = TabularDataset(path="../data/review_{}k.csv".format(size // 1000), 
                                format='csv', 
                                fields=[('review', TEXT), ('sentiment', LABEL)], skip_header=True)
    test_data = None
    if test_data_path is not None:
        test_data = TabularDataset(path=test_data_path, 
                                    format='csv', 
                                    fields=[('review', TEXT), ('sentiment', LABEL)], skip_header=True)
    
    
    train_data, valid_data = train_data.split(split_ratio=train_test_ratio, 
                                            random_state = random.seed(seed))
    
    TEXT.build_vocab(train_data)
    LABEL.build_vocab(train_data)

    vocab_size = len(TEXT.vocab)
    padding_idx = TEXT.vocab.stoi[TEXT.pad_token]

    return train_data, valid_data, test_data, vocab_size, padding_idx


def epoch_time(start_time, end_time):
    """Do not modify the code in this function."""
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs


def accuracy(prediction, label):
    """Do not modify the code in this function."""
    binary_prediction = torch.round(torch.sigmoid(prediction))
    correct = (binary_prediction == label).float()
    acc = correct.sum() / len(correct)
    return acc


class RNN(nn.Module):

    def __init__(self, num_embeddings, padding_idx, embedding_dim, 
                 hidden_dim, num_layers, dropout, bidirectional):
        """ Build a RNN model

        :param num_embeddings: the numebr of embeddings (vocab size)
        :param padding_idx: padding idx
        :param embedding_dim: (int) embedding dimension
        :param hidden_dim: (int) hidden dimension
        :param num_layers: (int) the number of recurrent layers
        :param dropout: (float) dropout rate
        :param bidirectional: (bool) is bidirectional

        :return output: type=torch.Tensor, shape=[batch size]
        """
        
        super().__init__()
        self.embedding = nn.Embedding(num_embeddings, embedding_dim, padding_idx = padding_idx)
        self.dropout = nn.Dropout(dropout)
        

        ## build your own RNN module and fully connected (fc) layer
        ## * self.rnn
        ##         - set batch_first=True of your RNN module since the input text shape is [batch size, max text length, embedding dim]
        ##         - Example: self.rnn = nn.GRU(...., batch_first=True, ...)
        ## * self.fc
        ##         - The structure of fully connected (fc) layer can vary depending on the output of your RNN module

        ########### IMPLEMENT HERE ###############

        self.num_directions = int(bidirectional)*2
        self.num_layers = num_layers
        self.hidden_dim = hidden_dim
        self.rnn: nn.Module = nn.GRU(embedding_dim, hidden_dim, num_layers, batch_first = True, dropout = dropout, bidirectional = bidirectional)     # Example: nn.GRU(...., batch_first=True, ...)             
        self.fc: nn.Module = nn.Linear(self.num_directions*hidden_dim, 1)      # Hint: nn.Linear(/* BLANK */, 1)

        ##########################################
        
        
    def forward(self, text, text_lengths):
        
        ## text.shape = [batch size, max text length, embedding_dim]
        ## text_lengths.shape = [batch_size]
 
        embedded = self.dropout(self.embedding(text))
       
        ## We use pack_padded_sequence to deal with padding and boost the performance.
        ## Because you have already sorted sentences by using BucketIterator, you can use pack_padded_sequence without any modification.
        ## * reference
        ##      - document: https://pytorch.org/docs/stable/generated/torch.nn.utils.rnn.pack_padded_sequence.html
        ##      - example: https://gist.github.com/HarshTrivedi/f4e7293e941b17d19058f6fb90ab0fec
        ## * hyperparameter 
        ##      - set batch_first=True
        ##      - the text length need to be on CPU

        packed_embedded = pack_padded_sequence(embedded, text_lengths.to('cpu'), batch_first = True)
        

        ## build your own RNN structure using self.rnn, self.fc
        ## You don't need to use self.dropout. It is optional.
        ########### IMPLEMENT HERE ###############

        output: torch.Tensor = None

        _, ht = self.rnn(packed_embedded)
        ht = ht.view(self.num_layers, self.num_directions, -1, self.hidden_dim)
        if self.num_directions == 2:
            ht = torch.cat([ht[-1,0,:,:],ht[-1,1,:,:]], dim=1)
        else:
            ht = ht.squeeze(0)        
        self.dropout(ht)
        output = self.fc(ht)
        output = output.view(-1)

        ##########################################

        assert output.shape == torch.Size([text.shape[0]]) # batch_size
        return output


def train(model, iterator, optimizer, criterion):
    """ Complete train method
    :param model: RNN model
    :param iterator: train dataset iterator
    :param optimizer: optimzer
    :param criterion: loss function

    :return output: train loss, train accuracy
    """
    
    total_epoch_loss = 0
    total_epoch_acc = 0
    
    model.train()
    
    for batch in tqdm(iterator, desc="train"):

        optimizer.zero_grad()      
        (text, text_lengths), labels = batch.review, batch.sentiment
        
        ## Complete train method using model(), criterion(), accuracy()
        ########### IMPLEMENT HERE ###############

        prediction = model.forward(text, text_lengths)

        loss:torch.Tensor = criterion(prediction, labels)
        acc:torch.Tensor = accuracy(prediction, labels)
        loss.backward()
        optimizer.step()

        ##########################################

        assert loss.shape == torch.Size([])
        assert acc.shape == torch.Size([])
        
        total_epoch_loss += loss.item()
        total_epoch_acc += acc.item()
    
    
    epoch_loss = total_epoch_loss / len(iterator)
    epoch_acc = total_epoch_acc / len(iterator)
    return epoch_loss, epoch_acc


def evaluate(model, iterator, criterion):
    """ Complete evaluate method
    :param model: RNN model
    :param iterator: dataset iterator
    :param criterion: loss function

    :return output: loss, accuracy
    """
    
    total_epoch_loss = 0
    total_epoch_acc = 0
    
    model.eval()
    with torch.no_grad():
        for batch in tqdm(iterator, desc="evaluate"):
            (text, text_lengths), labels = batch.review, batch.sentiment

            ## Complete evaluate method using model(), criterion(), accuracy()
            ########### IMPLEMENT HERE ###############

            prediction = model.forward(text, text_lengths)

            loss:torch.Tensor = criterion(prediction, labels)
            acc:torch.Tensor = accuracy(prediction, labels)  

            ##########################################

            assert loss.shape == torch.Size([])
            assert acc.shape == torch.Size([])

            total_epoch_loss += loss.item()
            total_epoch_acc += acc.item()

    epoch_loss = total_epoch_loss / len(iterator)
    epoch_acc = total_epoch_acc / len(iterator)
    return epoch_loss, epoch_acc



def set_hyperparameter_dict():
    """ Set your best hyperparameters for your model
    """
    param_dict = {
        'embedding_dim': 128,
        'hidden_dim': 256,
        'num_layers': 3,
        'dropout': 0.55,
        'bidirectional': True, 
        'batch_size': 32,
        'num_epochs': 15,
        'learning_rate': 1e-3,
        'device':'cuda'
    }
    return param_dict
    


def run(num_samples=10000, param_dict=set_hyperparameter_dict(), train=train, evaluate=evaluate, seed=0, test_data_path=None, verbose=True):
    """
    You do not have to consider test_data_path, since it will be used for grading only.
    You can modify this run function for training your own model in the marked area below.  
    """
    train_data, valid_data, test_data, vocab_size, padding_idx = _load_dataset(test_data_path, num_samples, seed=seed)
    
    NUM_EMBEDDINGS = vocab_size
    PADDING_IDX = padding_idx

    param_dict = set_hyperparameter_dict()

    model = RNN(NUM_EMBEDDINGS, 
                PADDING_IDX,
                param_dict['embedding_dim'], 
                param_dict['hidden_dim'], 
                param_dict['num_layers'], 
                param_dict['dropout'], 
                param_dict['bidirectional']
                )

    device = torch.device(param_dict['device'] if torch.cuda.is_available() else 'cpu')

    train_iter, val_iter = BucketIterator.splits(
                                    (train_data, valid_data), 
                                    batch_size = param_dict['batch_size'],
                                    sort_within_batch = True,
                                    sort_key=lambda x: len(x.review),
                                    device = device)
    
    if test_data is not None:
        test_iter = BucketIterator(test_data, 
                                    batch_size = param_dict['batch_size'],
                                    sort_within_batch = True,
                                    sort_key=lambda x: len(x.review),
                                    device = device)

    train_loss, train_acc = None, None
    valid_loss, valid_acc = None, None
    test_loss, test_acc = None, None

    ########### You can modify here ###############

    optimizer = optim.Adam(model.parameters(), lr=param_dict['learning_rate'])
    criterion = nn.BCEWithLogitsLoss()
    model = model.to(device)
    criterion = criterion.to(device)
    high_valid_acc = 0

    for epoch in range(param_dict['num_epochs']):
        print(f'Epoch: {epoch+1:02}')
        start_time = time.time()

        train_loss, train_acc = train(model, train_iter, optimizer, criterion)
        valid_loss, valid_acc = evaluate(model, val_iter, criterion)   
                   
        if verbose:
            end_time = time.time()
            epoch_mins, epoch_secs = epoch_time(start_time, end_time)
            print(f'\nEpoch Time: {epoch_mins}m {epoch_secs}s')
            print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
            print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

            if valid_loss < 0.5 and valid_acc > 0.86:
                break

    #########################################
    
    if test_data is not None:
        test_loss, test_acc = evaluate(model, test_iter, criterion)
        print(f'\t Test Loss: {test_loss:.3f} |  Test Acc: {test_acc*100:.2f}%')

    return train_loss, train_acc, valid_loss, valid_acc, test_loss, test_acc


if __name__ == '__main__':
    seed_reset()
    run()

Already exist: ../data/review_10k.csv





train:   0%|          | 0/250 [00:00<?, ?it/s][A[A[A

Epoch: 01





train:   0%|          | 1/250 [00:00<02:07,  1.95it/s][A[A[A


train:   1%|          | 3/250 [00:00<01:37,  2.54it/s][A[A[A


train:   2%|▏         | 5/250 [00:00<01:14,  3.31it/s][A[A[A


train:   2%|▏         | 6/250 [00:01<01:00,  4.05it/s][A[A[A


train:   3%|▎         | 8/250 [00:01<00:45,  5.27it/s][A[A[A


train:   4%|▎         | 9/250 [00:01<00:39,  6.11it/s][A[A[A


train:   4%|▍         | 10/250 [00:01<00:45,  5.23it/s][A[A[A


train:   4%|▍         | 11/250 [00:01<00:40,  5.89it/s][A[A[A


train:   5%|▍         | 12/250 [00:01<00:36,  6.56it/s][A[A[A


train:   6%|▌         | 15/250 [00:01<00:28,  8.14it/s][A[A[A


train:   7%|▋         | 17/250 [00:02<00:29,  7.79it/s][A[A[A


train:   8%|▊         | 19/250 [00:02<00:27,  8.51it/s][A[A[A


train:   8%|▊         | 21/250 [00:02<00:24,  9.44it/s][A[A[A


train:   9%|▉         | 23/250 [00:02<00:26,  8.65it/s][A[A[A


train:  10%|█         | 25/250 [00:03<00:25,  8.91it/s][A[A[A



Epoch Time: 0m 31s
	Train Loss: 0.692 | Train Acc: 54.41%
	 Val. Loss: 0.694 |  Val. Acc: 56.55%
Epoch: 02





train:   1%|          | 2/250 [00:00<00:56,  4.43it/s][A[A[A


train:   1%|          | 3/250 [00:01<01:24,  2.93it/s][A[A[A


train:   2%|▏         | 5/250 [00:01<01:12,  3.40it/s][A[A[A


train:   3%|▎         | 7/250 [00:01<01:04,  3.76it/s][A[A[A


train:   4%|▎         | 9/250 [00:02<00:50,  4.75it/s][A[A[A


train:   4%|▍         | 10/250 [00:02<00:43,  5.55it/s][A[A[A


train:   4%|▍         | 11/250 [00:02<00:48,  4.96it/s][A[A[A


train:   5%|▍         | 12/250 [00:02<00:43,  5.51it/s][A[A[A


train:   5%|▌         | 13/250 [00:02<00:37,  6.27it/s][A[A[A


train:   6%|▌         | 14/250 [00:02<00:42,  5.51it/s][A[A[A


train:   6%|▌         | 15/250 [00:03<00:44,  5.28it/s][A[A[A


train:   7%|▋         | 17/250 [00:03<00:43,  5.31it/s][A[A[A


train:   8%|▊         | 19/250 [00:03<00:35,  6.43it/s][A[A[A


train:   8%|▊         | 20/250 [00:03<00:37,  6.13it/s][A[A[A


train:   8%|▊         | 21/250 [00:03<00:34,  6.61it/s][A[A[A


Epoch Time: 0m 31s
	Train Loss: 0.656 | Train Acc: 61.52%
	 Val. Loss: 0.684 |  Val. Acc: 61.01%
Epoch: 03





train:   1%|          | 3/250 [00:00<00:25,  9.54it/s][A[A[A


train:   2%|▏         | 5/250 [00:00<00:26,  9.10it/s][A[A[A


train:   3%|▎         | 7/250 [00:00<00:25,  9.36it/s][A[A[A


train:   3%|▎         | 8/250 [00:00<00:29,  8.27it/s][A[A[A


train:   4%|▍         | 10/250 [00:00<00:24,  9.60it/s][A[A[A


train:   5%|▍         | 12/250 [00:01<00:28,  8.34it/s][A[A[A


train:   6%|▌         | 14/250 [00:01<00:24,  9.53it/s][A[A[A


train:   6%|▋         | 16/250 [00:01<00:23, 10.01it/s][A[A[A


train:   7%|▋         | 18/250 [00:01<00:28,  8.21it/s][A[A[A


train:   8%|▊         | 19/250 [00:02<00:31,  7.37it/s][A[A[A


train:   8%|▊         | 21/250 [00:02<00:26,  8.50it/s][A[A[A


train:   9%|▉         | 22/250 [00:02<00:34,  6.62it/s][A[A[A


train:  10%|▉         | 24/250 [00:02<00:28,  7.90it/s][A[A[A


train:  10%|█         | 26/250 [00:02<00:27,  8.05it/s][A[A[A


train:  11%|█         | 27/250 [00:03<00:28,  7.94it/s][A[A[


Epoch Time: 0m 31s
	Train Loss: 0.621 | Train Acc: 65.26%
	 Val. Loss: 0.580 |  Val. Acc: 70.83%
Epoch: 04





train:   1%|          | 2/250 [00:00<00:54,  4.58it/s][A[A[A


train:   1%|          | 3/250 [00:00<00:45,  5.39it/s][A[A[A


train:   2%|▏         | 4/250 [00:00<00:47,  5.20it/s][A[A[A


train:   2%|▏         | 5/250 [00:00<00:42,  5.80it/s][A[A[A


train:   3%|▎         | 7/250 [00:01<00:36,  6.71it/s][A[A[A


train:   4%|▎         | 9/250 [00:01<00:31,  7.67it/s][A[A[A


train:   4%|▍         | 11/250 [00:01<00:31,  7.59it/s][A[A[A


train:   5%|▍         | 12/250 [00:01<00:40,  5.85it/s][A[A[A


train:   6%|▌         | 14/250 [00:01<00:34,  6.86it/s][A[A[A


train:   6%|▋         | 16/250 [00:02<00:30,  7.60it/s][A[A[A


train:   7%|▋         | 17/250 [00:02<00:29,  7.82it/s][A[A[A


train:   7%|▋         | 18/250 [00:02<00:35,  6.48it/s][A[A[A


train:   8%|▊         | 20/250 [00:02<00:29,  7.67it/s][A[A[A


train:   9%|▉         | 22/250 [00:02<00:26,  8.64it/s][A[A[A


train:  10%|▉         | 24/250 [00:02<00:23,  9.48it/s][A[A[A



Epoch Time: 0m 31s
	Train Loss: 0.502 | Train Acc: 76.14%
	 Val. Loss: 0.518 |  Val. Acc: 77.83%
Epoch: 05





train:   1%|          | 3/250 [00:00<00:20, 11.79it/s][A[A[A


train:   2%|▏         | 4/250 [00:00<00:32,  7.47it/s][A[A[A


train:   2%|▏         | 6/250 [00:00<00:29,  8.30it/s][A[A[A


train:   3%|▎         | 8/250 [00:00<00:26,  9.07it/s][A[A[A


train:   4%|▎         | 9/250 [00:00<00:26,  9.14it/s][A[A[A


train:   4%|▍         | 11/250 [00:01<00:24,  9.85it/s][A[A[A


train:   5%|▍         | 12/250 [00:01<00:29,  8.10it/s][A[A[A


train:   5%|▌         | 13/250 [00:01<00:27,  8.56it/s][A[A[A


train:   6%|▌         | 15/250 [00:01<00:27,  8.63it/s][A[A[A


train:   6%|▋         | 16/250 [00:02<00:57,  4.07it/s][A[A[A


train:   7%|▋         | 17/250 [00:02<00:57,  4.02it/s][A[A[A


train:   8%|▊         | 19/250 [00:02<00:44,  5.15it/s][A[A[A


train:   8%|▊         | 20/250 [00:02<00:38,  5.91it/s][A[A[A


train:   8%|▊         | 21/250 [00:02<00:36,  6.21it/s][A[A[A


train:   9%|▉         | 23/250 [00:03<00:32,  6.99it/s][A[A[A


Epoch Time: 0m 31s
	Train Loss: 0.395 | Train Acc: 82.25%
	 Val. Loss: 0.404 |  Val. Acc: 83.58%
Epoch: 06





train:   1%|          | 2/250 [00:00<00:35,  7.02it/s][A[A[A


train:   2%|▏         | 4/250 [00:00<00:33,  7.32it/s][A[A[A


train:   2%|▏         | 6/250 [00:00<00:28,  8.65it/s][A[A[A


train:   3%|▎         | 8/250 [00:00<00:24,  9.78it/s][A[A[A


train:   4%|▍         | 10/250 [00:00<00:21, 10.96it/s][A[A[A


train:   5%|▍         | 12/250 [00:01<00:22, 10.59it/s][A[A[A


train:   6%|▌         | 14/250 [00:01<00:31,  7.59it/s][A[A[A


train:   6%|▌         | 15/250 [00:01<00:32,  7.26it/s][A[A[A


train:   6%|▋         | 16/250 [00:01<00:29,  7.88it/s][A[A[A


train:   7%|▋         | 18/250 [00:02<00:31,  7.37it/s][A[A[A


train:   8%|▊         | 19/250 [00:02<00:28,  7.99it/s][A[A[A


train:   8%|▊         | 21/250 [00:02<00:31,  7.19it/s][A[A[A


train:   9%|▉         | 23/250 [00:02<00:27,  8.21it/s][A[A[A


train:  10%|▉         | 24/250 [00:02<00:28,  7.81it/s][A[A[A


train:  10%|█         | 25/250 [00:03<00:35,  6.35it/s][A[A[


Epoch Time: 0m 32s
	Train Loss: 0.306 | Train Acc: 87.00%
	 Val. Loss: 0.426 |  Val. Acc: 85.32%
Epoch: 07





train:   1%|          | 3/250 [00:00<00:24, 10.27it/s][A[A[A


train:   2%|▏         | 5/250 [00:00<00:22, 10.97it/s][A[A[A


train:   2%|▏         | 6/250 [00:00<00:33,  7.20it/s][A[A[A


train:   3%|▎         | 8/250 [00:00<00:29,  8.19it/s][A[A[A


train:   4%|▍         | 10/250 [00:00<00:26,  9.18it/s][A[A[A


train:   4%|▍         | 11/250 [00:01<01:00,  3.93it/s][A[A[A


train:   5%|▍         | 12/250 [00:01<00:50,  4.73it/s][A[A[A


train:   5%|▌         | 13/250 [00:01<00:42,  5.58it/s][A[A[A


train:   6%|▌         | 15/250 [00:01<00:34,  6.77it/s][A[A[A


train:   6%|▋         | 16/250 [00:02<00:39,  5.90it/s][A[A[A


train:   7%|▋         | 18/250 [00:02<00:32,  7.20it/s][A[A[A


train:   8%|▊         | 20/250 [00:02<00:30,  7.46it/s][A[A[A


train:   8%|▊         | 21/250 [00:02<00:33,  6.80it/s][A[A[A


train:   9%|▉         | 22/250 [00:02<00:31,  7.27it/s][A[A[A


train:   9%|▉         | 23/250 [00:03<00:47,  4.76it/s][A[A[


Epoch Time: 0m 32s
	Train Loss: 0.272 | Train Acc: 89.09%
	 Val. Loss: 0.426 |  Val. Acc: 85.62%
Epoch: 08





train:   1%|          | 2/250 [00:00<00:29,  8.50it/s][A[A[A


train:   2%|▏         | 4/250 [00:00<00:29,  8.42it/s][A[A[A


train:   2%|▏         | 6/250 [00:00<00:26,  9.21it/s][A[A[A


train:   3%|▎         | 8/250 [00:00<00:23, 10.24it/s][A[A[A


train:   4%|▎         | 9/250 [00:00<00:24,  9.89it/s][A[A[A


train:   4%|▍         | 10/250 [00:01<00:25,  9.52it/s][A[A[A


train:   4%|▍         | 11/250 [00:01<00:29,  8.16it/s][A[A[A


train:   5%|▍         | 12/250 [00:01<00:37,  6.37it/s][A[A[A


train:   6%|▌         | 14/250 [00:01<00:30,  7.81it/s][A[A[A


train:   6%|▋         | 16/250 [00:01<00:24,  9.40it/s][A[A[A


train:   7%|▋         | 18/250 [00:01<00:21, 10.67it/s][A[A[A


train:   8%|▊         | 20/250 [00:02<00:39,  5.84it/s][A[A[A


train:   9%|▉         | 22/250 [00:02<00:34,  6.54it/s][A[A[A


train:   9%|▉         | 23/250 [00:02<00:32,  7.09it/s][A[A[A


train:  10%|█         | 25/250 [00:02<00:25,  8.74it/s][A[A[A


Epoch Time: 0m 32s
	Train Loss: 0.212 | Train Acc: 91.40%
	 Val. Loss: 0.370 |  Val. Acc: 85.96%
Epoch: 09





train:   1%|          | 3/250 [00:00<00:26,  9.21it/s][A[A[A


train:   2%|▏         | 4/250 [00:00<00:34,  7.22it/s][A[A[A


train:   2%|▏         | 5/250 [00:00<00:38,  6.39it/s][A[A[A


train:   3%|▎         | 7/250 [00:00<00:33,  7.32it/s][A[A[A


train:   4%|▎         | 9/250 [00:01<00:29,  8.27it/s][A[A[A


train:   4%|▍         | 10/250 [00:01<00:31,  7.65it/s][A[A[A


train:   5%|▍         | 12/250 [00:01<00:27,  8.81it/s][A[A[A


train:   6%|▌         | 14/250 [00:01<00:23,  9.88it/s][A[A[A


train:   6%|▋         | 16/250 [00:01<00:22, 10.19it/s][A[A[A


train:   7%|▋         | 18/250 [00:02<00:29,  7.99it/s][A[A[A


train:   8%|▊         | 19/250 [00:02<00:31,  7.38it/s][A[A[A


train:   8%|▊         | 20/250 [00:02<00:29,  7.75it/s][A[A[A


train:   8%|▊         | 21/250 [00:02<00:28,  7.93it/s][A[A[A


train:   9%|▉         | 23/250 [00:02<00:24,  9.14it/s][A[A[A


train:  10%|█         | 25/250 [00:02<00:24,  9.20it/s][A[A[A


Epoch Time: 0m 32s
	Train Loss: 0.189 | Train Acc: 92.35%
	 Val. Loss: 0.372 |  Val. Acc: 86.26%



