In [1]:
from __future__ import print_function

import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

import numpy as np
import pandas as pd
import re
import operator

import os

Idx = str(1)
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]=Idx

from tqdm import tqdm_notebook
import utils
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [2]:
import utils

# DBpedia, YahooAnswersUpper, YahooAnswersLower, YelpReviews, IMDB, IMDBv2, AGNews, YahooAnswerv2
x_train, y_train, x_valid, y_valid, x_test, y_test, TopicList, Idx2Topic = utils.LoadDatasets("IMDB")
Idx2Topic_list = []
for i in range(len(set(TopicList))): Idx2Topic_list.append(Idx2Topic[i])
Idx2Topic_list = np.array(Idx2Topic_list)

if not len(x_valid):
    from sklearn.model_selection import train_test_split
    x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.15, random_state=42)

In [3]:
WordDict = {"<NONE>":0, "<OOV>":1}
WordCnt = {"<NONE>":0, "<OOV>":0}

data_train, WordDict, WordCnt, MaxSeqLen = utils.DataProcessing(x_train, WordDict, WordCnt, TrainFlag=True)
data_valid, _, _, _ = utils.DataProcessing(x_valid, WordDict, WordCnt, TrainFlag=False)
data_test, _, _, _ = utils.DataProcessing(x_test, WordDict, WordCnt, TrainFlag=False)

WordCnt = sorted(WordCnt.items(), key=operator.itemgetter(1))
WordIdx = dict(zip(WordDict.values(), WordDict.keys()))
x_train_emb = utils.EmbeddingNumpy(data_train, WordDict, MaxSeqLen)
x_valid_emb = utils.EmbeddingNumpy(data_valid, WordDict, MaxSeqLen)
x_test_emb = utils.EmbeddingNumpy(data_test, WordDict, MaxSeqLen)
y_train = np.asarray(y_train)
y_valid = np.asarray(y_valid)
y_test = np.asarray(y_test)

NumClass = max(len(set(y_train)), max(y_train)+1)

HBox(children=(IntProgress(value=0, max=21250), HTML(value='')))




HBox(children=(IntProgress(value=0, max=3750), HTML(value='')))




HBox(children=(IntProgress(value=0, max=25000), HTML(value='')))




HBox(children=(IntProgress(value=0, max=21250), HTML(value='')))




HBox(children=(IntProgress(value=0, max=3750), HTML(value='')))




HBox(children=(IntProgress(value=0, max=25000), HTML(value='')))




In [6]:
import dataloader
import copy
import random

### Hyperparameters
Hyperparams = {
    "NumClfEpoch" : 100,
    "EmbeddingSize" : 300, # 300, 768
    "KernelSize" : [2,3,4,5],
    "BatchSize" : 2**8,
    "LearningRate" : 1e-3,
    "ChannelSize" : [32, 16],
    "UsePreWordVector" : False,
#     "UsePreWordVector" : "../../Data/PretrainedWV/glove.42B.300d.txt",
#     "UsePreWordVector" : "../../Data/PretrainedWV/GoogleNews-vectors-negative300.txt",
#     "UsePreWordVector" : "../../Data/PretrainedWV/wiki-news-300d-1M-subword.vec",
#     "UsePreWordVector" : "../../Data/PretrainedWV/GloVeSelfExtro5_Dim50.txt",
    "WordVectorNorm" : False,
    "MaxSeqLen" : MaxSeqLen,
    "NumClass" : NumClass,
    "MaskOutRate" : .2,
    "DropoutRate" : .0,
    "Normaliz" : "None", # Batch / Layer / None
}

###
MaskerNum = int(len(WordDict)*Hyperparams["MaskOutRate"])
UNKWords = []

print(len(UNKWords), '/', len(WordDict))
WordIdx = dict(zip(WordDict.values(), WordDict.keys()))

train_dataset = dataloader.ClassifyDataset(x=x_train_emb, y=y_train)
valid_dataset = dataloader.ClassifyDataset(x=x_valid_emb, y=y_valid)
test_dataset = dataloader.ClassifyDataset(x=x_test_emb, y=y_test)
train_loader = DataLoader(dataset=train_dataset, batch_size=Hyperparams["BatchSize"], shuffle=True, num_workers=0)
valid_loader = DataLoader(dataset=valid_dataset, batch_size=Hyperparams["BatchSize"], shuffle=True, num_workers=0)
test_loader = DataLoader(dataset=test_dataset, batch_size=Hyperparams["BatchSize"], shuffle=True, num_workers=0)

0 / 77319


In [None]:
import TextCNN
from imp import reload
reload(TextCNN)

from sklearn import decomposition
from sklearn.decomposition import PCA

for Iter in range(3):
    model = TextCNN.ConvNet(WordDict, Hyperparams)
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    
    Range = 1.
    UNKWords = []

    # Train the model
    maxacc_all = 0; Step = 0
    MetaEpochs = int(1./Hyperparams["MaskOutRate"])
    StopFlag = False
    
    for meep in range(MetaEpochs+1):
        emb_init = copy.deepcopy(model.embedding.weight)
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                                         lr=Hyperparams["LearningRate"])
        maxacc_val = 0
        EarlyStopCnt = 5
        total_step = len(train_loader)
        pbar1 = tqdm_notebook(total = Hyperparams["NumClfEpoch"], leave=False, desc="Epoch")
        model.train() # train mode

        for epoch in range(Hyperparams["NumClfEpoch"]):
            
            pbar1.update(1)
#             pbar2 = tqdm_notebook(total = len(train_loader)*Hyperparams["BatchSize"], leave=False, desc="Training")
            for i, (texts, labels) in enumerate(train_loader):
#                 pbar2.update(Hyperparams["BatchSize"])
                texts = texts.to(device)
                labels = labels.to(device)
                # Forward pass
                outputs = model(texts)
                loss = criterion(outputs, labels)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
#             pbar2.close()

            print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, Hyperparams["NumClfEpoch"], loss.item()), end=' ')

            # Valid the model
            model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
            with torch.no_grad():
                correct = 0.
                total = 0
                for texts, labels in valid_loader:
                    texts = texts.to(device)
                    labels = labels.to(device)
                    outputs = model(texts)
                    predicted = []
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

                if (correct/total) >= maxacc_val:
                    maxacc_val = correct/total
                    torch.save(model.state_dict(), "./save/model"+ Idx)
                    torch.save(model.embedding.weight, "./save/embedding"+ Idx)

                print('ValidAcc: {:.4f} % , MaxAcc: {:.4f}'.format(100 * correct/total, maxacc_val))
                if (correct/total) < maxacc_val:
                    EarlyStopCnt = EarlyStopCnt-1
                    if EarlyStopCnt == 0:
                        break

        ### Test Acc.
#         model = TextCNN.ConvNet(WordDict, Hyperparams).to(device)
        model.load_state_dict(torch.load("./save/model"+ Idx))

        with torch.no_grad():
            model.eval()
            correct = 0.
            total = 0
            for texts, labels in test_loader:
                texts = texts.to(device)
                labels = labels.to(device)
                outputs = model(texts)
                predicted = []
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            print('TestAcc: {:.4f} % , ValidAcc: {:.4f}'.format(100 * correct / total, maxacc_val))

        pbar1.close()
        
        if StopFlag:
            if maxacc_val >= maxacc_all:
                maxacc_all = maxacc_val
                BestAcc = np.round(100*correct/total, 2)
            break
            
        if maxacc_val >= maxacc_all:
            torch.save(model.state_dict(), "./save/model_opt"+ Idx)
            torch.save(model.embedding.weight, "./save/embedding_opt"+ Idx)

        model = TextCNN.ConvNet(WordDict, Hyperparams).to(device)
#         model.load_state_dict(torch.load("model_opt" + Idx))
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=Hyperparams["LearningRate"])

        emb_opt = torch.load("./save/embedding_opt"+Idx)
        emb_cur = torch.load("./save/embedding"+Idx)
        
        ### Integrating Pretrained WV with Trained Masked WV
        if meep == 0: FirstAcc = np.round(100*correct/total, 2)
        if maxacc_val >= maxacc_all:
            print("++++++++++Step Forward", np.round(correct/total*100, 2))
            model.embedding.weight.data = emb_opt + (1/Hyperparams["MaskOutRate"])*(emb_cur - emb_init)
            
            maxacc_all = maxacc_val
            BestAcc = np.round(100*correct/total, 2)
            ###
            try:
                UNKWords = [ WordCnt[i][0] for i in range(int((meep)*MaskerNum), int((meep+1)*MaskerNum)+1) ]
#                 UNKWords += [ WordCnt[i][0] for i in range(int((meep)*MaskerNum), int((meep+1)*MaskerNum)+1) ]
                UNKWords = list(set(UNKWords))

            except IndexError:
                UNKWords = [ WordCnt[i][0] for i in range(int((meep)*MaskerNum), len(WordCnt)) ]
                UNKWords += [ WordCnt[i][0] for i in range(int((meep)*MaskerNum), len(WordCnt)) ]
                StopFlag = True
                
#             NoiseTensor = torch.randn(emb1.size()).to(device)
            NoiseTensor = Range * (2*torch.rand(emb_opt.size()).to(device) - 1)
            #####
            for nt_idx, unkmask in enumerate(UNKWords):
                model.embedding.weight.data[WordDict[unkmask]] += NoiseTensor[nt_idx]
                
        else:
            print("----------Step Backward")
            model.embedding.weight.data = emb_opt - (Hyperparams["MaskOutRate"])*(emb_cur - emb_init)
            ###
            try:
#                 UNKWords = [ WordCnt[i][0] for i in range(int((meep)*MaskerNum), int((meep+1)*MaskerNum)) ]
                UNKWords += [ WordCnt[i][0] for i in range(int((meep)*MaskerNum), int((meep+1)*MaskerNum)) ]
                UNKWords = list(set(UNKWords))

            except IndexError:
#                 UNKWords = [ WordCnt[i][0] for i in range(int((meep-Overlap)*MaskerNum), len(WordCnt)) ]
                UNKWords += [ WordCnt[i][0] for i in range(int((meep)*MaskerNum), len(WordCnt)) ]
                StopFlag = True
            ###
#             NoiseTensor = torch.randn(emb1.size()).to(device)
            NoiseTensor = Range * (2*torch.rand(emb_opt.size()).to(device) - 1)
            #####
            for nt_idx, unkmask in enumerate(UNKWords):
                model.embedding.weight.data[WordDict[unkmask]] += NoiseTensor[nt_idx]
    print(Iter+1, "::::::::::", FirstAcc, '->', BestAcc)

HBox(children=(IntProgress(value=0, description='Epoch', style=ProgressStyle(description_width='initial')), HT…

Epoch [1/100], Loss: 0.3051 ValidAcc: 77.3867 % , MaxAcc: 0.7739
Epoch [2/100], Loss: 0.6426 ValidAcc: 83.4933 % , MaxAcc: 0.8349
Epoch [3/100], Loss: 0.3220 ValidAcc: 79.1467 % , MaxAcc: 0.8349
Epoch [4/100], Loss: 0.5044 ValidAcc: 84.4000 % , MaxAcc: 0.8440
Epoch [5/100], Loss: 0.0757 ValidAcc: 86.1867 % , MaxAcc: 0.8619
Epoch [6/100], Loss: 0.0328 ValidAcc: 86.7467 % , MaxAcc: 0.8675
Epoch [7/100], Loss: 0.0003 ValidAcc: 87.2000 % , MaxAcc: 0.8720
Epoch [8/100], Loss: 0.0093 ValidAcc: 87.3600 % , MaxAcc: 0.8736
Epoch [9/100], Loss: 0.0019 ValidAcc: 87.2000 % , MaxAcc: 0.8736
Epoch [10/100], Loss: 0.0157 ValidAcc: 87.3067 % , MaxAcc: 0.8736
Epoch [11/100], Loss: 0.0118 ValidAcc: 87.1733 % , MaxAcc: 0.8736
Epoch [12/100], Loss: 0.0007 ValidAcc: 87.4133 % , MaxAcc: 0.8741
Epoch [13/100], Loss: 0.0000 ValidAcc: 87.3600 % , MaxAcc: 0.8741
TestAcc: 86.7560 % , ValidAcc: 0.8741
++++++++++Step Forward 86.76


HBox(children=(IntProgress(value=0, description='Epoch', style=ProgressStyle(description_width='initial')), HT…

Epoch [1/100], Loss: 0.3246 ValidAcc: 86.3467 % , MaxAcc: 0.8635
Epoch [2/100], Loss: 0.0051 ValidAcc: 88.8000 % , MaxAcc: 0.8880
Epoch [3/100], Loss: 0.2439 ValidAcc: 86.9333 % , MaxAcc: 0.8880
Epoch [4/100], Loss: 0.0023 ValidAcc: 89.3600 % , MaxAcc: 0.8936
Epoch [5/100], Loss: 0.0879 ValidAcc: 89.7067 % , MaxAcc: 0.8971
Epoch [6/100], Loss: 0.0002 ValidAcc: 89.7867 % , MaxAcc: 0.8979
Epoch [7/100], Loss: 0.0205 ValidAcc: 89.5733 % , MaxAcc: 0.8979
Epoch [8/100], Loss: 0.0009 ValidAcc: 89.7067 % , MaxAcc: 0.8979
Epoch [9/100], Loss: 0.0207 ValidAcc: 89.6000 % , MaxAcc: 0.8979
Epoch [10/100], Loss: 0.0001 ValidAcc: 89.7067 % , MaxAcc: 0.8979
TestAcc: 88.7920 % , ValidAcc: 0.8979
++++++++++Step Forward 88.79


HBox(children=(IntProgress(value=0, description='Epoch', style=ProgressStyle(description_width='initial')), HT…

Epoch [1/100], Loss: 0.0044 ValidAcc: 87.2533 % , MaxAcc: 0.8725
Epoch [2/100], Loss: 0.0139 ValidAcc: 88.9600 % , MaxAcc: 0.8896
Epoch [3/100], Loss: 0.0119 ValidAcc: 89.3067 % , MaxAcc: 0.8931
Epoch [4/100], Loss: 0.0006 ValidAcc: 88.6133 % , MaxAcc: 0.8931
Epoch [5/100], Loss: 0.0003 ValidAcc: 88.8533 % , MaxAcc: 0.8931
Epoch [6/100], Loss: 0.0002 ValidAcc: 88.9067 % , MaxAcc: 0.8931
Epoch [7/100], Loss: 0.0000 ValidAcc: 88.9600 % , MaxAcc: 0.8931
Epoch [8/100], Loss: 0.0003 ValidAcc: 89.0933 % , MaxAcc: 0.8931
TestAcc: 88.6160 % , ValidAcc: 0.8931
----------Step Backward


HBox(children=(IntProgress(value=0, description='Epoch', style=ProgressStyle(description_width='initial')), HT…

Epoch [1/100], Loss: 0.0145 ValidAcc: 87.4667 % , MaxAcc: 0.8747
Epoch [2/100], Loss: 0.0097 ValidAcc: 87.5200 % , MaxAcc: 0.8752
Epoch [3/100], Loss: 0.2647 ValidAcc: 87.9200 % , MaxAcc: 0.8792
Epoch [4/100], Loss: 0.0047 ValidAcc: 88.5333 % , MaxAcc: 0.8853
Epoch [5/100], Loss: 0.0589 ValidAcc: 89.2267 % , MaxAcc: 0.8923
Epoch [6/100], Loss: 0.0294 ValidAcc: 89.3333 % , MaxAcc: 0.8933
Epoch [7/100], Loss: 0.0132 ValidAcc: 89.3333 % , MaxAcc: 0.8933
Epoch [8/100], Loss: 0.0000 ValidAcc: 89.3333 % , MaxAcc: 0.8933
Epoch [9/100], Loss: 0.0000 ValidAcc: 89.2800 % , MaxAcc: 0.8933
Epoch [10/100], Loss: 0.0025 ValidAcc: 89.2533 % , MaxAcc: 0.8933
Epoch [11/100], Loss: 0.0001 ValidAcc: 89.2267 % , MaxAcc: 0.8933
Epoch [12/100], Loss: 0.0002 ValidAcc: 89.3333 % , MaxAcc: 0.8933
Epoch [13/100], Loss: 0.0011 ValidAcc: 89.2533 % , MaxAcc: 0.8933
Epoch [14/100], Loss: 0.0007 ValidAcc: 89.0667 % , MaxAcc: 0.8933
TestAcc: 88.7280 % , ValidAcc: 0.8933
----------Step Backward


HBox(children=(IntProgress(value=0, description='Epoch', style=ProgressStyle(description_width='initial')), HT…

Epoch [1/100], Loss: 0.1311 ValidAcc: 87.8133 % , MaxAcc: 0.8781
Epoch [2/100], Loss: 0.0029 ValidAcc: 88.0800 % , MaxAcc: 0.8808
Epoch [3/100], Loss: 0.0450 ValidAcc: 88.5600 % , MaxAcc: 0.8856
Epoch [4/100], Loss: 0.0001 ValidAcc: 89.7067 % , MaxAcc: 0.8971
Epoch [5/100], Loss: 0.0119 ValidAcc: 89.5733 % , MaxAcc: 0.8971
Epoch [6/100], Loss: 0.0044 ValidAcc: 89.6000 % , MaxAcc: 0.8971
Epoch [7/100], Loss: 0.0068 ValidAcc: 89.8667 % , MaxAcc: 0.8987
Epoch [8/100], Loss: 0.0000 ValidAcc: 89.7600 % , MaxAcc: 0.8987
Epoch [9/100], Loss: 0.0027 ValidAcc: 89.5733 % , MaxAcc: 0.8987
Epoch [10/100], Loss: 0.0001 ValidAcc: 89.7067 % , MaxAcc: 0.8987
TestAcc: 88.9120 % , ValidAcc: 0.8987
++++++++++Step Forward 88.91


HBox(children=(IntProgress(value=0, description='Epoch', style=ProgressStyle(description_width='initial')), HT…

Epoch [1/100], Loss: 0.4226 ValidAcc: 86.9867 % , MaxAcc: 0.8699
Epoch [2/100], Loss: 0.0021 ValidAcc: 88.9600 % , MaxAcc: 0.8896
Epoch [3/100], Loss: 0.0598 ValidAcc: 89.1733 % , MaxAcc: 0.8917
Epoch [4/100], Loss: 0.0000 ValidAcc: 89.5467 % , MaxAcc: 0.8955
Epoch [5/100], Loss: 0.0001 ValidAcc: 89.6000 % , MaxAcc: 0.8960
Epoch [6/100], Loss: 0.0010 ValidAcc: 89.5200 % , MaxAcc: 0.8960
Epoch [7/100], Loss: 0.0005 ValidAcc: 89.4667 % , MaxAcc: 0.8960
Epoch [8/100], Loss: 0.0007 ValidAcc: 89.3600 % , MaxAcc: 0.8960
Epoch [9/100], Loss: 0.0016 ValidAcc: 89.4133 % , MaxAcc: 0.8960
Epoch [10/100], Loss: 0.0001 ValidAcc: 89.3600 % , MaxAcc: 0.8960
TestAcc: 87.6280 % , ValidAcc: 0.8960
----------Step Backward
1 :::::::::: 86.76 -> 88.91


HBox(children=(IntProgress(value=0, description='Epoch', style=ProgressStyle(description_width='initial')), HT…

Epoch [1/100], Loss: 0.4645 ValidAcc: 75.7867 % , MaxAcc: 0.7579
Epoch [2/100], Loss: 0.3271 ValidAcc: 81.7067 % , MaxAcc: 0.8171
Epoch [3/100], Loss: 0.0268 ValidAcc: 81.0667 % , MaxAcc: 0.8171
Epoch [4/100], Loss: 0.0349 ValidAcc: 84.3200 % , MaxAcc: 0.8432
Epoch [5/100], Loss: 0.0419 ValidAcc: 85.0400 % , MaxAcc: 0.8504
Epoch [6/100], Loss: 0.0055 ValidAcc: 85.1733 % , MaxAcc: 0.8517
Epoch [7/100], Loss: 0.1956 ValidAcc: 84.1600 % , MaxAcc: 0.8517
Epoch [8/100], Loss: 0.0128 ValidAcc: 85.6800 % , MaxAcc: 0.8568
Epoch [9/100], Loss: 0.0017 ValidAcc: 86.2133 % , MaxAcc: 0.8621
Epoch [10/100], Loss: 0.0014 ValidAcc: 86.0800 % , MaxAcc: 0.8621
Epoch [11/100], Loss: 0.0006 ValidAcc: 86.2400 % , MaxAcc: 0.8624
Epoch [12/100], Loss: 0.0100 ValidAcc: 86.1600 % , MaxAcc: 0.8624
Epoch [13/100], Loss: 0.0050 ValidAcc: 86.2667 % , MaxAcc: 0.8627
Epoch [14/100], Loss: 0.0015 ValidAcc: 86.2133 % , MaxAcc: 0.8627
TestAcc: 85.2920 % , ValidAcc: 0.8627
++++++++++Step Forward 85.29


HBox(children=(IntProgress(value=0, description='Epoch', style=ProgressStyle(description_width='initial')), HT…

Epoch [1/100], Loss: 0.0862 ValidAcc: 87.2267 % , MaxAcc: 0.8723
Epoch [2/100], Loss: 0.2367 ValidAcc: 84.8800 % , MaxAcc: 0.8723
Epoch [3/100], Loss: 0.0381 ValidAcc: 88.6667 % , MaxAcc: 0.8867
Epoch [4/100], Loss: 0.0113 ValidAcc: 88.8267 % , MaxAcc: 0.8883
Epoch [5/100], Loss: 0.0001 ValidAcc: 89.6000 % , MaxAcc: 0.8960
Epoch [6/100], Loss: 0.0274 ValidAcc: 89.6000 % , MaxAcc: 0.8960
Epoch [7/100], Loss: 0.0005 ValidAcc: 89.5733 % , MaxAcc: 0.8960
Epoch [8/100], Loss: 0.0061 ValidAcc: 89.5733 % , MaxAcc: 0.8960
Epoch [9/100], Loss: 0.0002 ValidAcc: 89.6533 % , MaxAcc: 0.8965
Epoch [10/100], Loss: 0.0001 ValidAcc: 89.6800 % , MaxAcc: 0.8968
Epoch [11/100], Loss: 0.0000 ValidAcc: 89.5733 % , MaxAcc: 0.8968
Epoch [12/100], Loss: 0.0000 ValidAcc: 89.5467 % , MaxAcc: 0.8968
TestAcc: 88.7840 % , ValidAcc: 0.8968
++++++++++Step Forward 88.78


HBox(children=(IntProgress(value=0, description='Epoch', style=ProgressStyle(description_width='initial')), HT…

Epoch [1/100], Loss: 0.2077 ValidAcc: 86.3467 % , MaxAcc: 0.8635
Epoch [2/100], Loss: 0.0349 ValidAcc: 88.2400 % , MaxAcc: 0.8824
Epoch [3/100], Loss: 0.0020 ValidAcc: 88.6400 % , MaxAcc: 0.8864
Epoch [4/100], Loss: 0.0035 ValidAcc: 88.9333 % , MaxAcc: 0.8893
Epoch [5/100], Loss: 0.0129 ValidAcc: 89.0400 % , MaxAcc: 0.8904
Epoch [6/100], Loss: 0.0015 ValidAcc: 88.8000 % , MaxAcc: 0.8904
Epoch [7/100], Loss: 0.0008 ValidAcc: 88.9067 % , MaxAcc: 0.8904
Epoch [8/100], Loss: 0.0001 ValidAcc: 88.8000 % , MaxAcc: 0.8904
Epoch [9/100], Loss: 0.0000 ValidAcc: 89.0400 % , MaxAcc: 0.8904
Epoch [10/100], Loss: 0.0000 ValidAcc: 88.8533 % , MaxAcc: 0.8904
Epoch [11/100], Loss: 0.0002 ValidAcc: 89.0667 % , MaxAcc: 0.8907
Epoch [12/100], Loss: 0.0000 ValidAcc: 88.9600 % , MaxAcc: 0.8907
TestAcc: 88.3440 % , ValidAcc: 0.8907
----------Step Backward


HBox(children=(IntProgress(value=0, description='Epoch', style=ProgressStyle(description_width='initial')), HT…

Epoch [1/100], Loss: 1.6582 ValidAcc: 87.7067 % , MaxAcc: 0.8771
Epoch [2/100], Loss: 0.0185 ValidAcc: 89.2533 % , MaxAcc: 0.8925
Epoch [3/100], Loss: 0.0030 ValidAcc: 89.8400 % , MaxAcc: 0.8984
Epoch [4/100], Loss: 0.0783 ValidAcc: 88.8267 % , MaxAcc: 0.8984
Epoch [5/100], Loss: 0.0003 ValidAcc: 88.9067 % , MaxAcc: 0.8984
Epoch [6/100], Loss: 0.0001 ValidAcc: 89.7067 % , MaxAcc: 0.8984
Epoch [7/100], Loss: 0.0018 ValidAcc: 89.7867 % , MaxAcc: 0.8984
Epoch [8/100], Loss: 0.0007 ValidAcc: 89.7867 % , MaxAcc: 0.8984
TestAcc: 88.4680 % , ValidAcc: 0.8984
++++++++++Step Forward 88.47


HBox(children=(IntProgress(value=0, description='Epoch', style=ProgressStyle(description_width='initial')), HT…

Epoch [1/100], Loss: 0.7552 ValidAcc: 87.3067 % , MaxAcc: 0.8731
Epoch [2/100], Loss: 0.2185 ValidAcc: 89.1467 % , MaxAcc: 0.8915
Epoch [3/100], Loss: 0.0006 ValidAcc: 89.2533 % , MaxAcc: 0.8925
Epoch [4/100], Loss: 0.0083 ValidAcc: 89.4133 % , MaxAcc: 0.8941
Epoch [5/100], Loss: 0.0010 ValidAcc: 89.4667 % , MaxAcc: 0.8947
Epoch [6/100], Loss: 0.0069 ValidAcc: 89.6000 % , MaxAcc: 0.8960
Epoch [7/100], Loss: 0.0071 ValidAcc: 89.6267 % , MaxAcc: 0.8963
Epoch [8/100], Loss: 0.0141 ValidAcc: 89.8667 % , MaxAcc: 0.8987
Epoch [9/100], Loss: 0.0006 ValidAcc: 89.5733 % , MaxAcc: 0.8987
Epoch [10/100], Loss: 0.0000 ValidAcc: 89.5467 % , MaxAcc: 0.8987
Epoch [11/100], Loss: 0.0027 ValidAcc: 89.5467 % , MaxAcc: 0.8987
Epoch [12/100], Loss: 0.0012 ValidAcc: 89.6000 % , MaxAcc: 0.8987
Epoch [13/100], Loss: 0.0017 ValidAcc: 89.7067 % , MaxAcc: 0.8987
TestAcc: 88.6040 % , ValidAcc: 0.8987
++++++++++Step Forward 88.6


HBox(children=(IntProgress(value=0, description='Epoch', style=ProgressStyle(description_width='initial')), HT…

Epoch [1/100], Loss: 0.3220 ValidAcc: 85.7333 % , MaxAcc: 0.8573
Epoch [2/100], Loss: 0.0044 ValidAcc: 87.9733 % , MaxAcc: 0.8797
Epoch [3/100], Loss: 0.0450 ValidAcc: 88.2933 % , MaxAcc: 0.8829
Epoch [4/100], Loss: 0.0000 ValidAcc: 87.8933 % , MaxAcc: 0.8829
Epoch [5/100], Loss: 0.0000 ValidAcc: 87.8933 % , MaxAcc: 0.8829
Epoch [6/100], Loss: 0.0005 ValidAcc: 87.8133 % , MaxAcc: 0.8829
Epoch [7/100], Loss: 0.0019 ValidAcc: 87.7867 % , MaxAcc: 0.8829
Epoch [8/100], Loss: 0.0000 ValidAcc: 87.9200 % , MaxAcc: 0.8829
TestAcc: 87.1080 % , ValidAcc: 0.8829
----------Step Backward
2 :::::::::: 85.29 -> 88.6


HBox(children=(IntProgress(value=0, description='Epoch', style=ProgressStyle(description_width='initial')), HT…