In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.autograd
# from omni import *
from mixup import * 

In [2]:
"""
Creates a MobileNetV3 Model as defined in:
Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam. (2019).
Searching for MobileNetV3
arXiv preprint arXiv:1905.02244.
"""

import torch.nn as nn
import math


__all__ = ['mobilenetv3_large', 'mobilenetv3_small']


def _make_divisible(v, divisor, min_value=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:
    :param divisor:
    :param min_value:
    :return:
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


class h_sigmoid(nn.Module):
    def __init__(self, inplace=True):
        super(h_sigmoid, self).__init__()
        self.relu = nn.ReLU6(inplace=inplace)

    def forward(self, x):
        return self.relu(x + 3) / 6


class h_swish(nn.Module):
    def __init__(self, inplace=True):
        super(h_swish, self).__init__()
        self.sigmoid = h_sigmoid(inplace=inplace)

    def forward(self, x):
        return x * self.sigmoid(x)


class SELayer(nn.Module):
    def __init__(self, channel, reduction=4):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
                nn.Linear(channel, _make_divisible(channel // reduction, 8)),
                nn.ReLU(inplace=True),
                nn.Linear(_make_divisible(channel // reduction, 8), channel),
                h_sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y


def conv_3x3_bn(inp, oup, stride):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
        nn.BatchNorm2d(oup),
        h_swish()
    )


def conv_1x1_bn(inp, oup):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
        nn.BatchNorm2d(oup),
        h_swish()
    )


class InvertedResidual(nn.Module):
    def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se, use_hs):
        super(InvertedResidual, self).__init__()
        assert stride in [1, 2]

        self.identity = stride == 1 and inp == oup

        if inp == hidden_dim:
            self.conv = nn.Sequential(
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                h_swish() if use_hs else nn.ReLU(inplace=True),
                # Squeeze-and-Excite
                SELayer(hidden_dim) if use_se else nn.Identity(),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )
        else:
            self.conv = nn.Sequential(
                # pw
                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
                nn.BatchNorm2d(hidden_dim),
                h_swish() if use_hs else nn.ReLU(inplace=True),
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                # Squeeze-and-Excite
                SELayer(hidden_dim) if use_se else nn.Identity(),
                h_swish() if use_hs else nn.ReLU(inplace=True),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )

    def forward(self, x):
        if self.identity:
            return x + self.conv(x)
        else:
            return self.conv(x)


class MobileNetV3(nn.Module):
    def __init__(self, cfgs, mode, num_classes=1000, width_mult=1.):
        super(MobileNetV3, self).__init__()
        # setting of inverted residual blocks
        self.cfgs = cfgs
        assert mode in ['large', 'small']

        # building first layer
        input_channel = _make_divisible(16 * width_mult, 8)
        layers = [conv_3x3_bn(3, input_channel, 2)]
        # building inverted residual blocks
        block = InvertedResidual
        for k, t, c, use_se, use_hs, s in self.cfgs:
            output_channel = _make_divisible(c * width_mult, 8)
            exp_size = _make_divisible(input_channel * t, 8)
            layers.append(block(input_channel, exp_size, output_channel, k, s, use_se, use_hs))
            input_channel = output_channel
        self.features = nn.Sequential(*layers)
        # building last several layers
        self.conv = conv_1x1_bn(input_channel, exp_size)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        output_channel = {'large': 1280, 'small': 1024}
        output_channel = _make_divisible(output_channel[mode] * width_mult, 8) if width_mult > 1.0 else output_channel[mode]
        self.classifier = nn.Sequential(
            nn.Linear(exp_size, output_channel),
            h_swish(),
            nn.Dropout(0.2),
            nn.Linear(output_channel, num_classes),
        )

        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.conv(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()


def mobilenetv3_large(**kwargs):
    """
    Constructs a MobileNetV3-Large model
    """
    cfgs = [
        # k, t, c, SE, HS, s
        [3,   1,  16, 0, 0, 1],
        [3,   4,  24, 0, 0, 2],
        [3,   3,  24, 0, 0, 1],
        [5,   3,  40, 1, 0, 2],
        [5,   3,  40, 1, 0, 1],
        [5,   3,  40, 1, 0, 1],
        [3,   6,  80, 0, 1, 2],
        [3, 2.5,  80, 0, 1, 1],
        [3, 2.3,  80, 0, 1, 1],
        [3, 2.3,  80, 0, 1, 1],
        [3,   6, 112, 1, 1, 1],
        [3,   6, 112, 1, 1, 1],
        [5,   6, 160, 1, 1, 2],
        [5,   6, 160, 1, 1, 1],
        [5,   6, 160, 1, 1, 1]
    ]
    return MobileNetV3(cfgs, mode='large', **kwargs)


def mobilenetv3_small(**kwargs):
    """
    Constructs a MobileNetV3-Small model
    """
    cfgs = [
        # k, t, c, SE, HS, s
        [3,    1,  16, 1, 0, 2],
        [3,  4.5,  24, 0, 0, 2],
        [3, 3.67,  24, 0, 0, 1],
        [5,    4,  40, 1, 1, 2],
        [5,    6,  40, 1, 1, 1],
        [5,    6,  40, 1, 1, 1],
        [5,    3,  48, 1, 1, 1],
        [5,    3,  48, 1, 1, 1],
        [5,    6,  96, 1, 1, 2],
        [5,    6,  96, 1, 1, 1],
        [5,    6,  96, 1, 1, 1],
    ]

    return MobileNetV3(cfgs, mode='small', **kwargs)

In [3]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Training

In [4]:
import torch
import torch.nn as nn
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import numpy as np

In [5]:
def load_data(data_dir, download = True):

  transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
  ])

  train_data = datasets.CIFAR100(
      root = data_dir, train = True,
      download = download, transform = transform
  )

  test_data = datasets.CIFAR100(
      root = data_dir, train = False,
      download = download, transform = transform
  )

  return (train_data, test_data)

train_data, test_data = load_data('./data/cifar100')

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar100/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:21<00:00, 7926044.11it/s] 


Extracting ./data/cifar100/cifar-100-python.tar.gz to ./data/cifar100
Files already downloaded and verified


In [6]:
batch_size = 128
num_workers = 4

train_loader = DataLoader(train_data, batch_size = batch_size,
                          shuffle = True, num_workers = num_workers)
test_loader = DataLoader(test_data, batch_size = batch_size,
                         shuffle = True, num_workers = num_workers)

In [7]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [8]:
import logging
import os
from tqdm.notebook import tqdm

def check_logging_directory(path):
  parent_directory = os.path.dirname(path)
  if not os.path.exists(parent_directory):
    os.makedirs(parent_directory)
    print("Create new directory")

logging_path = './logging/mixup_mobilenetv3_normallarge_cifar100.log'
check_logging_directory(logging_path)

logging.basicConfig(filename=logging_path, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


In [9]:
from gradient_descent_the_ultimate_optimizer import gdtuo

criterion = nn.CrossEntropyLoss().to(device)

num_epochs = 100

mobile_v3 = mobilenetv3_large(num_classes = 100).to(device)
optim = gdtuo.Adam(optimizer=gdtuo.SGD(1e-5))
mw = gdtuo.ModuleWrapper(mobile_v3, optimizer=optim)
mw.initialize()

print(f"The number of parameters: {count_parameters(mobile_v3)}")


The number of parameters: 4330132


In [10]:
from thop import profile

input_size = (1, 3, 224, 224)

flops, params = profile(mobile_v3, inputs=(torch.randn(*input_size).to(device),))
print(f"FLOPs: {flops / 1e9} billion")
print(f"Parameters: {params / 1e6} million")

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU6'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
FLOPs: 0.233686456 billion
Parameters: 4.330132 million


In [None]:
# Huấn luyện mô hình
train_loss, val_loss = [], []
train_acc, val_acc = [], []

epoch_bar = tqdm(desc = 'Epoch',
                 total = num_epochs, position = 1)
train_bar = tqdm(desc = 'Training', total = len(train_loader),
                 position = 1, leave = True)
val_bar = tqdm(desc = 'Validation', total = len(test_loader),
               position = 1, leave = True)

print("🚀 Training MobileNetV3 - Omni Dimensional Dynamic Convolution 🚀")
logging.info("🚀 Training MobileNetV3 - Omni Dimensional Dynamic Convolution 🚀")

for epoch in range(num_epochs):

    epoch_bar.set_description(f'Epoch {epoch + 1}/{num_epochs}')

    running_loss = 0.0
    running_acc = 0.0
    total_loss = 0.0
    total_acc = 0.0

    total = 0
    for i, (X, y) in enumerate(train_loader):

        mw.begin()
        mw.zero_grad()
        X, y = X.to(device), y.to(device)
        X, y_origin, y_sampled, lam = mixup_data(X, y, device, alpha = 0.4)
        
        # Forward pass
        output = mw.forward(X)
        loss = mixup_criterion(criterion, output, y_origin, y_sampled, lam)
        
        # Backward pass
        loss.backward(create_graph = True)
        mw.step()
    
        loss_t = loss.item()
        running_loss += (loss_t - running_loss) / (i + 1)
        total_loss += loss_t
        
        # Calculating the accuracy
        _, predicted = torch.max(output.data, 1)
        n_correct = (lam * predicted.eq(y_origin.data).cpu().sum().float()
                    + (1 - lam) * predicted.eq(y_sampled.data).cpu().sum().float())

        acc_t = n_correct / len(predicted) * 100
        running_acc += (acc_t - running_acc) / (i + 1)

        total_acc += n_correct
        total += y.shape[0]


        train_bar.set_postfix(loss = running_loss,
                              acc = f"{running_acc:.2f}%",
                              epoch = epoch + 1)
        train_bar.update()

    # mw.begin()
    current_loss = total_loss / len(train_loader)
    current_acc = total_acc / total * 100
    train_loss.append(current_loss)
    train_acc.append(current_acc)

    print("========================================")
    print("\033[1;34m" + f"Epoch {epoch + 1}/{num_epochs}" + "\033[0m")
    print(f"Train Loss: {current_loss:.2f}\t|\tTrain Acc: {current_acc:.2f}%")

    logging.info("========================================")
    logging.info("\033[1;34m" + f"Epoch {epoch + 1}/{num_epochs}" + "\033[0m")
    logging.info(f"Train Loss: {current_loss:.2f}  -   Train Acc: {current_acc:.2f}%")


    # Eval trên valid set
    running_loss = 0.0
    running_acc = 0.0
    total_loss = 0.0
    total_acc = 0.0

    total = 0
    # mw.end()
    # mw.eval()
    with torch.no_grad():
        for i, (X, y) in enumerate(test_loader):

            X, y = X.to(device), y.to(device)
            # Forward pass
            output = mw.forward(X)

            # Calculate Loss
            loss = criterion(output, y)
            loss_t = loss.item()
            running_loss += (loss_t - running_loss) / (i + 1)
            total_loss += loss_t

            # Calculate Accuracies
            _, predicted = torch.max(output.data, 1)
            n_correct = (predicted == y).sum().item()
            acc_t = n_correct / len(predicted) * 100
            running_acc += (acc_t - running_acc) / (i + 1)
            total_acc += n_correct

            total += y.shape[0]

            val_bar.set_postfix(loss = running_loss,
                                acc = f"{running_acc:.2f}%",
                                epoch = epoch + 1)
            val_bar.update()

    current_loss = total_loss / len(test_loader)
    current_acc = total_acc / total * 100

    val_loss.append(current_loss)
    val_acc.append(current_acc)

    print(f"Val Loss: {current_loss:.2f}\t|\tVal Acc: {current_acc:.2f}%")
    logging.info(f"Val Loss: {current_loss:.2f}  -  Val Acc: {current_acc:.2f}%")

    train_bar.n = 0
    val_bar.n = 0
    epoch_bar.update()


print("========================================")
print("Training Completed! 😀")
logging.info("========================================")
logging.info("Training Completed! 😀")

Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

Training:   0%|          | 0/391 [00:00<?, ?it/s]

Validation:   0%|          | 0/79 [00:00<?, ?it/s]

🚀 Training MobileNetV3 - Omni Dimensional Dynamic Convolution 🚀


  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[1;34mEpoch 1/100[0m
Train Loss: 4.04	|	Train Acc: 8.65%
Val Loss: 3.44	|	Val Acc: 16.88%
[1;34mEpoch 2/100[0m
Train Loss: 3.63	|	Train Acc: 16.24%
Val Loss: 2.97	|	Val Acc: 25.72%
[1;34mEpoch 3/100[0m
Train Loss: 3.33	|	Train Acc: 22.08%
Val Loss: 2.62	|	Val Acc: 33.48%
[1;34mEpoch 4/100[0m
Train Loss: 3.15	|	Train Acc: 26.26%
Val Loss: 2.43	|	Val Acc: 38.43%
[1;34mEpoch 5/100[0m
Train Loss: 2.81	|	Train Acc: 33.96%
Val Loss: 2.20	|	Val Acc: 43.18%
[1;34mEpoch 6/100[0m
Train Loss: 2.64	|	Train Acc: 38.44%
Val Loss: 2.06	|	Val Acc: 46.14%
[1;34mEpoch 7/100[0m
Train Loss: 2.61	|	Train Acc: 39.66%
Val Loss: 1.99	|	Val Acc: 47.90%
[1;34mEpoch 8/100[0m
Train Loss: 2.43	|	Train Acc: 44.12%
Val Loss: 1.86	|	Val Acc: 49.94%
[1;34mEpoch 9/100[0m
Train Loss: 2.39	|	Train Acc: 46.01%
Val Loss: 1.75	|	Val Acc: 52.75%
[1;34mEpoch 10/100[0m
Train Loss: 2.24	|	Train Acc: 49.67%
Val Loss: 1.71	|	Val Acc: 54.29%
[1;34mEpoch 11/100[0m
Train Loss: 2.05	|	Train Acc: 54.29%
Val Loss:

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[1;34mEpoch 26/100[0m
Train Loss: 1.42	|	Train Acc: 74.31%
Val Loss: 1.59	|	Val Acc: 60.03%
[1;34mEpoch 27/100[0m
Train Loss: 1.45	|	Train Acc: 74.15%
Val Loss: 1.60	|	Val Acc: 60.00%
[1;34mEpoch 28/100[0m
Train Loss: 1.36	|	Train Acc: 75.95%
Val Loss: 1.58	|	Val Acc: 60.33%
[1;34mEpoch 29/100[0m
Train Loss: 1.46	|	Train Acc: 73.44%
Val Loss: 1.63	|	Val Acc: 59.31%
[1;34mEpoch 30/100[0m
Train Loss: 1.41	|	Train Acc: 74.65%
Val Loss: 1.73	|	Val Acc: 57.30%
[1;34mEpoch 31/100[0m
Train Loss: 1.41	|	Train Acc: 74.14%
Val Loss: 1.65	|	Val Acc: 59.23%
[1;34mEpoch 32/100[0m
Train Loss: 1.45	|	Train Acc: 74.19%
Val Loss: 1.61	|	Val Acc: 60.14%
[1;34mEpoch 33/100[0m
Train Loss: 1.28	|	Train Acc: 77.50%
Val Loss: 1.60	|	Val Acc: 60.34%
[1;34mEpoch 34/100[0m
Train Loss: 1.42	|	Train Acc: 74.77%
Val Loss: 1.62	|	Val Acc: 60.08%
[1;34mEpoch 37/100[0m
Train Loss: 1.28	|	Train Acc: 77.48%
Val Loss: 1.60	|	Val Acc: 60.10%
[1;34mEpoch 38/100[0m
Train Loss: 1.40	|	Train Acc: 75.33%

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[1;34mEpoch 46/100[0m
Train Loss: 1.31	|	Train Acc: 76.98%
Val Loss: 1.57	|	Val Acc: 61.33%
[1;34mEpoch 47/100[0m
Train Loss: 1.35	|	Train Acc: 76.26%
Val Loss: 1.67	|	Val Acc: 59.86%
[1;34mEpoch 48/100[0m
Train Loss: 1.27	|	Train Acc: 77.88%
Val Loss: 1.61	|	Val Acc: 60.54%
[1;34mEpoch 49/100[0m
Train Loss: 1.23	|	Train Acc: 78.95%
Val Loss: 1.54	|	Val Acc: 62.08%
[1;34mEpoch 50/100[0m
Train Loss: 1.21	|	Train Acc: 79.34%
Val Loss: 1.65	|	Val Acc: 60.26%
[1;34mEpoch 51/100[0m
Train Loss: 1.35	|	Train Acc: 76.32%
Val Loss: 1.65	|	Val Acc: 60.18%
[1;34mEpoch 52/100[0m
Train Loss: 1.24	|	Train Acc: 78.33%
Val Loss: 1.53	|	Val Acc: 62.30%
[1;34mEpoch 53/100[0m
Train Loss: 1.21	|	Train Acc: 79.56%
Val Loss: 1.58	|	Val Acc: 61.49%
[1;34mEpoch 54/100[0m
Train Loss: 1.34	|	Train Acc: 76.63%
Val Loss: 1.55	|	Val Acc: 61.83%
[1;34mEpoch 56/100[0m
Train Loss: 1.24	|	Train Acc: 78.10%
Val Loss: 1.54	|	Val Acc: 62.68%
[1;34mEpoch 57/100[0m
Train Loss: 1.25	|	Train Acc: 78.35%

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[1;34mEpoch 71/100[0m
Train Loss: 1.08	|	Train Acc: 81.58%
Val Loss: 1.53	|	Val Acc: 64.06%
[1;34mEpoch 72/100[0m
Train Loss: 1.21	|	Train Acc: 78.75%
Val Loss: 1.55	|	Val Acc: 62.46%
[1;34mEpoch 73/100[0m
Train Loss: 1.25	|	Train Acc: 78.18%
Val Loss: 1.57	|	Val Acc: 62.03%
[1;34mEpoch 74/100[0m
Train Loss: 1.17	|	Train Acc: 79.29%
Val Loss: 1.51	|	Val Acc: 62.91%
[1;34mEpoch 75/100[0m
Train Loss: 1.25	|	Train Acc: 78.46%
Val Loss: 1.56	|	Val Acc: 62.17%
[1;34mEpoch 76/100[0m
Train Loss: 1.24	|	Train Acc: 78.64%
Val Loss: 1.53	|	Val Acc: 63.68%
[1;34mEpoch 77/100[0m
Train Loss: 1.26	|	Train Acc: 77.67%
Val Loss: 1.53	|	Val Acc: 62.89%
[1;34mEpoch 78/100[0m
Train Loss: 1.20	|	Train Acc: 79.15%
[1;34mEpoch 80/100[0m
Train Loss: 1.14	|	Train Acc: 80.16%
Val Loss: 1.49	|	Val Acc: 63.83%
[1;34mEpoch 81/100[0m
Train Loss: 1.16	|	Train Acc: 80.03%
[1;34mEpoch 83/100[0m
Train Loss: 1.26	|	Train Acc: 77.24%
Val Loss: 1.48	|	Val Acc: 64.58%
[1;34mEpoch 84/100[0m
Train Lo

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[1;34mEpoch 94/100[0m
Train Loss: 1.13	|	Train Acc: 80.41%
Val Loss: 1.49	|	Val Acc: 64.53%
[1;34mEpoch 95/100[0m
Train Loss: 1.08	|	Train Acc: 81.40%
Val Loss: 1.47	|	Val Acc: 64.68%
[1;34mEpoch 96/100[0m
Train Loss: 1.11	|	Train Acc: 80.93%
Val Loss: 1.48	|	Val Acc: 65.00%
[1;34mEpoch 97/100[0m
Train Loss: 1.20	|	Train Acc: 78.54%
Val Loss: 1.49	|	Val Acc: 64.09%


In [None]:
!pip install matplotlib
import matplotlib.pyplot as plt

if not os.path.exists("/plot"):
    os.makedirs("/plot")
    print("Creating a new directory")

def plot_loss(train_loss, val_loss, loss_fig):
    plt.figure(figsize = (10, 6))
    plt.plot(range(num_epochs), train_loss)
    plt.plot(range(num_epochs), val_loss)

    plt.xlabel("Num Epochs")
    plt.ylabel("Loss Value")
    
    plt.savefig(loss_fig)
    plt.show()

def plot_accuracy(train_acc, val_acc, acc_fig):
    plt.figure(figsize = (10, 6))
    plt.plot(range(num_epochs), train_acc)
    plt.plot(range(num_epochs), val_acc)

    plt.xlabel("Num Epochs")
    plt.ylabel("Accuracy (%)")
    
    plt.savefig(acc_fig)
    plt.show()

loss_path = "./plot/loss_fig_normal_cifar100(large).png"
acc_path = "./plot/acc_fig_normal_cifar100(large).png"

plot_loss(train_loss, val_loss, loss_path)
plot_accuracy(train_acc, val_acc, acc_path)
