<a href="https://colab.research.google.com/github/Marchinski/AI_Models/blob/main/VGG_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Installs

In [11]:
!pip install tensorflow==2.0.0-alpha0



### Tensorboard initialization and loading

In [15]:
%reload_ext tensorboard.notebook
%tensorboard --logdir=logs

Reusing TensorBoard on port 6007 (pid 972), started 0:00:10 ago. (Use '!kill 972' to kill it.)

### Imports

In [None]:
import os
import pickle
import time, datetime

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torchvision
from tqdm import tqdm
import tensorflow as tf

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Methods

In [None]:
def show_train_hist(hist, show=False, save=False, path='Train_hist.png'):
    """Loss tracker
    
    Plot the losses of generator and discriminator independently to see the trend
    
    Arguments:
        hist {[dict]} -- Tracking variables
    
    Keyword Arguments:
        show {bool} -- If to display the figure (default: {False})
        save {bool} -- If to store the figure (default: {False})
        path {str} -- path to store the figure (default: {'Train_hist.png'})
    """
    x = range(len(hist['losses']))

    y1 = hist['losses']

    plt.plot(x, y1, label='loss')

    plt.xlabel('Epoch')
    plt.ylabel('Loss')

    plt.legend(loc=4)
    plt.grid(True)
    plt.tight_layout()

    if save:
        plt.savefig(path)

    if show:
        plt.show()
    else:
        plt.close()

### Data Loading and processing

In [None]:
# initialise the device for training, if gpu is available, device = 'cuda', else: device = 'cpu'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

data_dir = './MNIST_data/'
save_dir = './Results/'
# create folder if not exist
if not os.path.exists(save_dir):
    os.mkdir(save_dir)

# training parameters
batch_size = 256
learning_rate = 0.01
epochs = 100

# construct the datasets and data loaders
transform = transforms.Compose([transforms.Resize(64),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=(0.5,), std=(0.5,))])
tv_data = datasets.MNIST(root=data_dir, train=True, transform=transform, download=True)
train_data, validation_data = torch.utils.data.random_split(tv_data, [50000, 10000])

train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(dataset=validation_data, batch_size=batch_size, shuffle=True)

test_data = datasets.MNIST(root=data_dir, train=False, transform=transform, download=True)
test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)

cuda


### Model loading and training

In [None]:
VGG_net = torch.hub.load('pytorch/vision:v0.9.0', 'vgg19', pretrained=False)
VGG_net = VGG_net.to(device)

criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(VGG_net.parameters(), lr=learning_rate, momentum=0.9)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2)

# tracking variables
train_hist = {}
train_hist['losses'] = []
train_hist['per_epoch_ptimes'] = []
train_hist['total_ptime'] = []

start_time = time.time()
now = datetime.datetime.now()
print('Training starting at: %02d:%02d:%02d'%(now.hour, now.minute, now.second))
for epoch in range(epochs):
    VGG_net.train()
    Train_Loss = []
    Val_Loss = []
    epoch_start_time = time.time()
    now = datetime.datetime.now()
    print('Epoch %d starting at: %02d:%02d:%02d'%(epoch+1, now.hour, now.minute, now.second))
    # Training
    for (train_images, train_labels) in tqdm(train_loader):
        train_images = train_images.repeat(1, 3, 1, 1)
        train_images = train_images.to(device)
        train_labels = train_labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        train_outputs = VGG_net(train_images)
        train_loss = criterion(train_outputs, train_labels)
        train_loss.backward()
        optimizer.step()

        Train_Loss.append(train_loss.item())
    
    # Validation
    for (val_images, val_labels) in tqdm(validation_loader):
        val_images = val_images.repeat(1, 3, 1, 1)
        val_images = val_images.to(device)
        val_labels = val_labels.to(device)

        # calculate outputs by running images through the network
        val_outputs = VGG_net(val_images)
        val_loss = criterion(val_outputs, val_labels)
        
        Val_Loss.append(val_loss.item())

    # Learning Rate Adaptation step
    # changes LR on flat plateau
    scheduler.step(np.mean(Val_Loss))

    epoch_loss = np.mean(Train_Loss)  # mean generator loss for the epoch
    epoch_end_time = time.time()
    per_epoch_ptime = epoch_end_time - epoch_start_time

    print("\nEpoch %d of %d with %.2f s" % (epoch + 1, epochs, per_epoch_ptime))
    print("Loss: %.8f" % (epoch_loss))

    # record the loss for every epoch
    train_hist['losses'].append(epoch_loss)
    train_hist['per_epoch_ptimes'].append(per_epoch_ptime)

print('Finished Training')

end_time = time.time()
total_ptime = end_time - start_time
train_hist['total_ptime'].append(total_ptime)

print('Avg per epoch ptime: %.2f, total %d epochs ptime: %.2f' % (np.mean(train_hist['per_epoch_ptimes']), epochs, total_ptime))

show_train_hist(train_hist, save=True, path=save_dir + '/MNIST_GAN_train_hist.png')

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.9.0
  0%|          | 0/196 [00:00<?, ?it/s]

Training starting at: 21:21:31
Epoch 1 starting at: 21:21:31


100%|██████████| 196/196 [01:03<00:00,  3.08it/s]
100%|██████████| 40/40 [00:05<00:00,  7.17it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 1 of 100 with 69.20 s
Loss: 2.67217962
Epoch 2 starting at: 21:22:41


100%|██████████| 196/196 [01:03<00:00,  3.08it/s]
100%|██████████| 40/40 [00:05<00:00,  7.18it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 2 of 100 with 69.23 s
Loss: 0.19122635
Epoch 3 starting at: 21:23:50


100%|██████████| 196/196 [01:03<00:00,  3.08it/s]
100%|██████████| 40/40 [00:05<00:00,  7.17it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 3 of 100 with 69.18 s
Loss: 0.07304710
Epoch 4 starting at: 21:24:59


100%|██████████| 196/196 [01:03<00:00,  3.08it/s]
100%|██████████| 40/40 [00:05<00:00,  7.17it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 4 of 100 with 69.27 s
Loss: 0.04492310
Epoch 5 starting at: 21:26:08


100%|██████████| 196/196 [01:03<00:00,  3.08it/s]
100%|██████████| 40/40 [00:05<00:00,  7.14it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 5 of 100 with 69.26 s
Loss: 0.03199426
Epoch 6 starting at: 21:27:18


100%|██████████| 196/196 [01:03<00:00,  3.08it/s]
100%|██████████| 40/40 [00:05<00:00,  7.09it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 6 of 100 with 69.32 s
Loss: 0.02578453
Epoch 7 starting at: 21:28:27


100%|██████████| 196/196 [01:03<00:00,  3.07it/s]
100%|██████████| 40/40 [00:05<00:00,  7.04it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 7 of 100 with 69.59 s
Loss: 0.02021953
Epoch 8 starting at: 21:29:36


100%|██████████| 196/196 [01:03<00:00,  3.08it/s]
100%|██████████| 40/40 [00:05<00:00,  7.13it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 8 of 100 with 69.33 s
Loss: 0.01635975
Epoch 9 starting at: 21:30:46


100%|██████████| 196/196 [01:03<00:00,  3.08it/s]
100%|██████████| 40/40 [00:05<00:00,  7.18it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 9 of 100 with 69.25 s
Loss: 0.01241273
Epoch 10 starting at: 21:31:55


100%|██████████| 196/196 [01:03<00:00,  3.07it/s]
100%|██████████| 40/40 [00:05<00:00,  7.15it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 10 of 100 with 69.37 s
Loss: 0.01036521
Epoch 11 starting at: 21:33:04


100%|██████████| 196/196 [01:03<00:00,  3.07it/s]
100%|██████████| 40/40 [00:05<00:00,  7.02it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 11 of 100 with 69.48 s
Loss: 0.01202463
Epoch 12 starting at: 21:34:14


100%|██████████| 196/196 [01:03<00:00,  3.08it/s]
100%|██████████| 40/40 [00:05<00:00,  7.16it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 12 of 100 with 69.26 s
Loss: 0.00547380
Epoch 13 starting at: 21:35:23


100%|██████████| 196/196 [01:03<00:00,  3.08it/s]
100%|██████████| 40/40 [00:05<00:00,  7.12it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 13 of 100 with 69.25 s
Loss: 0.00297396
Epoch 14 starting at: 21:36:32


100%|██████████| 196/196 [01:03<00:00,  3.08it/s]
100%|██████████| 40/40 [00:05<00:00,  7.18it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 14 of 100 with 69.14 s
Loss: 0.00120017
Epoch 15 starting at: 21:37:42


100%|██████████| 196/196 [01:03<00:00,  3.08it/s]
100%|██████████| 40/40 [00:05<00:00,  7.16it/s]
  0%|          | 0/196 [00:00<?, ?it/s]


Epoch 15 of 100 with 69.19 s
Loss: 0.00091285
Epoch 16 starting at: 21:38:51


 29%|██▊       | 56/196 [00:18<00:45,  3.08it/s]

KeyboardInterrupt: ignored

### Model testing