In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import clear_output

import torch
import torch.nn as nn
import torch.nn.functional as F


In [2]:
from torchvision.datasets import MNIST
from torchvision import transforms

In [3]:
from torchvision.datasets.cifar import CIFAR10
dataset_train = MNIST('./', train = True, download = True,
                transform = transforms.Compose([transforms.ToTensor(), 
                transforms.Normalize([0.5],[0.5])]))

dataset_test = MNIST('./', train = False, download = True,
                transform = transforms.Compose([transforms.ToTensor(), 
                transforms.Normalize([0.5],[0.5])]))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw



In [5]:
dataset_train[0][0].shape

torch.Size([1, 28, 28])

In [None]:
from torch.utils.data import DataLoader

In [None]:
train_dataloader = DataLoader(dataset_train, batch_size = 28, shuffle = True)
test_dataloader = DataLoader(dataset_test, batch_size = 28, shuffle = True)

In [None]:
from torch.nn.modules.conv import Conv2d
from torch.nn.modules import flatten
class ConvBlock(nn.Module):
  def __init__(self, **kwargs):
    super(ConvBlock, self).__init__()
    self.conv = nn.Conv2d(**kwargs)
    self.bn = nn.BatchNorm2d(kwargs.get('out_channels'))
    self.relu = nn.ReLU()

  def forward(self, x):
    return self.relu(self.bn(self.conv(x)))

class MyModel(nn.Module):
  def __init__(self, num_classes = 2):
    super(MyModel, self).__init__()
    self.num_classes = num_classes
    self.conv1 = ConvBlock(in_channels = 1, out_channels = 16, kernel_size =(3, 3), padding=1)
    self.conv2 = ConvBlock(in_channels = 16, out_channels = 16, kernel_size =(3, 3), padding=0)
    self.pool1 = nn.MaxPool2d((2, 2))
    self.conv3 = ConvBlock(in_channels = 16, out_channels = 32, kernel_size =(3, 3), padding=1)
    self.conv4 = ConvBlock(in_channels = 32, out_channels = 64, kernel_size =(3, 3), padding=0)
    self.pool2 = nn.MaxPool2d((2, 2))
    self.conv5 = ConvBlock(in_channels = 64, out_channels = 128, kernel_size =(3, 3), padding=1)
    self.conv6 = ConvBlock(in_channels = 128, out_channels = 256, kernel_size =(3, 3), padding=0)
    self.flatten = nn.Flatten()
    self.body = nn.Sequential(
        # self.num_clusses = num_classes
        # self.conv1 = ConvBlock(in_channels = 1, out_channels = 16, kernal_size =(3, 3))
        # self.conv2 = ConvBlock(in_channels = 16, out_channels = 16, kernal_size =(3, 3))
        # self.pool1 = nn.MaxPool2d((2, 2))
        # self.conv3 = ConvBlock(in_channels = 16, out_channels = 32, kernal_size =(3, 3))
        # self.conv4 = ConvBlock(in_channels = 32, out_channels = 64, kernal_size =(3, 3))
        # self.pool2 = nn.MaxPool2d((2, 2))
        # self.conv5 = ConvBlock(in_channels = 64, out_channels = 128, kernal_size =(3, 3))
        # self.conv6 = ConvBlock(in_channels = 128, out_channels = 256, kernal_size =(3, 3))
        # self.flatten = nn.Flatten()
        
        # ConvBlock(in_channels = 1, out_channels = 16, kernal_size =(3, 3)),
        # ConvBlock(in_channels = 16, out_channels = 16, kernal_size =(3, 3)),
        # nn.MaxPool2d((2, 2)),
        # ConvBlock(in_channels = 16, out_channels = 32, kernal_size =(3, 3)),
        # ConvBlock(in_channels = 32, out_channels = 64, kernal_size =(3, 3)),
        # nn.MaxPool2d((2, 2)),
        # ConvBlock(in_channels = 64, out_channels = 128, kernal_size =(3, 3)),
        # ConvBlock(in_channels = 128, out_channels = 256, kernal_size =(3, 3)),
        # nn.Flatten()
        self.conv1,
        self.conv2,
        self.pool1,
        self.conv3,
        self.conv4,
        self.pool2,
        self.conv5,
        self.conv6,
        self.flatten
    )

    self.fc = nn.Linear(2304, self.num_classes)

  
  def forward(self, x: torch.Tensor):
    return self.fc(self.body(x))


model = MyModel(10)

In [None]:
model(torch.rand(48, 1, 28, 28)).shape

torch.Size([48, 10])

In [None]:
from IPython.core.display import ProgressBar
from tqdm.notebook import tqdm
import numpy as np
def train_one_epoch(model, train_dataloader, criterion, optimizer, device = "cuda:0"):
  model.train().to(device)
  progress_bar = tqdm(train_dataloader)
  for images, labels in progress_bar:
    images, labels = images.to(device), labels.to(device)
    optimizer.zero_grad()
    outputs = model(images) #==model.forward(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    progress_bar.set_description(f"Loss = {loss.item():.4f}")

def evaluate(model, val_dataloader, criterion, device = "cuda:0"):
  model.eval().to(device)
  progress_bar = tqdm(val_dataloader)
  acc = []
  losses = []
  # model.eval().to(device)
  with torch.no_grad():
    for images, labels in progress_bar:
      images, labels = images.to(device), labels.to(device)
      outputs = model(images)
      predicted_classes = outputs.argmax(1)
      loss = criterion(outputs, labels)
      losses.append(loss.item())
      acc.append((labels == predicted_classes).float().mean().item())
      progress_bar.set_description(f"Loss = {loss.item():.4f}")
  print(f'val_acc = {np.mean(acc)}, val_loss = {np.mean(losses)}')

def plot_history(train_history, val_history, title='loss'):
    plt.figure(figsize=(15, 4))
    plt.title('{}'.format(title))
    
    points = np.array(train_history)
    plt.plot(points[:,0], points[:,1], label='train', zorder=1)
    
    points = np.array(val_history)
    
    plt.scatter(points[:, 0], points[:, 1], marker='+', s=180, c='orange', label='val', zorder=2)
    plt.xlabel('train steps')
    
    plt.legend(loc='best')
    plt.grid()

    plt.show()
    

def train(model, train_dataloader, val_dataloader, criterion, optimizer, n_epochs = 10, device = "cuda:0"):
  for epoch in range(n_epochs):
    evaluate(model, val_dataloader, criterion)
    train_one_epoch(model, train_dataloader, criterion, optimizer)
    

In [None]:
from torch.optim.optimizer import Optimizer
train_dataloader = DataLoader(dataset_train, batch_size = 28, shuffle = False)
test_dataloader = DataLoader(dataset_test, batch_size = 28, shuffle = False)
model = MyModel(10)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), 1e-3)
# device = torch.device("cuda:0")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



In [None]:
train(model, train_dataloader, test_dataloader, criterion, optimizer, 10, device)

  0%|          | 0/358 [00:00<?, ?it/s]

val_acc = 0.1132282580993029, val_loss = 2.3020207422405647


  0%|          | 0/2143 [00:00<?, ?it/s]

  0%|          | 0/358 [00:00<?, ?it/s]

val_acc = 0.9723663338402796, val_loss = 0.10765727934708517


  0%|          | 0/2143 [00:00<?, ?it/s]

  0%|          | 0/358 [00:00<?, ?it/s]

val_acc = 0.9803471760043885, val_loss = 0.07298804931121478


  0%|          | 0/2143 [00:00<?, ?it/s]

  0%|          | 0/358 [00:00<?, ?it/s]

val_acc = 0.9834397523429806, val_loss = 0.05896057270651985


  0%|          | 0/2143 [00:00<?, ?it/s]

  0%|          | 0/358 [00:00<?, ?it/s]

val_acc = 0.986033526046316, val_loss = 0.05099427160938978


  0%|          | 0/2143 [00:00<?, ?it/s]

  0%|          | 0/358 [00:00<?, ?it/s]

val_acc = 0.9874301734250351, val_loss = 0.045825015406537925


  0%|          | 0/2143 [00:00<?, ?it/s]

  0%|          | 0/358 [00:00<?, ?it/s]

val_acc = 0.9885275392226001, val_loss = 0.04211069995474965


  0%|          | 0/2143 [00:00<?, ?it/s]

  0%|          | 0/358 [00:00<?, ?it/s]

val_acc = 0.9891261023849083, val_loss = 0.03921357684511192


  0%|          | 0/2143 [00:00<?, ?it/s]

  0%|          | 0/358 [00:00<?, ?it/s]

val_acc = 0.9899241866013191, val_loss = 0.036971832854699606


  0%|          | 0/2143 [00:00<?, ?it/s]

  0%|          | 0/358 [00:00<?, ?it/s]

val_acc = 0.9905227497636273, val_loss = 0.03517357510159783


  0%|          | 0/2143 [00:00<?, ?it/s]

In [None]:
# weights = model[0].weight.cpu().detach()
# plt.figure(figsize=(10, 10))
# for i in range(10):
#     plt.subplot(5, 5, i+1)
#     plt.title("Label: %i" % i)
#     plt.imshow(weights[i].reshape([28, 28]), cmap='gray')

TypeError: ignored