In [0]:
import torch
import random
import numpy as np
import matplotlib.pyplot as plt


random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic = False

In [0]:
import torchvision.datasets

In [0]:
MNIST_train = torchvision.datasets.MNIST('./', download=True, train=True)
MNIST_test = torchvision.datasets.MNIST('./', download=True, train=False)

In [0]:
X_train = MNIST_train.train_data
y_train = MNIST_train.train_labels
X_test = MNIST_test.test_data
y_test = MNIST_test.test_labels

In [0]:
len(y_train), len(y_test)

In [0]:
import matplotlib.pyplot as plt
plt.imshow(X_train[0, :, :])
plt.show()
print(y_train[0])

In [0]:
X_train = X_train.unsqueeze(1).float()
X_test = X_test.unsqueeze(1).float()

In [0]:
X_train.shape

In [0]:
class LeNet5(torch.nn.Module):
    def __init__(self,
                 activation='tanh',
                 pooling='avg',
                 conv_size=5,
                 use_batch_norm=False):
        super(LeNet5, self).__init__()
        
        self.conv_size = conv_size
        self.use_batch_norm = use_batch_norm
        
        if activation == 'tanh':
            activation_function = torch.nn.Tanh()
        elif activation == 'sigmoid':
            activation_function  = torch.nn.Sigmoid()
        elif activation == 'relu':
            activation_function  = torch.nn.ReLU()
        else:
            raise NotImplementedError
            
        if pooling == 'avg':
            pooling_layer = torch.nn.AvgPool2d(kernel_size=2, stride=2)
        elif pooling == 'max':
            pooling_layer  = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        else:
            raise NotImplementedError
        
        if conv_size == 5:
            self.conv1 = torch.nn.Conv2d(
                in_channels=1, out_channels=6, kernel_size=5, padding=2)
        elif conv_size == 3:
            self.conv1_1 = torch.nn.Conv2d(
                in_channels=1, out_channels=6, kernel_size=3, padding=1)
            self.conv1_2 = torch.nn.Conv2d(
                in_channels=6, out_channels=6, kernel_size=3, padding=1)
        else:
            raise NotImplementedError

        self.act1 = activation_function
        self.bn1 = torch.nn.BatchNorm2d(num_features=6)
        self.pool1 = pooling_layer
       
        if conv_size == 5:
            self.conv2 = self.conv2 = torch.nn.Conv2d(
                in_channels=6, out_channels=16, kernel_size=5, padding=0)
        elif conv_size == 3:
            self.conv2_1 = torch.nn.Conv2d(
                in_channels=6, out_channels=16, kernel_size=3, padding=0)
            self.conv2_2 = torch.nn.Conv2d(
                in_channels=16, out_channels=16, kernel_size=3, padding=0)
        else:
            raise NotImplementedError

        self.act2 = activation_function
        self.bn2 = torch.nn.BatchNorm2d(num_features=16)
        self.pool2 = pooling_layer
        
        self.fc1 = torch.nn.Linear(5 * 5 * 16, 120)
        self.act3 = activation_function
    
        self.fc2 = torch.nn.Linear(120, 84)
        self.act4 = activation_function
        
        self.fc3 = torch.nn.Linear(84, 10)
    
    def forward(self, x):
        if self.conv_size == 5:
            x = self.conv1(x)
        elif self.conv_size == 3:
            x = self.conv1_2(self.conv1_1(x))
        x = self.act1(x)
        if self.use_batch_norm:
            x = self.bn1(x)
        x = self.pool1(x)
        
        if self.conv_size == 5:
            x = self.conv2(x)
        elif self.conv_size == 3:
            x = self.conv2_2(self.conv2_1(x))
        x = self.act2(x)
        if self.use_batch_norm:
            x = self.bn2(x)
        x = self.pool2(x)
        
        x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3))
        x = self.fc1(x)
        x = self.act3(x)
        x = self.fc2(x)
        x = self.act4(x)
        x = self.fc3(x)
        
        return x

In [0]:
def train(net, X_train, y_train, X_test, y_test,batch_size,optimizer,lr):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    net = net.to(device)
    loss = torch.nn.CrossEntropyLoss()
    lr=lr
    optimizer =optimizer(net.parameters(), lr=lr)
    
    batch_size = batch_size
    

    test_accuracy_history = []
    test_loss_history = []

    X_test = X_test.to(device)
    y_test = y_test.to(device)

    for epoch in range(20):
        order = np.random.permutation(len(X_train))
        for start_index in range(0, len(X_train), batch_size):
            optimizer.zero_grad()
            net.train()

            batch_indexes = order[start_index:start_index+batch_size]

            X_batch = X_train[batch_indexes].to(device)
            y_batch = y_train[batch_indexes].to(device)

            preds = net.forward(X_batch) 

            loss_value = loss(preds, y_batch)
            loss_value.backward()

            optimizer.step()

        net.eval()
        test_preds = net.forward(X_test)
        test_loss_history.append(loss(test_preds, y_test).data.cpu())
        test_pr=test_preds

        accuracy = (test_preds.argmax(dim=1) == y_test).float().mean().data.cpu()
        test_accuracy_history.append(accuracy)

        print('{} {}'.format(epoch,accuracy))
    
    return test_accuracy_history, test_loss_history







In [0]:
import matplotlib.pyplot as plt
from datetime import datetime
import time

start_time = datetime.now()

accuracies = {}
losses = {}

fig, axes = plt.subplots(nrows=3, ncols=2,figsize=(20,30))
i=0
j=0

for size in range(120,121,20):
  print('batch_size is {}'.format(size))
  print('---------------------------')


  print('tanh')
  accuracies['tanh'], losses['tanh'] = \
    train(LeNet5(activation='tanh', conv_size=5),
          X_train, y_train, X_test, y_test, batch_size=size,optimizer= torch.optim.SGD, lr=0.01)
  print('...........................')
  """
  print('sigmoid')
  accuracies['sigmoid'], losses['sigmoid'] = \
    train(LeNet5(activation='sigmoid', conv_size=5),
          X_train, y_train, X_test, y_test, batch_size=size,optimizer= torch.optim.SGD,lr=0.01)
  print('...........................') 
  
  print('relu')
  accuracies['relu'], losses['relu'] = \
    train(LeNet5(activation='relu', conv_size=5),
          X_train, y_train, X_test, y_test, batch_size=size,optimizer= torch.optim.SGD,lr=0.01)
  print('...........................')
  
  print('relu_lr_0.1')
  accuracies['relu_lr_0.1'], losses['relu_lr_0.1'] = \
    train(LeNet5(activation='relu', conv_size=5),
          X_train, y_train, X_test, y_test, batch_size=size,optimizer= torch.optim.SGD,lr=0.1)
  print('...........................')

  print('relu_adam')
  accuracies['relu_adam'], losses['relu_adam'] = \
    train(LeNet5(activation='relu', conv_size=5),
          X_train, y_train, X_test, y_test, batch_size=size,optimizer= torch.optim.Adam,lr=1.0e-3)
  print('...........................')
  
  
  print('relu_adam_3')
  accuracies['relu_adam_3'], losses['relu_adam_3'] = \
    train(LeNet5(activation='relu', conv_size=3),
          X_train, y_train, X_test, y_test, batch_size=size,optimizer= torch.optim.Adam,lr=1.0e-3)
  print('...........................')
  
  print('relu_adam_3_mx_pl')
  accuracies['relu_adam_3_mx_pl'], losses['relu_adam_3_mx_pl'] = \
    train(LeNet5(activation='relu', conv_size=3, pooling='max'), 
          X_train, y_train, X_test, y_test, batch_size=size,optimizer= torch.optim.Adam,lr=1.0e-3)
  print('...........................')  
  """
  print('relu_adam_3_mx_pl_bn')
  accuracies['relu_3_max_pool_bn'], losses['relu_3_max_pool_bn'] = \
    train(LeNet5(activation='relu', conv_size=3, pooling='max', use_batch_norm=True), 
          X_train, y_train, X_test, y_test, batch_size=size, optimizer= torch.optim.Adam, lr=1.0e-3)
  print('...........................')
  

  for experiment_id in accuracies.keys():
    axes[i,j].plot(accuracies[experiment_id], label=experiment_id)
  axes[i,j].legend()
  axes[i,j].set_title('Batch size is {}'.format(size))
  #axes[i,j].set_xlim(0,100)
  axes[i,j].set_ylim(0.96,0.998)
  axes[i,j].set_xticks(np.arange(0, 101, 5))
  axes[i,j].set_yticks(np.arange(0.96, 0.998, 0.002))
  axes[i,j].grid()

  j+=1
  if(j==2):
    j=0
    i+=1
fig.tight_layout()  
plt.show()
print(datetime.now() - start_time)

In [0]:
for experiment_id in accuracies.keys():
    plt.plot(losses[experiment_id], label=experiment_id)
plt.grid()
plt.ylim(0,0.2)
plt.xlim(0,50)
plt.title('Losses')
plt.ylabel('Loss-value')
plt.xlabel('epoch number')

In [0]:
#не правильно опознанные цифры по 'relu_3_max_pool_bn' на 5 эпохах
errors=[]

def train1(net, X_train, y_train, X_test, y_test,batch_size,optimizer,lr):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    net = net.to(device)
    loss = torch.nn.CrossEntropyLoss()
    lr=lr
    optimizer =optimizer(net.parameters(), lr=lr)
    
    batch_size = batch_size

    test_accuracy_history = []
    test_loss_history = []

    X_test = X_test.to(device)
    y_test = y_test.to(device)

    for epoch in range(5):
        order = np.random.permutation(len(X_train))
        for start_index in range(0, len(X_train), batch_size):
            optimizer.zero_grad()
            net.train()

            batch_indexes = order[start_index:start_index+batch_size]

            X_batch = X_train[batch_indexes].to(device)
            y_batch = y_train[batch_indexes].to(device)

            preds = net.forward(X_batch) 

            loss_value = loss(preds, y_batch)
            loss_value.backward()

            optimizer.step()

        net.eval()
        test_preds = net.forward(X_test)
        test_loss_history.append(loss(test_preds, y_test).data.cpu())
          

        accuracy = (test_preds.argmax(dim=1) == y_test).float().mean().data.cpu()
        test_accuracy_history.append(accuracy)

        print(epoch+1,accuracy)

        if(epoch==4):
          test_preds=test_preds.argmax(dim=1)
          for a in range (0,10000):
            if(test_preds[a]!=y_test[a]):
              errors.append([a,test_preds[a].cpu().detach().numpy(),y_test[a].cpu().numpy()])

    print('---------------')
    
    print(len(errors))
    print(errors)
    
    return test_accuracy_history, test_loss_history
    

accuracies1 = {}


accuracies1['relu_3_max_pool_bn'] = \
    train1(LeNet5(activation='relu', conv_size=3, pooling='max', use_batch_norm=True), 
          X_train, y_train, X_test, y_test, batch_size=100, optimizer= torch.optim.Adam, lr=1.0e-3)

In [0]:
errors

In [0]:
X_test=X_test.squeeze(1)

In [0]:
X_test.shape

In [0]:
fig=plt.figure(figsize=(25, 15))

columns = 7
rows = 3
for i in range(1, columns*rows +1):
    fig.add_subplot(rows, columns, i)
    plt.imshow(X_test[errors[i][0], :, :])
    plt.title('predicted: {}'.format(errors[i][1]),fontsize=30)
    
    plt.xlabel('real: {}'.format(errors[i][2]),fontsize=30)
fig.tight_layout()  
plt.show()

In [0]:
import pandas as pd

In [0]:
df=pd.DataFrame(errors)
df.columns=['Position','Predicted','Real']
df.head()