In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import pandas as pd


In [19]:
device = torch.device("cpu")

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [22]:

class CNN(nn.Module):
    def __init__(self, ker1,ker2,pad1,pad2,str1,str2,pool,dropout,fc_units=128):
        super().__init__()
        self.conv1 = nn.Conv2d(1,32,kernel_size=ker1,stride=str1,padding=pad1)
        self.conv2 = nn.Conv2d(32,64,kernel_size=ker2,stride=str2,padding=pad2)
        if pool=="max" :
            self.pool = nn.MaxPool2d(2,2) 
        else: 
            self.pool=nn.AvgPool2d(2,2)
        self.dropout = nn.Dropout(dropout)

        x = torch.randn(1,1,28,28)
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        self._to_linear = x.numel()
        self.fc1 = nn.Linear(self._to_linear, fc_units)
        self.fc2 = nn.Linear(fc_units,10)
        
    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0),-1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [None]:

experiments = [
    (3,3,0,0,1,1,"max",0.0),   
    (7,7,3,3,3,3,"avg",0.7),   
    (3,7,0,3,1,3,"max",0.7),   
    (7,3,3,0,3,1,"avg",0.0),   
    (5,5,0,3,2,2,"max",0.5),   
    (3,5,3,0,1,2,"avg",0.7),   
    (7,5,0,0,1,1,"max",0.0),   
    (3,3,3,3,3,3,"avg",0.7),   
]

results = []

for k1,k2,p1,p2,s1,s2,pool,drop in experiments:
    model = CNN(k1,k2,p1,p2,s1,s2,pool,drop)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    
    for epoch in range(2):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs,1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    acc = 100*correct/total
    results.append({"ker1":k1,"ker2":k2,"pad1":p1,"pad2":p2,"str1":s1,"str2":s2,"pool":pool,"drop":drop,"accuracy":acc})
    print(f"ker1={k1},ker2={k2},pad1={p1},pad2={p2},str1={s1},str2={s2},pool={pool},drop={drop} --- acc={acc}%")



ker1=3,ker2=3,pad1=0,pad2=0,str1=1,str2=1,pool=max,drop=0.0 --- acc=99.03%
ker1=7,ker2=7,pad1=3,pad2=3,str1=3,str2=3,pool=avg,drop=0.7 --- acc=97.4%
ker1=3,ker2=7,pad1=0,pad2=3,str1=1,str2=3,pool=max,drop=0.7 --- acc=98.09%
ker1=7,ker2=3,pad1=3,pad2=0,str1=3,str2=1,pool=avg,drop=0.0 --- acc=96.08%
ker1=5,ker2=5,pad1=0,pad2=3,str1=2,str2=2,pool=max,drop=0.5 --- acc=98.31%
ker1=3,ker2=5,pad1=3,pad2=0,str1=1,str2=2,pool=avg,drop=0.7 --- acc=97.79%
ker1=7,ker2=5,pad1=0,pad2=0,str1=1,str2=1,pool=max,drop=0.0 --- acc=98.58%


**1 запуск**

ker1=3,ker2=3,pad1=0,pad2=0,str1=1,str2=1,pool=max,drop=0.0 --- acc=99.00%
ker1=7,ker2=7,pad1=3,pad2=3,str1=3,str2=3,pool=avg,drop=0.7 --- acc=97.39%
ker1=3,ker2=7,pad1=0,pad2=3,str1=1,str2=3,pool=max,drop=0.7 --- acc=98.03%
ker1=7,ker2=3,pad1=3,pad2=0,str1=3,str2=1,pool=avg,drop=0.0 --- acc=96.78%
ker1=5,ker2=5,pad1=0,pad2=3,str1=2,str2=2,pool=max,drop=0.5 --- acc=98.60%
ker1=3,ker2=5,pad1=3,pad2=0,str1=1,str2=2,pool=avg,drop=0.7 --- acc=97.59%
ker1=7,ker2=5,pad1=0,pad2=0,str1=1,str2=1,pool=max,drop=0.0 --- acc=98.72%
ker1=3,ker2=3,pad1=3,pad2=3,str1=3,str2=3,pool=avg,drop=0.7 --- acc=65.97%
ker1=5,ker2=3,pad1=0,pad2=3,str1=2,str2=1,pool=max,drop=0.5 --- acc=98.67%
ker1=7,ker2=7,pad1=0,pad2=0,str1=1,str2=3,pool=avg,drop=0.0 --- acc=97.35%
ker1=3,ker2=7,pad1=3,pad2=3,str1=1,str2=2,pool=max,drop=0.7 --- acc=98.14%
ker1=5,ker2=5,pad1=3,pad2=3,str1=3,str2=3,pool=avg,drop=0.5 --- acc=96.56%

**2 запуск**

ker1=3,ker2=3,pad1=0,pad2=0,str1=1,str2=1,pool=max,drop=0.0 --- acc=98.63%
ker1=7,ker2=7,pad1=3,pad2=3,str1=3,str2=3,pool=avg,drop=0.7 --- acc=97.33%
ker1=3,ker2=7,pad1=0,pad2=3,str1=1,str2=3,pool=max,drop=0.7 --- acc=98.13%
ker1=7,ker2=3,pad1=3,pad2=0,str1=3,str2=1,pool=avg,drop=0.0 --- acc=95.67%
ker1=5,ker2=5,pad1=0,pad2=3,str1=2,str2=2,pool=max,drop=0.5 --- acc=98.44%
ker1=3,ker2=5,pad1=3,pad2=0,str1=1,str2=2,pool=avg,drop=0.7 --- acc=97.71%
ker1=7,ker2=5,pad1=0,pad2=0,str1=1,str2=1,pool=max,drop=0.0 --- acc=98.75%
ker1=3,ker2=3,pad1=3,pad2=3,str1=3,str2=3,pool=avg,drop=0.7 --- acc=68.02%
ker1=5,ker2=3,pad1=0,pad2=3,str1=2,str2=1,pool=max,drop=0.5 --- acc=98.81%
ker1=7,ker2=7,pad1=0,pad2=0,str1=1,str2=3,pool=avg,drop=0.0 --- acc=97.78%
ker1=3,ker2=7,pad1=3,pad2=3,str1=1,str2=2,pool=max,drop=0.7 --- acc=98.4%
ker1=5,ker2=5,pad1=3,pad2=3,str1=3,str2=3,pool=avg,drop=0.5 --- acc=96.87%

**3 запуск**

ker1=3,ker2=3,pad1=0,pad2=0,str1=1,str2=1,pool=max,drop=0.0 --- acc=98.77%
ker1=7,ker2=7,pad1=3,pad2=3,str1=3,str2=3,pool=avg,drop=0.7 --- acc=97.3%
ker1=3,ker2=7,pad1=0,pad2=3,str1=1,str2=3,pool=max,drop=0.7 --- acc=98.17%
ker1=7,ker2=3,pad1=3,pad2=0,str1=3,str2=1,pool=avg,drop=0.0 --- acc=96.59%
ker1=5,ker2=5,pad1=0,pad2=3,str1=2,str2=2,pool=max,drop=0.5 --- acc=98.42%
ker1=3,ker2=5,pad1=3,pad2=0,str1=1,str2=2,pool=avg,drop=0.7 --- acc=97.92%
ker1=7,ker2=5,pad1=0,pad2=0,str1=1,str2=1,pool=max,drop=0.0 --- acc=98.8%
ker1=3,ker2=3,pad1=3,pad2=3,str1=3,str2=3,pool=avg,drop=0.7 --- acc=67.89%
ker1=5,ker2=3,pad1=0,pad2=3,str1=2,str2=1,pool=max,drop=0.5 --- acc=98.72%
ker1=7,ker2=7,pad1=0,pad2=0,str1=1,str2=3,pool=avg,drop=0.0 --- acc=97.81%
ker1=3,ker2=7,pad1=3,pad2=3,str1=1,str2=2,pool=max,drop=0.7 --- acc=98.3%
ker1=5,ker2=5,pad1=3,pad2=3,str1=3,str2=3,pool=avg,drop=0.5 --- acc=96.26%

**4 запуск**

ker1=3,ker2=3,pad1=0,pad2=0,str1=1,str2=1,pool=max,drop=0.0 --- acc=98.22%
ker1=7,ker2=7,pad1=3,pad2=3,str1=3,str2=3,pool=avg,drop=0.7 --- acc=96.82%
ker1=3,ker2=7,pad1=0,pad2=3,str1=1,str2=3,pool=max,drop=0.7 --- acc=98.05%
ker1=7,ker2=3,pad1=3,pad2=0,str1=3,str2=1,pool=avg,drop=0.0 --- acc=96.05%
ker1=5,ker2=5,pad1=0,pad2=3,str1=2,str2=2,pool=max,drop=0.5 --- acc=98.46%
ker1=3,ker2=5,pad1=3,pad2=0,str1=1,str2=2,pool=avg,drop=0.7 --- acc=97.66%
ker1=7,ker2=5,pad1=0,pad2=0,str1=1,str2=1,pool=max,drop=0.0 --- acc=98.75%
ker1=3,ker2=3,pad1=3,pad2=3,str1=3,str2=3,pool=avg,drop=0.7 --- acc=66.91%

# Выводы
- **маленькое ядро** показало лучший результат 99%
- **большие ядра** показали хороший ,но чуть хуже результат +-97%
- **большой stride** сильно понизил точность 66%
- **max pooling** лучше чем **average pooling** - ~98% vs ~97%
- **dropout** - высокий (0.7) понижает пточность, 0.5 работает корреткно,0.0 - максимальная точность
  
>***Оптимальные паарметры достигаются при малых kernel, stride, dropout и max pooling***