In [1]:
import torch
import torch.nn as nn
from PIL import Image
from pathlib import Path
import numpy as np
from matplotlib import pyplot as plt
import os
import math
import torch.nn.functional as F
%matplotlib inline
import torch.nn.init as init
import torch.nn.functional as F

In [2]:
PATH= Path("data/mnist")

In [3]:
def log_softmax(x): 
    return (x.exp()/(x.exp().sum(-1,keepdim=True)) + 1e-20).log()

In [4]:
def nll(preds, actuals): 
    return -preds[range(actuals.shape[0]), actuals].mean()

In [5]:
def validation_acc(model):
    return torch.stack([accuracy(model(xb), yb) for xb, yb in valid_dl]).mean().item()

In [6]:
def accuracy(preds, yb): 
    return (torch.argmax(preds, dim=1, keepdim = True)==yb).float().mean()

In [7]:
def loss_func(preds, targets):
    preds = log_softmax(preds)
    return nll(preds, targets)

In [8]:
def create_ds_from_file(src):
    imgs, labels = [], []
    
    for label in range(10):
        path = src/str(label)
        print(path)
        t = [o.name for o in os.scandir(path)]
        t = _get_files(path, t, extensions = [".jpg", ".png"])
        for e in t:
            l = [np.array(Image.open(e)).reshape(28*28)]
            imgs += l
        labels += ([label] * len(t))
    return torch.tensor(imgs,  dtype=torch.float32), torch.tensor(labels, dtype=torch.long).view(-1,1)

In [9]:
def _get_files(p, fs, extensions = None):
    p = Path(p) # to support / notation
    res = [p/f for f in fs if not f.startswith(".") 
           and ((not extensions) or f'.{f.split(".")[-1].lower()}' in extensions)]
    return res

In [10]:
trn_x, trn_y = create_ds_from_file(PATH/"train")

data\mnist\train\0
data\mnist\train\1
data\mnist\train\2
data\mnist\train\3
data\mnist\train\4
data\mnist\train\5
data\mnist\train\6
data\mnist\train\7
data\mnist\train\8
data\mnist\train\9


In [11]:
val_x,val_y = create_ds_from_file(PATH/"validation")

data\mnist\validation\0
data\mnist\validation\1
data\mnist\validation\2
data\mnist\validation\3
data\mnist\validation\4
data\mnist\validation\5
data\mnist\validation\6
data\mnist\validation\7
data\mnist\validation\8
data\mnist\validation\9


In [12]:
trn_x.shape

torch.Size([31216, 784])

In [13]:
val_x.shape

torch.Size([10784, 784])

In [14]:
class Dataset():
    def __init__(self, x, y): 
        self.x,self.y = x,y
    def __len__(self): 
        return len(self.x)
    def __getitem__(self, i): 
        return self.x[i].view(-1,1,28,28),self.y[i]


class DataLoader():
    def __init__(self, ds, bs): 
        self.ds, self.bs = ds, bs
    def __iter__(self):
        n = len(self.ds)
        l = torch.randperm(n)

        
        for i in range(0, n, self.bs): 
            idxs_l = l[i:i+self.bs]
            yield self.ds[idxs_l]

In [15]:
class Func(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func
        
    def forward(self,x):
        return self.func(x)

In [16]:
def flatten(x):      
    return x.view(x.shape[0], -1)

In [17]:
mean = trn_x.mean()
std = trn_x.std()

trn_x=(trn_x-mean)/std
mean, std, trn_x.mean(), trn_x.std()

(tensor(34.3409), tensor(77.5014), tensor(2.2117e-05), tensor(1.0000))

In [18]:
val_x = (val_x-mean)/std
val_x.mean(), val_x.std()

(tensor(0.0042), tensor(1.0038))

In [19]:
train_ds = Dataset(trn_x, trn_y)
valid_ds = Dataset(val_x,val_y)

In [20]:
train_dl = DataLoader(train_ds, 256)
valid_dl = DataLoader(valid_ds, 256)

In [21]:
x, y = next(iter(train_dl))

In [22]:
def train(model, epochs=5, valid_epoch=5):
    for epoch in range(epochs):
        for xb, yb in train_dl:
            
            preds = model(xb)
            loss = loss_func(preds, yb.squeeze())
            loss.backward()
            optim.step()
            optim.zero_grad()
        
        if epoch % valid_epoch == 0:
            print(validation_acc(model))

In [23]:
def stats(x):
    print(f"Mean: {x.mean()}, Std: {x.std()}")

In [24]:
def gain(a):
    return math.sqrt(2.0 / (1 + a**2))

In [25]:
def kaiming_uniform(x, a):
    n = x[0].shape.numel()
    std = gain(a) / math.sqrt(n)
    bound = math.sqrt(4.5) * std
    x.data.uniform_(-bound, bound)

**Model 1 için Sequential yapımızı oluşturalım.**

In [31]:

model1 = nn.Sequential(
        nn.Conv2d(1, 8, 3, padding=1,stride=2), nn.ReLU(),
        nn.Conv2d(8, 16, 3, padding=1,stride=2), nn.ReLU(),
        nn.Conv2d(16, 24, 3, padding=1,stride=2), nn.ReLU(),
        nn.Conv2d(24, 32, 3, padding=1,stride=2), nn.ReLU(),
        nn.Conv2d(32, 32, 3, padding=1,stride=2), nn.ReLU(),
        nn.AdaptiveAvgPool2d(1),
        Func(flatten),
        nn.Linear(32,10)
)

In [32]:
for l in model1:
    if isinstance(l, nn.Conv2d):
        kaiming_uniform(l.weight, a = 0)
        l.bias.data.zero_()

In [33]:
temp = model1(x)
stats(temp)

Mean: 0.04436280578374863, Std: 0.5081759691238403


In [34]:
optim = torch.optim.SGD(model1.parameters(), lr=0.01, weight_decay=1e-3)

In [35]:
train(model1,80,10)

0.6198219656944275
0.9258720874786377
0.9488554000854492
0.9571220874786377
0.9636628031730652
0.9665697813034058
0.9689316749572754
0.968204915523529


**Model 2 için Sequential yapımızı oluşturalım.**

In [54]:
model2 = nn.Sequential(
        nn.Conv2d(1, 8, 3, padding=1,stride=2), nn.ReLU(),
        nn.Conv2d(8, 16, 3, padding=1,stride=2), nn.ReLU(),
        nn.Conv2d(16, 32, 3, padding=1,stride=2), nn.ReLU(),
        nn.Conv2d(32, 32, 3, padding=1,stride=2), nn.ReLU(),
        nn.AdaptiveAvgPool2d(1),
        Func(flatten),
        nn.Linear(32,10)
)

In [55]:
for l in model2:
    if isinstance(l, nn.Conv2d):
        kaiming_uniform(l.weight, a = 0)
        l.bias.data.zero_()

In [56]:
temp = model2(x)
stats(temp)

Mean: -0.13845662772655487, Std: 0.5103725790977478


In [57]:
optim = torch.optim.SGD(model2.parameters(), lr=0.02, weight_decay=1e-3)

In [58]:
train(model2,80,10)

0.6299055218696594
0.9336845874786377
0.9523982405662537
0.9623909592628479
0.9650254249572754
0.9693859219551086
0.9692041873931885
0.9712936282157898
