In [7]:
import torch
from torch.utils.data import DataLoader
from torch import nn

import numpy as np

import torchvision.datasets as datasets
from torchvision.transforms import ToTensor


## download data

In [8]:

mnist_train = datasets.MNIST(root='./data', download=True, train=True, transform=ToTensor())
mnist_test = datasets.MNIST(root='./data', download=True, train=False, transform=ToTensor())


## Preparing data loaders

In [9]:
train_dataloader = DataLoader(mnist_train, batch_size=32, shuffle=True)
test_dataloader = DataLoader(mnist_test, batch_size=32, shuffle=True)

# 0 Classidier

In [4]:

model = nn.Sequential(
    nn.Linear(784, 100),
    nn.ReLU(),
    nn.Linear(100, 1)
)


In [5]:

loss_fn = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [6]:

for i in range(0, 10):
    loss_sum = 0
    for X, y in train_dataloader:
        X = X.reshape((-1, 784))
        y = (y == 0).type(torch.float32).reshape((-1, 1))

        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y)
        loss.backward()
        optimizer.step()

        loss_sum+=loss.item()
        
    print(loss_sum)

75.50271133135539
31.90515285660149
20.767941875608813
14.740755172010722


KeyboardInterrupt: 

In [None]:

model.eval()
with torch.no_grad():
    accurate = 0
    total = 0
    for X, y in test_dataloader:
        X = X.reshape((-1, 784))
        y = (y == 0).reshape((-1, 1))

        outputs = nn.functional.sigmoid(model(X))
        correct_pred = ((outputs > 0.5) == y)
        total+=correct_pred.size(0)

        accurate+=correct_pred.type(torch.int).sum().item()
    print(accurate / total)

# 10 numbers classifier

In [10]:
import torch.nn.functional as F


In [11]:

model = nn.Sequential(
    nn.Linear(784, 100),
    nn.ReLU(),
    nn.Linear(100, 10)
)

loss_fn = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [12]:


for i in range(0, 10):
    loss_sum = 0
    for X, y in train_dataloader:
        X = X.reshape((-1, 784))
        y = F.one_hot(y, num_classes=10).type(torch.float32)

        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y)
        loss.backward()
        optimizer.step()

        loss_sum+=loss.item()
        
    print(loss_sum)

139.8836110793054
63.92386899422854
47.44558489928022
38.23342060612049
32.071616405039094
27.681449309340678
24.28866142022889
21.671198528027162
19.653695161570795
17.472788227838464


In [14]:

model.eval()
with torch.no_grad():
    accurate = 0
    total = 0
    for X, y in test_dataloader:
        X = X.reshape((-1, 784))
        # y = F.one_hot(y, num_classes=10).type(torch.float32)

        outputs = nn.functional.sigmoid(model(X))
        correct_pred = (y == outputs.max(dim=1).indices)
        total+=correct_pred.size(0)
        accurate+=correct_pred.type(torch.int).sum().item()
    print(accurate / total)

0.9771


## Softmax

In [15]:

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [16]:

for i in range(0, 10):
    loss_sum = 0
    for X, y in train_dataloader:
        X = X.reshape((-1, 784))
        y = F.one_hot(y, num_classes=10).type(torch.float32)

        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y)
        loss.backward()
        optimizer.step()

        loss_sum+=loss.item()
        
    print(loss_sum)

63.428809635461675
44.23540080193925
39.303113118838155
32.24418871043599
27.410026102458687
24.896516696486287
21.33373809166642
18.074290609485615
17.16176595670413
15.267608764571833


In [17]:

model.eval()
with torch.no_grad():
    accurate = 0
    total = 0
    for X, y in test_dataloader:
        X = X.reshape((-1, 784))

        outputs = nn.functional.softmax(model(X), dim=1) 

        correct_pred = (y == outputs.max(dim=1).indices)
        total+=correct_pred.size(0)
        accurate+=correct_pred.type(torch.int).sum().item()
    print(accurate / total)

0.9769


## expand the model

In [19]:
model = nn.Sequential(
    nn.Linear(784, 100),
    nn.ReLU(),
    nn.Linear(100, 50),
    nn.ReLU(),
    nn.Linear(50, 50),
    nn.ReLU(),
    nn.Linear(50, 10)
)

In [20]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [21]:
for i in range(0, 10):
    loss_sum = 0
    for X, y in train_dataloader:
        X = X.reshape((-1, 784))
        y = F.one_hot(y, num_classes=10).type(torch.float32)

        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y)
        loss.backward()
        optimizer.step()

        loss_sum+=loss.item()
        
    print(loss_sum)

605.3214039579034
247.66766305174679
179.67231406504288
139.57538314291742
116.38537886820268
94.61950177434483
79.75649269814312
66.49835635578347
58.32240558512058
52.42944729811643


In [22]:
model.eval()
with torch.no_grad():
    accurate = 0
    total = 0
    for X, y in test_dataloader:
        X = X.reshape((-1, 784))

        outputs = nn.functional.softmax(model(X), dim=1)

        correct_pred = (y == outputs.max(dim=1).indices)
        total+=correct_pred.size(0)
        accurate+=correct_pred.type(torch.int).sum().item()
    print(accurate / total)

0.9782
