In [14]:
import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np

In [11]:
batch_size = 20
valid_set_size = 0.2

In [3]:
transform = transforms.ToTensor()

train_data = datasets.MNIST('data', train = True, download = True, transform = transform)
test_data = datasets.MNIST('data', train = False, download = True, transform = transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz


1.4%

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz


102.8%

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz



6.4%

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz


112.7%

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw






In [12]:
num_train = len(train_data)
indices   = list(range(num_train))
np.random.shuffle(indices)
split     = int(np.floor(valid_set_size*num_train))

train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

In [17]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
                                                        sampler=train_sampler)

valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
                                                        sampler=valid_sampler)

test_loader  = torch.utils.data.DataLoader(train_data, batch_size=batch_size)

In [20]:
import torch.nn as nn
import torch.nn.functional as F

# define the NN architecture
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        # number of hidden nodes in each layer (512)
        hidden_1 = 512
        hidden_2 = 512
        # linear layer (784 -> hidden_1)
        self.fc1 = nn.Linear(28 * 28, hidden_1)
        # linear layer (n_hidden -> hidden_2)
        self.fc2 = nn.Linear(hidden_1, hidden_2)
        # linear layer (n_hidden -> 10)
        self.fc3 = nn.Linear(hidden_2, 10)
        # dropout layer (p=0.2)
        # dropout prevents overfitting of data
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        # flatten image input
        x = x.view(x.shape[0], -1)
        
        # add hidden layer, with relu activation function
        x = F.relu(self.fc1(x))
        # add dropout layer
        x = self.dropout(x)
        # add hidden layer, with relu activation function
        x = F.relu(self.fc2(x))
        # add dropout layer
        x = self.dropout(x)
        # add output layer
        x = self.fc3(x)
        
        return x

# initialize the NN
model = Net()
print(model)

Net(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)


In [21]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [None]:
np.

In [None]:
 n_epochs = 20
    
valid_loss_min = np.Inf

for epoch in range(n_epochs):
    train_loss = 0 
    valid_loss = 0
    
    for data, target in train_loader:
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*data.size(0)
        
        
        for data, target in valid_loader:
            output = model(data)
            loss   = criterion(output, target)
            
            valid_loss += loss.item()*data.size(0)
        
        train_loss = train_loss/len(train_loader.dataset)
        train_loss = train_loss/len(valid_loader.dataset)
            
        if valid_loss <= valid_loss_min:
            torch.save(model.state_dict(), 'model.pt')
            valid_loss_min = valid_loss
            
            _, pred = torch.max(output,1)
            correct = np.squeeze(pred.eq(target.data.view_as(pred)))
            # pred == target    
            
        for i in range(batch_size):
            
            label = target.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1
            

In [None]:
torch.eq()