In [31]:
import torch
import torch.nn as nn 
import torch.nn.functional as F 

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        # kernel size = set the filter size 
        # in_channels = Depend on the number of color channels of the input
        # out_channels = Set the number of filters 
        # increase the number of channels during convolution 
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        # shrink the channels during the linear function
        # 12*4*4 means you have flatten the matrix
            # 12 = out_channel
            # 4 * 4 = 
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        # (1) input layer 
        t = t 
        # output: [1,1,28,28]
        
        # (2) hidden conv layer 
        t = self.conv1(t)
        # output: [1,6,24,24]
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        # output: [1,6,12,12]
        
        # (3) hidden conv layer 
        t = self.conv2(t)
        # output: [1,12,8,8]
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        # output: [1,12,4,4]
        
        # (4) hidden linear layer 
        t = t.reshape(-1, 12 * 4 * 4)
        # output: [1,192]
        t = self.fc1(t)
        # output: [1,120]
        t = F.relu(t)
        
        # (5) hidden linear layer 
        t = self.fc2(t)
        # output: [1,60]
        t = F.relu(t)
        
        # (6) ouptut layer 
        t = self.out(t)
        # output: [1,10]
        # t = F.softmax(t, dim=1)
        return t

In [30]:
network = Network()
print(network)
print(network.conv1)
print(network.conv2)
print(network.fc1)

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)
Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
Linear(in_features=192, out_features=120, bias=True)


In [26]:
network.conv1.weight

Parameter containing:
tensor([[[[-0.1711,  0.1359,  0.1461, -0.1989,  0.1453],
          [-0.0698, -0.0657, -0.0920, -0.1884,  0.0459],
          [ 0.0708, -0.1693,  0.0482, -0.1047, -0.0068],
          [-0.1736,  0.0746, -0.1650,  0.1818,  0.0669],
          [ 0.1697, -0.1069,  0.1543,  0.0256,  0.1557]]],


        [[[ 0.0585, -0.0941,  0.1178, -0.0015, -0.0087],
          [-0.0934, -0.0563, -0.1759, -0.0917, -0.0847],
          [ 0.1211,  0.0757, -0.0124, -0.0692, -0.1519],
          [-0.0575, -0.0517,  0.0436,  0.0808,  0.0487],
          [-0.1061, -0.1797,  0.1358, -0.0204, -0.1514]]],


        [[[-0.1123, -0.1055, -0.0145, -0.1858, -0.0529],
          [ 0.0909,  0.0193,  0.0174, -0.0206, -0.0836],
          [-0.0279, -0.0998, -0.0645, -0.0419, -0.1884],
          [ 0.0992,  0.0034,  0.0927, -0.1696,  0.1205],
          [ 0.0170, -0.0437, -0.0316,  0.0492, -0.1920]]],


        [[[ 0.0059, -0.0579, -0.1822,  0.1153,  0.1392],
          [ 0.0482,  0.1152, -0.0611, -0.1372, -0.1727

In [9]:
network.conv1.weight.shape

torch.Size([6, 1, 5, 5])

In [15]:
# perform matrix multiplication to create a linear result 
in_features = torch.tensor([1,2,3,4], dtype=torch.float32)
weight_matrix = torch.tensor([ 
    [1,2,3,4],
    [2,3,4,5],
    [3,4,5,6]
], dtype=torch.float32)
weight_matrix.matmul(in_features)

tensor([30., 40., 50.])

In [14]:
for name, param in network.named_parameters():
    print(name, '\t', param.shape)

conv1.weight 	 torch.Size([6, 1, 5, 5])
conv1.bias 	 torch.Size([6])
conv2.weight 	 torch.Size([12, 6, 5, 5])
conv2.bias 	 torch.Size([12])
fc1.weight 	 torch.Size([120, 192])
fc1.bias 	 torch.Size([120])
fc2.weight 	 torch.Size([60, 120])
fc2.bias 	 torch.Size([60])
out.weight 	 torch.Size([10, 60])
out.bias 	 torch.Size([10])


In [20]:
fc = nn.Linear(in_features=4, out_features=3, bias=False)

In [21]:
fc.weight = nn.Parameter(weight_matrix)

In [22]:
fc(in_features)

tensor([30., 40., 50.], grad_fn=<SqueezeBackward3>)

In [33]:
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
torch.set_printoptions(linewidth=120)

train_set = torchvision.datasets.FashionMNIST(
    root = '/Users/sunghohong/Desktop/data/FashionMNIST',
    train=True, # data for the training set
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor() # transform the data into tensors
    ])
)


In [34]:
torch.set_grad_enabled(False)
network = Network()
sample = next(iter(train_set))

image, label = sample 
image.shape 

torch.Size([1, 28, 28])

In [35]:
image.unsqueeze(0).shape

torch.Size([1, 1, 28, 28])

In [38]:
pred = network(image.unsqueeze(0))

In [39]:
print(pred)

tensor([[-0.0105,  0.0121, -0.0526,  0.0408,  0.0733,  0.0254, -0.0562,  0.0941, -0.0473,  0.1409]])


In [41]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 10)
batch = next(iter(train_loader))
images, labels = batch
images.shape

torch.Size([10, 1, 28, 28])

In [42]:
preds = network(images)

In [43]:
preds

tensor([[-0.0105,  0.0121, -0.0526,  0.0408,  0.0733,  0.0254, -0.0562,  0.0941, -0.0473,  0.1409],
        [-0.0126,  0.0144, -0.0529,  0.0408,  0.0761,  0.0329, -0.0581,  0.0945, -0.0523,  0.1397],
        [-0.0088,  0.0039, -0.0528,  0.0395,  0.0717,  0.0299, -0.0535,  0.1054, -0.0464,  0.1411],
        [-0.0114,  0.0097, -0.0521,  0.0398,  0.0732,  0.0310, -0.0557,  0.0998, -0.0478,  0.1384],
        [-0.0161,  0.0151, -0.0478,  0.0423,  0.0730,  0.0307, -0.0496,  0.0914, -0.0403,  0.1411],
        [-0.0159,  0.0146, -0.0498,  0.0415,  0.0793,  0.0311, -0.0587,  0.0900, -0.0515,  0.1397],
        [-0.0099,  0.0111, -0.0518,  0.0402,  0.0742,  0.0274, -0.0561,  0.0964, -0.0487,  0.1390],
        [-0.0117,  0.0116, -0.0491,  0.0432,  0.0775,  0.0274, -0.0573,  0.0950, -0.0519,  0.1335],
        [-0.0107,  0.0009, -0.0543,  0.0380,  0.0744,  0.0275, -0.0557,  0.1057, -0.0456,  0.1363],
        [-0.0142,  0.0087, -0.0476,  0.0340,  0.0775,  0.0225, -0.0583,  0.0977, -0.0527,  0.1403]])

In [44]:
preds.argmax(dim=1)

tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9])

In [45]:
labels

tensor([9, 0, 0, 3, 0, 2, 7, 2, 5, 5])

In [46]:
preds.argmax(dim=1).eq(labels)

tensor([ True, False, False, False, False, False, False, False, False, False])

In [58]:
import torch.optim as optim
torch.set_grad_enabled(True)

<torch.autograd.grad_mode.set_grad_enabled at 0x13060e7f0>

In [59]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 100)
batch = next(iter(train_loader))
images, labels = batch 

preds = network(images)
loss = F.cross_entropy(preds, labels)
loss.item()

2.300213575363159

In [60]:
print(network.conv1.weight.grad)

None


In [61]:
loss.backward()

In [63]:
print(network.conv1.weight.grad.shape)

torch.Size([6, 1, 5, 5])


In [64]:
optimizer = optim.Adam(network.parameters(), lr=0.01)
loss.item()

2.300213575363159

In [65]:
optimizer.step()

In [66]:
preds = network(images)
loss = F.cross_entropy(preds, labels)
loss.item()

2.2714192867279053

In [75]:
network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

total_loss = 0 

for batch in train_loader:
    images, labels = batch 
    
    preds = network(images)
    loss = F.cross_entropy(preds, labels)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    total_loss += loss.item()

print('epoch:',0, "loss", total_loss)

epoch: 0 loss 336.615659981966


In [76]:
print(len(train_set))
print(len(train_set.targets))

60000
60000


In [78]:
def get_all_preds(model, loader):
    all_preds = torch.tensor([])
    for batch in loader:
        images, labels = batch 
        preds = model(images)
        all_preds = torch.cat((all_preds, preds),dim=0)
    return all_preds


def get_num_correct()

prediction_loader = torch.utils.data.DataLoader(train_set, batch_size=10000)
train_preds = get_all_preds(network, prediction_loader)
print(train_preds.requires_grad)

True
