# Building Neural Network

In [2]:
import torch
from torchvision import transforms, datasets 

# init training and testing data
train = datasets.MNIST("", train =True, download=True, 
                       transform=transforms.Compose([transforms.ToTensor()]))

test = datasets.MNIST("", train =False, download=True, 
                       transform=transforms.Compose([transforms.ToTensor()]))

# Training and Testing sets
trainset = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True) # Batch size dictates how much data is passed through model at a time
testset = torch.utils.data.DataLoader(test, batch_size=10, shuffle=True) # Shuffle creates randomness - improves generalization

In [3]:
import torch.nn as nn 
import torch.nn.functional as F

In [8]:
class Net(nn.Module):
    def __init__(self):
        super().__init__() # Required
        self.fc1 = nn.Linear(28*28, 64) # Fully connected first layer, output -> 3 layers of 64 neurons
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 10) # 10 outputs (1 for each number)
    
    def forward(self, x): # How data flows through network
        x = F.relu(self.fc1(x)) # Relu activation function
        x = F.relu(self.fc2(x))
        # Can put logic in here (more advanced)
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)
    
net = Net()
print(net)

Net(
  (fc1): Linear(in_features=784, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)


In [27]:
X = torch.rand((28,28))
X = X.view(-1,28*28) # Flattening

In [28]:
output = net(X)
output

tensor([[-2.4504, -2.1972, -2.2861, -2.4013, -2.1790, -2.1951, -2.3202, -2.3966,
         -2.4476, -2.2066]], grad_fn=<LogSoftmaxBackward>)