In [1]:
import numpy as np 
import pandas as pd
import tensorflow as tf
import matplotlib as plt
import torch 
import torchvision
from torchvision import transforms, datasets

In [2]:
train = datasets.MNIST("", train=True, download=True,
                      transform = transforms.Compose([transforms.ToTensor()]))

test = datasets.MNIST("", train=False, download=True,
                      transform = transforms.Compose([transforms.ToTensor()])) 


In [3]:
trainset = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True)
testset = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True)

In [4]:
#batch is useful because patching data through model at once will have a difficult time optimizing. Generalization. Passing through batches, the opitmization with erase overfitting. 
#8 to 64 for batching 
#shuffle to help nn generalize 




In [5]:
for data in trainset:
    print(data)
    break

[tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        ...,


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0

In [6]:
x, y = data[0][0], data[1][0]

print(y)

tensor(8)


In [7]:
import matplotlib.pyplot as plt

plt.imshow(data[0][0].view(28,28))
plt.show

<function matplotlib.pyplot.show(*args, **kw)>

In [8]:
total = 0
counter_dict = {0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,}

for data in trainset: 
    Xs, ys = data
    for y in ys:
        counter_dict[int(y)] += 1
        total+=1
        
print(counter_dict)

{0: 5923, 1: 6742, 2: 5958, 3: 6131, 4: 5842, 5: 5421, 6: 5918, 7: 6265, 8: 5851, 9: 5949}


In [9]:
for i in counter_dict:
    print(f"(i): {counter_dict[i]/total*100}")

(i): 9.871666666666666
(i): 11.236666666666666
(i): 9.93
(i): 10.218333333333334
(i): 9.736666666666666
(i): 9.035
(i): 9.863333333333333
(i): 10.441666666666666
(i): 9.751666666666667
(i): 9.915000000000001


3.0 Modeling

In [10]:
import torch.nn as nn
import torch.nn.functional as F #nn pass parameters (intitalizatiion) 

In [11]:
class Net(nn.Module):
    
    def __init__(self):
        super().__init__() #
        self.fc1 = nn.Linear(28*28, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 10) #output layer 10 neurons
        
        #feed forward/activation_func --> range between 0,1 contains outputs
    def forward(self, x):  #much variation/logic can be implemented here
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x) #contrains single neuron to be fired/ probability distrabution
        return F.log_softmax(x, dim=1) #similar to axis, probability distributuion of batches of tensors.
    

net = Net()
print(net)

Net(
  (fc1): Linear(in_features=784, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)


In [12]:
X = torch.rand((28,28))
X = X.view(-1,28*28) #-1 is tensor of any size 

In [None]:
output = net(X)

In [None]:
output #actual predictions, now find out loss, forward function  

4.0 Loss and optimization

In [None]:
import torch.optim as optim  #pass data, adjust loss etc..

optimizer = optim.Adam(net.parameters(), lr=0.001) #used in transfer learning, lr to tell optimizer to step down #decay lr

EPOCHS = 3

for epoch in range(EPOCHS):
    for data in trainset:
        # data is a batch of featuresets and labels
        X, y = data
        net.zero_grad() #training goes faster, law of diminishing return, zero the gradient, batch training
        output = net(X.view(-1, 28*28))
        loss = F.nll_loss(output, y) #2 major ways to calc loss, one hot vector/MSE,
        loss.backward() #backward prop
        optimizer.step() #optimize weights
    print(loss)
        
        

In [None]:
correct = 0
total = 0

with torch.no_grad(): #know how good is network, net.train()/net.eval()
    for data in trainset:
        X, y = data
        output = net(X.view(-1, 784))
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1
            total += 1
print("Accuracy: ", round(correct/total, 3))
    

In [None]:
import matplotlib.pyplot as plt
plt.imshow(X[0].view(28,28))
plt.show

In [None]:
print(torch.argmax(net(X[0].view(-1,784))[0])) #prediction/verification

In [None]:
a_featureset = X[0]
reshaped_for_network = a_featureset.view(-1,784) # 784 b/c 28*28 image resolution.
output = net(reshaped_for_network) #output will be a list of network predictions.
first_pred = output[0]
print(first_pred)

In [None]:
biggest_index = torch.argmax(first_pred)
print(biggest_index)

overall-- Tracking/graphing loss and accuracy over time, 
comparing in and out of sample accuracy, 
maybe hand-drawing our own example to see if it works...etc.