In [None]:
#%pip install torch torchvision 

In [3]:
import torch

x = torch.tensor([1,2,3]) # tensor is n-dimentional array 
x

tensor([1, 2, 3])

In [5]:
random_tensor = torch.rand(2,3) #random tensor, 2 rows, 3 columns
random_tensor

tensor([[0.4196, 0.0964, 0.1988],
        [0.3200, 0.4745, 0.0249]])

In [6]:
zeros_tensor = torch.zeros(2,3)
zeros_tensor

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [12]:
ones_tensor = torch.ones(2,3)
ones_tensor

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [16]:
x + ones_tensor

tensor([[2., 3., 4.],
        [2., 3., 4.]])

In [29]:
a = torch.tensor([[1.,2], [3,4]])
b = torch.tensor([[5.,6], [7,8]])

a@b # or torch.matmul(a,b)


tensor([[19., 22.],
        [43., 50.]])

In [20]:
#elementwise
a*b

tensor([[ 5, 12],
        [21, 32]])

In [None]:
# Sum
a.sum()

tensor(10)

In [30]:
a.mean()

tensor(2.5000)

In [8]:
# cpu, cuda, mps and possibly other frameworks

torch.cuda.is_available()

False

In [4]:
# Gradients

x = torch.tensor(2.0, requires_grad=True) #mode: propogate the gradients over all the computation steps
y = x**2 + 3*x + 5
# 2*x +3 -> 2*2 +3 =7
y.backward() #compute gradients using back propagation algorithm 
x.grad

tensor(7.)

- easy Neural Network, one gradient descent step 

In [7]:
import torch.nn as nn # Neural Networks 
import torch.optim as optim # optimizer framework for gradient methods

# 10 inouts, 1 output -> fully connected feed-forward neural network
NN = nn.Linear(10,1) # Linear is W*x + b, in Tensorflow -> Dense not Linear
#MLP -> Multi-Layer Perception
# in Literature/Publications: FC(), FFN(feed-forward network), FFNN(feed-forward neural network), MLP

loss = nn.MSELoss() # mean squared error
# loss, cost, criterion, crit
optimizer = optim.SGD(NN.parameters(), lr = 1e-2)
# lr = learning rate, eta, alpha
# NN.parameters() ... these are our weights and biases

input_data = torch.rand(10) # random stuff, X
output = NN(input_data) # y_pred, predictions, y_hat
y = torch.ones(1) # ground truth, target, regr

#initial value of the loss function
loss_output = loss(y, output) # Difference between the reality and expectation
print(f"Loss: {loss_output:.2} (before update)")

# two magical lines (no touching these 2 lines)
loss_output.backward() #compute gradients
optimizer.step() # Update the weights and biases 

output_new = NN(input_data) # here are new weights and biases 
loss_new = loss(output_new, y) # value of the loss function after one update
print(f"Loss: {loss_new:.2} (after update)")



Loss: 1.2 (before update)
Loss: 1.1 (after update)


In [33]:
y

tensor([1.])

In [34]:
output # before update 

tensor([-1.1091], grad_fn=<ViewBackward0>)

In [35]:
output_new # after update 

tensor([-0.8951], grad_fn=<ViewBackward0>)

# FashionMNIST dataset 
-modern HelloWorld for NNs (by Zalando)

In [8]:
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.ToTensor(), # convert to tensor
])

train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
 
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

In [9]:
# Model 

class NN(nn.Module): # class

    def __init__(self):
        super(NN,self).__init__() # init from the superclass
        self.layer1 = nn.Linear(28*28, 128) # input layer (should take the pictures and do something with them)
        self.bn1 = nn.BatchNorm1d(128)
        self.layer2 = nn.Linear(128, 64) # hidden layer (neither input nor output layer, the intermediate layer beasically)
        self.bn2 = nn.BatchNorm1d(64)
        self.layer3 = nn.Linear(64, 10) #output layer, 10 number of product categories
        self.drop = nn.Dropout(0.3) # 30% of activations are set to zero
        #when using dropout, theres no point in using 0.15, just use increments of 0.1 -> 0.2 -> ... -> 0.5

        # First way to apply BatchNorm (preactivation)

    def forward(self,x): # propagate the information through the network 
        x = x.view(-1, 28*28) # flatten 2D -> 1D
        x = torch.relu(self.bn1(self.layer1(x))) # activation using ReLU 
        x = self.drop(x) # dropout can also be put here. dont put dopout for input layer because you want all input neurons. 
        x = torch.relu(self.bn2(self.layer2(x)))
        x = self.drop(x) # dropout is usually placed here
        x = self.layer3(x) # indentity activation -> logit (no need to compute gradients of softmax)
        return x
    
        # Second way to apply BatchNorm (Postactivation)

    def forward(self,x): # propagate the information through the network 
        x = x.view(-1, 28*28) # flatten 2D -> 1D
        x = torch.relu(self.layer1(x)) # activation using ReLU 
        x = self.bn1(x)
        x = self.drop(x) # dropout can also be put here. dont put dopout for input layer because you want all input neurons. 
        x = torch.relu(self.layer2(x))
        x = self.bn2(x)
        x = self.drop(x) # dropout is usually placed here
        x = self.layer3(x) # indentity activation -> logit (no need to compute gradients of softmax)
        return x
    



In [27]:
# Now training 

model = NN()
lr = 1e-3 # learning rate 
loss = nn.CrossEntropyLoss() # CE because multi-class problem 
optimizer = optim.SGD(model.parameters(), lr = lr)

n_epochs = 40 

for epoch in range(n_epochs):
    model.train() # train mode
    running_loss = 0.0 # loss per epoch 
    for images, labels in train_loader:
        optimizer.zero_grad() # reset the gradients 
        # forward
        outputs = model(images) # calculate outputs
        curr_loss = loss(outputs, labels)
        running_loss += curr_loss
        # backward
        curr_loss.backward() # gradients
        optimizer.step() # update weights and biases
    print(f"Epoch[{epoch + 1}/{n_epochs}], Loss: {running_loss}")
    

Epoch[1/40], Loss: 1164.992919921875
Epoch[2/40], Loss: 781.4962158203125
Epoch[3/40], Loss: 663.2780151367188
Epoch[4/40], Loss: 596.8756713867188
Epoch[5/40], Loss: 553.795654296875
Epoch[6/40], Loss: 527.2633056640625
Epoch[7/40], Loss: 505.00341796875
Epoch[8/40], Loss: 489.5926818847656
Epoch[9/40], Loss: 475.85858154296875
Epoch[10/40], Loss: 465.38946533203125
Epoch[11/40], Loss: 454.567626953125
Epoch[12/40], Loss: 444.9560241699219
Epoch[13/40], Loss: 436.3873596191406
Epoch[14/40], Loss: 427.5809631347656
Epoch[15/40], Loss: 420.22052001953125
Epoch[16/40], Loss: 416.5887756347656
Epoch[17/40], Loss: 410.8335266113281
Epoch[18/40], Loss: 404.0298767089844
Epoch[19/40], Loss: 403.4423828125
Epoch[20/40], Loss: 394.0428161621094
Epoch[21/40], Loss: 393.3251953125
Epoch[22/40], Loss: 386.9781799316406
Epoch[23/40], Loss: 383.8389587402344
Epoch[24/40], Loss: 380.0873107910156
Epoch[25/40], Loss: 376.14105224609375
Epoch[26/40], Loss: 373.02337646484375
Epoch[27/40], Loss: 371.50

In [17]:
len(train_dataset)

60000

In [18]:
len(test_dataset)

10000

In [None]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        predicted = torch.max(outputs.data, 1)[-1]
        total += labels.size(0)
        correct += (predicted==labels).sum().item()
        
accuracy = correct/total
print(f"Accuracy: {accuracy*100:.2f}%")

Accuracy: 87.46%
