## Ex. 3: Deep Learning basics using PyTorch
--------------------------------------------------------------------
- 

In [3]:
import torch.nn as nn
import torch.nn.functional as functional # For activations functions
import torch.optim as optim
import torch as T

In [30]:
class LinearClassifier (nn.Module):
    
    # Step 1: Initialize NN
    def __init__(self, lr, n_classes, input_dims):
        super(LinearClassifier, self).__init__();
        
        # 1.1. Three input fully-connected (fc) layers
        self.fc_1 = nn.Linear(*input_dims, 128)
        self.fc_2 = nn.Linear(128, 256)
        self.fc_3 = nn.Linear(256, n_classes)
        
        # 1.2. Optimizer. self.parameters() tells optimizer what we are optimizing
        self.optimizer = optim.Adam(self.parameters(), lr=lr)
        
        # 1.3. Loss function: Alternative MSELoss(). huber-loss etc.
        self.loss = nn.CrossEntropyLoss()
        
        # 1.4. Detect GPU and use if available
        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        
        # 1.5. Send entire nn to this device
        self.to(self.device)
    
    # PyTorch handles back-prop for us, but feed-forward also. we must provide
    # Step 2: Feed-forward algo.
    def feedforward(self, data):
        # 2.1. Activate each layer and feed-forward into next
        layer_1 = functional.sigmoid(self.fc_1(data))
        layer_2 = functional.sigmoid(self.fc_2(layer_1))
        
        # 2.2. Output of last layer is NOT activate.
        # The cross-entropy loss function activates this and converts it 
        #   into probabilities that will add up to 1.0
        layer_3 = self.fc_3(layer_2)
        
        return (layer_3)
    
    # Step 3: Learn (later for Q-learning)
    def learn(self, data, labels):
        # 3.1. Initialize gradients before begining learning loop to zero
        #      Reduces chance of cross-chatter or gradients mis-behaving or leaking from one iteration to another
        self.optimizer.zero_grad()
        
        # 3.2. Convert data and labels to tensors - so as to be compatible with PyTorch
        data = T.tensor(data).to(self.device)
        labels = T.tensor(labels).to(self.device)
        
        print(data)
        print(labels)
        
        # 3.3. Get predictions, i.e. outputs
        predictions = self.feedforward(data)
        
        # 3.4. Compute loss = error = cost
        loss = self.loss(predictions, labels)
        
        # 3.5 Backward propogate
        self.backward()
        
        # 3.6 Call optimizer to optimize
        self.optimizer.step()

In [31]:
l = LinearClassifier(0.001, 2, (4,2))

In [32]:
import numpy as np
data = np.random.rand(4,2)
data

array([[3.07726621e-02, 2.52504430e-01],
       [1.23155270e-01, 3.95220516e-01],
       [7.54188863e-01, 5.97814853e-01],
       [4.16636973e-01, 1.66059927e-04]])

In [35]:
# labels = [0,1,1,0]
labels = np.random.randint(2, size=(4))
labels

array([1, 0, 1, 0])

In [36]:
l.learn(data, labels)

tensor([[3.0773e-02, 2.5250e-01],
        [1.2316e-01, 3.9522e-01],
        [7.5419e-01, 5.9781e-01],
        [4.1664e-01, 1.6606e-04]], dtype=torch.float64)
tensor([1, 0, 1, 0], dtype=torch.int32)


RuntimeError: mat1 and mat2 must have the same dtype