In [1]:
import torch
import torch.nn as nn


print(torch.__version__)

2.3.1


# TORCH
- Think of this as a toolbox for building and using neural networks

#### torch.nn 
- Think of this as a special part of the toolbox with all the pieces you need to build brain-like models

### NN Tools breakdown

#### nn.Linear
- This layer connects the input to the next layer by performing a math operation

#### nn.Sigmoid
- This is a function. What this function does is that it squashes the numbers to be between 0 and 1.
- This is used to make decisions yes or no/true or false

#### nn.Sequential
- Think of this like stacking a lego block. Each block is a layer and you put each layer on top of the other in order.

In [8]:
# Neurons/Nodes are like tiny decision makers, in the first layer we have 3 inputs (3 decision maker), then layer 2 has 2 inputs(2 decision maker), then the final output has 1(1 decision maker)


model = nn.Sequential(
    nn.Linear(3,2), # The 3 is the input, the 2 is then the output
    nn.Linear(2,1),
    nn.Sigmoid()
)

print(model)

Sequential(
  (0): Linear(in_features=3, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=1, bias=True)
  (2): Sigmoid()
)


### Create tensors
- Tensor is like a list of numbers arrange in a specific way. Think of tensors like it's an array (but it's a multi dimensional array, so like arrays inside an array)

In [None]:
torch.tensor([
    [1. , 1. , 1.] # This creates a tensor with one row and three column all filled with 1.0 (1x3) TENSORS HAVE TO BE FLOATS!! OTHERWISE IT WONT ACCEPT

])

torch.tensor([
    [0.1, 0.2, 0.3],  # This  creates a tensor with 5 rows and 3 columns (5x3)
    [0.4, 0.5, 0.6],   # Although since I initialize at the top that it only accepts 3 inputs (3 neurons), if I make each layer to contain 4 neurons, it will be unacceptable
    [0.7, 0.8, 0.9],
    [1.0, 1.1, 1.2],
    [1.3, 1.4, 1.5]
])

### Feed a Tensor to our Model
- Create a tensor with 3 columns and make sure it's a type float

In [10]:
# Define a tensor for our model
input_tensor = torch.tensor([
    [0.1, 0.2, 0.3]
])

#Ensure tensors are float since it cant accepts an integer
input_tensor = input_tensor.float()

### Build a simple model
- nn.Sequential : THis is like stacking blocks in a row one after the other
- nn.Linear(3,2): This block takes 3 inputs and turns them into 2 outputs.
- nn.Linear(2,1): This block takes the 2 outputs from the previous layer and turns them into 1 output.
- nn.Sigmoid(): This block squashes the final number between 0 and 1.

In [9]:
# Define the model using sequential
model = nn.Sequential(
    nn.Linear(3,2), #Input layer to hidden layer
    nn.Linear(2,1), #Hidden layer to output layer
    nn.Sigmoid() #Activation layer
)

In [13]:
# Running the model

output = model(input_tensor)
output # This will print it 

tensor([[0.4561]], grad_fn=<SigmoidBackward0>)


## Weights and Biases
- This is like deciding how much you like different kinds of fruits, you have scoring system where you give point to each fruit based on how much you like them
 #### Weights
 - Think of weights as "Importance" or "Preference" you give to each type of fruit
   - If you really like Apples, you give apple a high weight or high importance
   - If you don't like bananas, you give it a lower weight or low importance
 #### Biases
 - Now imagine that no matter what fruit it is, you give extra point because you just love fruits in general, this extra point is called 'bias'
 - You add the extra point after all the points in the weight is calculated
   - Examples: 
      - Multiply each fruit by its weight:
         - Apples: 2 (number of apples) * 3 (weight) = 6 points
         - Bananas: 1 (number of bananas) * 1 (weight) = 1 point
         - Cherries: 3 (number of cherries) * 2 (weight) = 6 points   --->> This will total to 13 points. Then you add the Bias ---> 13 + 1 (bias) = 14 points

In [17]:
# The model.named_parameters() is a built-in function in pytorch from the nn.Module class. This returns an iterator that provides both the name and parameter (like weight and biases) of each later
# if param.requires_grad: This line is like asking, "Is this part of the model something we need to change and learn from?" If the answer is yes, we'll look at it more closely. This checks a condition (something that can be true or false). This tells us if the current part (weights or biases) should be changed during learning.

# View weights and biases
for name, param in model.named_parameters(): 
    if param.requires_grad:
        print(name, param.data)

0.weight tensor([[ 0.2197,  0.3834,  0.3737],
        [ 0.2718,  0.2545, -0.1983]])
0.bias tensor([-0.1764,  0.2548])
1.weight tensor([[-0.0162, -0.2990]])
1.bias tensor([-0.0936])


### Weights and Biases example
 - Weights: These are numbers that the model uses to connect one layer to the next
 - Biases: These are extra numbers that help the mmodel make better predictions

 #### How they are applied from the code below
 - First layer: hidden_layer_output = input_tensor * first_layer_weights^T + first_layer_biases
 - Second Layer: output = hidden_layer_output * second_layer_weights^T + second_layer_biases
 - Final output: final_output = sigmoid(output)

In [19]:
# First layer weights:
# These weight connect the 3 input features to the 2 neurons in the hidden layer
tensor([[-0.4012, -0.5649,  0.3153],
        [-0.4333,  0.0119, -0.4002]])

# First layer biases:
# These biases are added to the weighted sum of inputs for each neuron in the hidden layer
tensor([-0.3478, -0.4386])

# Using the input_tensor from above, this is how the weights and biases work for the first hidden layer:
# hidden_layer_output[0] = (0.1 * -0.4012) + (0.2 * -0.5649) + (0.3 * 0.3153) - 0.3478
#                        = -0.04012 - 0.11298 + 0.09459 - 0.3478
#                        = -0.40631


# Using the input_tensor from above, this is how the weights and biases work for the first hidden layer:
# hidden_layer_output[1] = (0.1 * -0.4333) + (0.2 * 0.0119) + (0.3 * -0.4002) - 0.4386
#                        = -0.04333 + 0.00238 - 0.12006 - 0.4386
#                        = -0.59961

# The -0.40631 AND -0.59961 are the output from the nn.Linear(3,2) in the input_tensor above


# Second Layer weights:
# these weights connect the 2 neurons in the hidden layer to the single output neuron
tensor([[-0.6352, -0.1661]])

# Second layer biases:
# These bias is added to the weighted sum of inputs for the output neuron
tensor([0.4511])

# Then the calculation of the second layer which is nn.Linear(2,1) is:
# output = (-0.40631 * -0.6352) + (-0.59961 * -0.1661) + 0.4511
#        ≈ 0.25806 + 0.09957 + 0.4511
#        ≈ 0.80873


# Sigmoid Activation:
# final_output = sigmoid(0.80873) ≈ 1 / (1 + e^(-0.80873)) ≈ 0.6918
# Explanation:
        # Sigmoid Function: The calculator uses a function called sigmoid.
        # This function makes sure the number you get is between 0 and 1.
        # It's like putting a number through a machine that squishes it to fit between 0 and 1.
        # More explanation:
                # The "e" represents Euler's number, which helps the sigmoid function turn a number (0.80873 in this case) into a value between 0 and 1,
                # making it useful for decisions in things like artificial intelligence and other complex calculations.


tensor([[-0.6352, -0.1661]])

#### Setting Weights and Biases Manually:

In [20]:
model[0].weight = nn.Parameter(torch.tensor([[-0.5, 0.2, 0.1], [0.4, -0.1, -0.3]]))

### Defining a Model with nn.Module

In [31]:
class BinaryModel(nn.Module):
    def __init__(self):
        super(BinaryModel, self).__init__()
        self.linear1 = nn.Linear(3,2)
        self.linear2 = nn.Linear(2,1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.sigmoid(x)

        return x
    
model = BinaryModel()

new_output = model(input_tensor)
new_output

tensor([[0.5137]], grad_fn=<SigmoidBackward0>)