In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Update network below to perform a multi-class classification with four labels
model = nn.Sequential(
  nn.Linear(11, 20),
  nn.Linear(20, 12),
  nn.Linear(12, 6),
  nn.Linear(6, 4),
  nn.Softmax(dim = -1)
)

output = model(input_tensor)
print(output)

tensor([[0.2322, 0.3862, 0.2930, 0.0886]], grad_fn=<SoftmaxBackward0>)


Help to determine loss function of predication model

In [4]:
y = 1
num_classes = 3

# Create the one-hot encoded vector using NumPy
one_hot_numpy = np.array([0, 1, 0])

# Create the one-hot encoded vector using PyTorch
one_hot_pytorch = F.one_hot(torch.tensor(1), num_classes = 3)

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

y = [2]
scores = torch.tensor([[0.1, 6.0, -2.0, 3.2]])

# Create a one-hot encoded vector of the label y
one_hot_label = F.one_hot(torch.tensor(y), scores.shape[1])

# Create the cross entropy loss function
# nn.CrossEntropyLoss could also be done as from torch.nn import CrossEntropyLoss, then criterion = CrossEntropyLoss
criterion = nn.CrossEntropyLoss()

# Calculate the cross entropy loss
loss = criterion(scores.double(), one_hot_label.double())
print(loss)

tensor(8.0619, dtype=torch.float64)


gradient descent

In [11]:
model = nn.Sequential(nn.Linear(16, 8),
                      nn.Sigmoid(),
                      nn.Linear(8, 2))

# Access the weight of the first linear layer
weight_0 = model[0].weight

# Access the bias of the second linear layer
bias_1 = model[2].bias


In [None]:
# learing rate
lr = 0.001

weight0 = model[0].weight
weight1 = model[1].weight
weight2 = model[2].weight

# Access the gradients of the weight of each linear layer
grads0 = model[0].weight.grad
grads1 = model[1].weight.grad
grads2 = model[2].weight.grad

# Update the weights using the learning rate and the gradients
weight0 = weight0 - lr * grads0
weight1 = weight1 - lr * grads1
weight2 = weight2 - lr * grads2

change step size to find global min

In [None]:
import torch.optim as optim

pred = model(sample)

# Create the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001)

loss = criterion(pred, target)
loss.backward()

# Update the model's parameters using the optimizer
optimizer.step()

rectified linear unit (ReLU): It overcomes the training problems linked with the sigmoid function you learned, such as the vanishing gradients problem.

all negative numbers will be set to 0. For positive integers the ReLU returns itself.

In [None]:
# Create a ReLU function with PyTorch
relu_pytorch = nn.ReLU()

# Apply your ReLU function on x, and calculate gradients
x = torch.tensor(-1.0, requires_grad=True)
y = relu_pytorch(x)
y.backward()

# Print the gradient of the ReLU function for x
gradient = x.grad
print(gradient)

This is leaky which mean that negative numbers will not be set to 0

In [None]:
leaky_relu_pytorch = nn.LeakyReLU(negative_slope = 0.05)

x = torch.tensor(-3.0)
output = leaky_relu_pytorch(x)
print(output)

.numel() = counting number of elements

In [None]:
model = nn.Sequential(nn.Linear(8, 4),                       
                      nn.Linear(4, 2))

total = 0
for parameter in model.parameters():    
    total += parameter.numel()
print(total)

# nn.Linear(8,4) has 4 neuron and each neuron  has 8+1 parameters (the + 1 is related to the bias layer) 
# nn.Linear(4,2) has 2 neuron and each neuron  has 4+1 parameters (the + 1 is related to the bias layer) 

In [None]:
# Create a neural network with exactly three linear layers and less than 120 parameters, 
# which takes n_features as inputs and outputs n_classes.

def calculate_capacity(model):
  total = 0
  for p in model.parameters():
    total += p.numel()
  return total

n_features = 8
n_classes = 2

input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])

# Create a neural network with less than 120 parameters
model_less = nn.Sequential(
    nn.Linear(n_features, 1),
    nn.Linear(1, 1),
    nn.Linear(1, n_classes)
)

# Create a neural network with more than 120 parameters
model_greater = nn.Sequential(
    nn.Linear(n_features, 4),
    nn.Linear(4, 8),
    nn.Linear(8, 9),
    nn.Linear(9, n_classes)
)

output_less = model_less(input_tensor)

print(calculate_capacity(model_less))

output_greater = model_greater(input_tensor)

print(calculate_capacity(model_greater))

Stochasitc gradient descent optimizier optim.SGD(model.parameter(), lr = 0.01, momentum = 0.95)

- lr = controles the step size 
    - too small = not making large enough movement to minimum
    - too large = never finding minimum because covering too large of an area
    - A learning rate around 0.08 - 0.09 gets you closest to the global minimum
    
- momentum = helps to overcome getting stuck in a local minimum (the inertia of the optimizer)


- Momentum and learning rate are critical to the training of your neural network. A good rule of thumb is to start with a learning rate of 0.001 and a momentum of 0.95.

Fine-tuning = A type of transfer learning
- Smaller learning rate
- Not every layer is trained (we freeze some of them)
- Rule of thumb: freeze early layers of network and fine-tune layers closer to output layer

In [None]:
import torch.nn as nn
model = nn.Sequential(nn.Linear(64, 128),  
                      nn.Linear(128, 256))
for name, param in model.named_parameters():
    if name == '0.weight':        
        param.requires_grad = False


In [None]:
for name, param in model.named_parameters():    
  
    # Check if the parameters belong to the first layer
    if name == '0.weight' or name == '0.bias':
      
        # Freeze the parameters
        param.requires_grad = False
  
    # Check if the parameters belong to the second layer
    if name == '1.weight' or name == '1.bias':
      
        # Freeze the parameters
        param.requires_grad = False


In [None]:
# uniform initalization 
layer0 = nn.Linear(16, 32)
layer1 = nn.Linear(32, 64)

# Use uniform initialization for layer0 and layer1 weights
nn.init.uniform_(layer0.weight)
nn.init.uniform_(layer1.weight)

model = nn.Sequential(layer0, layer1)

TensorDataset is great to use when your dataset can be loaded from NumPy arrays (or converted to NumPy arrays). However, sometimes you need to code a custom dataset class. 

In [None]:
import numpy as np
import torch
from torch.utils.data import TensorDataset

np_features = np.array(np.random.rand(12, 8))
np_target = np.array(np.random.rand(12, 1))

# Convert arrays to PyTorch tensors
torch_features = torch.tensor(np_features)
torch_target = torch.tensor(np_target)

# Create a TensorDataset from two tensors
dataset = TensorDataset(torch_features, torch_target)

# Return the last element of this dataset
print(dataset[-1])

In [None]:
# Load the different columns into two PyTorch tensors
features = torch.tensor(np.array(
  dataframe[['ph', 'Sulfate', 'Conductivity', 'Organic_carbon']])).float()
target = torch.tensor(np.array(
  dataframe['Potability'])).float()

# Create a dataset from the two generated tensors
dataset = TensorDataset(features, target)

# Create a dataloader using the above dataset
dataloader = DataLoader(dataset, shuffle=True, batch_size=2)
x, y = next(iter(dataloader))

# Create a model using the nn.Sequential API
model = nn.Sequential(
  nn.Linear(4, 2), # 4 = num cols in features
  nn.Linear(2,1) # 1 = num cols in target
)
output = model(features)
print(output)

In [None]:
# Set the model to evaluation mode
model.eval()
validation_loss = 0.0

with torch.no_grad():
  
  for data in validationloader:
    
      outputs = model(data[0])
      loss = criterion(outputs, data[1])
      
      # Sum the current loss to the validation_loss variable
      validation_loss += loss.item()
      
# Calculate the mean loss value
validation_loss_epoch = validation_loss/len(validationloader)
print(validation_loss_epoch)

# Set the model back to training mode
model.train()

detect overfitting when training model is doing better or worse than accuracy line

In [None]:
import torchmetrics

# Create accuracy metric using torch metrics
metric = torchmetrics.Accuracy(task="multiclass", num_classes=3)
for data in dataloader:
    features, labels = data
    outputs = model(features)
    
    # Calculate accuracy over the batch
    acc = metric(outputs.softmax(dim=-1), labels.argmax(dim=-1))
    
# Calculate accuracy over the whole epoch
acc = metric.compute()

# Reset the metric for the next epoch 
metric.reset()
plot_errors(model, dataloader)

In [None]:
# weight decay 

optimizer = optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-4)

Random search is a great way to fine-tune your hyperparameters. Upper and lower bounds should be carefully chosen to not waste computational power.


In [None]:

values = []
for idx in range(10):
    # Randomly sample a learning rate factor between 0.01 and 0.0001
    factor = np.random.uniform(2, 4)
    lr = 10 ** -factor
    
    # Randomly select a momentum between 0.85 and 0.99
    momentum = np.random.uniform(0.85, 0.99)
    
    values.append((lr, momentum))

In [2]:
def birthdayCakeCandles(candles):
    unique_dict = {}

    for candle in candles:
        # unique_dict.get(candle, 0): This part retrieves the
        # current value associated with the key candle in unique_dict.
        # If the key is not present, it returns the default value 0
        unique_dict[candle] = unique_dict.get(candle, 0) + 1

    max_value = max(unique_dict.values())
    print(max_value)

array = [3, 2, 1, 3]
birthdayCakeCandles(array)

2


In [1]:
import torch
from torch.autograd import Variable

# Define the function f(x, y)
def f(x, y):
    return (x + 1)**2 + 8*y**2 - 3*x - y + 1

# Initialize variables x and y (starting point for gradient descent)
x = Variable(torch.tensor(0.0), requires_grad=True)
y = Variable(torch.tensor(0.0), requires_grad=True)

# Set learning rate and number of iterations
lr = 0.01
num_iterations = 1000

# Gradient Descent loop
for i in range(num_iterations):
    # Compute the function value
    loss = f(x, y)
    
    # Compute gradients
    loss.backward()
    
    # Update variables using gradient descent
    x.data -= lr * x.grad
    y.data -= lr * y.grad
    
    # Zero the gradients to prevent accumulation in the next iteration
    x.grad.zero_()
    y.grad.zero_()

# Print the final values of x and y after gradient descent
print("Final x:", x.item())
print("Final y:", y.item())

# Print the minimum value of the function after gradient descent
print("Minimum value of f(x, y):", f(x, y).item())


Final x: 0.4999992251396179
Final y: 0.062499988824129105
Minimum value of f(x, y): 1.718750238418579
