In [1]:
import torch  # missing from lecture 2, slide 18/35
import torch.nn as nn

torch.manual_seed(711)

## Create input_tensor with three features
input_tensor = torch.tensor([[0.3471, 0.4547, -0.2356]])
print(f"input_tensor shape: {input_tensor.shape}")
input_tensor

input_tensor shape: torch.Size([1, 3])


tensor([[ 0.3471,  0.4547, -0.2356]])

In [2]:
# define a linear layer
linear_layer = nn.Linear(in_features=3, out_features=2)
weights = linear_layer.weight
biases = linear_layer.bias
print(f"shape of linear_layer weights: {weights.shape}")
print(f"shape of linear_layer biases: {biases.shape}")  # biases are 1D
output = linear_layer(input_tensor)
print(f"shape of output: {output.shape}")
print(output)

shape of linear_layer weights: torch.Size([2, 3])
shape of linear_layer biases: torch.Size([2])
shape of output: torch.Size([1, 2])
tensor([[-0.0949, -0.0668]], grad_fn=<AddmmBackward0>)


## Understanding what is happening with torch.nn.Linear



In [3]:
weights
# Parameter containing:
# tensor([[ 0.2116,  0.3001, -0.4096],
#         [-0.1005,  0.2814,  0.1440]], requires_grad=True)

Parameter containing:
tensor([[ 0.2116,  0.3001, -0.4096],
        [-0.1005,  0.2814,  0.1440]], requires_grad=True)

In [4]:
biases

Parameter containing:
tensor([-0.4013, -0.1259], requires_grad=True)

In [5]:
# make the biases compatible with outputs (1 x 2)
biases = torch.reshape(biases, (1, -1))
print(biases.shape)
biases

torch.Size([1, 2])


tensor([[-0.4013, -0.1259]], grad_fn=<ViewBackward0>)

In [6]:
y1 = torch.mm(weights, torch.t(input_tensor)) # since output is (1 x 2), this is NOT what torch is doing
print(y1.shape)
y1

torch.Size([2, 1])


tensor([[0.3064],
        [0.0591]], grad_fn=<MmBackward0>)

In [7]:
# https://stackoverflow.com/questions/44524901/how-do-i-multiply-matrices-in-pytorch
y2 = torch.mm(input_tensor, torch.t(weights))
print(y2.shape)
y2

torch.Size([1, 2])


tensor([[0.3064, 0.0591]], grad_fn=<MmBackward0>)

In [8]:
y2_plus_bias = y2 + biases
print(y2_plus_bias.shape)
y2_plus_bias

torch.Size([1, 2])


tensor([[-0.0949, -0.0668]], grad_fn=<AddBackward0>)

Since the output of the matrix multiplication **and** the `output` tensor in cell 2 are both equal to:  
<code>
shape of output: torch.Size([1, 2])
tensor([[-0.0949, -0.0668]], grad_fn=<AddmmBackward0>)
</code>

It looks like if `x` is the input tensor, `y` is the output tensor, `W` are the weights and `b` are the biases then what `nn.Linear` is doing is:
$$
y = xW^T + b
$$

and this is confirmed in the documentation:  https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#torch.nn.Linear

In [9]:
# y3 = weights @ input_tensor + biases  # as shown in slide 20/35 of lecture 2: RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x3 and 1x3)
y3 = input_tensor @ torch.t(weights) + biases
y3

tensor([[-0.0949, -0.0668]], grad_fn=<AddBackward0>)

In [10]:
# ex 2
# Create a neural network of linear layers that takes a tensor of dimensions 1x8 as input and outputs a tensor of dimensions 1x1
# Use any output dimension for the first layer you want.
input_tensor = torch.Tensor([[2, 3, 6, 7, 9, 3, 2, 1]])

# Implement a small neural network with exactly two linear layers
model = nn.Sequential(nn.Linear(8, 3),  # using 3 as the output dim of the first layer, but could be anything
                      nn.Linear(3, 1)
                     )

output = model(input_tensor)
print(output)  # 1x1 output

tensor([[-1.6389]], grad_fn=<AddmmBackward0>)


In [11]:
# sigmoid activation
input_tensor = torch.tensor([[6.0]])
sigmoid = nn.Sigmoid()
output = sigmoid(input_tensor)
output

tensor([[0.9975]])

In [12]:
# softmax for multi-dim classification
# Create an input tensor
input_tensor = torch.tensor([[4.3, 6.1, 2.3]])
# Apply softmax along the last dimension
probabilities = nn.Softmax(dim=-1)
output_tensor = probabilities(input_tensor)
print(output_tensor)

tensor([[0.1392, 0.8420, 0.0188]])


In [13]:
# Binary classification: forward pass, lecture 4, slide 4/35
# Create input data of shape 5x6
input_data = torch.tensor(
[[-0.4421, 1.5207, 2.0607, -0.3647, 0.4691, 0.0946],
[-0.9155, -0.0475, -1.3645, 0.6336, -1.9520, -0.3398],
[ 0.7406, 1.6763, -0.8511, 0.2432, 0.1123, -0.0633],
[-1.6630, -0.0718, -0.1285, 0.5396, -0.0288, -0.8622],
[-0.7413, 1.7920, -0.0883, -0.6685, 0.4745, -0.4245]])

# Create binary classification model
model = nn.Sequential(
nn.Linear(6, 4), # First linear layer
nn.Linear(4, 1), # Second linear layer
nn.Sigmoid() # Sigmoid activation function
)
# Pass input data through model
output = model(input_data)
print(output)

tensor([[0.7225],
        [0.7238],
        [0.6813],
        [0.7264],
        [0.7294]], grad_fn=<SigmoidBackward0>)


In [14]:
# lecture 4 excercises
input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])

# Implement a small neural network for binary classification
model = nn.Sequential(
  nn.Linear(8, 1),
  nn.Sigmoid()
)

output = model(input_tensor)
print(output)

tensor([[0.8332]], grad_fn=<SigmoidBackward0>)


In [15]:
# Create a neural network with exactly four linear layers, which takes
# the input tensor as input, and outputs a regression value, using any
# shapes you like for the hidden layers.

input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Implement a neural network with exactly four linear layers
model = nn.Sequential(
    nn.Linear(11, 7),
    nn.Linear(7, 6),
    nn.Linear(6, 5),
    nn.Linear(5, 1)
)

output = model(input_tensor)
print(output)

tensor([[-0.9538]], grad_fn=<AddmmBackward0>)


In [16]:
# A similar neural network to the one you just built is provided, containing four linear layers;
# update this network to perform a multi-class classification with four outputs.
input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Update network below to perform a multi-class classification with four labels
model = nn.Sequential(
  nn.Linear(11, 20),
  nn.Linear(20, 12),
  nn.Linear(12, 6),
  nn.Linear(6, 4), 
  nn.Softmax(dim=-1)
)

output = model(input_tensor)
print(output)

tensor([[0.3961, 0.2461, 0.1899, 0.1679]], grad_fn=<SoftmaxBackward0>)


In [17]:
# lecture 5

In [18]:
import torch.nn.functional as F
# one-hot encoding
print(F.one_hot(torch.tensor(0), num_classes = 3))
print(F.one_hot(torch.tensor(1), num_classes = 3))
print(F.one_hot(torch.tensor(2), num_classes = 3))

tensor([1, 0, 0])
tensor([0, 1, 0])
tensor([0, 0, 1])


In [19]:
from torch.nn import CrossEntropyLoss

scores = torch.tensor([[-0.1211, 0.1059]])
one_hot_target = torch.tensor([[1, 0]])
criterion = CrossEntropyLoss()
criterion(scores.double(), one_hot_target.double())

tensor(0.8131, dtype=torch.float64)

In [20]:
# try it manually, start with softmax probabilities
import numpy as np
scores = [-0.1211, 0.1059]
targets = [1, 0]
denom = np.exp(scores[0]) + np.exp(scores[1])
p0 = np.exp(scores[0]) / denom
p1 = np.exp(scores[1]) / denom
print(p0, p1)

0.443492440036494 0.5565075599635061


In [21]:
# simplified for single point, binary classification
# for the general multi-nomial version see: https://rpubs.com/mszczepaniak/classificationgoodness
cross_entropy = -(np.log(p0))
print(cross_entropy)  # very close: 0.81307

0.8130745235187279


In [22]:
# lecture 5 excercises
y = 1
num_classes = 3

# Create the one-hot encoded vector using NumPy
one_hot_numpy = np.array([0, 1, 0])

# Create the one-hot encoded vector using PyTorch
one_hot_pytorch = F.one_hot(torch.tensor(1), num_classes)
print(one_hot_numpy)
print(one_hot_pytorch)

[0 1 0]
tensor([0, 1, 0])


Start by creating a one-hot encoded vector of the ground truth label y, which is a required step to compare y with the scores predicted by your model. Next, you'll create a cross entropy loss function. Last, you'll call the loss function, which takes scores (model predictions before the final softmax function), and the one-hot encoded ground truth label, as inputs. It outputs a single float, the loss of that sample.

In [23]:
y = [2]
scores = torch.tensor([[0.1, 6.0, -2.0, 3.2]])

# Create a one-hot encoded vector of the label y
one_hot_label = F.one_hot(torch.tensor(y), num_classes = scores.shape[1])
print(one_hot_label)
# Create the cross entropy loss function
criterion = nn.CrossEntropyLoss()
# Calculate the cross entropy loss
loss = criterion(scores.double(), one_hot_label.double())
print(loss)

tensor([[0, 0, 1, 0]])
tensor(8.0619, dtype=torch.float64)


In [24]:
# lecture 6 - Backpropagation in PyTorch
# exercise 1
weight = torch.tensor([[-1.5349,  1.1395, -0.7303, -1.4280,  1.4367,  1.5740,  0.8475,  1.3379, 1.5674],
                       [ 1.1543,  1.2855,  0.5122,  0.4215,  0.4982,  1.5640, -0.9705, -0.4987, 0.6331]])
bias = torch.tensor([-0.1435,  1.6498])
preds = torch.tensor([[2.1289, 3.7059]])
target = torch.tensor([[1., 0.]])

In [25]:
criterion = nn.CrossEntropyLoss()

# Calculate the loss
loss = criterion(preds, target)

# Compute the gradients of the loss
# torch.set_grad_enabled(True)  # Context-manager 
loss.requires_grad = True
# loss.retain_grad()
loss.backward()

# Display gradients of the weight and bias tensors in order
print(weight.grad)
print(bias.grad)

None
None


This is the output I get in the DataCamp environment:

`tensor([[-0.5063, -0.4353, -0.3859, -0.3938, -0.5257, -0.1628, -0.5167, -0.4315,
         -0.6264],
        [ 0.5063,  0.4353,  0.3859,  0.3938,  0.5257,  0.1628,  0.5167,  0.4315,
          0.6264]])
tensor([-0.8288,  0.8288])

<script.py> output:
    tensor([[-0.5215, -0.4484, -0.3975, -0.4056, -0.5416, -0.1677, -0.5322, -0.4444,
             -0.6453],
            [ 0.5215,  0.4484,  0.3975,  0.4056,  0.5416,  0.1677,  0.5322,  0.4444,
              0.6453]])
    tensor([-0.8537,  0.8537])`
`

In [26]:
model = nn.Sequential(nn.Linear(16, 8),
                      nn.Sigmoid(),
                      nn.Linear(8, 2))

# Access the weight of the first linear layer
weight_0 = model[0].weight

# Access the bias of the second linear layer
bias_1 = model[2].bias

In [27]:
model = nn.Sequential(
    nn.Linear(in_features=16, out_features=8, bias=True),
    nn.Linear(in_features=8, out_features=4, bias=True),
    nn.Linear(in_features=4, out_features=2, bias=True)
)

weight0 = model[0].weight
weight1 = model[1].weight
weight2 = model[2].weight

print(weight0)

Parameter containing:
tensor([[-0.0889, -0.2258,  0.0293,  0.1313, -0.0783, -0.0155, -0.1121,  0.0294,
          0.1611, -0.2011, -0.1436,  0.1360,  0.1156, -0.1702, -0.0887, -0.0908],
        [ 0.0051, -0.2487,  0.1283, -0.1048,  0.1592,  0.2132,  0.2190, -0.0770,
         -0.0115, -0.2344, -0.0252,  0.2449, -0.2486,  0.1549, -0.0008, -0.0557],
        [-0.1959,  0.1224, -0.2193,  0.0094, -0.2493,  0.0515,  0.0467,  0.0953,
          0.2318,  0.1633, -0.2498, -0.0413,  0.2065, -0.1216,  0.1051,  0.2073],
        [-0.1022,  0.0755,  0.1533, -0.0431, -0.1693,  0.1013,  0.2298,  0.1844,
          0.0824,  0.0120,  0.0956,  0.2078,  0.2139,  0.0491, -0.2257,  0.2486],
        [-0.1727,  0.0303, -0.1517, -0.1722, -0.1611,  0.1731,  0.0837, -0.1372,
          0.1318, -0.1916, -0.1886, -0.0165,  0.0697,  0.0994,  0.1626,  0.0643],
        [-0.0431,  0.1704,  0.1510,  0.0745, -0.0403,  0.1404,  0.0038, -0.0721,
         -0.0901,  0.1510,  0.0007, -0.1222,  0.0969,  0.1484,  0.1137,  0.1714],


In [28]:
# Access the gradients of the weight of each linear layer
grads0 = weight0.grad
grads1 = weight1.grad
grads2 = weight2.grad

print(grads0)  # FIXME: why getting None?

None


In [29]:
# Update the weights using the learning rate and the gradients
lr = 0.001
# FIXME
# weight0 = weight0 - (lr * grads0)
# weight1 = weight1 - (lr * grads1)
# weight2 = weight2 - (lr * grads2)

In [30]:
import torch.optim as optim

# Create the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001)
criterion = CrossEntropyLoss()

pred = torch.tensor([[-0.1738,  0.1308]])
target = torch.tensor([[1., 0.]])

loss = criterion(pred, target)
# loss.backward()   # FIXME: RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

# Update the model's parameters using the optimizer
# optimizer.step()

In [31]:
# lecture 7 exercises
y_hat = np.array(10)
y = np.array(1)

# Calculate the MSELoss using NumPy
mse_numpy = np.mean((y_hat - y)**2)

# Create the MSELoss function
criterion = nn.MSELoss()

# Calculate the MSELoss using the created loss function
mse_pytorch = criterion(torch.tensor(y_hat).float(), torch.tensor(y).float())
print(mse_pytorch)

tensor(81.)


In [32]:
model = nn.Sequential(
    nn.Linear(in_features=4, out_features=2, bias=True),
    nn.Sigmoid(),
    nn.Linear(in_features=2, out_features=1, bias=True)
)

criterion = nn.MSELoss()

In [33]:
def show_results(model, dataloader):
    model.eval()
    iter_loader = iter(dataloader)
    for _ in range(3):
        feature, target = next(iter_loader)
        preds = model(feature)
        for p, t in zip(preds, target):
            print(f'Ground truth salary: {t.item():.3f}. Predicted salary: {p.item():.3f}.')

In [34]:
# need to load data for this to work locally
# num_epochs = 10
# # Loop over the number of epochs and the dataloader
# for i in range(num_epochs):
#   for data in dataloader:
#     # Set the gradients to zero
#     optimizer.zero_grad()
#     # Run a forward pass
#     feature, target = data
#     prediction = model(feature)
#     # Calculate the loss
#     loss = criterion(prediction, target)
#     # Compute the gradients
#     loss.backward()
#     # Update the model's parameters
#     optimizer.step()

# show_results(model, dataloader)

### Explanation of the next code cell

First creates a ReLU (Rectified Linear Unit) function using PyTorch's built-in nn.ReLU() function.

Next, it creates a tensor x with a value of -1.0 and sets requires_grad=True. This is important because it tells PyTorch that we want to calculate gradients with respect to x during the backward pass.

Then, it applies the ReLU function to x and stores the result in y.

The y.backward() function is then called to perform a backward pass through the computation graph. This calculates the gradient of y with respect to x.

Finally, the gradient of x is accessed using x.grad and printed out. This is the gradient of the ReLU function at x.

The ReLU function is defined as f(x) = max(0, x). So, its derivative (or gradient) is 1 for x > 0 and 0 for x <= 0. Since x is -1.0 in this case, the gradient at x is 0, which is what the code prints out.

In [35]:
# lecture 8 exercises
# Create a ReLU function with PyTorch
relu_pytorch = nn.ReLU()

# Apply your ReLU function on x, and calculate gradients
x = torch.tensor(-1.0, requires_grad=True)
y = relu_pytorch(x)
y.backward()  # calc's gradient of y wrt x

# Print the gradient of the ReLU function for x
gradient = x.grad  # calculates the gradient at x
print(gradient)

tensor(0.)


In [36]:
# Create a leaky relu function in PyTorch
leaky_relu_pytorch = nn.LeakyReLU(negative_slope = 0.05)

x = torch.tensor(-2.0)
# Call the above function on the tensor x
output = leaky_relu_pytorch(x)
print(output)

tensor(-0.1000)


In [37]:
# Counting the number of parameters
model = nn.Sequential(nn.Linear(16, 4),
                      nn.Linear(4, 2),
                      nn.Linear(2, 1))

total = 0

# Calculate the number of parameters in the model
for parameter in model.parameters():
  total += parameter.numel()

print(total)

81


In [38]:
def calculate_capacity(model):
  total = 0
  for p in model.parameters():
    total += p.numel()
  return total

In [39]:
# Create a neural network with exactly three linear layers and
# less than 120 parameters, which takes n_features as inputs and outputs n_classes.
n_features = 8
n_classes = 2
h1 = 6
h2 = 6

input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])

# Create a neural network with less than 120 parameters
model = nn.Sequential(
    nn.Linear(n_features, h1),
    nn.Linear(h1, h2),
    nn.Linear(h2 ,n_classes)
)
output = model(input_tensor)

print(calculate_capacity(model))

110


In [40]:
# Create a neural network with exactly four linear layers and
# more than 120 parameters, which takes n_features as inputs and outputs n_classes.
n_features = 8
n_classes = 2
h = 6
h1 = h
h2 = h
h3 = h

input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])

# Create a neural network with more than 120 parameters
model = model = nn.Sequential(
    nn.Linear(n_features, h1),
    nn.Linear(h1, h2),
    nn.Linear(h2, h3),
    nn.Linear(h3 ,n_classes)
)

output = model(input_tensor)

print(calculate_capacity(model))

152


In [41]:
%pwd

'D:\\llmamd\\references'

In [42]:
# lecture 12
import pandas as pd
# D:\Sync\ds_ml\Data Camp\DeepLearning, NLP, LLMs\Introduction to Deep Learning with PyTorch\data
df_animals = pd.read_csv('D:\\Sync\\ds_ml\\Data Camp\\DeepLearning, NLP, LLMs\\Introduction to Deep Learning with PyTorch\\data\\animals.csv')
df_animals  # different order so that the feature array looks the same as ch4, lec 12, slide 3/34

Unnamed: 0,animal_name,hair,feathers,eggs,milk,predator,fins,legs,tail,type
0,skimmer,0,1,1,0,1,0,2,1,2
1,seahorse,0,0,1,0,0,1,0,1,4
2,gull,0,1,1,0,1,0,2,1,2
3,squirrel,1,0,0,1,0,0,2,1,1
4,tuatara,0,0,1,0,1,0,4,1,3


In [43]:
import numpy as np
# Define input features
features = df_animals.iloc[:, 1:-1]
X = features.to_numpy()
print(X)

[[0 1 1 0 1 0 2 1]
 [0 0 1 0 0 1 0 1]
 [0 1 1 0 1 0 2 1]
 [1 0 0 1 0 0 2 1]
 [0 0 1 0 1 0 4 1]]


In [44]:
# Define target features (ground truth)
target = df_animals.iloc[:, -1]
y = target.to_numpy()
y

array([2, 4, 2, 1, 3], dtype=int64)

In [45]:
import torch
from torch.utils.data import TensorDataset

# instantiate dataset class
dataset = TensorDataset(torch.tensor(X).float(), torch.tensor(y).float())
# access an individual sample
sample = dataset[0]
input_sample, label_sample = sample  # dataset samples are stored as tuples
print('input sample:', input_sample)
print('label sample:', label_sample)

input sample: tensor([0., 1., 1., 0., 1., 0., 2., 1.])
label sample: tensor(2.)


In [46]:
from torch.utils.data import DataLoader

batch_size = 2
shuffle = True

# Create a DataLoader
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
# iterate over the dataloader, shuffle will create different results each time
for batch_inputs, batch_labels in dataloader:  # samples pulled w/o replacement
    print('batch inputs:', batch_inputs)
    print('batch labels:', batch_labels)

batch inputs: tensor([[0., 0., 1., 0., 0., 1., 0., 1.],
        [0., 1., 1., 0., 1., 0., 2., 1.]])
batch labels: tensor([4., 2.])
batch inputs: tensor([[0., 0., 1., 0., 1., 0., 4., 1.],
        [0., 1., 1., 0., 1., 0., 2., 1.]])
batch labels: tensor([3., 2.])
batch inputs: tensor([[1., 0., 0., 1., 0., 0., 2., 1.]])
batch labels: tensor([1.])


In [47]:
# lecture 12 exercises
from torch.utils.data import TensorDataset

np_features = np.array(np.random.rand(12, 8))  # 12 rows with 8 random cols over [0, 1)
np_target = np.array(np.random.rand(12, 1))

np_features = np.array(np.random.rand(12, 8))
np_target = np.array(np.random.rand(12, 1))

# Convert arrays to PyTorch tensors
torch_features = torch.tensor(np_features)
torch_target = torch.tensor(np_target)
print(torch_features)
print(torch_target)

# Create a TensorDataset from two tensors
dataset = TensorDataset(torch_features, torch_target)

# Return the last element of this dataset
print(dataset[-1])

tensor([[0.3608, 0.0057, 0.4879, 0.5359, 0.9642, 0.6054, 0.1229, 0.2786],
        [0.3707, 0.9154, 0.6273, 0.4206, 0.3951, 0.2510, 0.0408, 0.5207],
        [0.7888, 0.9537, 0.6254, 0.2681, 0.2210, 0.6374, 0.0289, 0.6980],
        [0.7408, 0.7149, 0.8567, 0.2335, 0.0032, 0.6638, 0.1643, 0.6056],
        [0.5463, 0.5661, 0.8986, 0.3689, 0.1703, 0.7138, 0.8451, 0.9975],
        [0.7414, 0.8461, 0.7615, 0.1829, 0.9359, 0.6373, 0.0931, 0.8486],
        [0.6384, 0.9011, 0.3266, 0.2482, 0.6701, 0.1662, 0.9046, 0.3980],
        [0.6860, 0.4769, 0.7711, 0.5176, 0.8972, 0.0262, 0.4289, 0.7202],
        [0.8114, 0.7752, 0.6935, 0.7576, 0.5891, 0.9161, 0.3832, 0.0639],
        [0.3998, 0.7037, 0.2712, 0.3282, 0.5936, 0.7280, 0.1247, 0.9487],
        [0.8309, 0.1836, 0.7013, 0.6666, 0.7764, 0.0919, 0.4695, 0.9589],
        [0.2498, 0.7144, 0.2446, 0.9538, 0.5818, 0.1236, 0.9856, 0.1313]],
       dtype=torch.float64)
tensor([[0.9615],
        [0.8529],
        [0.2271],
        [0.7835],
        [0.

In [48]:
# lecture 12, exercise 2
file_path = "D:\\Sync\\ds_ml\\Data Camp\\DeepLearning, NLP, LLMs\\Introduction to Deep Learning with PyTorch\\data\\water_potability.csv"
df_potability = pd.read_csv(file_path)
print(df_potability.shape)
df_potability.head()

(2011, 10)


Unnamed: 0,ph,Hardness,Solids,Chloramines,Sulfate,Conductivity,Organic_carbon,Trihalomethanes,Turbidity,Potability
0,0.587349,0.577747,0.386298,0.568199,0.647347,0.292985,0.654522,0.795029,0.630115,0
1,0.643654,0.4413,0.314381,0.439304,0.514545,0.356685,0.377248,0.202914,0.520358,0
2,0.388934,0.470876,0.506122,0.524364,0.561537,0.142913,0.249922,0.401487,0.219973,0
3,0.72582,0.715942,0.506141,0.521683,0.751819,0.148683,0.4672,0.658678,0.242428,0
4,0.610517,0.532588,0.237701,0.270288,0.495155,0.494792,0.409721,0.469762,0.585049,0


In [49]:
df_potability['Potability'].value_counts()

Potability
0    1200
1     811
Name: count, dtype: int64

In [50]:
# lecture 12, exercise 2 cont.

# Extract the features (ph, Sulfate, Conductivity, Organic_carbon) and target (Potability) values
# and load them into the appropriate tensors to represent features and targets.
features = torch.tensor(df_potability.loc[:, ['ph', 'Sulfate', 'Conductivity', 'Organic_carbon']].to_numpy()).float()
target = torch.tensor(df_potability.iloc[:, -1].to_numpy()).float()

# Use both tensors to create a PyTorch dataset using the dataset class that's
# quickest to use when tensors don't require any additional preprocessing.
dataset = TensorDataset(features, target)

In [51]:
# Create a dataloader using the above dataset
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)
x, y = next(iter(dataloader))

In [52]:
import torch.nn as nn

# Create a model using the nn.Sequential API
model = nn.Sequential(
    nn.Linear(4, 5),
    nn.Linear(5, 1),
    nn.Sigmoid()
)
output = model(features)
print(type(model))
print(output)

<class 'torch.nn.modules.container.Sequential'>
tensor([[0.5394],
        [0.5263],
        [0.5221],
        ...,
        [0.5321],
        [0.5242],
        [0.5217]], grad_fn=<SigmoidBackward0>)


In [53]:
# lecture 13, ex1, model and validationloader already in env
# Set the model to evaluation mode
# model.eval()
# validation_loss = 0.0

# with torch.no_grad():
  
#   for data in validationloader:
    
#       outputs = model(data[0])
#       loss = criterion(outputs, data[1])
      
      # Sum the current loss to the validation_loss variable
      # validation_loss += loss.item()
      
# Calculate the mean loss value
# validation_loss_epoch = validation_loss / len(validationloader)
# print(validation_loss_epoch)

# Set the model back to training mode
# model.train()

In [54]:
# lecture 13, ex2 uses the masks dataset which is a pickle file

In [55]:
import matplotlib.pyplot as plt

def plot_errors(model, dataloader):
    """ plotting function used in lecture 13, ex 2"""
    # find mismatches
    mismatches = []
    for data in dataloader:
        if len(mismatches) > 8:
            break
        features, labels = data
        outputs = model(features)
        gt = labels.argmax(-1)
        pred = outputs.argmax(-1)
        for f, g, p in zip(features, gt, pred):
            if g != p:
                mismatches.append((f, g, p))
    
    
    fig = plt.figure(figsize=(8, 8))
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                     nrows_ncols=(2, 4),  # creates 2x2 grid of axes
                     axes_pad=0.5,  # pad between axes in inch.
                     )
    mapping = {0: 'No mask', 1: 'Mask', 2: 'Incorrect'}
    for idx, ax in enumerate(grid):
        ax.imshow(mismatches[idx][0].permute(1, 2, 0))
        ax.set_title(f'GT: {mapping[mismatches[idx][1].item()]} \n PRED: {mapping[mismatches[idx][2].item()]}')
        ax.axis('off')
    plt.show()

In [56]:
# Create accuracy metric using torch metrics
metric = torchmetrics.Accuracy(task="multiclass", num_classes=3)
for data in dataloader:
    features, labels = data
    outputs = model(features)
    
    # Calculate accuracy over the batch
    acc = metric(outputs.softmax(dim=-1), labels.argmax(dim=-1))
    
# Calculate accuracy over the whole epoch
acc = metric.compute()

# Reset the metric for the next epoch 
metric.reset()
plot_errors(model, dataloader)

NameError: name 'torchmetrics' is not defined