In [None]:
"""PyTorch tutorial, by Jason Liu jasonl@wustl.edu for CSE527A"""
# reference: https://pytorch.org/tutorials/

In [5]:
"""Import libraries"""
import torch # pytorch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn

In [33]:
"""Dataset"""
from torch.utils.data import dataset, dataloader
class CustomDataset(dataset.Dataset):
    """Contains data to train the model. 
    An instance of this object is created separately for training, validation, and testing"""
    def __init__(self, data=[[1, 2, 3], [4, 5, 6]]): # They can be more and other arguments
        super().__init__() # this needs to be here, because we're using inheritance
        """This object inherits from the Dataset base class,
        To use its functionarlity, we need to overwrite its member functions as needed! (below)
        We can also add additional methods"""
        # This is a good time to make your data part of this class
        self.data = data
    def __getitem__(self, index): # The name matters, e.g. getitem() doesn't work!
        """How this dataset extracts items by index
        'Index' is often one number, but doens't have to be!"""
        # It's a good idea to turn your data into Tensors, so they can work with the model below
        return 0, 0 # The return value can be a bunch of things (a tuple)
        # For example, in supervised learning, this would be (data, label)  
    def __len__(self):
        """Other objects that work with the dataset might need this function
        Usually, this is the number of elements in the dataset"""
        return 0

In [38]:
"""Dataloader"""
"""Feeds data into the model. 
The data comes from your dataset class, but they are is stacked up in batches
Similar to Dataset, we need one of these for training, validation, and testing!
Usually, we don't need to make a superclass of Dataloader"""
train_loader = dataloader.DataLoader(dataset=CustomDataset(), batch_size=10)
# By default, when we specify a batch_size, the dataloader gives a stack of 'batch_size' number of data in
#   each step of being enumerated
validation_loader = dataloader.DataLoader(dataset=CustomDataset())
test_loader = dataloader.DataLoader(dataset=CustomDataset())

In [34]:
"""Model"""
class CustomModel(nn.Module):
    """Contains neural network model"""
    def __init__(self):
        """The definition of your model: its layers and settings"""
        super().__init__()
        # For example, the nn.Sequential() container can pack up a bunch of layers
        #   When called, data is passed through these layers sequentially
        self.network = nn.Sequential(
            nn.Linear(in_features=1, out_features=1),
            nn.Linear(in_features=1, out_features=1) 
        )
        # Of course, it's not necessary to use sequential layers
        # Note that we can have a CustomModel2(nn.Module(2)) and use it here,
        #   so we get a model-in-model situation.
        #   This is helpful for styling or code readability. 
        #   Importantly, that CustomModel2 need to have a forward() method defined!
    def helper(self, something): # Optional, name doesn't matter
        return something
    def forward(self, input): # Name matters! The model needs this to work
        """How the model calculates predictions
        You do NOT call this method, because it's called by the nn.Module's __call__ function"""
        # It's NOT ok for other methods to have this name
        # If your code gets messy, some calculation can be off-loaded to other methods
        return self.network(self.helper(input))
m = CustomModel() # Create an instance of your model to use it!

In [7]:
"""Loss function"""
loss_function = nn.CrossEntropyLoss() # This one of many loss functions

In [10]:
"""Optimizer"""
learning_rate = 1e-3 # A hyperparameter you tune
# Optimizer's job is to optimize the model's parameters
# So we need to put the model's parameters into the optimizer
optimizer = torch.optim.Adam(params=m.parameters(), lr=learning_rate)

In [25]:
"""Training model"""
def train(dataloader, model, loss_function, optimizer):
    # It's not necessary for the train() method to have the above interface
    for index, data in enumerate(dataloader):
        # Enumerate() goes through what's in the dataloader one by one
        # 'Data' is what we're getting each time. It's one of many items, 
        #   with the shape (batch size, shape of data straight out of your dataset's __getitem__)
        # 'Index' is the index of that data
        #   It's not necessary to use 'index,' but it might be useful if you want to print the progress of your training
        #   For example:
        """1. Get prediction from model"""
        # You might do something to the data so it fits in the model (e.g. change its shape)
        prediction = model(data) # we're using the forward() function here
        """Also: It's a good idea to be specific (fully-spelled-out words) in naming your varaibles"""
        #   1 people who read your code, e.g. TAs helping you to debug, or your future self trying to recycle code, understand it faster
        #   2 in VScode, you rarely need to type more than 10 characters to auto-fill your desired varaible name with the Tab button
        #   Fast way to get help: (we can quickly see what's what and start trouble-shooting!)
        #       stuff_i_get_from_model = m(data) <- wordy, but helpful (you're good!)
        #       predictions = m(data) <- concise and helpful (perfect!)
        #       very_long_name_separated_by_underscode = m(data) <- it's a good idea to leave the details as a comment
        #   Slow way to get help: (because we're spending time to understand what is what)
        #       pred = m(data) <- it's getting confusing, I need to take a closer look
        #       p = m(data) <- I need time to figure this out, so
        #       randomletters = m(data) <- I need a lot of time to guess-work or ask you for clarification
        #       someRandomNameSeparatedByCapitalLetters = m(data) <- I'm running into a parsing error
        """2. Calculate the loss"""
        loss = loss_function(prediction) # the loss of the model's prediction
        # In the supervised learning setting, we often see something like loss_function(prediction, label)
        #   where label is from 'data' from dataloader
        """3. Optimization"""
        # It's unlikely that you need any code sandwitched between these 3 following lines
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        # Sometimes, we evaluate the model too, see test() below for how it's done
        # Validation is usually done along with training
        #   so it makes sense for that process to stay in this function
        #   i.e. we usually don't see a validation() function
        # It's a good idea to record the loss in some way, so we can plot it for evaluation purposes

In [13]:
"""Testing the model"""
def test(dataloader, model, loss_function):
    """The following code can be used for validation too,
    beacuse when we validate, we also don't want to optimize the model and don't need the gradients"""
    # The training step EXCEPT here's no optimization
    for index, data in enumerate(dataloader):
        with torch.no_grad(): # MUST to be here
            """.no_grad() is a context manager
            Its job is to tell the model to NOT have gradients in the calculation within its context
            This matters because when we're testing, we don't need the gradients"""
            prediction = model(dataloader)
            loss = loss_function(prediction)
            # When testing, we certainly don't want to optimize the model, 
            #   because that changes the parameters' values
    # It's a good idea to record the loss in some way, so we can plot it for evaluation purposes

In [37]:
"""Actually training the model"""
number_of_epochs = 10 # a hyperparameter to tune
for epoch in range(number_of_epochs):
    train(train_loader, m, loss_function, optimizer)
    test(validation_loader, m, loss_function)

In [44]:
"""Debugging tensor-related errors"""
# 1 Shape error
# Make sure to read the documentation of objects you use to understand what kind of shape they want
# For example, if some input goes in 'first_layer' then 'second_layer', 
#   the out_features of the 'first_layer' need to equal the in_features of 'second_layer'
first_layer = nn.Linear(in_features=10, out_features=10)
second_layer = nn.Linear(in_features=20, out_features=12)
# 2 Gradient error
# Parameters of PyTorch models are tensors, and they require gradients to get optimized
# This can be specified when greating tensors
tensor_with_gradient = torch.tensor([0.1], requires_grad=True) # ok to be the parameters of a model
# On the other hand, data you feed into the model do not get optimized,
#   so you don't need to worry about giving them gradients!
tensor_just_data = torch.tensor([0.1])
# 3 Device error
# Two tensors need to be on the same device to interact (e.g. be in the same calculation)
# You can change the device of a tensor using .device()
device_you_want_tensor_to_be_on = 'mps' # on Colab, this should be 'cuda'
tensor_just_data.to(device_you_want_tensor_to_be_on)
# Being on the right device is also important for model calculation speed
# If the device is CPU, we're missing out on the speed-up from using cuda CPU!
m.to(device_you_want_tensor_to_be_on)
# 4 Data type error
# Some layers have specific requirements about the data type of the tensor
# We can check the datatype of our tensors by .dtype()
tensor_just_data.dtype
# We can change the datatype using .to(PyTorch datatype)
tensor_just_data.to(torch.int32) # note that we enter torch.data_type, not just data_type

tensor([0], dtype=torch.int32)