To better understand how PyTorch works, I requested ChatGPT to generate a quickstart guide. I then followed this guide while also referring to the official PyTorch documentation.

Start by importing PyTorch and any other necessary libraries.

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim

You can create tensors (PyTorch’s fundamental data structure) in several ways. Here are some examples:

In [14]:
# Create a tensor of zeros
x = torch.zeros(3, 3)
print("Tensor of zeros:\n", x)

# Create a tensor of ones
y = torch.ones(3, 3)
print("Tensor of ones:\n", y)

# Create a tensor from a Python list
z = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
print("Tensor from list:\n", z)

# Create a random tensor
random_tensor = torch.rand(3, 3)
print("Random tensor:\n", random_tensor)


Tensor of zeros:
 tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
Tensor of ones:
 tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
Tensor from list:
 tensor([[1., 2.],
        [3., 4.]])
Random tensor:
 tensor([[0.2553, 0.3853, 0.1101],
        [0.5632, 0.9935, 0.0277],
        [0.8100, 0.8897, 0.2769]])


You can perform a variety of operations on tensors. Here are some examples:

In [15]:
# Element-wise addition
a = torch.tensor([1.0, 2.0, 3.0])
b = torch.tensor([4.0, 5.0, 6.0])
sum_tensor = a + b
print("Element-wise sum:\n", sum_tensor)

# Matrix multiplication (dot product)
dot_product = torch.dot(a, b)
print("Dot product:\n", dot_product)

# Matrix multiplication (for 2D tensors)
matrix1 = torch.tensor([[1, 2], [3, 4]])
matrix2 = torch.tensor([[5, 6], [7, 8]])
matrix_multiplication = torch.matmul(matrix1, matrix2)
print("Matrix multiplication:\n", matrix_multiplication)

# Reshaping a tensor
reshaped_tensor = a.view(3, 1)
print("Reshaped tensor:\n", reshaped_tensor)


Element-wise sum:
 tensor([5., 7., 9.])
Dot product:
 tensor(32.)
Matrix multiplication:
 tensor([[19, 22],
        [43, 50]])
Reshaped tensor:
 tensor([[1.],
        [2.],
        [3.]])


Let’s see how PyTorch handles automatic differentiation (Autograd):

In [16]:
# Create a tensor and enable gradient tracking
x = torch.tensor([3.0], requires_grad=True)

# Perform an operation
y = x ** 2

# Compute gradients
y.backward()

# Print the gradient (dy/dx)
print("Gradient of x:\n", x.grad)


Gradient of x:
 tensor([6.])


Now let’s define a simple neural network. Here, we’ll create a feedforward network with one hidden layer.

In [17]:
# Define a simple neural network
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(2, 4)  # Fully connected layer (2 input features, 4 neurons)
        self.fc2 = nn.Linear(4, 1)  # Output layer (4 neurons, 1 output)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))  # Apply ReLU activation function
        x = self.fc2(x)
        return x

# Instantiate the model
model = SimpleNet()

# Print the model architecture
print(model)


SimpleNet(
  (fc1): Linear(in_features=2, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=1, bias=True)
)


For training, you need a loss function and an optimizer. Here's how you can define them:

In [18]:
# Define a loss function (Mean Squared Error)
criterion = nn.MSELoss()

# Define an optimizer (Stochastic Gradient Descent)
optimizer = optim.SGD(model.parameters(), lr=0.01)

Now let's simulate training the network on some random data.

In [19]:
# Simulate some input data (batch_size=5, 2 features)
inputs = torch.randn(5, 2)

# Simulate the corresponding target data
targets = torch.randn(5, 1)

# Training loop
for epoch in range(100):
    # Zero the gradients
    optimizer.zero_grad()

    # Forward pass: compute predicted outputs by passing inputs to the model
    outputs = model(inputs)

    # Compute the loss
    loss = criterion(outputs, targets)

    # Backward pass: compute gradients
    loss.backward()

    # Update model parameters
    optimizer.step()

    # Print the loss every 10 epochs
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")

Epoch 0, Loss: 0.07616738975048065
Epoch 10, Loss: 0.07422588765621185
Epoch 20, Loss: 0.07329786568880081
Epoch 30, Loss: 0.07276806980371475
Epoch 40, Loss: 0.07240810990333557
Epoch 50, Loss: 0.07213012129068375
Epoch 60, Loss: 0.07189889252185822
Epoch 70, Loss: 0.07169922441244125
Epoch 80, Loss: 0.07152363657951355
Epoch 90, Loss: 0.07136775553226471


If you have a GPU available, you can move your model and tensors to the GPU for faster computation.

In [20]:
# Check if CUDA (GPU) is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Move model to GPU
model = model.to(device)

# Move inputs and targets to GPU
inputs = inputs.to(device)
targets = targets.to(device)

# Training loop with GPU
for epoch in range(100):
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")


Epoch 0, Loss: 0.07122863084077835
Epoch 10, Loss: 0.07110394537448883
Epoch 20, Loss: 0.07099183648824692
Epoch 30, Loss: 0.07089066505432129
Epoch 40, Loss: 0.07079906761646271
Epoch 50, Loss: 0.07071582227945328
Epoch 60, Loss: 0.07063988596200943
Epoch 70, Loss: 0.07057031244039536
Epoch 80, Loss: 0.07050631940364838
Epoch 90, Loss: 0.07044719159603119


After training your model, you can save it and later reload it.

In [21]:
# Save the model's state_dict
torch.save(model.state_dict(), 'simple_net.pth')

# Create a new instance of the model
model_new = SimpleNet()

# Load the saved state_dict into the new model
model_new.load_state_dict(torch.load('simple_net.pth'))

# Set the model to evaluation mode (if needed)
model_new.eval()


SimpleNet(
  (fc1): Linear(in_features=2, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=1, bias=True)
)

PyTorch provides tools to load and iterate over data efficiently using the DataLoader class.

In [22]:
from torch.utils.data import DataLoader, TensorDataset

# Create some dummy data (inputs and targets)
inputs = torch.randn(100, 2)  # 100 samples, 2 features
targets = torch.randn(100, 1)  # 100 targets

# Create a dataset
dataset = TensorDataset(inputs, targets)

# Create a DataLoader to load the data in batches
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)

# Iterate over batches
for batch_idx, (input_batch, target_batch) in enumerate(dataloader):
    print(f"Batch {batch_idx + 1}:")
    print("Input batch:\n", input_batch)
    print("Target batch:\n", target_batch)
    break  # Only show the first batch for simplicity


Batch 1:
Input batch:
 tensor([[-0.0492, -0.2575],
        [ 0.3752,  2.4848],
        [-0.6219, -0.0186],
        [-0.2602, -1.5549],
        [ 0.1875,  0.3585],
        [ 0.4695, -0.1040],
        [ 0.1428, -0.8554],
        [ 0.4228,  0.4990],
        [ 0.2511,  0.0760],
        [ 2.9968, -0.6627]])
Target batch:
 tensor([[ 1.4559],
        [-2.6418],
        [ 0.5649],
        [ 0.4986],
        [ 1.4350],
        [ 0.5621],
        [ 1.4103],
        [-0.6509],
        [-0.6911],
        [ 0.8793]])


When using the model for inference (testing), you should set it to evaluation mode using .eval() to ensure that layers like dropout or batch normalization behave correctly.

In [23]:
# Set the model to evaluation mode
model.eval()

# Perform inference (without gradients)
with torch.no_grad():
    # Simulate new input data
    test_inputs = torch.randn(5, 2)
    test_outputs = model(test_inputs)

print("Test outputs:\n", test_outputs)


Test outputs:
 tensor([[-0.3710],
        [-0.1559],
        [-0.3670],
        [-0.1599],
        [-0.4335]])


Here’s a simple pipeline that combines creating the model, training it, and evaluating it.

In [24]:
# Create the model, loss function, and optimizer
model = SimpleNet()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Simulate training and testing data
inputs = torch.randn(100, 2)
targets = torch.randn(100, 1)

# Training loop
for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")

# Evaluation
model.eval()
with torch.no_grad():
    test_inputs = torch.randn(5, 2)
    test_outputs = model(test_inputs)
    print("Test outputs:\n", test_outputs)


Epoch 0, Loss: 1.074761986732483
Epoch 10, Loss: 1.0521851778030396
Epoch 20, Loss: 1.0380479097366333
Epoch 30, Loss: 1.029058814048767
Epoch 40, Loss: 1.0232292413711548
Epoch 50, Loss: 1.0193463563919067
Epoch 60, Loss: 1.0166529417037964
Epoch 70, Loss: 1.0147151947021484
Epoch 80, Loss: 1.0132513046264648
Epoch 90, Loss: 1.0121207237243652
Test outputs:
 tensor([[-0.0002],
        [ 0.0490],
        [-0.0002],
        [ 0.0416],
        [ 0.0566]])


These code snippets should give you a good foundation to start using PyTorch. You can experiment with creating models, training them, using GPUs, and managing data with DataLoader. Feel free to modify the code and add your own layers, loss functions, and data pipelines!