In [1]:
import torch
from torch import nn
from sklearn.model_selection import train_test_split
from tqdm import tqdm
torch.__version__

'2.5.1+cu124'

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
device

'cuda'

## Create Some Dummy Data

In [4]:
num_features = 10
num_samples = 1000

In [5]:
# Generate random data for X
X = torch.rand(size=(num_samples, num_features), dtype=torch.float)

In [6]:
# Generate random weights and bias which will act as original weights and bias used to create y
original_weights = torch.randn(size=(num_features, 1), dtype=torch.float)
original_bias = torch.randn(1)

In [7]:
y = X @ original_weights + original_bias

In [8]:
# Print shapes
print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: torch.Size([1000, 10])
y shape: torch.Size([1000, 1])


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [10]:
# Print train and test shapes
print("X Train shape:", X_train.shape)
print("y Train shape:", y_train.shape)
print("X Test shape:", X_test.shape)
print("y Test shape:", y_test.shape)

X Train shape: torch.Size([800, 10])
y Train shape: torch.Size([800, 1])
X Test shape: torch.Size([200, 10])
y Test shape: torch.Size([200, 1])


## Define Linear Regression Model

In [11]:
# Model is subclass of nn.Module
class LinearRegressionModel(nn.Module):
  def __init__(self):
    super().__init__()

    # Linear layer
    self.linear_layer = nn.Linear(in_features=num_features, out_features=1)

  # override forward method of nn.Module
  def forward(self, X):
    return self.linear_layer(X)

In [12]:
model = LinearRegressionModel()

In [13]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.1445, -0.1143,  0.2820, -0.2890, -0.2298,  0.1156, -0.1046, -0.1054,
          -0.2429,  0.0691]], requires_grad=True),
 Parameter containing:
 tensor([-0.0339], requires_grad=True)]

## Set model and data to same device

In [14]:
model = model.to(device)
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

## Training Loop

In [15]:
# define cost function
loss_fn = nn.MSELoss()

# define optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [16]:
num_epochs = 1000

for epoch in range(num_epochs):
  #set model to training
  model.train()

  #1. Forward pass
  y_pred = model(X_train)

  #2. Calculate loss
  loss = loss_fn(y_train, y_pred)

  #3. Set optimizer zero grad
  optimizer.zero_grad()

  #4. Loss backward
  loss.backward()

  #5. Optimizer step
  optimizer.step()

  #6. Inference
  if(epoch % 100 == 0):
    model.eval()
    with torch.inference_mode():
      y_test_pred = model(X_test)
      loss_test = loss_fn(y_test_pred, y_test)
      print(f"Epoch {epoch} | Training Set Loss {loss} | Test Set Loss {loss_test}")

Epoch 0 | Training Set Loss 0.09649810940027237 | Test Set Loss 0.10279981046915054
Epoch 100 | Training Set Loss 0.06734631955623627 | Test Set Loss 0.07079833000898361
Epoch 200 | Training Set Loss 0.048248402774333954 | Test Set Loss 0.050895560532808304
Epoch 300 | Training Set Loss 0.034643061459064484 | Test Set Loss 0.03668508306145668
Epoch 400 | Training Set Loss 0.024941787123680115 | Test Set Loss 0.026523713022470474
Epoch 500 | Training Set Loss 0.018016912043094635 | Test Set Loss 0.0192481130361557
Epoch 600 | Training Set Loss 0.01306756492704153 | Test Set Loss 0.014030643738806248
Epoch 700 | Training Set Loss 0.009524732828140259 | Test Set Loss 0.010282177478075027
Epoch 800 | Training Set Loss 0.0069839912466704845 | Test Set Loss 0.007583157625049353
Epoch 900 | Training Set Loss 0.005157767329365015 | Test Set Loss 0.00563463568687439


## Make predictions

In [17]:
model.eval()

with torch.inference_mode():
  y_test_pred = model(X_test)

In [18]:
y_test[:10]

tensor([[-0.4530],
        [-0.1436],
        [-0.9461],
        [-0.2386],
        [-0.7597],
        [ 0.2255],
        [-0.0547],
        [-0.1716],
        [-0.8107],
        [-0.5328]], device='cuda:0')

In [19]:
y_test_pred[:10]

tensor([[-0.4432],
        [-0.1285],
        [-0.8695],
        [-0.2445],
        [-0.7767],
        [ 0.1592],
        [-0.1224],
        [-0.1932],
        [-0.6874],
        [-0.5396]], device='cuda:0')