In [1]:
import torch
from torch import nn
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore")
torch.__version__

'2.5.1+cu124'

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
device

'cuda'

## Create Some Dummy Data

In [4]:
num_features = 10
num_samples = 1000

In [5]:
# Generate random data for X
X = torch.rand(size=(num_samples, num_features), dtype=torch.float)

In [6]:
# Generate random weights and bias which will act as original weights and bias used to create y
original_weights = torch.randn(size=(num_features, 1), dtype=torch.float)
original_bias = torch.randn(1)

In [7]:
y = X @ original_weights + original_bias

In [8]:
# Print shapes
print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: torch.Size([1000, 10])
y shape: torch.Size([1000, 1])


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [10]:
# Print train and test shapes
print("X Train shape:", X_train.shape)
print("y Train shape:", y_train.shape)
print("X Test shape:", X_test.shape)
print("y Test shape:", y_test.shape)

X Train shape: torch.Size([800, 10])
y Train shape: torch.Size([800, 1])
X Test shape: torch.Size([200, 10])
y Test shape: torch.Size([200, 1])


## Define Linear Regression Model

In [11]:
# Model is subclass of nn.Module
class LinearRegressionModel(nn.Module):
  def __init__(self):
    super().__init__()

    # Linear layer
    self.linear_layer = nn.Linear(in_features=num_features, out_features=1)

  # override forward method of nn.Module
  def forward(self, X):
    return self.linear_layer(X)

In [12]:
model = LinearRegressionModel()

In [13]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.1064,  0.0254,  0.1777,  0.2607,  0.2963,  0.0662, -0.0234,  0.1621,
          -0.2293, -0.3072]], requires_grad=True),
 Parameter containing:
 tensor([-0.2464], requires_grad=True)]

## Set model and data to same device

In [14]:
model = model.to(device)
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

## Training Loop

In [15]:
# define cost function
loss_fn = nn.MSELoss()

# define optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [16]:
num_epochs = 1000

for epoch in range(num_epochs):
  #set model to training
  model.train()

  #1. Forward pass
  y_pred = model(X_train)

  #2. Calculate loss
  loss = loss_fn(y_train, y_pred)

  #3. Set optimizer zero grad
  optimizer.zero_grad()

  #4. Loss backward
  loss.backward()

  #5. Optimizer step
  optimizer.step()

  #6. Inference
  if(epoch % 100 == 0):
    model.eval()
    with torch.inference_mode():
      y_test_pred = model(X_test)
      loss_test = loss_fn(y_test_pred, y_test)
      print(f"Epoch {epoch} | Training Set Loss {loss} | Test Set Loss {loss_test}")

Epoch 0 | Training Set Loss 13.65917682647705 | Test Set Loss 14.222023963928223
Epoch 100 | Training Set Loss 0.4257676601409912 | Test Set Loss 0.4217972457408905
Epoch 200 | Training Set Loss 0.30123233795166016 | Test Set Loss 0.29712438583374023
Epoch 300 | Training Set Loss 0.2141295075416565 | Test Set Loss 0.21206194162368774
Epoch 400 | Training Set Loss 0.15282398462295532 | Test Set Loss 0.151864156126976
Epoch 500 | Training Set Loss 0.10957647114992142 | Test Set Loss 0.10917282104492188
Epoch 600 | Training Set Loss 0.0789899006485939 | Test Set Loss 0.0788324847817421
Epoch 700 | Training Set Loss 0.0572955422103405 | Test Set Loss 0.057219427078962326
Epoch 800 | Training Set Loss 0.041858136653900146 | Test Set Loss 0.04178304597735405
Epoch 900 | Training Set Loss 0.030832519754767418 | Test Set Loss 0.030725693330168724


## Make predictions

In [17]:
model.eval()

with torch.inference_mode():
  y_test_pred = model(X_test)

In [18]:
y_test[:10]

tensor([[-3.7715],
        [-4.4885],
        [-5.3416],
        [-4.1589],
        [-3.8064],
        [-4.0408],
        [-3.6651],
        [-4.0178],
        [-2.9622],
        [-2.9253]], device='cuda:0')

In [19]:
y_test_pred[:10]

tensor([[-3.8224],
        [-4.2661],
        [-5.1328],
        [-4.1516],
        [-3.7890],
        [-4.0706],
        [-3.8289],
        [-4.0112],
        [-2.9219],
        [-2.9771]], device='cuda:0')

## Save and Load Model

In [20]:
from pathlib import Path

#1. Create models directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

In [21]:
from pathlib import Path

# create models directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

In [22]:
# create model save path
MODEL_NAME = "linear_regression_model.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

In [23]:
# save the model state dict
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model.state_dict(), # only saves the models learned parameters
           f=MODEL_SAVE_PATH)

Saving model to: models/linear_regression_model.pth


In [24]:
# instantiate a fresh instance of LinearRegressionMode
loaded_model = LinearRegressionModel()

# load model state dict
loaded_model.load_state_dict(torch.load(MODEL_SAVE_PATH))

# put model to target device
loaded_model.to(device)

LinearRegressionModel(
  (linear_layer): Linear(in_features=10, out_features=1, bias=True)
)