# Linear Regression Using PyTorch

Example of how to train a simple regression model in PyTorch.

Dataset from [Kaggle](https://www.kaggle.com/datasets/abhishek14398/salary-dataset-simple-linear-regression).

In [195]:
import torch
import numpy as np
import pandas as pd

dat = pd.read_csv('/content/Salary_dataset.csv')

dat.head()

Unnamed: 0.1,Unnamed: 0,YearsExperience,Salary
0,0,1.2,39344.0
1,1,1.4,46206.0
2,2,1.6,37732.0
3,3,2.1,43526.0
4,4,2.3,39892.0


In [196]:
if torch.cuda.is_available:
  device = 'cuda' # GPU
elif torch.backend.mps.is_available:
  device = 'mps' # Apple
else:
  device = 'cpu'
print(device)

cuda


In [197]:
# convert data to numpy arrays
X_train = dat[['YearsExperience']].to_numpy()
y_train = dat['Salary'].to_numpy()

In [198]:
# create train / test ratios
split_ratio = 0.8
split_index = int(split_ratio * len(X_train))
X_train_split = X_train[:split_index]
y_train_split = y_train[:split_index]
X_test_split = X_train[split_index:]
y_test_split = y_train[split_index:]

In [199]:
X_train_split[:5]

array([[1.2],
       [1.4],
       [1.6],
       [2.1],
       [2.3]])

In [200]:
y_train_split[:5]

array([39344., 46206., 37732., 43526., 39892.])

In [215]:
X_train_split.shape

(24, 1)

In [202]:
X_test_split.shape

(6, 1)

## Scale Data

In [203]:
# get mean & std, then scale
X_mean = X_train_split.mean()
X_std = X_train_split.std()
X_scaled = (X_train_split - X_mean) / X_std

y_mean = y_train_split.mean()
y_std = y_train_split.std()
y_scaled = (y_train_split - y_mean) / y_std

# convert to tensors
X_train_tensor = torch.FloatTensor(X_scaled)
y_train_tensor = torch.FloatTensor(y_scaled).reshape(-1, 1)

In [204]:
X_train_tensor.mean() # mean of ~ 0

tensor(9.9341e-09)

In [205]:
X_train_tensor.std() # std of ~ 1

tensor(1.0215)

## Regression Parameters

In [206]:
torch.manual_seed(501)

n_features = X_train_tensor.shape[1]

# initialize params
w = torch.randn((n_features, 1), requires_grad=True)
b = torch.tensor(0., requires_grad=True)

## Train Model

In [207]:
learning_rate = 0.1

n_epochs = 20 # training rounds

for epoch in range(n_epochs):
    # Use X_train_tensor, not X_train
    y_pred = X_train_tensor @ w + b

    # Use y_train_tensor, not y_train_scaled
    loss = ((y_pred - y_train_tensor) ** 2).mean()
    # calc grads
    loss.backward()

    with torch.no_grad():
        # adjust params
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad
        # reset grads
        w.grad.zero_()
        b.grad.zero_()
    if epoch % 10 == 0:
      print(f'Epoch {epoch}: loss = {loss.item()}')

# make pred in real dollars, not dollars squared
with torch.no_grad():
    # get sample val and make pred
    sample_input = X_train_tensor[0]
    predicted_scaled = sample_input @ w + b

    # convert pred back to real dollar amount
    predicted_dollars = (predicted_scaled * y_std) + y_mean

    # actual val
    real_dollars = y_train[0]

    print(f"\nExample Prediction:")
    print(f"Model Predicted: ${predicted_dollars.item():,.2f}")

    # .item() to get the scalar
    val = real_dollars.item()
    print(f"Actual Salary:   ${val:,.2f}")

Epoch 0: loss = 0.08344434946775436
Epoch 10: loss = 0.08210023492574692

Example Prediction:
Model Predicted: $34,704.90
Actual Salary:   $39,344.00


## Test Set Prediction

In [213]:
with torch.no_grad():
    # pick first sample from test set
    index = 0
    sample_input_raw = X_test_split[index]
    sample_target_raw = y_test_split[index]

    # scale using training data stats
    sample_input_scaled = (sample_input_raw - X_mean) / X_std

    # convert data to tensor
    sample_tensor = torch.FloatTensor(np.array(sample_input_scaled))

    # model pred
    predicted_scaled = sample_tensor @ w + b

    # convert back to normal dollars
    predicted_dollars = (predicted_scaled * y_std) + y_mean

    # 6. Report
    print(f"Test Example (Index {index}):")
    print('-'*30)
    print(f"Years Experience: {sample_input_raw.item():.1f}")
    print(f"Model Predicted: ${predicted_dollars.item():,.2f}")
    print(f"Actual Salary:   ${sample_target_raw.item():,.2f}")
    # Difference
    diff = predicted_dollars.item() - sample_target_raw.item()
    print(f"Difference:      ${diff:,.2f}")

Test Example (Index 0):
------------------------------
Years Experience: 8.8
Model Predicted: $111,139.98
Actual Salary:   $109,432.00
Difference:      $1,707.98
