In [1]:
import random
import torch
import numpy as np

# for easier reading np
np.set_printoptions(precision=3,suppress=True)

In [2]:
from sklearn import datasets
iris = datasets.load_iris()
X = torch.tensor(iris.data[:, :3], dtype=torch.float32)  # we only take the first three features.
y = torch.tensor(iris.data[:, 3], dtype=torch.float32)   # we use the fourth feature as the target.

# Uncomment this line for problem 2 (logistic regression)
# X = torch.tensor(iris.data, dtype=torch.float32)
# y = torch.tensor(iris.target == 2, dtype=torch.float32)

In [3]:
# Partition the data into Training and Testing (80:20 split)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, shuffle=True, random_state=0)

print('X_train', X_train.shape)
print('X_test', X_test.shape)

X_train torch.Size([120, 3])
X_test torch.Size([30, 3])


In [4]:
# Reading the dataset
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    
    # The examples are read at random, in no particular order
    indices = list(range(num_examples))
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = indices[i:i + batch_size]
        yield features[j], labels[j]

# Check data reader
for X_batch, y_batch in data_iter(batch_size=10, features=X_train, labels=y_train):
    print('X_batch', X_batch.shape, X_batch[0])
    print('y_batch', y_batch.shape, y_batch[0])
    break

X_batch torch.Size([10, 3]) tensor([7.9000, 3.8000, 6.4000])
y_batch torch.Size([10]) tensor(2.)


In [5]:
# Initializing Model Parameters
w = torch.nn.Parameter(data=torch.zeros((3, 1)), requires_grad=True)
torch.nn.init.normal_(w, mean=0, std=0.01)
b = torch.nn.Parameter(data=torch.zeros((1, 1)), requires_grad=True)
print('w', w)
print('b', b)

w Parameter containing:
tensor([[ 0.0033],
        [-0.0109],
        [-0.0088]], requires_grad=True)
b Parameter containing:
tensor([[0.]], requires_grad=True)


In [6]:
# Defining the Model
def linreg(X, w, b): 
    """The linear regression model."""
    return X@w + b

# Check model
for X_batch, y_batch in data_iter(batch_size=10, features=X_train, labels=y_train):
    out_batch = linreg(X_batch, w, b)
    print('X_batch', X_batch.shape, X_batch[0])
    print('out_batch', out_batch.shape, out_batch[0])
    break

X_batch torch.Size([10, 3]) tensor([5.1000, 2.5000, 3.0000])
out_batch torch.Size([10, 1]) tensor([-0.0370], grad_fn=<SelectBackward0>)


In [7]:
# Defining the Loss Function
def squared_loss(y_hat, y):
    """Squared loss."""
    return torch.mean((y_hat - y.view(y_hat.shape))**2 / 2)

def rsquare(y_hat, y):
    mse = torch.sum((y - yhat)**2)
    var = torch.sum((y - torch.mean(y_test))**2)
    return 1 - mse / var

# Check loss
err = squared_loss(torch.tensor([1, 2, 3]), torch.tensor([3, 2, 1]))
print('err =', err)

err = tensor(1.3333)


In [8]:
# Defining the Optimization Algorithm
def sgd(params, grads, lr):
    """Minibatch stochastic gradient descent."""
    for p, g in zip(params, grads):
        p.data -= lr * g

**Training**

In [9]:
lr = 0.01
batch_size = 10
num_epochs = 50
net = linreg
loss = squared_loss

In [11]:
# Initialize the parameters of the model
torch.nn.init.normal_(w, mean=0, std=0.01)
torch.nn.init.zeros_(b)

for epoch in range(num_epochs):
    # Evaluate model
    with torch.no_grad():
        yhat = net(X_test, w, b)[:, 0]
        test_Rsq = rsquare(yhat, y_test)
        test_l = loss(yhat, y_test)
    
    with torch.no_grad():
        yhat = net(X_train, w, b)[:, 0]
        train_Rsq = rsquare(yhat, y_train)
        train_l = loss(yhat, y_train)
    
    print(f'epoch {epoch:03d}, test loss {float(test_l):.3f}, train loss {float(train_l):.3f}') 

    # Train for one epoch
    for X_batch, y_batch in data_iter(batch_size=10, features=X_train, labels=y_train):
        # Use model to compute predictions
        yhat = net(X_batch, w, b)
        l = loss(yhat, y_batch)  # Minibatch loss in `X_batch` and `y_batch`

        # Compute gradients by back propagation
        l.backward()

        # Update parameters using their gradient
        sgd([w, b], [w.grad, b.grad], lr)

        # Reset gradients
        w.grad = b.grad = None

epoch 000, test loss 1.057, train loss 1.350
epoch 001, test loss 0.095, train loss 0.108
epoch 002, test loss 0.083, train loss 0.075
epoch 003, test loss 0.058, train loss 0.055
epoch 004, test loss 0.070, train loss 0.054
epoch 005, test loss 0.047, train loss 0.037
epoch 006, test loss 0.045, train loss 0.033
epoch 007, test loss 0.038, train loss 0.028
epoch 008, test loss 0.034, train loss 0.026
epoch 009, test loss 0.033, train loss 0.025
epoch 010, test loss 0.034, train loss 0.023
epoch 011, test loss 0.032, train loss 0.024
epoch 012, test loss 0.038, train loss 0.024
epoch 013, test loss 0.040, train loss 0.025
epoch 014, test loss 0.034, train loss 0.022
epoch 015, test loss 0.033, train loss 0.022
epoch 016, test loss 0.033, train loss 0.022
epoch 017, test loss 0.033, train loss 0.021
epoch 018, test loss 0.035, train loss 0.021
epoch 019, test loss 0.042, train loss 0.025
epoch 020, test loss 0.033, train loss 0.021
epoch 021, test loss 0.034, train loss 0.021
epoch 022,

In [12]:
# R2 = 1 - MSE/var(y)
print('Intercept = ', b.detach().numpy())
print('Coefficients = \n', w.detach().numpy())

with torch.no_grad():
    yhat = net(X_train, w, b)[:, 0]
    R_sq_train = rsquare(yhat, y_train)
    
    yhat = net(X_test, w, b)[:, 0]
    R_sq_test = rsquare(yhat, y_test)
    
print('Train R square = ', format(R_sq_train.numpy(),".3f"))
print('Test R square = ', format(R_sq_test.numpy(),".3f"))

Intercept =  [[-0.038]]
Coefficients = 
 [[-0.041]
 [-0.044]
 [ 0.426]]
Train R square =  0.938
Test R square =  0.873
