In [1]:
import torch

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

import pandas as pd 


In [2]:
dataset = load_boston()
dataset = pd.DataFrame(dataset.data, columns=dataset.feature_names)
dataset['MEDV'] = load_boston().target

dataset.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [3]:
# since this is an experiment, picking features from this post
# https://towardsdatascience.com/linear-regression-on-boston-housing-dataset-f409b7e4a155

import numpy as np

X = pd.DataFrame(np.c_[dataset['LSTAT'], dataset['RM']], columns = ['LSTAT','RM']).values
Y = dataset['MEDV'].values.reshape(-1, 1)

In [4]:
# split into train test set
X_train, X_test, y_train, y_test = train_test_split(X, Y)

In [5]:
import torch.nn as nn

input_size = X_train.shape[1]
output_size = y_train.shape[1]

linm = nn.Linear(input_size, output_size)

criterion = nn.MSELoss()
optimizer = torch.optim.SGD(linm.parameters(), lr=1e-3)

In [6]:
epochs = 150

for e in range(epochs):
    inputs = torch.from_numpy(X_train).float()
    targets = torch.from_numpy(y_train).float()


    # forwards pass
    outputs = linm(inputs)
    loss = criterion(outputs, targets)

    # backwards, GD
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print ('Epoch [{}/{}], Loss: {:.4f}'.format(e + 1, epochs, loss.item()))

Epoch [1/150], Loss: 590.3761
Epoch [2/150], Loss: 377.9886
Epoch [3/150], Loss: 314.2489
Epoch [4/150], Loss: 289.2007
Epoch [5/150], Loss: 274.5026
Epoch [6/150], Loss: 262.8373
Epoch [7/150], Loss: 252.3034
Epoch [8/150], Loss: 242.3958
Epoch [9/150], Loss: 232.9692
Epoch [10/150], Loss: 223.9722
Epoch [11/150], Loss: 215.3779
Epoch [12/150], Loss: 207.1664
Epoch [13/150], Loss: 199.3200
Epoch [14/150], Loss: 191.8226
Epoch [15/150], Loss: 184.6584
Epoch [16/150], Loss: 177.8128
Epoch [17/150], Loss: 171.2716
Epoch [18/150], Loss: 165.0211
Epoch [19/150], Loss: 159.0485
Epoch [20/150], Loss: 153.3414
Epoch [21/150], Loss: 147.8881
Epoch [22/150], Loss: 142.6772
Epoch [23/150], Loss: 137.6980
Epoch [24/150], Loss: 132.9401
Epoch [25/150], Loss: 128.3937
Epoch [26/150], Loss: 124.0495
Epoch [27/150], Loss: 119.8984
Epoch [28/150], Loss: 115.9318
Epoch [29/150], Loss: 112.1416
Epoch [30/150], Loss: 108.5199
Epoch [31/150], Loss: 105.0591
Epoch [32/150], Loss: 101.7523
Epoch [33/150], L

In [7]:
from sklearn.metrics import r2_score

# train set
predicted = linm(torch.from_numpy(X_train).float()).detach().numpy()
acc = r2_score(y_pred=predicted, y_true=y_train)
print("R2 on train set : ", acc)


# test set
predicted = linm(torch.from_numpy(X_test).float()).detach().numpy()
acc = r2_score(y_pred=predicted, y_true=y_test)
print("R2 on test set : ", acc)


R2 on train set :  0.6428088706865092
R2 on test set :  0.603445833033869
