In [1]:
# !pip install scikit-learn==1.6 torch==2.6

In [2]:
import torch
from torch import nn
from torch import optim

# https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [3]:
data = fetch_california_housing()

In [4]:
print(data['DESCR'])

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

:Number of Instances: 20640

:Number of Attributes: 8 numeric, predictive attributes and the target

:Attribute Information:
    - MedInc        median income in block group
    - HouseAge      median house age in block group
    - AveRooms      average number of rooms per household
    - AveBedrms     average number of bedrooms per household
    - Population    block group population
    - AveOccup      average number of household members
    - Latitude      block group latitude
    - Longitude     block group longitude

:Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived from the 1990 U.S. census, using one row per ce

In [5]:
data['data']

array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
          37.88      , -122.23      ],
       [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
          37.86      , -122.22      ],
       [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
          37.85      , -122.24      ],
       ...,
       [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
          39.43      , -121.22      ],
       [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
          39.43      , -121.32      ],
       [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
          39.37      , -121.24      ]], shape=(20640, 8))

In [6]:
data['target']

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894], shape=(20640,))

In [7]:
X = data.data
X.shape

(20640, 8)

In [8]:
y = data.target
y.shape

(20640,)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # 0.15, 0.2, 0.3

In [10]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((16512, 8), (4128, 8), (16512,), (4128,))

In [11]:
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [12]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([16512, 8]),
 torch.Size([4128, 8]),
 torch.Size([16512]),
 torch.Size([4128]))

In [13]:
sklearn_linear_regression = LinearRegression()
sklearn_linear_regression

In [14]:
sklearn_linear_regression.fit(X_train.numpy(), y_train.numpy())
y_pred = sklearn_linear_regression.predict(X_test)

r2 = r2_score(y_pred, y_test.numpy())
print(r2)

-7.163000106811523


In [15]:
class MyLinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(8, 1)

    def forward(self, x):
        y = self.linear(x)
        return y

In [16]:
my_linear_regression = MyLinearRegression()
my_linear_regression

MyLinearRegression(
  (linear): Linear(in_features=8, out_features=1, bias=True)
)

In [17]:
class MyNeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Linear(8, 5)
        self.relu = nn.ReLU()
        self.linear_2 = nn.Linear(5, 1)

    def forward(self, x):
        x = self.linear_1(x)
        x = self.relu(x)
        y = self.linear_2(x)
        return y

In [18]:
my_neural_network = MyNeuralNetwork()
my_neural_network

MyNeuralNetwork(
  (linear_1): Linear(in_features=8, out_features=5, bias=True)
  (relu): ReLU()
  (linear_2): Linear(in_features=5, out_features=1, bias=True)
)

In [19]:
loss_fn = nn.L1Loss()
loss_fn

L1Loss()

In [20]:
model = my_neural_network
# model = my_linear_regression

In [21]:
# optimizer = optim.SGD(model.parameters(), lr=0.01)
optimizer = optim.Adam(model.parameters(), lr=0.01)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    weight_decay: 0
)

In [22]:
model.train()
best_loss_value = 999999
for epoch in range(300):
    y_pred = model(X_train)
    loss_value = loss_fn(y_pred, y_train)
    optimizer.zero_grad()
    loss_value.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print('Epoch', epoch, 'Loss', loss_value.item())

    if loss_value < best_loss_value:
        best_loss_value = loss_value
        print('Found new best loss', loss_value.item())
        torch.save(model.state_dict(), 'best_model.pth')

  return F.l1_loss(input, target, reduction=self.reduction)


Epoch 0 Loss 101.189453125
Found new best loss 101.189453125
Found new best loss 82.69425964355469
Found new best loss 63.91596221923828
Found new best loss 44.89702224731445
Found new best loss 25.8376407623291
Found new best loss 8.925691604614258
Epoch 10 Loss 27.597209930419922
Found new best loss 6.731420993804932
Epoch 20 Loss 8.9717435836792
Found new best loss 5.393065452575684
Found new best loss 5.265041351318359
Found new best loss 4.3174872398376465
Found new best loss 3.851949453353882
Epoch 30 Loss 3.1101675033569336
Found new best loss 3.1101675033569336
Found new best loss 2.954636812210083
Found new best loss 2.7678380012512207
Epoch 40 Loss 3.153329372406006
Found new best loss 2.499972343444824
Found new best loss 2.1817545890808105
Epoch 50 Loss 1.9631808996200562
Found new best loss 1.9631808996200562
Found new best loss 1.7915363311767578
Found new best loss 1.6523257493972778
Found new best loss 1.5441852807998657
Epoch 60 Loss 1.566347599029541
Found new best lo

In [23]:
model.load_state_dict(torch.load('best_model.pth', weights_only=True))
model.eval()
with torch.no_grad():
    y_pred = model(X_test)
    loss_value = loss_fn(y_pred, y_test)
    r2 = r2_score(y_pred.numpy(), y_test.numpy())
    print(loss_value)
    print(r2)

tensor(0.9935)
-4.550994396209717


  return F.l1_loss(input, target, reduction=self.reduction)
