In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_california_housing

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from copy import deepcopy

In [2]:
cali = fetch_california_housing()
df = pd.DataFrame(cali.data, columns=cali.feature_names)
df['target'] = cali.target
df.tail()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09,0.781
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21,0.771
20637,1.7,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22,0.923
20638,1.8672,18.0,5.329513,1.17192,741.0,2.123209,39.43,-121.32,0.847
20639,2.3886,16.0,5.254717,1.162264,1387.0,2.616981,39.37,-121.24,0.894


In [3]:
scaler = StandardScaler()
scaler.fit(df.values[:, :-1])
df.values[:, :-1] = scaler.transform(df.values[:, :-1])

df.tail()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
20635,-1.216128,-0.289187,-0.155023,0.077354,-0.512592,-0.04911,1.801647,-0.758826,0.781
20636,-0.691593,-0.845393,0.276881,0.462365,-0.944405,0.005021,1.806329,-0.818722,0.771
20637,-1.142593,-0.924851,-0.090318,0.049414,-0.369537,-0.071735,1.778237,-0.823713,0.923
20638,-1.054583,-0.845393,-0.040211,0.158778,-0.604429,-0.091225,1.778237,-0.873626,0.847
20639,-0.780129,-1.004309,-0.070443,0.138403,-0.033977,-0.043682,1.750146,-0.833696,0.894


In [4]:
data = torch.from_numpy(df.values)
data = data.float()
data.dtype

torch.float32

In [5]:
x = data[:,:-1]
y = data[:, -1:]
x.shape, y.shape

(torch.Size([20640, 8]), torch.Size([20640, 1]))

In [6]:
ratio = [.6, .2, .2]

train_cnt = int(data.size(0) * ratio[0])
valid_cnt = int(data.size(0) * ratio[1])
test_cnt = int(data.size(0) * ratio[2])

cnts = [train_cnt, valid_cnt, test_cnt]

print('Train %d / Valid %d / Test %d' % (train_cnt, valid_cnt, test_cnt))

Train 12384 / Valid 4128 / Test 4128


In [7]:
ratio = [.6, .2, .2]

train_cnt = int(data.size(0) * ratio[0])
valid_cnt = int(data.size(0) * ratio[1])
test_cnt = int(data.size(0) * ratio[2])

cnts = [train_cnt, valid_cnt, test_cnt]

print('Train %d / Valid %d / Test %d' % (train_cnt, valid_cnt, test_cnt))

Train 12384 / Valid 4128 / Test 4128


In [8]:
indices = torch.randperm(data.size(0))
x = torch.index_select(x, dim=0, index=indices)
y = torch.index_select(y, dim=0, index=indices)

x = list(x.split(cnts, dim=0))
y = y.split(cnts, dim=0)

for x_i, y_i in zip(x, y):
    print(x_i.size(), y_i.size())

torch.Size([12384, 8]) torch.Size([12384, 1])
torch.Size([4128, 8]) torch.Size([4128, 1])
torch.Size([4128, 8]) torch.Size([4128, 1])


In [9]:
scaler = StandardScaler()
scaler.fit(x[0].numpy())

x[0] = torch.from_numpy(scaler.transform(x[0].numpy())).float()
x[1] = torch.from_numpy(scaler.transform(x[1].numpy())).float()
x[2] = torch.from_numpy(scaler.transform(x[2].numpy())).float()

In [10]:
model = nn.Sequential(
    nn.Linear(x[0].size(-1), 6),
    nn.LeakyReLU(),
    nn.Linear(6, 4),
    nn.LeakyReLU(),
    nn.Linear(4, 2),
    nn.LeakyReLU(),
    nn.Linear(2, y[0].size(-1)))

model

Sequential(
  (0): Linear(in_features=8, out_features=6, bias=True)
  (1): LeakyReLU(negative_slope=0.01)
  (2): Linear(in_features=6, out_features=4, bias=True)
  (3): LeakyReLU(negative_slope=0.01)
  (4): Linear(in_features=4, out_features=2, bias=True)
  (5): LeakyReLU(negative_slope=0.01)
  (6): Linear(in_features=2, out_features=1, bias=True)
)

In [11]:
optimizer = optim.Adam(model.parameters())
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
)

In [12]:
epochs = 1000
batch = 256
interval = 100
lr = 1e-2

In [13]:
lowest_loss = np.inf
best_model = None

early_stop = 100
lowest_epoch = np.inf

In [14]:
train_history = []
valid_history = []

In [16]:
for i in range(epochs):
    indices = torch.randperm(x[0].size(0))
    x_ = torch.index_select(x[0], dim=0, index = indices)
    y_ = torch.index_select(y[0], dim=0, index = indices)
    
    x_ = x_.split(batch, dim=0)
    y_ = y_.split(batch, dim=0)
    
    train_loss, valid_loss = 0, 0
    y_hat = []
    
    for x_i, y_i in zip(x_, y_):
        y_hat_i = model(x_i)
        loss = F.mse_loss(y_hat_i, y_i)
        
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()
        train_loss += float(loss)
    
    train_loss = train_loss / len(x_)

    with torch.no_grad():
        x_ = x[1].split(batch, dim=0)
        y_ = y[1].split(batch, dim=0)

        valid_loss = 0

        for x_i, y_i in zip(x_, y_):
            y_hat_i = model(x_i)
            loss = F.mse_loss(y_hat_i, y_i)

            valid_loss += loss

            y_hat += [y_hat_i]

    valid_loss = valid_loss / len(x_)

    train_history += [train_loss]
    valid_history += [valid_loss]

    if (i+1) % interval == 0:
        print('Epoch %d: train loss=%.4e, valid loss=%.4e, lowest_loss=%.4e' % (i+1, train_loss, valid_loss, lowest_loss))

    if valid_loss <= lowest_loss:
        lowest_loss = valid_loss
        lowest_epoch = i
        best_model = deepcopy(model.state_dict())
    else:
        if early_stop > 0 and lowest_epoch + early_stop < (i+1):
            print('There is no improvement during last %d epochs' % early_stop)
            break
print('The best valid loss from epoch %d: %.4e' % (lowest_epoch + 1, lowest_loss))

model.load_state_dict(best_model)

Epoch 100: train loss=3.5174e-01, valid loss=3.5328e-01, lowest_loss=3.5265e-01
Epoch 200: train loss=3.4704e-01, valid loss=3.5166e-01, lowest_loss=3.5049e-01
Epoch 300: train loss=3.4647e-01, valid loss=3.4699e-01, lowest_loss=3.4759e-01
Epoch 400: train loss=3.4305e-01, valid loss=3.4664e-01, lowest_loss=3.4592e-01
Epoch 500: train loss=3.4150e-01, valid loss=3.4637e-01, lowest_loss=3.4546e-01
There is no improvement during last 100 epochs
The best valid loss from epoch 433: 3.4546e-01


<All keys matched successfully>