In [1]:
from sklearn.datasets import fetch_california_housing
california_housing = fetch_california_housing(as_frame=True)

In [2]:
import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import fetch_california_housing
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from torch import Tensor
from sklearn.preprocessing import MinMaxScaler 

In [3]:
from tqdm.notebook import tqdm

In [4]:
data = california_housing.data
target = california_housing.target

In [5]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=13)

In [6]:
scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [7]:
class RegressionDataset(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)
    
    
train_dataset = RegressionDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
test_dataset = RegressionDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float())

In [8]:
EPOCHS = 150
BATCH_SIZE = 64
LEARNING_RATE = 0.001
NUM_FEATURES = len(data.columns)

In [9]:
len(data.columns)

8

In [10]:
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=1)

In [11]:
class MultipleRegression(nn.Module):
    def __init__(self, num_features):
        super(MultipleRegression, self).__init__()
        
        self.layer_1 = nn.Linear(num_features, 16)
        self.bn = nn.BatchNorm1d(16)
        self.dp = nn.Dropout(0.25)
        self.layer_out = nn.Linear(16, 1)
        
        self.relu = nn.ReLU()
        
    def forward(self, inputs):
        
        x = self.relu(self.layer_1(inputs))
        x = self.layer_out(x)
        return (x)
    
    def predict(self, test_inputs):
        x = self.relu(self.layer_1(test_inputs))
        x = self.layer_out(x)
        return (x)

### Adam

In [12]:
model = MultipleRegression(NUM_FEATURES)
print(model)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

MultipleRegression(
  (layer_1): Linear(in_features=8, out_features=16, bias=True)
  (bn): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dp): Dropout(p=0.25, inplace=False)
  (layer_out): Linear(in_features=16, out_features=1, bias=True)
  (relu): ReLU()
)


In [13]:
loss_stats = {
    'train': [],
    "val": []
}

In [14]:
print("Begin training.")
for e in tqdm(range(1, EPOCHS+1)):
    
    # TRAINING
    train_epoch_loss = 0
    model.train()
    for X_train_batch, y_train_batch in train_loader:
        optimizer.zero_grad()
        
        y_train_pred = model(X_train_batch)
        
        train_loss = criterion(y_train_pred, y_train_batch.unsqueeze(1))
        
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss += train_loss.item()
        
        
    # VALIDATION    
    with torch.no_grad():
        
        val_epoch_loss = 0
        
        model.eval()

    loss_stats['train'].append(train_epoch_loss/len(train_loader))
                           
    
    print(f'Epoch {e+0:03}: | Train Loss: {train_epoch_loss/len(train_loader):.5f}')


Begin training.


  0%|          | 0/150 [00:00<?, ?it/s]

Epoch 001: | Train Loss: 2.69493
Epoch 002: | Train Loss: 1.17434
Epoch 003: | Train Loss: 1.00337
Epoch 004: | Train Loss: 0.79697
Epoch 005: | Train Loss: 0.64698
Epoch 006: | Train Loss: 0.58835
Epoch 007: | Train Loss: 0.57130
Epoch 008: | Train Loss: 0.56377
Epoch 009: | Train Loss: 0.55787
Epoch 010: | Train Loss: 0.55236
Epoch 011: | Train Loss: 0.54761
Epoch 012: | Train Loss: 0.54276
Epoch 013: | Train Loss: 0.53941
Epoch 014: | Train Loss: 0.53491
Epoch 015: | Train Loss: 0.53198
Epoch 016: | Train Loss: 0.52752
Epoch 017: | Train Loss: 0.52398
Epoch 018: | Train Loss: 0.52027
Epoch 019: | Train Loss: 0.51769
Epoch 020: | Train Loss: 0.51455
Epoch 021: | Train Loss: 0.51279
Epoch 022: | Train Loss: 0.50951
Epoch 023: | Train Loss: 0.50725
Epoch 024: | Train Loss: 0.50499
Epoch 025: | Train Loss: 0.50376
Epoch 026: | Train Loss: 0.50170
Epoch 027: | Train Loss: 0.49947
Epoch 028: | Train Loss: 0.49864
Epoch 029: | Train Loss: 0.49611
Epoch 030: | Train Loss: 0.49500
Epoch 031:

In [15]:
y_pred_list = []
with torch.no_grad():
    model.eval()
    for X_batch, _ in test_loader:
       # X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_pred_list.append(y_test_pred.numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [16]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, y_pred_list)
r_square = r2_score(y_test, y_pred_list)
print("Mean Squared Error :",mse)
print("R^2 :",r_square)

Mean Squared Error : 0.4403075666576879
R^2 : 0.6741630321860955


### RMSprop

In [19]:
model = MultipleRegression(NUM_FEATURES)
print(model)
criterion = nn.MSELoss()
optimizer = optim.RMSprop(model.parameters(), lr=LEARNING_RATE)

loss_stats = {
    'train': [],
    "val": []
}



MultipleRegression(
  (layer_1): Linear(in_features=8, out_features=16, bias=True)
  (bn): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dp): Dropout(p=0.25, inplace=False)
  (layer_out): Linear(in_features=16, out_features=1, bias=True)
  (relu): ReLU()
)


In [20]:
print("Begin training.")
for e in tqdm(range(1, EPOCHS+1)):
    
    # TRAINING
    train_epoch_loss = 0
    model.train()
    for X_train_batch, y_train_batch in train_loader:
        optimizer.zero_grad()
        
        y_train_pred = model(X_train_batch)
        
        train_loss = criterion(y_train_pred, y_train_batch.unsqueeze(1))
        
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss += train_loss.item()
        
        
    # VALIDATION    
    with torch.no_grad():
        
        val_epoch_loss = 0
        
        model.eval()

    loss_stats['train'].append(train_epoch_loss/len(train_loader))
                           
    
    print(f'Epoch {e+0:03}: | Train Loss: {train_epoch_loss/len(train_loader):.5f}')


Begin training.


  0%|          | 0/150 [00:00<?, ?it/s]

Epoch 001: | Train Loss: 1.63153
Epoch 002: | Train Loss: 1.09556
Epoch 003: | Train Loss: 0.92228
Epoch 004: | Train Loss: 0.72390
Epoch 005: | Train Loss: 0.61845
Epoch 006: | Train Loss: 0.58950
Epoch 007: | Train Loss: 0.57814
Epoch 008: | Train Loss: 0.56904
Epoch 009: | Train Loss: 0.56127
Epoch 010: | Train Loss: 0.55377
Epoch 011: | Train Loss: 0.54717
Epoch 012: | Train Loss: 0.53945
Epoch 013: | Train Loss: 0.53274
Epoch 014: | Train Loss: 0.52735
Epoch 015: | Train Loss: 0.52265
Epoch 016: | Train Loss: 0.51765
Epoch 017: | Train Loss: 0.51339
Epoch 018: | Train Loss: 0.50911
Epoch 019: | Train Loss: 0.50487
Epoch 020: | Train Loss: 0.50113
Epoch 021: | Train Loss: 0.49734
Epoch 022: | Train Loss: 0.49454
Epoch 023: | Train Loss: 0.49184
Epoch 024: | Train Loss: 0.48830
Epoch 025: | Train Loss: 0.48544
Epoch 026: | Train Loss: 0.48295
Epoch 027: | Train Loss: 0.48057
Epoch 028: | Train Loss: 0.47833
Epoch 029: | Train Loss: 0.47712
Epoch 030: | Train Loss: 0.47486
Epoch 031:

In [21]:
y_pred_list = []
with torch.no_grad():
    model.eval()
    for X_batch, _ in test_loader:
       # X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_pred_list.append(y_test_pred.numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

mse = mean_squared_error(y_test, y_pred_list)
r_square = r2_score(y_test, y_pred_list)
print("Mean Squared Error :",mse)
print("R^2 :",r_square)

Mean Squared Error : 0.4317965285878678
R^2 : 0.6804613814483395


### SGD

In [22]:
model = MultipleRegression(NUM_FEATURES)
print(model)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

loss_stats = {
    'train': [],
    "val": []
}


MultipleRegression(
  (layer_1): Linear(in_features=8, out_features=16, bias=True)
  (bn): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dp): Dropout(p=0.25, inplace=False)
  (layer_out): Linear(in_features=16, out_features=1, bias=True)
  (relu): ReLU()
)


In [23]:
print("Begin training.")
for e in tqdm(range(1, EPOCHS+1)):
    
    # TRAINING
    train_epoch_loss = 0
    model.train()
    for X_train_batch, y_train_batch in train_loader:
        optimizer.zero_grad()
        
        y_train_pred = model(X_train_batch)
        
        train_loss = criterion(y_train_pred, y_train_batch.unsqueeze(1))
        
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss += train_loss.item()
        
        
    # VALIDATION    
    with torch.no_grad():
        
        val_epoch_loss = 0
        
        model.eval()

    loss_stats['train'].append(train_epoch_loss/len(train_loader))
                           
    
    print(f'Epoch {e+0:03}: | Train Loss: {train_epoch_loss/len(train_loader):.5f}')


Begin training.


  0%|          | 0/150 [00:00<?, ?it/s]

Epoch 001: | Train Loss: 3.30525
Epoch 002: | Train Loss: 1.46783
Epoch 003: | Train Loss: 1.23946
Epoch 004: | Train Loss: 1.21464
Epoch 005: | Train Loss: 1.20050
Epoch 006: | Train Loss: 1.18748
Epoch 007: | Train Loss: 1.17406
Epoch 008: | Train Loss: 1.16046
Epoch 009: | Train Loss: 1.14619
Epoch 010: | Train Loss: 1.13122
Epoch 011: | Train Loss: 1.11596
Epoch 012: | Train Loss: 1.09946
Epoch 013: | Train Loss: 1.08250
Epoch 014: | Train Loss: 1.06471
Epoch 015: | Train Loss: 1.04634
Epoch 016: | Train Loss: 1.02733
Epoch 017: | Train Loss: 1.00764
Epoch 018: | Train Loss: 0.98737
Epoch 019: | Train Loss: 0.96666
Epoch 020: | Train Loss: 0.94561
Epoch 021: | Train Loss: 0.92440
Epoch 022: | Train Loss: 0.90275
Epoch 023: | Train Loss: 0.88149
Epoch 024: | Train Loss: 0.86008
Epoch 025: | Train Loss: 0.83911
Epoch 026: | Train Loss: 0.81859
Epoch 027: | Train Loss: 0.79853
Epoch 028: | Train Loss: 0.77906
Epoch 029: | Train Loss: 0.76042
Epoch 030: | Train Loss: 0.74257
Epoch 031:

In [24]:
y_pred_list = []
with torch.no_grad():
    model.eval()
    for X_batch, _ in test_loader:
       # X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_pred_list.append(y_test_pred.numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

mse = mean_squared_error(y_test, y_pred_list)
r_square = r2_score(y_test, y_pred_list)
print("Mean Squared Error :",mse)
print("R^2 :",r_square)

Mean Squared Error : 0.532564653763472
R^2 : 0.6058908247605476


### Вывод
В данных наблюдениях лучше всех показал себя RMSprop с его r2= 0.68...  <br>
Второе место Adam r2 = 0.67 <br>
Третье SGD r2 = 0.60 