- Создать Dataset для загрузки данных (sklearn.datasets.fetch_california_housing)
- Обернуть его в Dataloader
- Написать архитектуру сети, которая предсказывает стоимость недвижимости. Сеть должна включать BatchNorm слои и Dropout (или НЕ включать, но нужно обосновать)
- Сравните сходимость Adam, RMSProp и SGD, сделайте вывод по качеству работы модели
- train-test разделение нужно сделать с помощью sklearn random_state=13, test_size = 0.25

In [2]:
import numpy as np
import pandas as pd

import torch
from torch import nn
from torch.optim import Adam, RMSprop, SGD
from torch.utils.data import DataLoader, Dataset

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [3]:
EPOCHES = 10
LR = 0.002

In [4]:
class FCHDataset(Dataset):
  def __init__(self, *init_datasets):
    assert all(init_datasets[0].size(0) == init_dataset.size(0) for init_dataset in init_datasets)
    self._base_datasets = init_datasets

  def __len__(self):
    return self._base_datasets[0].size(0)

  def __getitem__(self, idx):
      return tuple(base_dataset[idx] for base_dataset in self._base_datasets)

In [5]:
class CHNet(nn.Module):
  def __init__(self) -> None:
      super(CHNet, self).__init__()
      self.block_1 = nn.Sequential(
          nn.Linear(in_features=8, out_features=100, bias=True),
          nn.Dropout(0.5),
          nn.BatchNorm1d(100),
          nn.LeakyReLU())
      self.block_2 = nn.Sequential(
          nn.Linear(in_features=100, out_features=100, bias=True),
          nn.Dropout(0.5),
          nn.BatchNorm1d(100),
          nn.LeakyReLU())
      self.block_3 = nn.Sequential(
          nn.Linear(in_features=100, out_features=60, bias=True),
          nn.Dropout(0.5),
          nn.BatchNorm1d(60),
          nn.LeakyReLU())
      self.block_4 = nn.Sequential(
          nn.Linear(in_features=60, out_features=30, bias=True),
          nn.Dropout(0.5),
          nn.BatchNorm1d(30),
          nn.LeakyReLU())
      self.predict = nn.Sequential(
          nn.Linear(in_features=30, out_features=1, bias=True),
          nn.BatchNorm1d(1),
          nn.LeakyReLU())

  def forward(self, inp):
    out = self.block_1(inp)
    out = self.block_2(out)
    out = self.block_3(out)
    out = self.block_4(out)
    out = self.predict(out)
    return out[:, 0]

In [14]:
def train_loop(train_loader, test_loader, net, optimizer):
  loss_fn = nn.MSELoss()
  best_acc = {'train': None, 'test': None}
  net.train()
  for epoch in range(EPOCHES):
    running_loss, running_items, running_right = 0.0, 0.0, 0.0
    for i, (inputs, labels) in enumerate(train_loader):

        outputs = net(inputs)
        loss = loss_fn(outputs, labels)

        # обнуляем градиент
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        running_items += len(labels)

        # выводим статистику о процессе обучения
        if i % 100 == 0 or (i + 1) == len(train_loader):    # печатаем каждые 100 mini-batches
            net.eval()

            test_loss, test_running_total, test_loss  = 0.0, 0.0, 0.0
            for y, (out_test, lbl_test) in enumerate(test_loader):
                test_outputs = net(out_test)
                test_loss += loss_fn(test_outputs, lbl_test)
                test_running_total += len(lbl_test)

            res_loss_train = running_loss / running_items
            res_loss_test = test_loss / test_running_total

            if best_acc['train'] is None or res_loss_train < best_acc['train']:
              best_acc['train'] = res_loss_train

            if best_acc['test'] is None or res_loss_test < best_acc['test']:
              best_acc['test'] = res_loss_train

            print(f'Epoch [{epoch + 1}/{EPOCHES}]. ' \
                  f'Step [{i + 1}/{len(train_loader)}]. ' \
                  f'Loss: {res_loss_train:.3f}. '\
                  f'Test acc: {res_loss_test:.3f}.')

            running_loss, running_items = 0.0, 0.0
            net.train()
  print(f"Best acc train: {1 - best_acc['train']:.3f}. Best acc test: {1 - best_acc['test']:.3f}")
  print('Training is finished!')

In [7]:
california_housing = fetch_california_housing()
# Разделим на тестовые и тренеровочные данные
X_train, X_test, y_train, y_test = train_test_split(california_housing.data, california_housing.target, test_size=0.25, random_state=13)

In [8]:
# Нормализуем данные и подготовим их для дальнейшего использования в нашем dstaset
scale = MinMaxScaler()
X_train_s = scale.fit_transform(X_train)
X_test_s = scale.transform(X_test)

In [9]:
train_xt = torch.from_numpy(X_train_s.astype(np.float32))
train_yt = torch.from_numpy(y_train.astype(np.float32))

test_xt = torch.from_numpy(X_test_s.astype(np.float32))
test_yt = torch.from_numpy(y_test.astype(np.float32))

In [10]:
train_dataset = FCHDataset(train_xt, train_yt)
test_dataset = FCHDataset(test_xt, test_yt)

In [11]:
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=True, num_workers=2, drop_last=True)

Обучение моделей с lr = 0.002

SGD

In [16]:
net = CHNet()
optimizer = SGD(net.parameters(), lr=LR)

In [17]:
%%time
train_loop(train_loader, test_loader, net, optimizer)

Epoch [1/10]. Step [1/120]. Loss: 0.038. Test acc: 0.042.
Epoch [1/10]. Step [101/120]. Loss: 0.032. Test acc: 0.024.
Epoch [1/10]. Step [120/120]. Loss: 0.028. Test acc: 0.022.
Epoch [2/10]. Step [1/120]. Loss: 0.025. Test acc: 0.021.
Epoch [2/10]. Step [101/120]. Loss: 0.025. Test acc: 0.014.
Epoch [2/10]. Step [120/120]. Loss: 0.021. Test acc: 0.013.
Epoch [3/10]. Step [1/120]. Loss: 0.021. Test acc: 0.013.
Epoch [3/10]. Step [101/120]. Loss: 0.018. Test acc: 0.010.
Epoch [3/10]. Step [120/120]. Loss: 0.016. Test acc: 0.010.
Epoch [4/10]. Step [1/120]. Loss: 0.014. Test acc: 0.010.
Epoch [4/10]. Step [101/120]. Loss: 0.014. Test acc: 0.008.
Epoch [4/10]. Step [120/120]. Loss: 0.013. Test acc: 0.008.
Epoch [5/10]. Step [1/120]. Loss: 0.011. Test acc: 0.008.
Epoch [5/10]. Step [101/120]. Loss: 0.012. Test acc: 0.007.
Epoch [5/10]. Step [120/120]. Loss: 0.011. Test acc: 0.008.
Epoch [6/10]. Step [1/120]. Loss: 0.009. Test acc: 0.008.
Epoch [6/10]. Step [101/120]. Loss: 0.010. Test acc:

RMSProp + momentum

In [22]:
net = CHNet()
optimizer = RMSprop(net.parameters(), lr=LR, momentum=0.98)

In [23]:
%%time
train_loop(train_loader, test_loader, net, optimizer)

Epoch [1/10]. Step [1/120]. Loss: 0.041. Test acc: 0.040.
Epoch [1/10]. Step [101/120]. Loss: 0.014. Test acc: 0.008.
Epoch [1/10]. Step [120/120]. Loss: 0.009. Test acc: 0.007.
Epoch [2/10]. Step [1/120]. Loss: 0.010. Test acc: 0.007.
Epoch [2/10]. Step [101/120]. Loss: 0.007. Test acc: 0.005.
Epoch [2/10]. Step [120/120]. Loss: 0.006. Test acc: 0.005.
Epoch [3/10]. Step [1/120]. Loss: 0.007. Test acc: 0.005.
Epoch [3/10]. Step [101/120]. Loss: 0.006. Test acc: 0.005.
Epoch [3/10]. Step [120/120]. Loss: 0.005. Test acc: 0.005.
Epoch [4/10]. Step [1/120]. Loss: 0.005. Test acc: 0.005.
Epoch [4/10]. Step [101/120]. Loss: 0.005. Test acc: 0.005.
Epoch [4/10]. Step [120/120]. Loss: 0.005. Test acc: 0.005.
Epoch [5/10]. Step [1/120]. Loss: 0.006. Test acc: 0.005.
Epoch [5/10]. Step [101/120]. Loss: 0.005. Test acc: 0.004.
Epoch [5/10]. Step [120/120]. Loss: 0.005. Test acc: 0.004.
Epoch [6/10]. Step [1/120]. Loss: 0.006. Test acc: 0.004.
Epoch [6/10]. Step [101/120]. Loss: 0.005. Test acc:

Adam

In [20]:
net = CHNet()
optimizer = Adam(net.parameters(), lr=LR)

In [21]:
%%time
train_loop(train_loader, test_loader, net, optimizer)

Epoch [1/10]. Step [1/120]. Loss: 0.033. Test acc: 0.043.
Epoch [1/10]. Step [101/120]. Loss: 0.028. Test acc: 0.013.
Epoch [1/10]. Step [120/120]. Loss: 0.023. Test acc: 0.012.
Epoch [2/10]. Step [1/120]. Loss: 0.024. Test acc: 0.012.
Epoch [2/10]. Step [101/120]. Loss: 0.020. Test acc: 0.010.
Epoch [2/10]. Step [120/120]. Loss: 0.017. Test acc: 0.010.
Epoch [3/10]. Step [1/120]. Loss: 0.017. Test acc: 0.010.
Epoch [3/10]. Step [101/120]. Loss: 0.015. Test acc: 0.009.
Epoch [3/10]. Step [120/120]. Loss: 0.013. Test acc: 0.009.
Epoch [4/10]. Step [1/120]. Loss: 0.014. Test acc: 0.009.
Epoch [4/10]. Step [101/120]. Loss: 0.012. Test acc: 0.008.
Epoch [4/10]. Step [120/120]. Loss: 0.011. Test acc: 0.007.
Epoch [5/10]. Step [1/120]. Loss: 0.009. Test acc: 0.007.
Epoch [5/10]. Step [101/120]. Loss: 0.009. Test acc: 0.007.
Epoch [5/10]. Step [120/120]. Loss: 0.009. Test acc: 0.007.
Epoch [6/10]. Step [1/120]. Loss: 0.009. Test acc: 0.006.
Epoch [6/10]. Step [101/120]. Loss: 0.008. Test acc:

Обучение моделей с lr = 0.01


In [24]:
EPOCHES = 10
LR = 0.01

SGD

In [25]:
net = CHNet()
optimizer = SGD(net.parameters(), lr=LR)

In [26]:
%%time
train_loop(train_loader, test_loader, net, optimizer)

Epoch [1/10]. Step [1/120]. Loss: 0.031. Test acc: 0.038.
Epoch [1/10]. Step [101/120]. Loss: 0.021. Test acc: 0.009.
Epoch [1/10]. Step [120/120]. Loss: 0.011. Test acc: 0.009.
Epoch [2/10]. Step [1/120]. Loss: 0.009. Test acc: 0.010.
Epoch [2/10]. Step [101/120]. Loss: 0.009. Test acc: 0.008.
Epoch [2/10]. Step [120/120]. Loss: 0.008. Test acc: 0.009.
Epoch [3/10]. Step [1/120]. Loss: 0.006. Test acc: 0.009.
Epoch [3/10]. Step [101/120]. Loss: 0.008. Test acc: 0.007.
Epoch [3/10]. Step [120/120]. Loss: 0.008. Test acc: 0.007.
Epoch [4/10]. Step [1/120]. Loss: 0.005. Test acc: 0.008.
Epoch [4/10]. Step [101/120]. Loss: 0.007. Test acc: 0.007.
Epoch [4/10]. Step [120/120]. Loss: 0.007. Test acc: 0.007.
Epoch [5/10]. Step [1/120]. Loss: 0.007. Test acc: 0.007.
Epoch [5/10]. Step [101/120]. Loss: 0.006. Test acc: 0.007.
Epoch [5/10]. Step [120/120]. Loss: 0.006. Test acc: 0.006.
Epoch [6/10]. Step [1/120]. Loss: 0.006. Test acc: 0.006.
Epoch [6/10]. Step [101/120]. Loss: 0.006. Test acc:

RMSProp + momentum

In [27]:
net = CHNet()
optimizer = RMSprop(net.parameters(), lr=LR, momentum=0.98)

In [28]:
%%time
train_loop(train_loader, test_loader, net, optimizer)

Epoch [1/10]. Step [1/120]. Loss: 0.037. Test acc: 0.045.
Epoch [1/10]. Step [101/120]. Loss: 0.014. Test acc: 0.009.
Epoch [1/10]. Step [120/120]. Loss: 0.011. Test acc: 0.009.
Epoch [2/10]. Step [1/120]. Loss: 0.009. Test acc: 0.009.
Epoch [2/10]. Step [101/120]. Loss: 0.008. Test acc: 0.006.
Epoch [2/10]. Step [120/120]. Loss: 0.007. Test acc: 0.007.
Epoch [3/10]. Step [1/120]. Loss: 0.009. Test acc: 0.007.
Epoch [3/10]. Step [101/120]. Loss: 0.006. Test acc: 0.006.
Epoch [3/10]. Step [120/120]. Loss: 0.006. Test acc: 0.005.
Epoch [4/10]. Step [1/120]. Loss: 0.006. Test acc: 0.004.
Epoch [4/10]. Step [101/120]. Loss: 0.005. Test acc: 0.005.
Epoch [4/10]. Step [120/120]. Loss: 0.005. Test acc: 0.004.
Epoch [5/10]. Step [1/120]. Loss: 0.006. Test acc: 0.004.
Epoch [5/10]. Step [101/120]. Loss: 0.005. Test acc: 0.005.
Epoch [5/10]. Step [120/120]. Loss: 0.005. Test acc: 0.005.
Epoch [6/10]. Step [1/120]. Loss: 0.007. Test acc: 0.005.
Epoch [6/10]. Step [101/120]. Loss: 0.005. Test acc:

Adam

In [29]:
net = CHNet()
optimizer = Adam(net.parameters(), lr=LR)

In [30]:
%%time
train_loop(train_loader, test_loader, net, optimizer)

Epoch [1/10]. Step [1/120]. Loss: 0.043. Test acc: 0.041.
Epoch [1/10]. Step [101/120]. Loss: 0.019. Test acc: 0.008.
Epoch [1/10]. Step [120/120]. Loss: 0.010. Test acc: 0.007.
Epoch [2/10]. Step [1/120]. Loss: 0.010. Test acc: 0.007.
Epoch [2/10]. Step [101/120]. Loss: 0.007. Test acc: 0.005.
Epoch [2/10]. Step [120/120]. Loss: 0.006. Test acc: 0.005.
Epoch [3/10]. Step [1/120]. Loss: 0.005. Test acc: 0.005.
Epoch [3/10]. Step [101/120]. Loss: 0.006. Test acc: 0.005.
Epoch [3/10]. Step [120/120]. Loss: 0.005. Test acc: 0.005.
Epoch [4/10]. Step [1/120]. Loss: 0.006. Test acc: 0.005.
Epoch [4/10]. Step [101/120]. Loss: 0.006. Test acc: 0.005.
Epoch [4/10]. Step [120/120]. Loss: 0.005. Test acc: 0.006.
Epoch [5/10]. Step [1/120]. Loss: 0.006. Test acc: 0.006.
Epoch [5/10]. Step [101/120]. Loss: 0.005. Test acc: 0.006.
Epoch [5/10]. Step [120/120]. Loss: 0.005. Test acc: 0.006.
Epoch [6/10]. Step [1/120]. Loss: 0.005. Test acc: 0.006.
Epoch [6/10]. Step [101/120]. Loss: 0.005. Test acc:

Структура модели:
* 4 скрытых посносвязных слоя + 1 выходной слой
* в качестве функии активации во всех слоях бла применена LeakyRelu
* на каждом слое применён Dropout 0.5
* во всех слоях перед функцией активации применена батч-нормализация

Выводы:

Модели были обучены с исспользованием оптимизаторов Adam, SGD, RMSProp. Lr составил в 0,002 и 0,01 для епрвого и второго прогона моделей. На данном наборе данных и выбранной архитектурой модели значимого различия между оптимизаторами и скоростью обучения не выявлено.

В каждом случае модели показывали отличные результаты как на тренировочных данных  так и на тестовых.

Наилучший результат был достигнут с применением RMSProp (lr=0.02, momentum=0.98) с разницей в 0,001 по сравнению с другими оптимизаторами.
