**Домашнее задание**


1. Создать Dataset для загрузки данных

2. Обернуть его в Dataloader

3. Написать архитектуру сети, которая предсказывает стоимость недвижимости. Сеть должна включать BatchNorm слои и Dropout (или НЕ включать, но нужно обосновать)

4. Сравните сходимость Adam, RMSProp и SGD, сделайте вывод по качеству работы модели

Подключение библиотек

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
from torch.optim import Adam, SGD, RMSprop


import warnings
warnings.filterwarnings('ignore')

Загрузка данных

In [2]:
df = fetch_california_housing(as_frame=True).frame
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [3]:
x = df.drop(columns = 'MedHouseVal')
y= df['MedHouseVal']
x.shape, y.shape

((20640, 8), (20640,))

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 13)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((15480, 8), (5160, 8), (15480,), (5160,))

In [5]:
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [6]:
x_train = torch.FloatTensor(x_train)
y_train = torch.FloatTensor(y_train.values)

x_test = torch.FloatTensor(x_test)
y_test = torch.FloatTensor(y_test.values)

**Создание Dataset и оборачивание его в DataLoader**

In [7]:
class Data(Dataset):

  def __init__(self, data, target):
    self.x = data
    self.y = target

  def __len__(self):
    return len(self.x)

  def __getitem__(self, idx):
    return [self.x[idx], self.y[idx]]

In [8]:
train_data = Data(x_train, y_train)
train_loader = DataLoader(train_data,
                          batch_size = 128,
                          shuffle = True,
                          drop_last = True,
                          num_workers = 0)

In [9]:
test_data = Data(x_test, y_test)
test_loader = DataLoader(test_data,
                          batch_size = 128,
                          shuffle = True,
                          drop_last = True,
                          num_workers = 0)

**Сборка нейронной сети**

In [10]:
class Perceptron(nn.Module):
    
    def __init__(self, input_dim, output_dim, activation='relu'):
        
        super().__init__()
        self.fc = nn.Linear(input_dim, output_dim).cuda()
        assert activation in ['relu', 'sigmoid'], 'Activation func should be "relu" or "sigmoid"!'
        self.activation = activation
        
    def forward(self, x):
        
        x = self.fc(x).cuda()
        return eval(f'F.{self.activation}')(x)     

In [11]:
class FeedForward(nn.Module):
    
    def __init__(self, input_dim, hidden_dim):
        
        super(FeedForward, self).__init__()
        
        self.fc1 = Perceptron(input_dim, 4*hidden_dim).cuda()
        self.dp1 = nn.Dropout(0.4).cuda()
        self.bn1 = nn.BatchNorm1d(4*hidden_dim).cuda()

        self.fc2 = Perceptron(4*hidden_dim, 2*hidden_dim).cuda()
        self.dp2 = nn.Dropout(0.3).cuda()
        self.bn2 = nn.BatchNorm1d(2*hidden_dim).cuda()
        
        self.fc3 = Perceptron(2*hidden_dim, hidden_dim).cuda()
        self.dp3 = nn.Dropout(0.2).cuda()
        self.bn3 = nn.BatchNorm1d(hidden_dim).cuda()
        
        self.fc4 = Perceptron(hidden_dim, 1).cuda()
        
    def forward(self, x):
        
        x = self.fc1(x).cuda()
        x = self.bn1(x).cuda()
        x = self.dp1(x).cuda()
        
        x = self.fc2(x).cuda()
        x = self.bn2(x).cuda()
        x = self.dp2(x).cuda()
        
        x = self.fc3(x).cuda()
        x = self.bn3(x).cuda()
        x = self.dp3(x).cuda()
        
        x = self.fc4(x).cuda()
        
        return x

**Обучение нейронной сети с оптимизатором SGD**

In [12]:
net = FeedForward(8, 1024)

optimizer = SGD(net.parameters(), lr=0.005)
criterion = nn.L1Loss()

In [13]:
epochs = 200

net.train()
metrics_train = []
metrics_test = []

for ep in range(epochs):
    running_loss, running_items = 0.0, 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].cuda(), data[1].cuda()
        
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        running_items += len(labels)
        
        train_res = net(inputs)
        metrics_train.append(r2_score(labels.cpu().detach().numpy(), train_res.cpu().detach().numpy().reshape(-1)))
        
    if (ep + 1)%20 == 0:  
        net.eval()

        print(f'Epoch [{ep + 1}/{epochs}] | ' \
              f'Step [{i + 1}/{len(train_loader)}] | ' \
              f'Loss: {running_loss / running_items:.3f} | ' \
              f'Train R2: {sum(metrics_train) / len(metrics_train):.3f} | ', end='')

        running_loss, running_items = 0.0, 0.0
        metrics_train = []

        for i, data in enumerate(test_loader):
            test_res = net(data[0].cuda())
            metrics_test.append(r2_score(data[1].cpu().detach().numpy(), test_res.cpu().detach().numpy().reshape(-1)))
        print(f'Test R2: {sum(metrics_test) / len(metrics_test):.3f}')
        metrics_test = []
        net.train()
print('Training is finished!')

Epoch [20/200] | Step [120/120] | Loss: 0.005 | Train R2: 0.371 | Test R2: 0.585
Epoch [40/200] | Step [120/120] | Loss: 0.004 | Train R2: 0.584 | Test R2: 0.647
Epoch [60/200] | Step [120/120] | Loss: 0.004 | Train R2: 0.636 | Test R2: 0.654
Epoch [80/200] | Step [120/120] | Loss: 0.004 | Train R2: 0.659 | Test R2: 0.673
Epoch [100/200] | Step [120/120] | Loss: 0.004 | Train R2: 0.668 | Test R2: 0.640
Epoch [120/200] | Step [120/120] | Loss: 0.004 | Train R2: 0.672 | Test R2: 0.670
Epoch [140/200] | Step [120/120] | Loss: 0.004 | Train R2: 0.675 | Test R2: 0.658
Epoch [160/200] | Step [120/120] | Loss: 0.004 | Train R2: 0.678 | Test R2: 0.682
Epoch [180/200] | Step [120/120] | Loss: 0.004 | Train R2: 0.680 | Test R2: 0.676
Epoch [200/200] | Step [120/120] | Loss: 0.003 | Train R2: 0.683 | Test R2: 0.680
Training is finished!


**Обучение нейронной сети с оптимизатором Adam**

In [14]:
net = FeedForward(8, 512)

optimizer = Adam(net.parameters(), lr=0.001)
criterion = nn.MSELoss()

In [15]:
epochs = 200

net.train()
metrics_train = []
metrics_test = []

for ep in range(epochs):
    running_loss, running_items = 0.0, 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].cuda(), data[1].cuda()
        
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        running_items += len(labels)
        
        train_res = net(inputs)
        metrics_train.append(r2_score(labels.cpu().detach().numpy(), train_res.cpu().detach().numpy().reshape(-1)))
        
    if (ep + 1)%20 == 0:  
        net.eval()

        print(f'Epoch [{ep + 1}/{epochs}] | ' \
              f'Step [{i + 1}/{len(train_loader)}] | ' \
              f'Loss: {running_loss / running_items:.3f} | ' \
              f'Train R2: {sum(metrics_train) / len(metrics_train):.3f} | ', end='')

        running_loss, running_items = 0.0, 0.0
        metrics_train = []

        for i, data in enumerate(test_loader):
            test_res = net(data[0].cuda())
            metrics_test.append(r2_score(data[1].cpu().detach().numpy(), test_res.cpu().detach().numpy().reshape(-1)))
        print(f'Test R2: {sum(metrics_test) / len(metrics_test):.3f}')
        metrics_test = []
        net.train()
print('Training is finished!')

Epoch [20/200] | Step [120/120] | Loss: 0.003 | Train R2: 0.627 | Test R2: 0.701
Epoch [40/200] | Step [120/120] | Loss: 0.003 | Train R2: 0.727 | Test R2: 0.745
Epoch [60/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.747 | Test R2: 0.402
Epoch [80/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.763 | Test R2: 0.616
Epoch [100/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.773 | Test R2: -795.055
Epoch [120/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.783 | Test R2: -184.690
Epoch [140/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.789 | Test R2: -92441.633
Epoch [160/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.795 | Test R2: -44.766
Epoch [180/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.802 | Test R2: 0.193
Epoch [200/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.808 | Test R2: -8278.775
Training is finished!


При использовании оптимизатора Adam метрика на трейне и тесте выше чем, при использовании SGD. На трейне иногда наблюдаются не понятные значения

**Обучение нейронной сети с оптимизатором RMSprop**

In [16]:
net = FeedForward(8, 1024)

optimizer = RMSprop(net.parameters(), lr=0.005)
criterion = nn.MSELoss()

In [17]:
epochs = 200

net.train()
metrics_train = []

for ep in range(epochs):
    running_loss, running_items = 0.0, 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].cuda(), data[1].cuda()
        
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        running_items += len(labels)
        
        train_res = net(inputs)
        metrics_train.append(r2_score(labels.cpu().detach().numpy(), train_res.cpu().detach().numpy().reshape(-1)))
        
    if (ep + 1)%20 == 0:  
        net.eval()

        print(f'Epoch [{ep + 1}/{epochs}] | ' \
              f'Step [{i + 1}/{len(train_loader)}] | ' \
              f'Loss: {running_loss / running_items:.3f} | ' \
              f'Train R2: {sum(metrics_train) / len(metrics_train):.3f} | ', end='')

        running_loss, running_items = 0.0, 0.0
        metrics_train = []

        metrics_test = []
        for i, data in enumerate(test_loader):
            test_res = net(data[0].cuda())
            metrics_test.append(r2_score(data[1].cpu().detach().numpy(), test_res.cpu().detach().numpy().reshape(-1)))
        print(f'Test R2: {sum(metrics_test) / len(metrics_test):.3f}')
        net.train()
print('Training is finished!')

Epoch [20/200] | Step [120/120] | Loss: 0.003 | Train R2: 0.470 | Test R2: -31.246
Epoch [40/200] | Step [120/120] | Loss: 0.003 | Train R2: 0.725 | Test R2: -22.691
Epoch [60/200] | Step [120/120] | Loss: 0.003 | Train R2: 0.769 | Test R2: 0.613
Epoch [80/200] | Step [120/120] | Loss: 0.003 | Train R2: 0.785 | Test R2: 0.713
Epoch [100/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.794 | Test R2: -89767.896
Epoch [120/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.801 | Test R2: -17028.767
Epoch [140/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.800 | Test R2: -85576156.552
Epoch [160/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.807 | Test R2: -58.040
Epoch [180/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.807 | Test R2: -113913.656
Epoch [200/200] | Step [120/120] | Loss: 0.002 | Train R2: 0.810 | Test R2: -2767629.211
Training is finished!


Если используется оптимизатор RMSprop, то на тесте на большинстве эпох возникают очень странные значения метрики R2.