# Попрактикуемся с тем, что изучили

Будем практиковаться на датасете:
https://www.kaggle.com/c/avito-demand-prediction

Ваша задача:
1. Создать Dataset для загрузки данных (используем только числовые данные)
2. Обернуть его в Dataloader
3. Написать архитектуру сети, которая предсказывает число показов на основании числовых данных (вы всегда можете нагенерить дополнительных факторов). Сеть должна включать BatchNorm слои и Dropout (или НЕ включать, но нужно обосновать)
4. Учить будем на функцию потерь с кагла (log RMSE) - нужно её реализовать
5. Сравните сходимость Adam, RMSProp и SGD, сделайте вывод по качеству работы модели

train-test разделение нужно сделать с помощью sklearn random_state=13, test_size = 0.25

Вопросы? в личку @Kinetikm

In [487]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
# from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

import seaborn as sns
import matplotlib.pyplot as plt

In [488]:
df  = fetch_california_housing(as_frame=True)

In [489]:
df = df.frame
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [490]:
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

In [491]:
X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.25, random_state=13)

In [492]:
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.1, random_state=21)

In [493]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
X_train, y_train = np.array(X_train), np.array(y_train)
X_val, y_val = np.array(X_val), np.array(y_val)
X_test, y_test = np.array(X_test), np.array(y_test)

In [494]:
y_train, y_test, y_val = y_train.astype(float), y_test.astype(float), y_val.astype(float)

In [495]:
class HausingRegressionDataset(torch.utils.data.Dataset):
    
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __getitem__(self, index):
        return self.X[index], self.y[index]
        
    def __len__ (self):
        return len(self.X)

train_data = HausingRegressionDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
val_data = HausingRegressionDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val).float())
test_data = HausingRegressionDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float())

In [496]:
epochs = 30
batch_size = 64
learning_rate = 0.001
num_features = len(X.columns)

In [497]:
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=1)
test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=1)

In [498]:
class LinearRegression(nn.Module):
    def __init__(self, num_features):
        super(LinearRegression, self).__init__()
        
        self.fc1 = nn.Linear(num_features, 32)
        self.bn1 = nn.BatchNorm1d(32)
        self.dp1 = nn.Dropout(0.20)
        self.fc2 = nn.Linear(32, 16)
        self.bn2 = nn.BatchNorm1d(16)
        self.dp2 = nn.Dropout(0.20)
        self.fc3 = nn.Linear(16, 1)
        self.relu = nn.ReLU()

    def forward(self, inputs):
        x = self.relu(self.fc1(inputs))
        x = self.dp1(x)
        x = self.bn1(x)
        x = self.relu(self.fc2(x))
        x = self.dp2(x)
        x = self.bn2(x)
        x = self.fc3(x)
        return x

    def predict(self, test_inputs):
        x = self.relu(self.fc1(test_inputs))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [499]:
model = LinearRegression(num_features)
print(model)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# optimizer = torch.optim.RMSprop(model.parameters(), lr=model, alpha=0.99)
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
# optimizer = torch.optim.Adagrad(model.parameters(), lr=learning_rate)

LinearRegression(
  (fc1): Linear(in_features=8, out_features=32, bias=True)
  (bn1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dp1): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=32, out_features=16, bias=True)
  (bn2): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dp2): Dropout(p=0.2, inplace=False)
  (fc3): Linear(in_features=16, out_features=1, bias=True)
  (relu): ReLU()
)


In [500]:
loss_stats = {
    'train': [],
    "val": []
}

In [501]:
for epoch in range(epochs):
    
    train_epoch_loss = 0
    model.train()
    for X_train_batch, y_train_batch in train_loader:

        optimizer.zero_grad()
        
        y_train_pred = model(X_train_batch)
        train_loss = criterion(y_train_pred, y_train_batch.unsqueeze(1))
        train_loss.backward()
        optimizer.step()
        train_epoch_loss += train_loss.item()
        
         
    with torch.no_grad():
        
        val_epoch_loss = 0
        
        model.eval()
        for X_val_batch, y_val_batch in val_loader:
            y_val_pred = model(X_val_batch)               
            val_loss = criterion(y_val_pred, y_val_batch.unsqueeze(1))
            val_epoch_loss += val_loss.item()

        loss_stats['train'].append(train_epoch_loss/len(train_loader))
        loss_stats['val'].append(val_epoch_loss/len(val_loader))                              
    
        print(f'Epoch {epoch+1}: | Train Loss: {train_epoch_loss/len(train_loader):.3f} | Val Loss: {val_epoch_loss/len(val_loader):.3f}')

Epoch 1: | Train Loss: 3.524 | Val Loss: 1.617
Epoch 2: | Train Loss: 1.080 | Val Loss: 0.468
Epoch 3: | Train Loss: 0.708 | Val Loss: 0.445
Epoch 4: | Train Loss: 0.618 | Val Loss: 0.471
Epoch 5: | Train Loss: 0.571 | Val Loss: 0.514
Epoch 6: | Train Loss: 0.528 | Val Loss: 0.454
Epoch 7: | Train Loss: 0.488 | Val Loss: 0.479
Epoch 8: | Train Loss: 0.483 | Val Loss: 0.510
Epoch 9: | Train Loss: 0.457 | Val Loss: 0.428
Epoch 10: | Train Loss: 0.457 | Val Loss: 0.564
Epoch 11: | Train Loss: 0.448 | Val Loss: 0.467
Epoch 12: | Train Loss: 0.441 | Val Loss: 0.492
Epoch 13: | Train Loss: 0.437 | Val Loss: 0.521
Epoch 14: | Train Loss: 0.436 | Val Loss: 0.491
Epoch 15: | Train Loss: 0.438 | Val Loss: 0.565
Epoch 16: | Train Loss: 0.435 | Val Loss: 0.546
Epoch 17: | Train Loss: 0.425 | Val Loss: 0.478
Epoch 18: | Train Loss: 0.419 | Val Loss: 0.528
Epoch 19: | Train Loss: 0.424 | Val Loss: 0.634
Epoch 20: | Train Loss: 0.412 | Val Loss: 0.625
Epoch 21: | Train Loss: 0.417 | Val Loss: 0.627
E

In [502]:
y_pred_list = []
with torch.no_grad():
    model.eval()
    for X_batch, _ in test_loader:
        y_test_pred = model(X_batch)
        y_pred_list.append(y_test_pred.numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [503]:
mse = mean_squared_error(y_test, y_pred_list)
r_square = r2_score(y_test, y_pred_list)
print("Mean Squared Error :",mse)
print("R^2 :",r_square)

Mean Squared Error : 0.3684674596721582
R^2 : 0.7273262399076423
