# Попрактикуемся с тем, что изучили

Будем практиковаться на датасете:
https://www.kaggle.com/c/avito-demand-prediction

Ваша задача:
1. Создать Dataset для загрузки данных (используем только числовые данные)
2. Обернуть его в Dataloader
3. Написать архитектуру сети, которая предсказывает число показов на основании числовых данных (вы всегда можете нагенерить дополнительных факторов). Сеть должна включать BatchNorm слои и Dropout (или НЕ включать, но нужно обосновать)
4. Учить будем на функцию потерь с кагла (log RMSE) - нужно её реализовать
5. Сравните сходимость Adam, RMSProp и SGD, сделайте вывод по качеству работы модели

train-test разделение нужно сделать с помощью sklearn random_state=13, test_size = 0.25

Вопросы? в личку @Kinetikm

In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
import pandas as pd

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

import matplotlib.pyplot as plt

In [2]:
df = fetch_california_housing(as_frame=True)

In [3]:
df = df.frame
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [4]:
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

In [5]:
X.head(3)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24


In [6]:
y.head(3)

0    4.526
1    3.585
2    3.521
Name: MedHouseVal, dtype: float64

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=13)

In [8]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)

In [9]:
y_train, y_test = y_train.astype(float), y_test.astype(float)

In [10]:
class HousingDataset(torch.utils.data.Dataset):
    
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __getitem__(self, index):
        return self.X[index], self.y[index]
        
    def __len__ (self):
        return len(self.X)

train_data = HousingDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
test_data = HousingDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float())

In [11]:
epochs = 30
batch_size = 64
learning_rate = 0.001
num_features = len(X.columns)

In [12]:
train_data_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
test_data_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=1)

In [13]:
class LinearRegression(nn.Module):
    def __init__(self, num_features):
        super(LinearRegression, self).__init__()
        
        self.fc1 = nn.Linear(num_features, 32)
        self.bn1 = nn.BatchNorm1d(32)
        self.dp1 = nn.Dropout(0.20)
        self.fc2 = nn.Linear(32, 16)
        self.bn2 = nn.BatchNorm1d(16)
        self.dp2 = nn.Dropout(0.20)
        self.fc3 = nn.Linear(16, 1)
        self.relu = nn.ReLU()

    def forward(self, inputs):
        x = self.relu(self.fc1(inputs))
        x = self.dp1(x)
        x = self.bn1(x)
        x = self.relu(self.fc2(x))
        x = self.dp2(x)
        x = self.bn2(x)
        x = self.fc3(x)
        return x

    def predict(self, test_inputs):
        x = self.relu(self.fc1(test_inputs))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [14]:
model = LinearRegression(num_features)

criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, alpha=0.99)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

In [15]:
for epoch in range(epochs):
    
    train_epoch_loss = 0
    model.train()
    for X_train_batch, y_train_batch in train_data_loader:

        optimizer.zero_grad()
        
        y_train_pred = model(X_train_batch)
        train_loss = criterion(y_train_pred, y_train_batch.unsqueeze(1))
        train_loss.backward()
        optimizer.step()
        train_epoch_loss += train_loss.item()

In [16]:
y_pred_list = []
with torch.no_grad():
    model.eval()
    for X_batch, _ in test_data_loader:
        y_test_pred = model(X_batch)
        y_pred_list.append(y_test_pred.numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [17]:
mse = mean_squared_error(y_test, y_pred_list)
r_square = r2_score(y_test, y_pred_list)
print("Mean Squared Error :", mse)
print("R^2 :", r_square)

Mean Squared Error : 0.3786992024220274
R^2 : 0.7197545325704819


#### Adam
- MSE - 0.4167
- R^2 - 0.6916

#### RMSProp
- MSE - 0.3517
- R^2 - 0.7397

#### SGD
- MSE - 0.3787
- R^2 - 0.7198

В данном случае лучше отработала модель с RMSProp