<h1>Wine quality model<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Загрузка-данных" data-toc-modified-id="Загрузка-данных-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Загрузка данных</a></span></li><li><span><a href="#Выделение-признаков" data-toc-modified-id="Выделение-признаков-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Выделение признаков</a></span></li><li><span><a href="#Разбивка-на-тренировочную-и-валидационную-выборки,-нормализация-признаков" data-toc-modified-id="Разбивка-на-тренировочную-и-валидационную-выборки,-нормализация-признаков-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Разбивка на тренировочную и валидационную выборки, нормализация признаков</a></span></li><li><span><a href="#Создание-модели" data-toc-modified-id="Создание-модели-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Создание модели</a></span></li><li><span><a href="#Предсказания-модели" data-toc-modified-id="Предсказания-модели-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Предсказания модели</a></span></li></ul></div>

Третья по сложности задача физики — выбор подходящего вина для празднования открытий. Загрузите данные о винах из создайте новую модель с соответствующим количеством входных параметров

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from collections import OrderedDict

### Загрузка данных

In [2]:
wine_path = '/Users/alexfil/Desktop/PyTorch/data/winequality-white.csv'

In [3]:
wineq_numpy = np.loadtxt(wine_path, dtype=np.float32, delimiter=';', skiprows=1)
wineq = torch.from_numpy(wineq_numpy)

### Выделение признаков

In [5]:
features = wineq[:, :-1]
features[:-1], features.shape

(tensor([[ 7.0000,  0.2700,  0.3600,  ...,  3.0000,  0.4500,  8.8000],
         [ 6.3000,  0.3000,  0.3400,  ...,  3.3000,  0.4900,  9.5000],
         [ 8.1000,  0.2800,  0.4000,  ...,  3.2600,  0.4400, 10.1000],
         ...,
         [ 6.6000,  0.3200,  0.3600,  ...,  3.1500,  0.4600,  9.6000],
         [ 6.5000,  0.2400,  0.1900,  ...,  2.9900,  0.4600,  9.4000],
         [ 5.5000,  0.2900,  0.3000,  ...,  3.3400,  0.3800, 12.8000]]),
 torch.Size([4898, 11]))

In [6]:
target = wineq[:, -1].unsqueeze(1)
target,  target.shape

(tensor([[6.],
         [6.],
         [6.],
         ...,
         [6.],
         [7.],
         [6.]]),
 torch.Size([4898, 1]))

### Разбивка на тренировочную и валидационную выборки, нормализация признаков

In [8]:
n_samples = features.shape[0]
n_val = int(0.2 * n_samples)
n_samples, n_val

(4898, 979)

In [9]:
features_mean = torch.mean(features, dim=0)
features_var = torch.var(features, dim=0)
features_mean, features_var

(tensor([6.8548e+00, 2.7824e-01, 3.3419e-01, 6.3914e+00, 4.5772e-02, 3.5308e+01,
         1.3836e+02, 9.9403e-01, 3.1883e+00, 4.8985e-01, 1.0514e+01]),
 tensor([7.1211e-01, 1.0160e-02, 1.4646e-02, 2.5726e+01, 4.7733e-04, 2.8924e+02,
         1.8061e+03, 8.9455e-06, 2.2801e-02, 1.3025e-02, 1.5144e+00]))

In [10]:
features_normalized = (features - features_mean) / torch.sqrt(features_var)
features_normalized, features_normalized.shape

(tensor([[ 1.7208e-01, -8.1761e-02,  2.1326e-01,  ..., -1.2468e+00,
          -3.4915e-01, -1.3930e+00],
         [-6.5743e-01,  2.1587e-01,  4.7996e-02,  ...,  7.3995e-01,
           1.3422e-03, -8.2419e-01],
         [ 1.4756e+00,  1.7450e-02,  5.4378e-01,  ...,  4.7505e-01,
          -4.3677e-01, -3.3663e-01],
         ...,
         [-4.2043e-01, -3.7940e-01, -1.1915e+00,  ..., -1.3130e+00,
          -2.6153e-01, -9.0545e-01],
         [-1.6054e+00,  1.1666e-01, -2.8253e-01,  ...,  1.0049e+00,
          -9.6251e-01,  1.8574e+00],
         [-1.0129e+00, -6.7703e-01,  3.7852e-01,  ...,  4.7505e-01,
          -1.4882e+00,  1.0448e+00]]),
 torch.Size([4898, 11]))

In [11]:
shuffled_indices = torch.randperm(n_samples)
train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

In [39]:
y_train = target[train_indices]
X_train = features_normalized[train_indices]

y_val = target[val_indices]
X_val = features_normalized[val_indices]

In [40]:
y_train.shape, X_train.shape

(torch.Size([3919, 1]), torch.Size([3919, 11]))

In [41]:
y_val.shape, X_val.shape

(torch.Size([979, 1]), torch.Size([979, 11]))

### Создание модели

Модель переходит от 11 входных признаков до 100 скрытых признаков, пропускает их через функцию активации ReLU и тд и, наконец, объединяет получившиеся 25 чисел в один выходной признак

In [42]:
def training_loop(n_epochs, optimizer, model, loss_fn, X_train, X_val,
              y_train, y_val):
    for epoch in range(1, n_epochs + 1):
        pred_train = model(X_train)
        loss_train = loss_fn(pred_train, y_train)
        
        pred_val = model(X_val)
        loss_val = loss_fn(pred_val, y_val)
        
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()
        if epoch == 1 or epoch % 500 == 0:
            print(f'Epoch {epoch}, Training loss {loss_train.item():.4f},'
                  f'Validation loss {loss_val.item():.4f}')

In [43]:
seq_model_wine = nn.Sequential(
    nn.Linear(11, 100),
    nn.ReLU(),
    nn.Linear(100, 50),
    nn.ReLU(),
    nn.Linear(50, 25),
    nn.ReLU(),
    nn.Linear(25, 1)
)
optimizer = optim.SGD(seq_model_wine.parameters(), lr=1e-3)

In [44]:
seq_model_wine

Sequential(
  (0): Linear(in_features=11, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=50, bias=True)
  (3): ReLU()
  (4): Linear(in_features=50, out_features=25, bias=True)
  (5): ReLU()
  (6): Linear(in_features=25, out_features=1, bias=True)
)

In [45]:
training_loop(
    n_epochs=5000,
    optimizer=optimizer,
    model=seq_model_wine,
    loss_fn=nn.MSELoss(),
    X_train=X_train,
    X_val=X_val,
    y_train=y_train,
    y_val=y_val
)

Epoch 1, Training loss 34.1274,Validation loss 34.2607
Epoch 500, Training loss 0.8875,Validation loss 0.9613
Epoch 1000, Training loss 0.6625,Validation loss 0.7403
Epoch 1500, Training loss 0.5811,Validation loss 0.6542
Epoch 2000, Training loss 0.5449,Validation loss 0.6127
Epoch 2500, Training loss 0.5260,Validation loss 0.5901
Epoch 3000, Training loss 0.5142,Validation loss 0.5761
Epoch 3500, Training loss 0.5057,Validation loss 0.5666
Epoch 4000, Training loss 0.4992,Validation loss 0.5597
Epoch 4500, Training loss 0.4940,Validation loss 0.5543
Epoch 5000, Training loss 0.4896,Validation loss 0.5502


### Предсказания модели

In [46]:
pred = seq_model_wine(X_val)

In [47]:
pred[:5]

tensor([[6.4918],
        [5.1732],
        [4.8201],
        [5.2175],
        [5.2648]], grad_fn=<SliceBackward0>)