In [1]:
# predict good wines based on chemical analysis
import csv
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

wine_path = '../../dlwpt-code/data/p1ch4/tabular-wine/winequality-white.csv'
wineq_numpy = np.loadtxt(wine_path, dtype=np.float32, delimiter=";", skiprows=1)

col_list = next(csv.reader(open(wine_path), delimiter=";"))
print(col_list)

wineq = torch.from_numpy(wineq_numpy)
wineq.shape, wineq.dtype

['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality']


(torch.Size([4898, 12]), torch.float32)

In [3]:
# split train and validation sets
n_samples = wineq.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples) # random

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

wineq_train = wineq[train_indices]
wineq_val = wineq[val_indices]

In [9]:
# split result (quality) and data
wineq_train_expected = wineq_train[:, -1].unsqueeze(1)
wineq_train_data = wineq_train[:, :-1]

wineq_val_expected = wineq_val[:, -1].unsqueeze(1)
wineq_val_data = wineq_val[:, :-1]

print(wineq_train_expected.shape, wineq_train_data.shape)
print(wineq_val_expected.shape, wineq_val_data.shape)
#wineq_train_expected, wineq_train_data, wineq_val_expected, wineq_val_data

torch.Size([3919, 1]) torch.Size([3919, 11])
torch.Size([979, 1]) torch.Size([979, 11])


In [10]:
def training_loop(n_epochs, optimizer, model, loss_fn, train_data, val_data, train_expected, val_expected):
    for epoch in range(1, n_epochs+1):
        train_predicted = model(train_data)
        loss_train = loss_fn(train_predicted, train_expected)

        val_predicted = model(val_data)
        loss_val = loss_fn(val_predicted, val_expected)

        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()

        if epoch <= 3 or epoch % 1000 == 0:
            print(f'Epoch {epoch}, Training loss {loss_train:.4f}, Validation loss {loss_val:.4f}')

In [18]:
from collections import OrderedDict

seq_model = nn.Sequential(OrderedDict(
    [('hidden_linear', nn.Linear(11, 13)),
     ('hidden_activation', nn.Tanh()),
     ('output_linear', nn.Linear(13, 1))
]))
optimizer = optim.SGD(seq_model.parameters(), lr=1e-3)

training_loop(n_epochs=5000,
              optimizer=optimizer,
              model=seq_model,
              loss_fn=nn.MSELoss(),
              train_data=wineq_train_data,
              val_data=wineq_val_data,
              train_expected=wineq_train_expected,
              val_expected=wineq_val_expected)

print(f'output: {seq_model(wineq_val_data[:10])}')
print(f'answer: {wineq_val_expected[:10]}')

Epoch 1, Training loss 34.6151, Validation loss 34.6046
Epoch 2, Training loss 32.7312, Validation loss 32.7120
Epoch 3, Training loss 30.9235, Validation loss 30.9056
Epoch 1000, Training loss 0.7662, Validation loss 0.7307
Epoch 2000, Training loss 0.7792, Validation loss 0.7557
Epoch 3000, Training loss 0.7701, Validation loss 0.7436
Epoch 4000, Training loss 0.7226, Validation loss 0.6949
Epoch 5000, Training loss 0.7607, Validation loss 0.7264
output: tensor([[5.9463],
        [5.9447],
        [5.9466],
        [5.9456],
        [5.9464],
        [4.9746],
        [5.9400],
        [5.9457],
        [5.9460],
        [5.9461]], grad_fn=<AddmmBackward0>)
answer: tensor([[6.],
        [6.],
        [6.],
        [6.],
        [6.],
        [5.],
        [7.],
        [5.],
        [5.],
        [6.]])
