In [1]:
%matplotlib inline
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn

In [13]:
import csv
wine_path = '../data/p1ch4/tabular-wine/winequality-white.csv'
wineq_numpy = np.loadtxt(wine_path,dtype=np.float32,delimiter=';',
                         skiprows=1 )
wineq_numpy

array([[ 7.  ,  0.27,  0.36, ...,  0.45,  8.8 ,  6.  ],
       [ 6.3 ,  0.3 ,  0.34, ...,  0.49,  9.5 ,  6.  ],
       [ 8.1 ,  0.28,  0.4 , ...,  0.44, 10.1 ,  6.  ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],
       [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],
       [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 ,  6.  ]], dtype=float32)

In [3]:
wineq = torch.from_numpy(wineq_numpy)
wineq.shape

torch.Size([4898, 12])

In [7]:
data = wineq[:,:-1]
data, data.shape

(tensor([[ 7.0000,  0.2700,  0.3600,  ...,  3.0000,  0.4500,  8.8000],
         [ 6.3000,  0.3000,  0.3400,  ...,  3.3000,  0.4900,  9.5000],
         [ 8.1000,  0.2800,  0.4000,  ...,  3.2600,  0.4400, 10.1000],
         ...,
         [ 6.5000,  0.2400,  0.1900,  ...,  2.9900,  0.4600,  9.4000],
         [ 5.5000,  0.2900,  0.3000,  ...,  3.3400,  0.3800, 12.8000],
         [ 6.0000,  0.2100,  0.3800,  ...,  3.2600,  0.3200, 11.8000]]),
 torch.Size([4898, 11]))

In [6]:
target = wineq[:,-1]
target = target.unsqueeze(-1)
target,target.shape

(tensor([[6.],
         [6.],
         [6.],
         ...,
         [6.],
         [7.],
         [6.]]),
 torch.Size([4898, 1]))

In [15]:
n_samples = data.shape[0]
n_val = int(0.2*n_samples)
shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[0:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices.shape,val_indices.shape

(torch.Size([3919]), torch.Size([979]))

In [16]:
data_train = data[train_indices]
target_train = target[train_indices]

data_val = data[val_indices]
target_val = target[val_indices]

In [17]:
# define model

input_shape = data.shape[1]
n_neurons = 24

seq_model = nn.Sequential(nn.Linear(input_shape,n_neurons),
                          nn.Tanh(),
                          nn.Linear(n_neurons,1),
                          )

seq_model

Sequential(
  (0): Linear(in_features=11, out_features=24, bias=True)
  (1): Tanh()
  (2): Linear(in_features=24, out_features=1, bias=True)
)

In [18]:
# define loss
# 可以直接调用torch 中的包 如MSE

In [19]:
# define optimizer
optimizer = optim.Adam(seq_model.parameters(),lr=1e-3)

In [20]:
# training function
def training_loop(n_epochs, model, loss_fn, optimizer, data_train, target_train, data_val, target_val):
    for epoch in range(1,n_epochs+1):
        data_p_train = model(data_train)
        loss_train = loss_fn(data_p_train,target_train)

        data_p_val = model(data_val)
        loss_val = loss_fn(data_p_val,target_val)

        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()

        if epoch == 1 or epoch %100 ==0:
            print(f"epoch= {epoch} training loss = {loss_train.item():.4f},"
                  f"validation loss= {loss_val.item():.4f}")


In [21]:
#train
training_loop(n_epochs=3000,
              model=seq_model,
              loss_fn=nn.MSELoss(),
              optimizer=optimizer,
              data_train=data_train,
              target_train=target_train,
              data_val=data_val,
              target_val=target_val)

epoch= 1 training loss = 48.9995,validation loss= 48.3369
epoch= 100 training loss = 13.4213,validation loss= 13.1191
epoch= 200 training loss = 3.4313,validation loss= 3.3064
epoch= 300 training loss = 1.3705,validation loss= 1.3204
epoch= 400 training loss = 0.8698,validation loss= 0.8606
epoch= 500 training loss = 0.7798,validation loss= 0.7885
epoch= 600 training loss = 0.7451,validation loss= 0.7669
epoch= 700 training loss = 0.6866,validation loss= 0.6931
epoch= 800 training loss = 0.6592,validation loss= 0.6734
epoch= 900 training loss = 0.6444,validation loss= 0.6609
epoch= 1000 training loss = 0.6193,validation loss= 0.6192
epoch= 1100 training loss = 0.5880,validation loss= 0.5803
epoch= 1200 training loss = 0.5713,validation loss= 0.5658
epoch= 1300 training loss = 0.5595,validation loss= 0.5521
epoch= 1400 training loss = 0.5513,validation loss= 0.5430
epoch= 1500 training loss = 0.5457,validation loss= 0.5370
epoch= 1600 training loss = 0.5419,validation loss= 0.5315
epoch