In [25]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
from torch  import nn
from torch.utils.data import Dataset, DataLoader
from torch import optim

%matplotlib inline

In [26]:
wine = pd.read_csv('/content/winequality-red.csv', sep=';')
wine.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [27]:
wine.shape

(1599, 12)

In [28]:
wine.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')

In [29]:
wine.dtypes

fixed acidity           float64
volatile acidity        float64
citric acid             float64
residual sugar          float64
chlorides               float64
free sulfur dioxide     float64
total sulfur dioxide    float64
density                 float64
pH                      float64
sulphates               float64
alcohol                 float64
quality                   int64
dtype: object

In [30]:
wine['quality'].value_counts()

5    681
6    638
7    199
4     53
8     18
3     10
Name: quality, dtype: int64

In [31]:
features = wine.drop('quality', axis=1)
print(features.shape)
print(type(features))

(1599, 11)
<class 'pandas.core.frame.DataFrame'>


In [32]:
features = features.to_numpy()
print(type(features))

<class 'numpy.ndarray'>


In [33]:
target = wine['quality']
print(target.shape)
print(type(target))

(1599,)
<class 'pandas.core.series.Series'>


In [34]:
target = target.to_numpy()
print(type(target))

<class 'numpy.ndarray'>


In [35]:
scalar = StandardScaler()
features = scalar.fit_transform(features)

In [36]:
class TabDataset(Dataset):
  def __init__(self,x,y):
    self.x = x
    self.y = y
    self.x = torch.from_numpy(self.x).type(torch.float32)
    self.y = torch.from_numpy(self.y)

  def __len__(self): return len(self.x)

  def __getitem__(self, idx): return self.x[idx], self.y[idx]

In [37]:
def get_dataloader(features, target, bs):

  X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2) 
  
  #Dataset
  train_ds = TabDataset(X_train, y_train)
  test_ds = TabDataset(X_test, y_test)

  #DataLoader
  train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)
  test_dl = DataLoader(test_ds,  batch_size=bs, shuffle=True)

  return train_dl, test_dl

In [38]:
def first(x):
  try: return next(iter(x))
  except StopIteration: return None

In [39]:
bs = 125  #batch size
train_dl, test_dl = get_dataloader(features, target, bs)

xb, yb = first(train_dl)
xb.shape, yb.shape

(torch.Size([125, 11]), torch.Size([125]))

In [40]:
def get_model(n_in, n_out):
  return nn.Sequential(
      nn.Linear(n_in, 18),
      nn.ReLU(),
      nn.Linear(18, n_out)
      )

In [41]:
model = get_model(11, 1)
model

Sequential(
  (0): Linear(in_features=11, out_features=18, bias=True)
  (1): ReLU()
  (2): Linear(in_features=18, out_features=1, bias=True)
)

In [42]:
def validate_epoch(model, metric):
  accuracy = [metric(model(xb.float()), yb) for xb, yb in test_dl]
  return round(torch.stack(accuracy).mean().item(), 4)

In [43]:
def loss_func(inputs, targets):
  loss = nn.MSELoss()
  return loss(inputs.squeeze(), targets)

In [44]:
def train_model(model, metric, epochs):
  for i in range(epochs):
    for xb, yb  in train_dl:
      preds = model(xb.float())
      #print(preds.shape)
      #print(yb.shape)
      loss = loss_func(preds, yb.float())
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
    print(validate_epoch(model, metric), end=' ')


In [45]:
optimizer  = optim.Adam(model.parameters(), lr = 0.01)
train_model(model, metric = loss_func, epochs=5)

29.7529 20.5618 10.4897 3.9586 2.5445 

In [46]:
test_data = features[1]
out_predict = model(torch.Tensor(test_data).float())
out_predict.argmax()

tensor(0)