## Fully Connected Neural Net to Qualify Wine According to Chemical Analysis. ##

Wine data-set downloaded from [csv-file](http://mng.bz/90Ol)

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

data_path = Path("./winequality-white.csv")
wine_df = pd.read_csv(data_path, header=0, names=["fixed acidity", "volatile acidity", "citric acid", "residual sugar", "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density", 
"pH", "sulphates", "alcohol", "quality"], sep=";")

# data overview:
#print(wine_df.describe())

# which quality classes do we have? :
qualities = wine_df["quality"].unique()
print(f"Number of unique 'qualities': {len(qualities)}")
print(f"Qualities: {sorted(qualities)}")
binc = np.bincount([q for q in wine_df["quality"]])
no_inst = len(wine_df)
print(f"\nClass counts: {binc}")
print(f"\nNumber of instances: {no_inst} ")
print(f"\nClass fractions: {np.round(binc/no_inst,4) * 100}")


Number of unique 'qualities': 7
Qualities: [3, 4, 5, 6, 7, 8, 9]

Class counts: [   0    0    0   20  163 1457 2198  880  175    5]

Number of instances: 4898 

Class fractions: [ 0.    0.    0.    0.41  3.33 29.75 44.88 17.97  3.57  0.1 ]


In [2]:
import torch
from torch import nn

# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

# Define model
class WineNetwork(nn.Module):
    def __init__(self):
        #super(WineNetwork, self).__init__()
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(11, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(p=0.2),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Linear(256, 10),
            nn.ReLU()
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

model = WineNetwork().to(device)
print(model)

Using cuda device
WineNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=11, out_features=64, bias=True)
    (1): ReLU()
    (2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=64, out_features=128, bias=True)
    (5): ReLU()
    (6): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Linear(in_features=128, out_features=256, bias=True)
    (8): ReLU()
    (9): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): Linear(in_features=256, out_features=10, bias=True)
    (11): ReLU()
  )
)


In [3]:
# define torch.dataset: __init__(), __len__(), __getitem__()
from torch.utils.data import Dataset

class WineDataSet(Dataset):
    def __init__(self, data_df, transform=None, target_transform=None):
        self.data_df = data_df
        self.transform = transform
        self.target_transform = target_transform
        self.X = torch.tensor(self.data_df.iloc[:,:-1].values, dtype=torch.float32)
        self.Y = torch.tensor(self.data_df.iloc[:,-1].values, dtype= torch.long)

    def __len__(self):
        return len(self.Y)
        
    def __getitem__(self,idx):
        self.x = self.X[idx,:]
        self.y = self.Y[idx]
        if self.transform != None:
            self.x = self.transform(self.x)
        if self.target_transform != None:
            self.y = self.target_transform(self.y)
        return self.x, self.y
            

In [5]:
from sklearn.preprocessing import StandardScaler

def scale_dataframe(data_df, exempt_last_column=False, column_names_to_scale=None):
    """
        Scales columns of a given data frame with a StandardScaler from Sklearn. 
        Input:
            data_df : dataframe with numerical values to normalize
            exempt_last_column : if true, the parameter column_names_to_scale will be ignored and all but the last column will be scaled.
            column_names_to_scale : list of the names of the columns to be scaled

        Output:
            dataframe with columns scaled
    """
    scaler = StandardScaler()
    if(exempt_last_column & (column_names_to_scale != None)):
        raise UserWarning("exempt_last_column=True : your column_names_to_scale will be ignored!")
    if(exempt_last_column):
        print("scaler exempting last column")
        data = data_df.to_numpy()
        data_to_scale = data[:,:-1]
        last_column = np.expand_dims(data[:,-1].astype(np.int_), axis=1)
        data_scaled = scaler.fit_transform(data_to_scale)
        data_scaled = np.append(data_scaled, last_column, axis=1)
        data_df = pd.DataFrame(data_scaled)
    elif(column_names_to_scale):
        print("scaler scaling designated columns")
        data_to_scale = data_df[column_names_to_scale].to_numpy()
        data_scaled = scaler.fit_transform(data_to_scale)
        df_temp = pd.DataFrame(data_scaled, columns=column_names_to_scale, index=data_df.index)
        data_df[column_names_to_scale]= df_temp
    else:
        print("scaler scaling all columns")
        data_to_scale = data_df.to_numpy()
        data_scaled = scaler.fit_transform(data_to_scale)
        data_df = pd.DataFrame(data_scaled)
    
    return data_df


In [None]:
"""
# old scaler: 

from sklearn.preprocessing import StandardScaler

def scale_dataframe(data_df, exempt_last_column=False, column_names_to_scale=None):
    """
        Scales columns of a given data frame with a StandardScaler from Sklearn. 
        Input:
            data_df : dataframe with numerical values to normalize
            exempt_last_column : if true, column_names_to_scale will be ignored and all but the last column will be scaled.
            column_names_to_scale : list of the names of the columns to be scaled

        Output:
            dataframe with columns scaled
    """
    scaler = StandardScaler()

    if(exempt_last_column & (column_names_to_scale != None)):
        raise UserWarning("exempt_last_column=True : your column_names_to_scale will be ignored!")
    if(exempt_last_column):
        data = data_df.to_numpy()
        data_to_scale = data[:,:-1]
        last_column = np.expand_dims(data[:,-1].astype(np.int_), axis=1)
        data_scaled = np.append(scaler.fit_transform(data_to_scale), last_column, axis=1)
        return pd.DataFrame(data_scaled)
    elif(column_names_to_scale):
        data_to_scale = data_df[column_names_to_scale].to_numpy()
        data_scaled = scaler.fit_transform(data_to_scale)
        df_temp = pd.DataFrame(data_scaled, columns=column_names_to_scale, index=data_df.index)
        data_df[column_names_to_scale]= df_temp
    else:
        data_to_scale = data_df.to_numpy()
        data_scaled = scaler.fit_transform(data_to_scale)
        data_df = pd.DataFrame(data_scaled)
    
    return data_df

"""

In [None]:
# IS PROBABLY NOT NEEDED - SEE PYTORCH'S DEFINITION OF CROSSENTROPYLOSS:

# OHE encoding of the labels: 
labels = np.sort(wine_df["quality"].unique())
ohe=torch.nn.functional.one_hot(torch.tensor(labels))

# make a target_transform for the dataloaders:
target_transform = {l:oh for l,oh in zip(labels,ohe)}.get

# test the target_transform:
target_transform(4)

In [6]:
# test and train loops:

def train_loop(dataloader, model, loss_fn, optimizer):
    losses, no_correct = 0, 0
    for n_batch, (X, y) in enumerate(dataloader):
        pred = model(X)
        loss = loss_fn(pred, y)
        losses += loss.item()
        no_correct +=(pred.argmax(1)==y).sum().item()

        optimizer.zero_grad()        
        loss.backward()
        optimizer.step()
    
    return losses, no_correct
        

def test_loop(dataloader, model, loss_fn):
    losses, no_correct = 0, 0
    with torch.no_grad():
        for (X,y) in dataloader:
            pred = model(X)
            losses += loss_fn(pred, y).item()
            no_correct += (pred.argmax(1)== y).sum().item()
     
    return losses, no_correct
    

In [7]:
# LOAD AND PREPARE DATA:

import pandas as pd
from torch.utils.data import DataLoader
import numpy as np
from sklearn.model_selection import train_test_split

"""
# load data from winequality-white.csv:

from pathlib import Path
data_path = Path("./winequality-white.csv")
column_names = ["fixed acidity", "volatile acidity", "citric acid", "residual sugar", "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density", 
"pH", "sulphates", "alcohol", "quality"]
column_names_to_normalize = column_names[:-1]
wine_df = pd.read_csv(data_path, header=0, names=column_names, sep=";")

test_size=0.2
train_df, test_df = train_test_split(wine_df, test_size=test_size)
train_df = scale_dataframe(train_df, column_names_to_scale=column_names_to_normalize, exempt_last_column=False)
test_df = scale_dataframe(test_df, column_names_to_scale=column_names_to_normalize, exempt_last_column=False)
train_ds = WineDataSet(data_df=train_df)
test_ds = WineDataSet(data_df=test_df)

batch_size=64
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=True)
"""

# load data from the separate csv files - for stack- and blender training
train_df = pd.read_csv("./train.csv")
print(f"train_df.shape : {train_df.shape}")
test_df = pd.read_csv("./test.csv")
print(f"test_df.shape : {test_df.shape}")

# PREPARE DATA:
train_df = scale_dataframe(train_df, column_names_to_scale=None, exempt_last_column=True)
test_df = scale_dataframe(test_df, column_names_to_scale=None, exempt_last_column=True)

train_ds = WineDataSet(data_df=train_df)
test_ds = WineDataSet(data_df=test_df)

BATCH_SIZE=64
train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=True)


train_df.shape : (3917, 12)
test_df.shape : (979, 12)
scaler exempting last column
scaler exempting last column


In [8]:
# sanity check after scaling:

#train_df.head()
test_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,-1.683692,2.629722,-1.430366,-0.72068,-0.44985,-0.11745,-0.266898,-1.851571,0.102075,0.343974,2.145643,8.0
1,0.33968,-0.244747,0.397739,-0.846029,-0.44985,-0.336727,-0.35746,-1.068214,0.620864,-0.516181,1.340366,8.0
2,0.577724,0.063232,0.079808,-0.950486,0.366131,-0.281908,0.18591,-0.725495,-0.027622,-0.172119,0.13245,5.0
3,-0.374451,0.47387,0.000325,-0.093937,-0.087192,-0.501185,-1.104593,-0.704513,0.620864,0.946083,1.179311,7.0
4,-0.612495,0.268551,4.530844,0.428349,-0.540515,0.759658,1.340571,0.029885,1.334198,-8.8e-05,0.937727,6.0


In [12]:
# Train the model:
import os
from torch.utils.tensorboard import SummaryWriter

# writer for tensorboard:
writer = SummaryWriter("./tensorbd_logs")

# create new model instance:
#net_model = WineNetwork().to(device) 
net_model = WineNetwork()

# loss function:
# cross-entropy:
loss_fn = nn.CrossEntropyLoss()

# optimizer:
# adam:
#OPTIMIZER_NAME = "ADAM"
#LEARNING_RATE = 1e-3
#OPTIMIZER = torch.optim.Adam(net_model.parameters(), lr=LEARNING_RATE)
# sgd:
OPTIMIZER_NAME = "SGD"
LEARNING_RATE = 1e-3
OPTIMIZER = torch.optim.SGD(net_model.parameters(), lr= LEARNING_RATE, momentum=0.9)

# training parameters:
EPOCHS = 400
WRITE_LOG_AFTER_EPOCHS = 50

best_model_name = ""
max_correct = float("-inf")

for ep in range(1, EPOCHS+1):        
       
        # put model in train mode:
        net_model.train()
        (train_loss, train_no_correct) = train_loop(train_dl, net_model, loss_fn, OPTIMIZER)
              
        # switch model to to evaluation mode:
        net_model.eval()
        (test_loss, test_no_correct) = test_loop(test_dl, net_model, loss_fn)

        if(test_no_correct > max_correct):
            max_correct = test_no_correct
            if(best_model_name):
                os.remove(best_model_name)
            best_model_name = "./net_model_" + str(test_no_correct) + "_" + str(LEARNING_RATE) + "_" + str(ep) + "_" + str(BATCH_SIZE) + "_" + OPTIMIZER_NAME + ".pt"
            torch.save(model.state_dict(), best_model_name)

        writer.add_scalar("Loss/test", test_loss/ len(test_ds), ep)
        writer.add_scalar("Accuracy/test", test_no_correct/ len(test_ds), ep)
        writer.add_scalar("Loss/train", train_loss/ len(train_ds), global_step=ep)
        writer.add_scalar("Accuracy/train", train_no_correct/ len(train_ds), global_step=ep)
       
        if ep % WRITE_LOG_AFTER_EPOCHS == 0:
            print(f"\n----- Epoch: {ep} -----")
            print(f"Epoch loss: {test_loss/ len(test_ds)}")
            print(f"Epoch accuracy: {test_no_correct/ len(test_ds)}")
            


----- Epoch: 50 -----
Epoch loss: 0.018122082163291517
Epoch accuracy: 0.5587334014300307

----- Epoch: 100 -----
Epoch loss: 0.01689246232955278
Epoch accuracy: 0.5903983656792645

----- Epoch: 150 -----
Epoch loss: 0.016465491610479305
Epoch accuracy: 0.6108273748723186

----- Epoch: 200 -----
Epoch loss: 0.01655414222575062
Epoch accuracy: 0.6404494382022472

----- Epoch: 250 -----
Epoch loss: 0.016574289995998113
Epoch accuracy: 0.6435137895812053

----- Epoch: 300 -----
Epoch loss: 0.01676062507454051
Epoch accuracy: 0.6618998978549541

----- Epoch: 350 -----
Epoch loss: 0.01747330283485467
Epoch accuracy: 0.6292134831460674

----- Epoch: 400 -----
Epoch loss: 0.017354104199862455
Epoch accuracy: 0.6414708886618999


### Try and Improve - best Model ###

model: model_640_0.001_369_64_SGD.pt <br>
<br>
Training parameters:  <br>
optimizer_name = "SGD" <br>
learning_rate = 1e-3 <br>
optimizer = torch.optim.SGD(model.parameters(), lr= learning_rate, momentum=0.9) <br>
epochs = 400 <br>
test_size=0.2 <br>
batch_size=64 <br>


net_model_660_0.001_299_64_SGD.pt <br>

OPTIMIZER_NAME = "SGD" <br>
LEARNING_RATE = 1e-3 <br>
OPTIMIZER = torch.optim.SGD(net_model.parameters(), lr= LEARNING_RATE, momentum=0.9) <br>

training parameters: <br>
EPOCHS = 400 <br>
BATCH_SIZE=64 <br>
test_size=0.2