In [None]:
#importing libraries
import torch
import pandas as pd
import torch.nn as nn
import numpy as np
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [None]:
# Assuming that the data is in a CSV file
def Data_set(filename, input_features, output_features):

    input_data = pd.read_csv(filename, usecols= input_features).astype(np.float32)

    output_data = pd.read_csv(filename, usecols=output_features).astype(np.float32)

    # Convert data to tensors
    input_tensor = torch.tensor(input_data.values)
    output_tensor = torch.tensor(output_data.values)

    return input_tensor, output_tensor


In [None]:
def Data_set_split(input_tensor, output_tensor, test_size=0.2):
    #Convert to numpy for compatibility with train_test_split
    input_numpy = input_tensor.numpy()
    output_numpy = output_tensor.numpy()

    # Split the data (80% training, 20% testing)
    X_train, X_test, y_train, y_test = train_test_split(input_numpy, output_numpy, test_size=test_size, random_state=42)

    # Convert back to tensors
    input_train = torch.tensor(X_train, requires_grad=True)
    output_train = torch.tensor(y_train, requires_grad=True)
    input_test = torch.tensor(X_test, requires_grad=True)
    output_test = torch.tensor(y_test, requires_grad=True)

    return input_train,output_train,input_test,output_test



In [None]:
# Define the neural network
class NeuralNet(nn.Module):
    def __init__(self, architecture, activation_function):
        super(NeuralNet, self).__init__()
        self.layers = nn.ModuleList()
        self.activation_function = activation_function  # Store the activation function as an attribute

        for i in range(len(architecture)-1):
            self.layers.append(nn.Linear(architecture[i], architecture[i+1]))

    def forward(self, x):
        for i, layer in enumerate(self.layers):
            x = layer(x)
            # Apply activation to all but last layer
            if i < len(self.layers) - 1:
                x = self.activation_function(x)
        return x

In [None]:
def Create_model(architecture,activation_function):
    # Instantiate the network
    return NeuralNet(architecture,activation_function)

In [15]:
def Train_model(train_input, train_output, optimizer_type, model, epochs, learning_rate):

    # Define mean squared error loss
    criterion = nn.MSELoss()

    # Define optimizer
    optimizer = optimizer_type(model.parameters(), learning_rate)

    # Training loop
    for epoch in range(epochs):  # epochs
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        predicted_output = model(input_training)

        # Calculate loss
        loss = criterion(predicted_output, output_training)

        # Backpropagation
        loss.backward()

        # Update weights
        optimizer.step()

        if epoch == epochs:
            print(f"Epoch {epoch}/{epochs}, Training Loss: {loss.item()}")

In [16]:
def Test_loss(model,input_testing,output_testing, criterion):
    test_predicted = model(input_testing)
    test_loss = criterion(test_predicted, output_testing)
    return test_loss.item()

In [17]:
def Prediction(model, input_testing):
    with torch.no_grad():
        # We don't need gradients for prediction
        predicted = model(input_testing)
        # Convert tensor to float
        return predicted.item() if predicted.numel() == 1 else predicted.numpy()

In [18]:
# Setting the inputs/outputs and splitting the data

input_features = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar','chlorides',
                  'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH','sulphates', 'alcohol']

output_features = ['quality']

torch.manual_seed(42)  # Set seed (used so that we don't have each time we run the code a different test error)

input_tensor,output_tensor = Data_set('WineQT.csv', input_features, output_features)
input_training, output_training, input_testing, output_testing = Data_set_split(input_tensor, output_tensor)

###Case 1:
No hidden layers (11 inputs are directly connected to the output). The output loss and error are NaN, which indicates that they have diverged to very large values. This issue likely arises from the network's inability to learn effectively due to the absence of hidden layers, resulting in the accumulation of errors.

In [23]:
# Case 1
architecture = [11,1]

model = Create_model(architecture,nn.ReLU())
Train_model(input_training, output_training, optim.SGD, model, 1000, 0.001)

testLoss = Test_loss(model,input_testing,output_testing,nn.MSELoss())
print("Test loss:", testLoss)


x = torch.tensor([8.3, 0.655,0.12,2.3,0.083,15,113,0.9966,3.17,0.66,9.8])
pred = Prediction(model, x)
print(pred)

Test loss: nan
nan


### Case 2:
The additional hidden layer with 8 nodes helps the model capture more complex relationships in the data, reducing the error from very large (like in case 1, the output is NaN), to a reasonable test error of 0.40830376744270325. <br>
The example provided has an expected output of 5, but the prediction found was 4.885446071624756

In [26]:
# Case 2
torch.manual_seed(42)  # Set seed (used so that we don't have each time we run the code a different test error)
architecture = [11,8,1]

model = Create_model(architecture,nn.ReLU())
Train_model(input_training, output_training, optim.SGD, model, 1000, 0.001)

testLoss = Test_loss(model,input_testing,output_testing,nn.MSELoss())
print("Test loss:", testLoss)


x = torch.tensor([8.3, 0.655,0.12,2.3,0.083,15,113,0.9966,3.17,0.66,9.8])
pred = Prediction(model, x)
print(pred)

Test loss: 0.4199908971786499
5.317503929138184


### Case 3:
Adding an additional hidden layer with 5 nodes helps the model capture more complex relationships in the data. <br>
But we can see that the test loss increased. This is due to the increased complexity of the neural network with not enough epochs to reduce this error. <br>
This is why increasing the complexity but keeping the same number of epochs increased the test loss to 0.45804181694984436

In [30]:
# Case 3

architecture = [11,8,5,1]

model = Create_model(architecture,nn.ReLU())
Train_model(input_training, output_training, optim.SGD, model, 1000, 0.001)

testLoss = Test_loss(model,input_testing,output_testing,nn.MSELoss())
print("Test loss:", testLoss)


x = torch.tensor([8.3, 0.655,0.12,2.3,0.083,15,113,0.9966,3.17,0.66,9.8])
pred = Prediction(model, x)
print(pred)

Test loss: 0.45804181694984436
5.518387794494629


## Case 4
In this case, we increased the number of epochs from 1000 to 5000 to account for the additional complexity of the neural network. <br>
The test loss is reduced to 0.38506579399108887

In [31]:
# Case 4

architecture = [11,8,5,1]

model = Create_model(architecture,nn.ReLU())
Train_model(input_training, output_training, optim.SGD, model, 5000, 0.001)

testLoss = Test_loss(model,input_testing,output_testing,nn.MSELoss())
print("Test loss:", testLoss)


x = torch.tensor([8.3, 0.655,0.12,2.3,0.083,15,113,0.9966,3.17,0.66,9.8])
pred = Prediction(model, x)
print(pred)

Test loss: 0.38506579399108887
5.1290411949157715


## Case 5
In this case, we changed the activation function to Tanh. <br>
The loss increased to 0.5583304762840271. <br>
This is mainly due to the fact that ReLU converges faster, arriving at a lower test loss faster than Tanh

In [34]:
# Case 5

architecture = [11,8,5,1]

model = Create_model(architecture,nn.Tanh())
Train_model(input_training, output_training, optim.SGD, model, 5000, 0.001)

testLoss = Test_loss(model,input_testing,output_testing,nn.MSELoss())
print("Test loss:", testLoss)


x = torch.tensor([8.3, 0.655,0.12,2.3,0.083,15,113,0.9966,3.17,0.66,9.8])
pred = Prediction(model, x)
print(pred)

Test loss: 0.5583304762840271
5.6222076416015625


## Case 6
Using Adam optimizer instead of Stochastic Gradient Descent to train the model reduced the test loss to 0.3514178395271301. <br>


In [36]:
# Case 6

architecture = [11,8,5,1]

model = Create_model(architecture,nn.Tanh())
Train_model(input_training, output_training, optim.Adam, model, 5000, 0.001)

testLoss = Test_loss(model,input_testing,output_testing,nn.MSELoss())
print("Test loss:", testLoss)


x = torch.tensor([8.3, 0.655,0.12,2.3,0.083,15,113,0.9966,3.17,0.66,9.8])
pred = Prediction(model, x)
print(pred)

Test loss: 0.3514178395271301
5.195265293121338


## Case 7
Using ASGD (Averaged Stochastic Gradient Descent) as optimizer instead of Adam and SGD increases the test loss to 0.5557608604431152

In [37]:
# Case 7

architecture = [11,8,5,1]

model = Create_model(architecture,nn.Tanh())
Train_model(input_training, output_training, optim.ASGD , model, 5000, 0.001)

testLoss = Test_loss(model,input_testing,output_testing,nn.MSELoss())
print("Test loss:", testLoss)


x = torch.tensor([8.3, 0.655,0.12,2.3,0.083,15,113,0.9966,3.17,0.66,9.8])
pred = Prediction(model, x)
print(pred)

Test loss: 0.5557608604431152
5.660046577453613
