### Neural Network

##### Import Cleaned Dataset

In [1]:
#Import cleaned dataset using load_sets function defined in src.data.sets
import sys
sys.path.insert(1, '..')
from src.data.sets import load_sets
X_train, y_train, X_val, y_val, X_test, y_test = load_sets(path='../data/processed/beer_type')

##### Define Architecture

In [2]:
#Import `torch`, `torch.nn` as `nn` and `torch.nn.functional` as `F`# Solution:

import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
#Instantiate `PytorchBinary` with the correct number of input feature and save it into a variable called `model`

# Solution:
import sys
sys.path.insert(1, '..')

from src.models.pytorch import PytorchMultiClass

model = PytorchMultiClass(X_train.shape[1])

In [4]:
#Import `get_device()` from `src.models.pytorch` and set `model` to use the device available

import sys
sys.path.insert(1,'..')

from src.models.pytorch import get_device

device = get_device()
model.to(device)

PytorchMultiClass(
  (layer_1): Linear(in_features=9, out_features=32, bias=True)
  (layer_out): Linear(in_features=32, out_features=104, bias=True)
)

In [5]:
#Print the architecture of `model`

print(model)

PytorchMultiClass(
  (layer_1): Linear(in_features=9, out_features=32, bias=True)
  (layer_out): Linear(in_features=32, out_features=104, bias=True)
)


##### Train Model

In [5]:
#Instantiate a `nn.BCELoss()` and save it into a variable called `criterion` 

criterion = nn.CrossEntropyLoss()

In [6]:
#Instantiate a `torch.optim.Adam()` optimizer with the model's parameters and 0.001 as learning rate and save it into a variable called `optimizer`

optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

In [7]:
#Instantiate a torch.optim.lr_scheduler.StepLR() scheduler that will decrease the learning rate by a coefficient of 0.9 for each epoch

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)

In [8]:
#Create 2 variables called N_EPOCHS and BATCH_SIZE that will take respectively 5 and 32 as values

N_EPOCHS = 5
BATCH_SIZE = 32


In [9]:
#Import Dataset and DataLoader from torch.utils.data

from torch.utils.data import Dataset, DataLoader

#Import this class from `src/models/pytorch` and convert all sets to PytorchDataset
from src.models.pytorch import PytorchDataset

train_dataset = PytorchDataset(X=X_train, y=y_train)
val_dataset = PytorchDataset(X=X_val, y=y_val)
test_dataset = PytorchDataset(X=X_test, y=y_test)


In [10]:
#Create a for loop that will iterate through the specified number of epochs and will train the model with the training set and assess the performance on the validation set and print their scores

from src.models.pytorch import train_classification, test_classification

for epoch in range(N_EPOCHS):
    train_loss, train_acc = train_classification(train_dataset, model=model, criterion=criterion, optimizer=optimizer, batch_size=BATCH_SIZE, device=device)
    valid_loss, valid_acc = test_classification(val_dataset, model=model, criterion=criterion, batch_size=BATCH_SIZE, device=device)

    print(f'Epoch: {epoch}')
    print(f'\t(train)\t|\tLoss: {train_loss:.4f}\t|\tAcc: {train_acc * 100:.1f}%')
    print(f'\t(valid)\t|\tLoss: {valid_loss:.4f}\t|\tAcc: {valid_acc * 100:.1f}%')

Epoch: 0
	(train)	|	Loss: 0.1306	|	Acc: 7.4%
	(valid)	|	Loss: 0.1309	|	Acc: 7.4%
Epoch: 1
	(train)	|	Loss: 0.1309	|	Acc: 7.1%
	(valid)	|	Loss: 0.1307	|	Acc: 7.4%
Epoch: 2
	(train)	|	Loss: 0.1309	|	Acc: 7.1%
	(valid)	|	Loss: 0.1311	|	Acc: 7.4%
Epoch: 3
	(train)	|	Loss: 0.1309	|	Acc: 7.1%
	(valid)	|	Loss: 0.1310	|	Acc: 7.4%
Epoch: 4
	(train)	|	Loss: 0.1309	|	Acc: 7.1%
	(valid)	|	Loss: 0.1307	|	Acc: 7.4%
Epoch: 5
	(train)	|	Loss: 0.1309	|	Acc: 7.1%
	(valid)	|	Loss: 0.1307	|	Acc: 7.4%
Epoch: 6
	(train)	|	Loss: 0.1309	|	Acc: 7.2%
	(valid)	|	Loss: 0.1307	|	Acc: 7.4%
Epoch: 7
	(train)	|	Loss: 0.1309	|	Acc: 7.1%
	(valid)	|	Loss: 0.1311	|	Acc: 7.4%
Epoch: 8
	(train)	|	Loss: 0.1309	|	Acc: 7.1%
	(valid)	|	Loss: 0.1311	|	Acc: 7.4%
Epoch: 9
	(train)	|	Loss: 0.1309	|	Acc: 7.1%
	(valid)	|	Loss: 0.1310	|	Acc: 7.4%
Epoch: 10
	(train)	|	Loss: 0.1309	|	Acc: 7.1%
	(valid)	|	Loss: 0.1309	|	Acc: 5.4%
Epoch: 11
	(train)	|	Loss: 0.1309	|	Acc: 7.1%
	(valid)	|	Loss: 0.1307	|	Acc: 7.4%
Epoch: 12
	(train)	|	Loss:

Our baseline model and NN perform similarly and poorly. This is likely due to the large number of classes that need to be predicted (104 for the target variable). **Note: If permitted by Anthony/William, configure to top 20-30 classes by frequency and run the model again to see if performance has improved**

In [11]:
#Save the model into the models folder

torch.save(model, "../models/neural_network")

In [12]:
#Assess the model performance on the testing set and print its scores

test_loss, test_acc = test_classification(test_dataset, model=model, criterion=criterion, batch_size=BATCH_SIZE, device=device)
print(f'\tLoss: {test_loss:.4f}\t|\tAccuracy: {test_acc:.1f}')

	Loss: 0.1312	|	Accuracy: 0.1
