In [31]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import torch
from torch import nn
from torch import optim
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import tqdm
from copy import deepcopy

In [32]:
data = load_iris()
X = data['data']
y = [[t] for t in data['target']]
ohe = OneHotEncoder(handle_unknown='ignore', sparse_output=False).fit(y)
y = ohe.transform(y)
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

In [33]:
xtr, xts, ytr, yts = train_test_split(X, y, train_size=0.7, shuffle=True)

In [34]:
class ClassicDense(nn.Module):
    def __init__(self, layer_sizes):
        super().__init__()
        input, output = layer_sizes[0], layer_sizes[-1]
        self.hidden = nn.ModuleList([nn.Linear(layer_sizes[i], layer_sizes[i+1]) for i in range(len(layer_sizes)-2)])
        self.act = nn.ModuleList([nn.ReLU() for i in range(len(layer_sizes)-2)])
        self.output = nn.Linear(layer_sizes[-2], layer_sizes[-1])

    def forward(self, x):
        for hidden, act in zip(self.hidden, self.act):
            x = act(hidden(x))
        x = self.output(x)
        return x

In [35]:
def get_batched(data, batch_size):
    if batch_size is None:
        return data
    dc = deepcopy(data)
    while dc.numel():
        batch, dc = dc[:batch_size], dc[batch_size:]
        yield batch

def format_stats(loss, acc):
    return f"Loss={loss:.2f}, Accuracy={acc*100:.1f}%"
        
def train(model, xtr, ytr, batch_size = None):
    cum_loss, cum_acc = [], []
    for xtrb, ytrb in zip(get_batched(xtr, batch_size), get_batched(ytr, batch_size)):
        ypred = model(xtrb)
        loss = loss_fn(ypred, ytrb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        acc = (torch.argmax(ypred, 1) == torch.argmax(ytrb, 1)).float().mean()
        cum_loss.append(float(loss))
        cum_acc.append(float(acc))
    return cum_loss, cum_acc

def test(model, xts, yts):
    ypred = model(xts)
    loss = loss_fn(ypred, yts)
    acc = (torch.argmax(ypred, 1) == torch.argmax(xts, 1)).float().mean()
    return float(loss), float(acc)


def train_epochs(model, xtr, ytr, xts, yts, n_epochs = 100, batch_size = 5):
    best_acc = - np.inf
    best_weights = None
    train_loss_hist = []
    train_acc_hist = []
    
    for epoch in range(n_epochs):
        epoch_loss = []
        epoch_acc = []
        model.train()
        cum_loss, cum_acc = train(model, xtr, ytr, batch_size)
        model.eval()
        mloss, macc = np.mean(cum_loss), np.mean(cum_acc)
        train_loss_hist.append(mloss)
        train_acc_hist.append(macc)
        if macc > best_acc:
            best_acc = macc
            best_weights = deepcopy(model.state_dict())
        # print(f"Epoch {epoch} validation: Cross-entropy={mloss:.2f}, Accuracy={macc*100:.1f}%")
    model.load_state_dict(best_weights)

In [40]:
model = ClassicDense(layer_sizes=[4,8,3])
print(model)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
pre_loss, pre_acc = test(model, xts, yts)
train_epochs(model, xtr, ytr, xts, yts)
post_loss, post_acc = test(model, xts, yts)
print(format_stats(pre_loss, pre_acc), format_stats(post_loss, post_acc))

model = ClassicDense(layer_sizes=[4,8,8,3])
print(model)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
pre_loss, pre_acc = test(model, xts, yts)
train_epochs(model, xtr, ytr, xts, yts)
post_loss, post_acc = test(model, xts, yts)
print(format_stats(pre_loss, pre_acc), format_stats(post_loss, post_acc))

model = ClassicDense(layer_sizes=[4,8,16,8,3])
print(model)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
pre_loss, pre_acc = test(model, xts, yts)
train_epochs(model, xtr, ytr, xts, yts)
post_loss, post_acc = test(model, xts, yts)
print(format_stats(pre_loss, pre_acc), format_stats(post_loss, post_acc))

ClassicDense(
  (hidden): ModuleList(
    (0): Linear(in_features=4, out_features=8, bias=True)
  )
  (act): ModuleList(
    (0): ReLU()
  )
  (output): Linear(in_features=8, out_features=3, bias=True)
)
Loss=1.10, Accuracy=0.0% Loss=0.34, Accuracy=31.1%
ClassicDense(
  (hidden): ModuleList(
    (0): Linear(in_features=4, out_features=8, bias=True)
    (1): Linear(in_features=8, out_features=8, bias=True)
  )
  (act): ModuleList(
    (0-1): 2 x ReLU()
  )
  (output): Linear(in_features=8, out_features=3, bias=True)
)
Loss=1.04, Accuracy=0.0% Loss=0.43, Accuracy=31.1%
ClassicDense(
  (hidden): ModuleList(
    (0): Linear(in_features=4, out_features=8, bias=True)
    (1): Linear(in_features=8, out_features=16, bias=True)
    (2): Linear(in_features=16, out_features=8, bias=True)
  )
  (act): ModuleList(
    (0-2): 3 x ReLU()
  )
  (output): Linear(in_features=8, out_features=3, bias=True)
)
Loss=1.07, Accuracy=0.0% Loss=0.15, Accuracy=31.1%


0