In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from allison.preprocessing import StandardScaler,ColumnTransformer
from allison.datasets import Dataset,DataLoader,train_test_split
from allison.nn import Tensor, NeuralNetwork, CrossEntropyLoss,Linear,Relu,SGDMomentum,no_grad

In [4]:
df = pd.read_csv('../data/Iris.csv')
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [5]:
labels2id = {label: idx for idx, label in enumerate(df.Species.unique())}
id2labels = {idx: label for idx, label in labels2id.items()}
df.Species = df.Species.map(labels2id)
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,0
1,2,4.9,3.0,1.4,0.2,0
2,3,4.7,3.2,1.3,0.2,0
3,4,4.6,3.1,1.5,0.2,0
4,5,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,2
146,147,6.3,2.5,5.0,1.9,2
147,148,6.5,3.0,5.2,2.0,2
148,149,6.2,3.4,5.4,2.3,2


In [6]:
df_train, df_test = train_test_split(df, test_size=0.2)

In [7]:
numeric_features = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
labels = ['Species']


preprocessor = ColumnTransformer([
    ('numeric', StandardScaler(), numeric_features)
])

X_train = preprocessor.fit_transform(df_train)
y_train = df_train[labels].values

X_test = preprocessor.transform(df_test)
y_test = df_test[labels].values


In [8]:
class MyDataset(Dataset):

    def __getitem__(self, idx):
        return Tensor(self.X[idx]), Tensor(self.y[idx])

In [9]:
train_dataset = MyDataset(X_train, y_train)
test_dataset = MyDataset(X_test, y_test)

In [10]:
batch_size = 16

In [11]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

In [12]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)

for batch in train_dataloader:
    X, y = batch
    print(X)
    print(y)
    break

Tensor(
[[ 0.35853593 -0.62179708  0.57995336  0.03656201]
 [-0.1248833   1.64615581 -1.10786563 -1.12481962]
 [ 2.29221285 -1.07538766  1.81768728  1.45602846]
 [ 0.23768112 -0.3950018   0.46743209  0.42368923]
 [ 1.20451958 -0.62179708  0.63621399  0.29464682]
 [-0.48744772  0.73897465 -1.2203869  -0.99577722]
 [-0.24573811 -0.3950018  -0.0389136   0.16560442]
 [ 1.32537439  0.05858878  0.80499589  1.45602846]
 [ 0.47939074 -1.98256882  0.46743209  0.42368923]
 [-0.00402849 -0.84859237  0.12986829  0.03656201]
 [ 0.84195516  0.28538407  0.80499589  1.06890125]
 [-1.21257657 -0.16820651 -1.27664753 -1.38290443]
 [-0.36659292  0.96576994 -1.33290816 -1.25386203]
 [-1.09172176  0.05858878 -1.2203869  -1.38290443]
 [-0.85001215  1.64615581 -1.2203869  -1.12481962]
 [ 0.60024555  0.51217936  0.57995336  0.55273163]], shape=(16, 4))
Tensor(
[[1]
 [0]
 [2]
 [1]
 [1]
 [0]
 [1]
 [2]
 [1]
 [1]
 [2]
 [0]
 [0]
 [0]
 [0]
 [1]], shape=(16, 1))


In [13]:
class Net(NeuralNetwork):

    def __init__(self):
        super().__init__()
        self.fc1 = Linear(4, 8)
        self.fc2 = Linear(8, 4)
        self.fc3 = Linear(4, 3)
        self.relu = Relu()

    def forward(self, x: Tensor) -> Tensor:
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x


net = Net()
criterion = CrossEntropyLoss()
optimizer = SGDMomentum(net.parameters(), learning_rate=1e-3, momentum=0.9)

In [14]:

epochs = 50

for epoch in range(epochs):
    train_loss = 0

    for batch in train_dataloader:
        X, y = batch
        y_pred = net(X)
        loss = criterion(y, y_pred)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.data
    

    if epoch % 10 == 0:
        train_loss /= len(train_dataloader)

        with no_grad():
            test_acc = 0
            for batch in test_dataloader:
                X, y = batch
                y_pred = net(X)
                test_acc+=np.mean(y_pred.data.argmax(axis=1, keepdims=True) == y.data)
            test_acc /= len(test_dataloader)
            print(f"Epoch {epoch:5d} | Train Loss: {train_loss:.4f} | Test Acc: {100 * test_acc:.2f}%")

Epoch     0 | Train Loss: 1.3055 | Test Acc: 47.32%
Epoch    10 | Train Loss: 0.6230 | Test Acc: 83.93%
Epoch    20 | Train Loss: 0.4904 | Test Acc: 87.05%
Epoch    30 | Train Loss: 0.4070 | Test Acc: 93.30%
Epoch    40 | Train Loss: 0.3499 | Test Acc: 96.43%
