In [None]:
import sys
sys.path.append("../")

In [None]:
import pandas as pd
import numpy as np
from PIL import Image
import io

from mframework.data import ArrayDataset

df = pd.read_parquet("data/mnist_train.parquet")

# Decode PNG bytes into numpy arrays
images = []
for raw in df["image"]:
    img = Image.open(io.BytesIO(raw["bytes"]))
    # Convert the images to grayscale and normalise
    img = img.convert("L")
    arr = np.array(img, dtype=np.float32) / 255.0
     # Flatten the images into a vector
    arr = arr.reshape(-1)
    images.append(arr)

X = np.stack(images)
y = df["label"].to_numpy(dtype=np.int64)

# Wrap in ArrayDataset
train_ds = ArrayDataset(X, y)

In [None]:
# Load parquet
df = pd.read_parquet("data/mnist_test.parquet")

# Decode PNG bytes into numpy arrays
images = []
for raw in df["image"]:
    img = Image.open(io.BytesIO(raw["bytes"]))
    img = img.convert("L")
    arr = np.array(img, dtype=np.float32) / 255.0 
    arr = arr.reshape(-1)
    images.append(arr)

X = np.stack(images)
y = df["label"].to_numpy(dtype=np.int64)

test_ds = ArrayDataset(X, y)

In [None]:
from mframework.autograd.tensor import Tensor
from mframework.optim.sgd import SGD
from mframework.nn import Linear, Sequential, CrossEntropyLoss, ReLU
from mframework.data import DataLoader, SequentialSampler, BatchSampler
from mframework.dtypes import DType
from tqdm import tqdm
import numpy as np

# Basic feedforward neural network model definition
model = Sequential(
    Linear(784, 128, True),
    ReLU(),
    Linear(128, 64, True),
    ReLU(),
    Linear(64, 10, True)
)

criterion = CrossEntropyLoss()
lr = 0.01
optim = SGD(model.parameters(), lr)
epochs = 20
seq_sampler = SequentialSampler(train_ds)
batch_sampler = BatchSampler(seq_sampler, batch_size=64, drop_last=False)
train_dl = DataLoader(train_ds, batch_sampler)

for epoch in range(epochs):
    epoch_loss = 0.0
    for i, batch in enumerate(tqdm(train_dl, desc=f"epoch {epoch+1}")):
        # DataLoader yields (X_list, y_list) because of collate implementation
        X_list, y_list = batch

        # Stack / convert to numpy arrays
        X_np = np.stack(X_list).astype(np.float32)
        y_np = np.array(y_list, dtype=np.intp).reshape(-1)

        # Wrap inputs/targets into Tensors
        X_t = Tensor(X_np)
        y_t = Tensor(y_np, dtype=DType.INT64)

        # Run model forward pass and then update params
        logits = model(X_t)
        loss = criterion(logits, y_t)

        epoch_loss += loss.item

        loss.backward()
        optim.step()
        optim.zero_grad()


    avg = epoch_loss / len(train_dl)
    print(f"Epoch {epoch+1}/{epochs} avg loss: {avg:.4f}")


In [None]:
seq_sampler_test = SequentialSampler(test_ds)
batch_sampler_test = BatchSampler(seq_sampler_test, batch_size=64, drop_last=False)
test_dl = DataLoader(test_ds, batch_sampler_test)

correct = 0
total = 0
test_loss = 0.0

for batch in test_dl:
    X_list, y_list = batch
    X_np = np.stack(X_list).astype(np.float32)
    y_np = np.array(y_list, dtype=np.intp).reshape(-1)
    X_t = Tensor(X_np)
    y_t = Tensor(y_np, dtype=DType.INT64)
    logits = model(X_t)
    batch_loss = criterion(logits, y_t)
    test_loss += float(batch_loss.data)
    preds = np.argmax(logits.data, axis=1)
    correct += np.sum(preds == y_np)
    total += len(y_np)

avg_loss = test_loss / len(test_dl)
accuracy = correct / total
print(f"Test loss: {avg_loss:.4f}, Test accuracy: {accuracy*100:.2f}%")
