In [1]:
from my_neural_network import nn, optim, data, training
from keras.datasets import fashion_mnist, mnist, cifar10
from keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

In [2]:
#get data
print("Loading dataset...")
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# Reshape and normalize
X_train = X_train.reshape(60_000, 28*28).astype('float32') / 255.0
X_test = X_test.reshape(10_000, 28*28).astype('float32') / 255.0

# Standardization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# One-hot encode labels
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

# PCA for dimensionality reduction
print("Applying PCA...")
pca = PCA(n_components=0.99)  # Keep 99% of variance
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
comps = pca.n_components_

print(f"Original features: 784, Reduced features: {X_train.shape[1]}")
print(f"Explained variance ratio: {pca.explained_variance_ratio_.sum():.4f}")

Loading dataset...
Applying PCA...
Original features: 784, Reduced features: 527
Explained variance ratio: 0.9901


In [3]:

# Create validation split
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.1, random_state=42, stratify=y_train.argmax(axis=1)
)

print(f"Train samples: {X_train.shape[0]}")
print(f"Validation samples: {X_val.shape[0]}")
print(f"Test samples: {X_test.shape[0]}")

# Create data loaders
train_loader = data.DataLoader(
    X_train, y_train, 
    batch_size=128, 
    shuffle=True, 
    seed=42
)

val_loader = data.DataLoader(
    X_val, y_val, 
    batch_size=128, 
    shuffle=False
)


test_loader = data.DataLoader(
    X_test, y_test, 
    batch_size=128, 
    shuffle=False
)



Train samples: 54000
Validation samples: 6000
Test samples: 10000


In [4]:

# Build model with PyTorch-like syntax
model = [
    nn.Linear(comps, 512, init_method='he_normal'),
    nn.BatchNorm1d(512),
    nn.LeakyReLU(),
    nn.Linear(512, 256, init_method="he_normal"), 
    nn.BatchNorm1d(256),
    nn.LeakyReLU(), 
    nn.Linear(256, 10), 
    nn.Softmax()
]


# Create optimizer and loss
optimizer = optim.AdamW(learning_rate=0.001, weight_decay=0.01)
lr_scheduler = optim.LRScheduler(optimizer, mode = "cosine", T_max = 30, eta_min = 1e-5)
loss_fn = nn.SoftmaxCrossEntropyLoss()

In [5]:

# Create trainer
trainer = training.Trainer(model, loss_fn, optimizer)
history = trainer.fit(train_loader, val_loader, epochs=30, lr_scheduler=lr_scheduler)


Starting training for 30 epochs...
Train batches: 422
Validation batches: 47
Batch 100/422 - Loss: 1.571787 - Acc: 0.9062
Batch 200/422 - Loss: 1.607434 - Acc: 0.8594
Batch 300/422 - Loss: 1.652285 - Acc: 0.8203
Batch 400/422 - Loss: 1.559498 - Acc: 0.8906
Epoch 1 (16.35s) - Loss: 1.632551 - Accuracy: 0.8382 - Val Loss: 1.5890 - Val Accuracy: 0.8745 - LR: 0.001000
Batch 100/422 - Loss: 1.542586 - Acc: 0.9297
Batch 200/422 - Loss: 1.607609 - Acc: 0.8594
Batch 300/422 - Loss: 1.638720 - Acc: 0.8125
Batch 400/422 - Loss: 1.592380 - Acc: 0.8750
Epoch 2 (16.27s) - Loss: 1.580405 - Accuracy: 0.8842 - Val Loss: 1.5787 - Val Accuracy: 0.8857 - LR: 0.001000
Batch 100/422 - Loss: 1.542629 - Acc: 0.9297
Batch 200/422 - Loss: 1.532891 - Acc: 0.9297
Batch 300/422 - Loss: 1.530573 - Acc: 0.9297
Batch 400/422 - Loss: 1.601599 - Acc: 0.8594
Epoch 3 (16.66s) - Loss: 1.564707 - Accuracy: 0.8995 - Val Loss: 1.5729 - Val Accuracy: 0.8882 - LR: 0.000997
Batch 100/422 - Loss: 1.548985 - Acc: 0.9141
Batch 20

In [6]:
# Final evaluation
print(f"\nFinal Evaluation")
test_results = trainer.evaluate(test_loader, verbose=True)


Final Evaluation
Evaluation - Loss: 1.568466 - Accuracy: 0.8930
