In [2]:
import sys
import os
from sklearn.metrics import confusion_matrix
import numpy as np
import time
import json

sys.path.append(os.path.abspath(".."))
from layers.dense import Dense
from layers.conv2d import Conv2D
from layers.ReLU import ReLU
from layers.utils import AdamOptimizer, Sequential, compute_accuracy
from layers.softmaxcrossentropyloss import SoftmaxCrossEntropyLoss
from layers.maxpool2d import MaxPool2D
from layers.flatten import Flatten
from layers.dropout import Dropout
from data.data_loader import Data

In [3]:
all_train_losses = {}
all_val_losses = {}
all_train_accuracies = {}
all_val_accuracies = {}
training_times = {}
confusion_matrices = {}

In [4]:
batch_sizes = [32, 64, 128, 256]  # Batch sizes to test

In [7]:
for batch_size in batch_sizes:
    # load data
    print(f"[{batch_size}] Loading data...")
    train_loader = Data(
        path="D:/studia/SieciNeuronowe/dataset/train",
        batch_size=batch_size,
        use_cupy=True,
        shuffle=True,
    )
    val_loader = Data(
        path="D:/studia/SieciNeuronowe/dataset/test",
        batch_size=batch_size,
        use_cupy=True,
        shuffle=False,
    )
    print("Data loaded successfully.")
    # MODEL
    model = Sequential(
        [
            Conv2D(
                input_channels=1, output_channels=8, kernel_size=2, stride=1, padding=1
            ),
            ReLU(),
            MaxPool2D(kernel_size=2, stride=2),
            Dropout(0.3),  # Dropout layer with 40% dropout rate
            Flatten(),
            Dense(input_size=8 * 14 * 14, output_size=10),
        ]
    )
    loss_fn = SoftmaxCrossEntropyLoss()
    optimizer = AdamOptimizer(learning_rate=0.005)

    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    epochs_done_all = {}

    best_val_loss = float("inf")
    patience = 5
    epochs_done = 0
    num_epochs = 50
    start_time = time.time()
    for epoch in range(num_epochs):
        epoch_start_time = time.time()
        print(f"[Batch size: {batch_size}] Epoch {epoch + 1}/{num_epochs}")
        train_loss = 0
        train_acc = 0
        num_batches = 0

        for x_batch, y_batch in train_loader:
            logits = model.forward(x_batch)
            loss = loss_fn.forward(logits, y_batch)
            grad = loss_fn.backward()
            model.backward(grad)
            model.update(optimizer)

            train_loss += float(loss)
            train_acc += float(compute_accuracy(logits, y_batch))
            num_batches += 1

        train_loss /= num_batches
        train_acc /= num_batches

        val_logits = model.forward(val_loader.X)
        val_loss = loss_fn.forward(val_logits, val_loader.y)
        val_acc = compute_accuracy(val_logits, val_loader.y)
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch + 1}.")
                break
        print(
            f"Loss (train): {train_loss:.4f}, Acc (train): {train_acc:.4f}, "
            f"Loss (val): {float(val_loss):.4f}, Acc (val): {float(val_acc):.4f}, "
            f"Epoch completed in {time.time() - epoch_start_time:.2f} seconds."
        )
        epochs_done += 1
        train_losses.append(train_loss)
        val_losses.append(float(val_loss))
        train_accuracies.append(train_acc)
        val_accuracies.append(float(val_acc))
    end_time = time.time()
    print(
        f"[Batch size: {batch_size}] Training completed in {time.time() - start_time:.2f} seconds."
    )
    
    training_time = end_time - start_time
    epochs_done_all[batch_size] = epochs_done
    training_times[batch_size] = training_time
    all_train_losses[batch_size] = train_losses
    all_val_losses[batch_size] = val_losses
    all_train_accuracies[batch_size] = train_accuracies
    all_val_accuracies[batch_size] = val_accuracies
    true_labels = val_loader.y.get()
    val_logits = model.forward(val_loader.X)
    val_preds = np.argmax(val_logits.get(), axis=1)

    cm = confusion_matrix(true_labels, val_preds)
    confusion_matrices[batch_size] = cm
    results = {
        "train_loss": train_losses,
        "val_loss": val_losses,
        "train_accuracy": train_accuracies,
        "val_accuracy": val_accuracies,
        "training_time_seconds": training_time,
        "confusion_matrix": cm.tolist(),  # Convert to list for JSON serialization
        "epochs_done": epochs_done,
    }

    # Save results to JSON file
    with open(f"../DOCS/[{batch_size}]custom_cnn_results.json", "w") as f:
        json.dump(results, f)

    print(f"Results saved: [{batch_size}]custom_cnn_results.json")

[32] Loading data...
Loading cached dataset from D:/studia/SieciNeuronowe/dataset/train\cached_data_cupy.npz
Loading cached dataset from D:/studia/SieciNeuronowe/dataset/test\cached_data_cupy.npz
Data loaded successfully.
[Batch size: 32] Epoch 1/50
Loss (train): 0.3589, Acc (train): 0.8920, Loss (val): 0.2573, Acc (val): 0.9223, Epoch completed in 13.62 seconds.
[Batch size: 32] Epoch 2/50
Loss (train): 0.2157, Acc (train): 0.9347, Loss (val): 0.1823, Acc (val): 0.9451, Epoch completed in 11.96 seconds.
[Batch size: 32] Epoch 3/50
Loss (train): 0.1745, Acc (train): 0.9473, Loss (val): 0.1539, Acc (val): 0.9518, Epoch completed in 11.90 seconds.
[Batch size: 32] Epoch 4/50
Loss (train): 0.1513, Acc (train): 0.9539, Loss (val): 0.1371, Acc (val): 0.9565, Epoch completed in 11.96 seconds.
[Batch size: 32] Epoch 5/50
Loss (train): 0.1353, Acc (train): 0.9581, Loss (val): 0.1330, Acc (val): 0.9592, Epoch completed in 11.85 seconds.
[Batch size: 32] Epoch 6/50
Loss (train): 0.1221, Acc (tra