# üëÅÔ∏è‚Äçüó®Ô∏è Computer Vision with CIFAR-10 Dataset

## ‚öôÔ∏è Setup and imports

In this section, we import all required libraries, enable Plotly's dark theme, and define some global constants such as the input shape and class names for CIFAR-10.

In [17]:
from plotly.graph_objs import Figure
from __future__ import annotations
from tensorflow.keras import layers, models, optimizers
from typing import Tuple
from pathlib import Path
from typing import Final

from plotly.graph_objects import Figure

import os
import numpy as np
from typing import Optional
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

from tensorflow import keras
from tensorflow.keras import layers, models

from sklearn.metrics import confusion_matrix, classification_report

# Use Plotly dark theme globally
pio.templates.default = "plotly_dark"

# Make results a bit more reproducible
np.random.seed(42)

# CIFAR-10 meta info
NUM_CLASSES: int = 10
INPUT_SHAPE: Tuple[int, int, int] = (32, 32, 3)

CLASS_NAMES = [
    "airplane", "automobile", "bird", "cat", "deer",
    "dog", "frog", "horse", "ship", "truck",
]

In [18]:
# Base directories for exported figures
PLOTS_DIR: Final[Path] = Path("../plots")
DOCS_DIR: Final[Path] = Path("../docs")


def save_fig(fig: Figure, name: str, scale: int = 2) -> None:
    """
    Save a Plotly figure as both HTML and PNG.

    Parameters
    ----------
    fig : Figure
        The Plotly figure to be saved.
    name : str
        Base file name without extension.
    scale : int, default 2
        Scale factor for the PNG export (higher = higher resolution).
    """
    # Ensure output directories exist
    PLOTS_DIR.mkdir(parents=True, exist_ok=True)
    DOCS_DIR.mkdir(parents=True, exist_ok=True)

    html_path = DOCS_DIR / f"{name}.html"
    png_path = PLOTS_DIR / f"{name}.png"

    # Save interactive HTML file
    fig.write_html(str(html_path), include_plotlyjs="cdn")

    # Save static PNG image (requires kaleido)
    fig.write_image(str(png_path), scale=scale)

    print(f"Saved HTML to {html_path}")
    print(f"Saved PNG to {png_path}")

## üì• Loading the CIFAR-10 dataset

In this section, we load the CIFAR-10 dataset using Keras, optionally limit the number of training samples for faster experimentation,
and reshape the labels to a one-dimensional format.
We also print the shapes of the arrays and the mapping from class indices to human-readable class names.

In [19]:
# Optional: limit the number of training samples for faster experiments
TRAIN_LIMIT: Optional[int] = None  # e.g. 10_000 or None to use all samples

# Load CIFAR-10 using Keras
(train_images_raw, train_labels_raw), (test_images_raw, test_labels_raw) = keras.datasets.cifar10.load_data()

# Optionally reduce training set size
if TRAIN_LIMIT is not None:
    train_images_raw = train_images_raw[:TRAIN_LIMIT]
    train_labels_raw = train_labels_raw[:TRAIN_LIMIT]

# Flatten label arrays to shape (N,)
train_labels = train_labels_raw.reshape(-1)
test_labels = test_labels_raw.reshape(-1)

# For now we keep separate "raw" images (uint8, 0‚Äì255).
# Later we will create preprocessed versions (e.g. normalized float32).
train_images = train_images_raw
test_images = test_images_raw

print(f"Train images: {train_images.shape}, Train labels: {train_labels.shape}")
print(f"Test images:  {test_images.shape}, Test labels:  {test_labels.shape}")

print("\nClass index ‚Üí name mapping:")
for idx, name in enumerate(CLASS_NAMES):
    print(f"  {idx}: {name}")

Train images: (50000, 32, 32, 3), Train labels: (50000,)
Test images:  (10000, 32, 32, 3), Test labels:  (10000,)

Class index ‚Üí name mapping:
  0: airplane
  1: automobile
  2: bird
  3: cat
  4: deer
  5: dog
  6: frog
  7: horse
  8: ship
  9: truck


## üìä Class distribution in the training set

In this section, we visualize the class distribution of the CIFAR-10 training set using Plotly with a dark theme.
This helps us confirm that the dataset is balanced across all classes.

In [20]:
# Compute class counts for the training labels
unique_labels, label_counts = np.unique(train_labels, return_counts=True)

class_names_for_plot = [CLASS_NAMES[int(idx)] for idx in unique_labels]

fig_class_dist = px.bar(
    x=class_names_for_plot,
    y=label_counts,
    labels={"x": "Class", "y": "Count"},
    title="CIFAR-10 training set class distribution",
)
fig_class_dist.update_layout(xaxis_tickangle=-45)
fig_class_dist.show()


save_fig(fig_class_dist, "class_distribution")

Saved HTML to ../docs/class_distribution.html
Saved PNG to ../plots/class_distribution.png


## üñºÔ∏è Example images per class

In this section, we visualize multiple example images for each CIFAR-10 class using Plotly.
This helps to build an intuitive understanding of what the model will see during training
and how the different classes look in practice.

In [21]:
# Number of example images to display per class
EXAMPLES_PER_CLASS: int = 10  # you can increase this if you want

rows = NUM_CLASSES
cols = EXAMPLES_PER_CLASS

fig = make_subplots(
    rows=rows,
    cols=cols,
    horizontal_spacing=0.01,
    vertical_spacing=0.01,
)

for class_idx, class_name in enumerate(CLASS_NAMES):
    # Find indices of all images belonging to this class
    class_indices = np.where(train_labels == class_idx)[0]

    if len(class_indices) == 0:
        # This can happen if TRAIN_LIMIT is very small and some classes are missing
        continue

    # Randomly select up to EXAMPLES_PER_CLASS images
    n_examples = min(EXAMPLES_PER_CLASS, len(class_indices))
    selected_indices = np.random.choice(
        class_indices,
        size=n_examples,
        replace=False,
    )

    for col_idx, img_idx in enumerate(selected_indices):
        row = class_idx + 1
        col = col_idx + 1

        fig.add_trace(
            go.Image(z=train_images[img_idx]),
            row=row,
            col=col,
        )

        # Hide axis ticks for a cleaner look
        fig.update_xaxes(showticklabels=False, row=row, col=col)
        fig.update_yaxes(showticklabels=False, row=row, col=col)

    # Add the class name as a y-axis title for the first column of the row
    fig.update_yaxes(title_text=class_name, row=class_idx + 1, col=1)

fig.update_layout(
    title="Example CIFAR-10 images per class",
    height=150 * rows,
    width=150 * cols,
    showlegend=False,
)
fig.show()

save_fig(fig, "examples_per_class")

Saved HTML to ../docs/examples_per_class.html
Saved PNG to ../plots/examples_per_class.png


## üß™ Preprocessing

In this section, we prepare the CIFAR-10 data for training:

- Convert image data from `uint8` to `float32`
- Normalize pixel values to the range `[0, 1]`
- One-hot encode the class labels for use with `categorical_crossentropy`

In [22]:
from typing import Tuple

def preprocess_images(images: np.ndarray) -> np.ndarray:
    """
    Convert raw uint8 images to float32 and normalize to [0, 1].

    Parameters
    ----------
    images : np.ndarray
        Image array of shape (N, 32, 32, 3) with dtype uint8.

    Returns
    -------
    np.ndarray
        Normalized image array of type float32 in the range [0.0, 1.0].
    """
    images_float = images.astype("float32") / 255.0
    return images_float


def one_hot_encode_labels(labels: np.ndarray, num_classes: int) -> np.ndarray:
    """
    One-hot encode integer labels.

    Parameters
    ----------
    labels : np.ndarray
        Integer labels of shape (N,).
    num_classes : int
        Total number of classes.

    Returns
    -------
    np.ndarray
        One-hot encoded labels of shape (N, num_classes).
    """
    return keras.utils.to_categorical(labels, num_classes=num_classes)


# Preprocess images
train_images_preprocessed = preprocess_images(train_images)
test_images_preprocessed = preprocess_images(test_images)

# One-hot encode labels
train_labels_one_hot = one_hot_encode_labels(train_labels, NUM_CLASSES)
test_labels_one_hot = one_hot_encode_labels(test_labels, NUM_CLASSES)

print("train_images_preprocessed:", train_images_preprocessed.shape, train_images_preprocessed.dtype)
print("test_images_preprocessed: ", test_images_preprocessed.shape, test_images_preprocessed.dtype)
print("train_labels_one_hot:     ", train_labels_one_hot.shape)
print("test_labels_one_hot:      ", test_labels_one_hot.shape)

train_images_preprocessed: (50000, 32, 32, 3) float32
test_images_preprocessed:  (10000, 32, 32, 3) float32
train_labels_one_hot:      (50000, 10)
test_labels_one_hot:       (10000, 10)


## üß† Model architecture

In this section, we define a simple convolutional neural network (CNN) for CIFAR-10.
The model consists of two convolutional blocks followed by a dense classification head.

## üöÇ Training the model

We now train the CNN on the preprocessed CIFAR-10 images using
`categorical_crossentropy` as the loss function and accuracy as a metric.
A small validation split is used to monitor generalization during training.

## ‚úÖ Evaluation and recall

In this section, we evaluate the trained model on the test set and compute
classification metrics including per-class recall and macro / weighted recall
using `sklearn.metrics.classification_report` and `recall_score`.

## üß™ Simple full-data experiment runner

In this section, we define a compact helper function that trains a model
on the full CIFAR-10 training set and returns a small set of key metrics:

- test accuracy
- macro recall
- training time
- number of parameters
- overfitting gap (train_acc - val_acc at last epoch)

We only expose a few strong hyperparameters that we really care about:
learning rate, model capacity (filters and dense units), and dropout.

In [23]:
def build_cnn_model(
    input_shape: Tuple[int, int, int],
    num_classes: int,
    filters_block1: int = 32,
    filters_block2: int = 64,
    dense_units: int = 256,
    dropout_conv: float = 0.25,
    dropout_dense: float = 0.5,
) -> keras.Model:
    """
    Build a simple CNN model for CIFAR-10 classification.

    Parameters
    ----------
    input_shape : tuple[int, int, int]
        Shape of the input images, e.g. (32, 32, 3).
    num_classes : int
        Number of target classes.
    filters_block1 : int, default 32
        Number of convolutional filters in the first block.
    filters_block2 : int, default 64
        Number of convolutional filters in the second block.
    dense_units : int, default 256
        Number of units in the dense layer before the softmax head.
    dropout_conv : float, default 0.25
        Dropout rate applied after each convolutional block.
    dropout_dense : float, default 0.5
        Dropout rate applied before the final dense layer.

    Returns
    -------
    keras.Model
        Uncompiled Keras model.
    """
    inputs = keras.Input(shape=input_shape)

    # Block 1
    x = layers.Conv2D(filters_block1, (3, 3), padding="same", activation="relu")(inputs)
    x = layers.Conv2D(filters_block1, (3, 3), padding="same", activation="relu")(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(dropout_conv)(x)

    # Block 2
    x = layers.Conv2D(filters_block2, (3, 3), padding="same", activation="relu")(x)
    x = layers.Conv2D(filters_block2, (3, 3), padding="same", activation="relu")(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(dropout_conv)(x)

    # Classification head
    x = layers.Flatten()(x)
    x = layers.Dense(dense_units, activation="relu")(x)
    x = layers.Dropout(dropout_dense)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    model = models.Model(inputs=inputs, outputs=outputs, name="cifar10_cnn")
    return model

In [24]:
import time
from typing import Any, Dict

import numpy as np
import tensorflow as tf
from sklearn.metrics import recall_score


def set_global_seed(seed: int) -> None:
    """
    Set global random seeds for reproducibility across NumPy and TensorFlow.
    """
    np.random.seed(seed)
    tf.random.set_seed(seed)


def run_full_experiment(
    name: str,
    *,
    learning_rate: float,
    filters_block1: int = 32,
    filters_block2: int = 64,
    dense_units: int = 256,
    dropout_conv: float = 0.25,
    dropout_dense: float = 0.5,
    epochs: int = 10,
    batch_size: int = 64,
    seed: int = 42,
) -> Dict[str, Any]:
    """
    Run a single training experiment on the full CIFAR-10 training set.

    Parameters
    ----------
    name : str
        Human-readable name for this experiment (e.g. 'baseline_default').
    learning_rate : float
        Learning rate for the Adam optimizer (key hyperparameter).
    filters_block1 : int, default 32
        Number of filters in the first convolutional block.
    filters_block2 : int, default 64
        Number of filters in the second convolutional block.
    dense_units : int, default 256
        Number of units in the dense layer before the softmax head.
    dropout_conv : float, default 0.25
        Dropout rate after convolutional blocks (regularization).
    dropout_dense : float, default 0.5
        Dropout rate before the final dense layer.
    epochs : int, default 10
        Number of training epochs.
    batch_size : int, default 64
        Batch size used during training.
    seed : int, default 42
        Random seed for reproducibility.

    Returns
    -------
    dict
        Dictionary with hyperparameters and key metrics.
    """
    set_global_seed(seed)

    # Build and compile model
    model = build_cnn_model(
        input_shape=INPUT_SHAPE,
        num_classes=NUM_CLASSES,
        filters_block1=filters_block1,
        filters_block2=filters_block2,
        dense_units=dense_units,
        dropout_conv=dropout_conv,
        dropout_dense=dropout_dense,
    )

    model.compile(
        optimizer=optimizers.Adam(learning_rate=learning_rate),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )

    num_parameters = int(model.count_params())

    # Train on full training set with a validation split
    start_time = time.time()
    history = model.fit(
        train_images_preprocessed,
        train_labels_one_hot,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=0.2,
        verbose=0,  # set to 1 if you want to see the training progress
    )
    train_time_sec = float(time.time() - start_time)

    # Last epoch metrics
    train_acc_last = float(history.history["accuracy"][-1])
    val_acc_last = float(history.history["val_accuracy"][-1])
    overfit_gap_acc = train_acc_last - val_acc_last

    # Evaluate on full test set
    test_loss, test_accuracy = model.evaluate(
        test_images_preprocessed,
        test_labels_one_hot,
        verbose=0,
    )

    # Predictions for macro recall
    y_test_pred_probs = model.predict(
        test_images_preprocessed,
        batch_size=128,
        verbose=0,
    )
    y_test_pred = np.argmax(y_test_pred_probs, axis=1)

    macro_recall = float(recall_score(test_labels, y_test_pred, average="macro"))

    return {
        "name": name,
        "learning_rate": learning_rate,
        "filters_block1": filters_block1,
        "filters_block2": filters_block2,
        "dense_units": dense_units,
        "dropout_conv": dropout_conv,
        "dropout_dense": dropout_dense,
        "epochs": epochs,
        "batch_size": batch_size,
        "num_parameters": num_parameters,
        "train_acc_last": train_acc_last,
        "val_acc_last": val_acc_last,
        "overfit_gap_acc": overfit_gap_acc,
        "test_loss": float(test_loss),
        "test_accuracy": float(test_accuracy),
        "macro_recall": macro_recall,
        "train_time_sec": train_time_sec,
    }

## üß™ Running a small set of full-data experiments

We now run a small set of carefully chosen configurations:

- `baseline_default`: a reasonable starting point
- `lower_lr`: same capacity, smaller learning rate
- `more_filters`: higher capacity (more filters and dense units)
- `stronger_dropout`: more regularization to reduce overfitting

All experiments are trained on the full CIFAR-10 training set.

In [25]:
import pandas as pd

experiment_configs_full = [
    {
        "name": "baseline_default",
        "learning_rate": 1e-3,
        "filters_block1": 32,
        "filters_block2": 64,
        "dense_units": 256,
        "dropout_conv": 0.25,
        "dropout_dense": 0.5,
        "epochs": 10,
        "batch_size": 64,
    },
    {
        "name": "lower_lr",
        "learning_rate": 5e-4,
        "filters_block1": 32,
        "filters_block2": 64,
        "dense_units": 256,
        "dropout_conv": 0.25,
        "dropout_dense": 0.5,
        "epochs": 10,
        "batch_size": 64,
    },
    {
        "name": "more_filters",
        "learning_rate": 1e-3,
        "filters_block1": 64,
        "filters_block2": 128,
        "dense_units": 256,
        "dropout_conv": 0.25,
        "dropout_dense": 0.5,
        "epochs": 10,
        "batch_size": 64,
    },
    {
        "name": "stronger_dropout",
        "learning_rate": 1e-3,
        "filters_block1": 32,
        "filters_block2": 64,
        "dense_units": 256,
        "dropout_conv": 0.35,
        "dropout_dense": 0.6,
        "epochs": 10,
        "batch_size": 64,
    },
]

full_results_list: list[Dict[str, Any]] = []

for cfg in experiment_configs_full:
    print(f"Running experiment: {cfg['name']}")
    res = run_full_experiment(**cfg)
    full_results_list.append(res)

full_results_df = pd.DataFrame(full_results_list)
full_results_df

Running experiment: baseline_default
Running experiment: lower_lr
Running experiment: more_filters
Running experiment: stronger_dropout


Unnamed: 0,name,learning_rate,filters_block1,filters_block2,dense_units,dropout_conv,dropout_dense,epochs,batch_size,num_parameters,train_acc_last,val_acc_last,overfit_gap_acc,test_loss,test_accuracy,macro_recall,train_time_sec
0,baseline_default,0.001,32,64,256,0.25,0.5,10,64,1116970,0.74735,0.7511,-0.00375,0.736946,0.7447,0.7447,350.573883
1,lower_lr,0.0005,32,64,256,0.25,0.5,10,64,1116970,0.761675,0.7573,0.004375,0.704815,0.7542,0.7542,398.076555
2,more_filters,0.001,64,128,256,0.25,0.5,10,64,2360138,0.780125,0.7689,0.011225,0.693662,0.7646,0.7646,1527.367284
3,stronger_dropout,0.001,32,64,256,0.35,0.6,10,64,1116970,0.718075,0.7476,-0.029525,0.745375,0.7393,0.7393,371.960836


In [26]:
from pathlib import Path

Path("results").mkdir(exist_ok=True)
full_results_df.to_csv("results/experiments_full.csv", index=False)

## üìä Comparing full-data experiments

We now compare the experiments side by side:
- test accuracy vs macro recall
- training time vs test accuracy

This helps us see which configuration gives the best trade-off between
performance, overfitting and training time.

In [27]:
import plotly.graph_objects as go

fig_full_scores = go.Figure()

fig_full_scores.add_trace(
    go.Bar(
        x=full_results_df["name"],
        y=full_results_df["test_accuracy"],
        name="Test accuracy",
    )
)

fig_full_scores.add_trace(
    go.Bar(
        x=full_results_df["name"],
        y=full_results_df["macro_recall"],
        name="Macro recall",
    )
)

fig_full_scores.update_layout(
    barmode="group",
    title="Test accuracy and macro recall (full-data experiments)",
    xaxis_title="Experiment",
    yaxis_title="Score",
)

fig_full_scores.show()

# Optional: export for README / GitHub Pages
save_fig(fig_full_scores, "full_exp_scores")

Saved HTML to ../docs/full_exp_scores.html
Saved PNG to ../plots/full_exp_scores.png


In [28]:
fig_full_time = px.scatter(
    full_results_df,
    x="train_time_sec",
    y="test_accuracy",
    text="name",
    size="num_parameters",
    title="Training time vs test accuracy (full-data experiments)",
    labels={
        "train_time_sec": "Training time (seconds)",
        "test_accuracy": "Test accuracy",
    },
)

fig_full_time.update_traces(textposition="top center")
fig_full_time.show()

save_fig(fig_full_time, "full_exp_time_vs_accuracy")

Saved HTML to ../docs/full_exp_time_vs_accuracy.html
Saved PNG to ../plots/full_exp_time_vs_accuracy.png


In [29]:
full_results_df[
    [
        "name",
        "learning_rate",
        "filters_block1",
        "filters_block2",
        "dense_units",
        "num_parameters",
        "train_time_sec",
        "test_accuracy",
        "macro_recall",
        "overfit_gap_acc",
    ]
].sort_values("macro_recall", ascending=False)

Unnamed: 0,name,learning_rate,filters_block1,filters_block2,dense_units,num_parameters,train_time_sec,test_accuracy,macro_recall,overfit_gap_acc
2,more_filters,0.001,64,128,256,2360138,1527.367284,0.7646,0.7646,0.011225
1,lower_lr,0.0005,32,64,256,1116970,398.076555,0.7542,0.7542,0.004375
0,baseline_default,0.001,32,64,256,1116970,350.573883,0.7447,0.7447,-0.00375
3,stronger_dropout,0.001,32,64,256,1116970,371.960836,0.7393,0.7393,-0.029525


In [30]:
import pandas as pd

experiments_full_df = pd.read_csv("results/experiments_full.csv")
print(f"Number of experiments: {len(experiments_full_df)}")
experiments_full_df

Number of experiments: 4


Unnamed: 0,name,learning_rate,filters_block1,filters_block2,dense_units,dropout_conv,dropout_dense,epochs,batch_size,num_parameters,train_acc_last,val_acc_last,overfit_gap_acc,test_loss,test_accuracy,macro_recall,train_time_sec
0,baseline_default,0.001,32,64,256,0.25,0.5,10,64,1116970,0.74735,0.7511,-0.00375,0.736946,0.7447,0.7447,350.573883
1,lower_lr,0.0005,32,64,256,0.25,0.5,10,64,1116970,0.761675,0.7573,0.004375,0.704815,0.7542,0.7542,398.076555
2,more_filters,0.001,64,128,256,0.25,0.5,10,64,2360138,0.780125,0.7689,0.011225,0.693662,0.7646,0.7646,1527.367284
3,stronger_dropout,0.001,32,64,256,0.35,0.6,10,64,1116970,0.718075,0.7476,-0.029525,0.745375,0.7393,0.7393,371.960836


In [31]:
import plotly.graph_objects as go

fig_full_scores = go.Figure()

fig_full_scores.add_trace(
    go.Bar(
        x=experiments_full_df["name"],
        y=experiments_full_df["test_accuracy"],
        name="Test accuracy",
    )
)

fig_full_scores.add_trace(
    go.Bar(
        x=experiments_full_df["name"],
        y=experiments_full_df["macro_recall"],
        name="Macro recall",
    )
)

fig_full_scores.update_layout(
    barmode="group",
    title="Test accuracy and macro recall (full-data experiments)",
    xaxis_title="Experiment",
    yaxis_title="Score",
)

fig_full_scores.show()

# Optional: f√ºr README / GitHub Pages speichern
save_fig(fig_full_scores, "full_exp_scores")

Saved HTML to ../docs/full_exp_scores.html
Saved PNG to ../plots/full_exp_scores.png


In [32]:
import plotly.express as px

fig_full_time = px.scatter(
    experiments_full_df,
    x="train_time_sec",
    y="test_accuracy",
    text="name",
    size="num_parameters",
    title="Training time vs test accuracy (full-data experiments)",
    labels={
        "train_time_sec": "Training time (seconds)",
        "test_accuracy": "Test accuracy",
    },
)

fig_full_time.update_traces(textposition="top center")
fig_full_time.show()

save_fig(fig_full_time, "full_exp_time_vs_accuracy")

Saved HTML to ../docs/full_exp_time_vs_accuracy.html
Saved PNG to ../plots/full_exp_time_vs_accuracy.png


In [33]:
balanced_plus = {
    "name": "balanced_plus",
    "learning_rate": 1e-3,
    "filters_block1": 32,
    "filters_block2": 64,
    "dense_units": 256,
    "dropout_conv": 0.30,  # between baseline and stronger_dropout
    "dropout_dense": 0.55,
    "epochs": 10,
    "batch_size": 64,
}

light_capacity = {
    "name": "light_capacity",
    "learning_rate": 1e-3,
    "filters_block1": 48,  # between 32 and 64
    "filters_block2": 96,  # between 64 and 128
    "dense_units": 256,
    "dropout_conv": 0.30,
    "dropout_dense": 0.50,
    "epochs": 10,
    "batch_size": 64,
}

small_efficient = {
    "name": "small_efficient",
    "learning_rate": 1e-3,
    "filters_block1": 32,
    "filters_block2": 64,
    "dense_units": 128,  # fewer units in dense layer
    "dropout_conv": 0.30,
    "dropout_dense": 0.50,
    "epochs": 12,  # a bit more epochs to compensate smaller head
    "batch_size": 64,
}

new_experiment_configs = [balanced_plus, light_capacity, small_efficient]

In [34]:
new_results_list: list[dict] = []

for cfg in new_experiment_configs:
    print(f"Running experiment: {cfg['name']}")
    res = run_full_experiment(**cfg)
    new_results_list.append(res)

new_results_df = pd.DataFrame(new_results_list)
new_results_df

Running experiment: balanced_plus
Running experiment: light_capacity
Running experiment: small_efficient


Unnamed: 0,name,learning_rate,filters_block1,filters_block2,dense_units,dropout_conv,dropout_dense,epochs,batch_size,num_parameters,train_acc_last,val_acc_last,overfit_gap_acc,test_loss,test_accuracy,macro_recall,train_time_sec
0,balanced_plus,0.001,32,64,256,0.3,0.55,10,64,1116970,0.73855,0.7505,-0.01195,0.74851,0.7427,0.7427,399.891654
1,light_capacity,0.001,48,96,256,0.3,0.5,10,64,1722426,0.761925,0.7631,-0.001175,0.709582,0.7574,0.7574,808.793646
2,small_efficient,0.001,32,64,128,0.3,0.5,12,64,591274,0.742775,0.7604,-0.017625,0.716527,0.7555,0.7555,519.632603


In [35]:
import pandas as pd
from pathlib import Path

Path("results").mkdir(exist_ok=True)

# alte Ergebnisse laden
old_results_df = pd.read_csv("results/experiments_full.csv")

# alte + neue zusammenf√ºhren
combined_results_df = pd.concat(
    [old_results_df, new_results_df],
    ignore_index=True,
)

combined_results_df.to_csv("results/experiments_full_extended.csv", index=False)
combined_results_df

Unnamed: 0,name,learning_rate,filters_block1,filters_block2,dense_units,dropout_conv,dropout_dense,epochs,batch_size,num_parameters,train_acc_last,val_acc_last,overfit_gap_acc,test_loss,test_accuracy,macro_recall,train_time_sec
0,baseline_default,0.001,32,64,256,0.25,0.5,10,64,1116970,0.74735,0.7511,-0.00375,0.736946,0.7447,0.7447,350.573883
1,lower_lr,0.0005,32,64,256,0.25,0.5,10,64,1116970,0.761675,0.7573,0.004375,0.704815,0.7542,0.7542,398.076555
2,more_filters,0.001,64,128,256,0.25,0.5,10,64,2360138,0.780125,0.7689,0.011225,0.693662,0.7646,0.7646,1527.367284
3,stronger_dropout,0.001,32,64,256,0.35,0.6,10,64,1116970,0.718075,0.7476,-0.029525,0.745375,0.7393,0.7393,371.960836
4,balanced_plus,0.001,32,64,256,0.3,0.55,10,64,1116970,0.73855,0.7505,-0.01195,0.74851,0.7427,0.7427,399.891654
5,light_capacity,0.001,48,96,256,0.3,0.5,10,64,1722426,0.761925,0.7631,-0.001175,0.709582,0.7574,0.7574,808.793646
6,small_efficient,0.001,32,64,128,0.3,0.5,12,64,591274,0.742775,0.7604,-0.017625,0.716527,0.7555,0.7555,519.632603
