In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import os

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

In [None]:
import os
if IN_COLAB:
    print("running in google colab")
    from google.colab import drive
    drive.mount("/content/drive")
    os.chdir("/content/drive/My Drive/seminar_dl_workspace")
    print("switched workspace:", os.getcwd())
    
from create_visualizations import get_filenames_in_dir

In [None]:
from IPython.display import SVG, display
def show_svg(filename):
    display(SVG(filename=filename))

boilerplate done...

In [None]:
batch_sizes = [4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 10000]
learning_rates = [0.0001, 0.001, 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0]

def transform_experiment_name_to_heat_map_idx(name):
    matches = re.search(r"(\d+)_([0-9]*\.?[0-9]+)\.csv$", name)
    batch_size = int(matches.group(1))
    lr = float(matches.group(2))
    return batch_sizes.index(batch_size), learning_rates.index(lr)

def get_latest_experiment_df(csv_path):
    experiment_stats = pd.read_csv(csv_path)
    experiment_stats_grouped = experiment_stats.groupby("experiment_id")
    latest_training_session = experiment_stats_grouped.get_group(experiment_stats.experiment_id.max())
    if len(experiment_stats) < 20:
        print(f"WARNING: file {csv_path} has only {len(experiment_stats)} epochs but should have 20")
    elif len(experiment_stats) > 20 and len(experiment_stats) % 20 == 0:
        print(f"INFO: file {csv_path} has {experiment_stats_grouped.ngroups} training sessions. Using latest training session with {len(latest_training_session)} epochs and experiment_id {latest_training_session.iloc[0]['experiment_id']}")
    return latest_training_session

In [None]:
heat_map_dimensions = (len(batch_sizes), len(learning_rates))
heat_map_test_acc = np.zeros(heat_map_dimensions)
heat_map_train_acc = np.zeros(heat_map_dimensions)
heat_map_test_avg_loss = np.full(heat_map_dimensions, 2.5)
heat_map_train_avg_loss = np.full(heat_map_dimensions, 2.5)

experiments_stats_filenames = get_filenames_in_dir("experiments_stats", lambda x: "SGD_with_variable_batch_size_" in x)
print(f"found {len(experiments_stats_filenames)} experiments for SGD_with_variable_batch_size")
for filename in experiments_stats_filenames:
    i,k = transform_experiment_name_to_heat_map_idx(filename)
    df = get_latest_experiment_df(os.path.join("experiments_stats" , filename))
    heat_map_test_acc[i,k] = df["test_acc"].max()
    heat_map_train_acc[i,k] = df["train_acc"].max()
    heat_map_test_avg_loss[i,k] = df["test_avg_loss"].min()
    heat_map_train_avg_loss[i,k] = df["train_avg_loss"].min()

fig, axs = plt.subplots(2, 2, figsize=(28, 15))
fig.suptitle("SGD batch size vs. learning rate after 20 epochs")
xticklabels = learning_rates.copy()
xticklabels[0] = r"$10^{-4}$"
xticklabels[1] = r"$10^{-3}$"
xticklabels[2] = r"$10^{-2}$"

def draw_subplot_batch_size_lr_heatmap(ax, heat_map, title, mark_max=False, mark_min=False):
    im = ax.imshow(heat_map, cmap="rainbow", aspect="auto")
    ax.set_title(title)
    ax.set_xticks(range(len(learning_rates)))
    ax.set_xticklabels(xticklabels)
    ax.set_xlabel("learning rate")
    ax.set_yticks(range(len(batch_sizes)))
    ax.set_yticklabels(batch_sizes)
    ax.set_ylabel("batch size")
    plt.colorbar(im, ax=ax, fraction=0.025, pad=0.01)

    # annotate heat map and mark max/min values
    max_i, max_k = np.where(heat_map == np.amax(heat_map))
    max_i = max_i[0]
    max_k = max_k[0]
    min_i, min_k = np.where(heat_map == np.min(heat_map))
    min_i = min_i[0]
    min_k = min_k[0]
    for i in range(len(batch_sizes)):
        for k in range(len(learning_rates)):
            value = heat_map[i, k]
            fw = "normal"
            color = "black"
            if mark_max and i == max_i and k == max_k:
                fw = "bold"
                color = "white"
            elif mark_min and i == min_i and k == min_k:
                fw = "bold"
                color = "white"
            ax.text(k, i, round(value, 2), ha="center", va="center", fontweight=fw, color=color, fontsize=12)
        
    # fix cutting off top and bottom row bug: https://github.com/matplotlib/matplotlib/issues/14751#issuecomment-511017375
    ax.set_ylim(len(batch_sizes) - 0.5, -0.5)
    return (max_i, max_k), (min_i, min_k)

max_train_acc_idx, _ = draw_subplot_batch_size_lr_heatmap(axs[0, 0], heat_map_train_acc, "train accuracy", mark_max=True)
max_test_acc_idx, _ = draw_subplot_batch_size_lr_heatmap(axs[0, 1], heat_map_test_acc, "test accuracy", mark_max=True)
_, min_train_avg_loss_idx = draw_subplot_batch_size_lr_heatmap(axs[1, 0], heat_map_train_avg_loss, "train avg loss", mark_min=True)
_, min_test_avg_loss_idx = draw_subplot_batch_size_lr_heatmap(axs[1, 1], heat_map_test_avg_loss, "test avg loss", mark_min=True)
print(f"max train accuracy: batch_size={batch_sizes[max_train_acc_idx[0]]} lr={learning_rates[max_train_acc_idx[1]]} --> {heat_map_train_acc[max_train_acc_idx]}")
print(f"max test accuracy: batch_size={batch_sizes[max_test_acc_idx[0]]} lr={learning_rates[max_test_acc_idx[1]]} --> {heat_map_test_acc[max_test_acc_idx]}")
print(f"min train avg loss: batch_size={batch_sizes[min_train_avg_loss_idx[0]]} lr={learning_rates[min_train_avg_loss_idx[1]]} --> {heat_map_train_avg_loss[min_train_avg_loss_idx]}")
print(f"min test avg loss: batch_size={batch_sizes[min_test_avg_loss_idx[0]]} lr={learning_rates[min_test_avg_loss_idx[1]]} --> {heat_map_test_avg_loss[min_test_avg_loss_idx]}")
plt.savefig("visualizations/sgd_batch_size_vs_learning_rate_heat_maps.png")

In [None]:
def draw_subplot_batch_size_lr_lines(ax, data, title, ylabel="", ylim=None, legend_is_outside=False, make_line_idx_fat=None):
    line_objects = ax.plot(data)
    if make_line_idx_fat is not None:
        line_objects[make_line_idx_fat].set_linewidth(3.0)
        line_objects[make_line_idx_fat].zorder = 99
    if legend_is_outside:
        ax.legend(iter(line_objects), batch_sizes, loc="center left", bbox_to_anchor=(0.96, 0.5))
    else:
        ax.legend(iter(line_objects), batch_sizes)
    ax.set_title(title)
    ax.set_ylabel(ylabel)
    ax.set_xticks(range(len(learning_rates)))
    ax.set_xticklabels(xticklabels)
    ax.set_xlabel("learning rate")
    ax.grid()
    if ylim is not None:
        ax.set_ylim(ylim)

Plot avg losses and accuracies vs learning rate as lines for different batch sizes

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(28, 15), gridspec_kw=dict(hspace=0.35))
fig.suptitle("SGD batch size vs learning rate after 20 epochs")

draw_subplot_batch_size_lr_lines(axs[0, 0], heat_map_train_acc.T, "Train Accuracy vs learning rate for different batch sizes", ylabel="accuracy", legend_is_outside=True)
draw_subplot_batch_size_lr_lines(axs[0, 1], heat_map_test_acc.T, "Test Accuracy vs learning rate for different batch sizes", ylabel="accuracy", legend_is_outside=True)
draw_subplot_batch_size_lr_lines(axs[1, 0], heat_map_train_avg_loss.T, "Train avg loss vs learning rate for different batch sizes", ylabel="loss", legend_is_outside=True)
draw_subplot_batch_size_lr_lines(axs[1, 1], heat_map_test_avg_loss.T, "Test avg loss vs learning rate for different batch sizes", ylabel="loss", legend_is_outside=True)
plt.savefig("visualizations/sgd_batch_size_vs_learning_rate_lines_big.png")

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(28, 15), gridspec_kw=dict(hspace=0.35, wspace=0.2))
fig.suptitle("SGD batch size vs learning rate after 20 epochs")

draw_subplot_batch_size_lr_lines(axs[0, 0], heat_map_train_acc.T, "Train Accuracy vs learning rate for different batch sizes", ylabel="accuracy", ylim=(0.9, 0.975), legend_is_outside=True, make_line_idx_fat=2)
draw_subplot_batch_size_lr_lines(axs[0, 1], heat_map_test_acc.T, "Test Accuracy vs learning rate for different batch sizes", ylabel="accuracy", ylim=(0.7, 0.84), legend_is_outside=True, make_line_idx_fat=2)
draw_subplot_batch_size_lr_lines(axs[1, 0], heat_map_train_avg_loss.T, "Train avg loss vs learning rate for different batch sizes", ylabel="loss", ylim=(0.08, 0.25), legend_is_outside=True, make_line_idx_fat=2)
draw_subplot_batch_size_lr_lines(axs[1, 1], heat_map_test_avg_loss.T, "Test avg loss vs learning rate for different batch sizes", ylabel="loss", ylim=(0.5, 1.0), legend_is_outside=True, make_line_idx_fat=2)
plt.savefig("visualizations/sgd_batch_size_vs_learning_rate_lines_zoomed.png")