In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import os

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

In [None]:
import os
if IN_COLAB:
    print("running in google colab")
    from google.colab import drive
    drive.mount("/content/drive")
    os.chdir("/content/drive/My Drive/seminar_dl_workspace")
    print("switched workspace:", os.getcwd())

from create_visualizations import get_filenames_in_dir

In [None]:
from IPython.display import SVG, display
def show_svg(filename):
    display(SVG(filename=filename))

boilerplate done...

In [None]:
batch_sizes = [16]
learning_rates = [0.01, 0.04, 0.35]
momentums = [0.7, 0.725, 0.75, 0.775, 0.8, 0.825, 0.85, 0.875, 0.9, 0.905, 0.91, 0.915, 0.92, 0.925, 0.95, 0.975, 1.0]

def get_lr_and_momentum_from_experiment_name(name):
    matches = re.search(r"([0-9]*\.?[0-9]+)_(\d+)_([0-9]*\.?[0-9]+)\.csv$", name)
    momentum = float(matches.group(1))
    batch_size = int(matches.group(2))
    lr = float(matches.group(3))
    return lr, momentum, batch_size

def get_latest_experiment_df(csv_path):
    experiment_stats = pd.read_csv(csv_path)
    experiment_stats_grouped = experiment_stats.groupby("experiment_id")
    latest_training_session = experiment_stats_grouped.get_group(experiment_stats.experiment_id.max())
    if len(experiment_stats) < 20:
        print(f"WARNING: file {csv_path} has only {len(experiment_stats)} epochs but should have 20")
    elif len(experiment_stats) > 20 and len(experiment_stats) % 20 == 0:
        print(f"INFO: file {csv_path} has {experiment_stats_grouped.ngroups} training sessions. Using latest training session with {len(latest_training_session)} epochs and experiment_id {latest_training_session.iloc[0]['experiment_id']}")
    return latest_training_session

In [None]:
dims = (len(learning_rates), len(momentums))
results_test_acc = np.zeros(dims)
results_train_acc = np.zeros(dims)
results_test_avg_loss = np.zeros(dims)
results_train_avg_loss = np.zeros(dims)

experiments_stats_filenames = get_filenames_in_dir("experiments_stats", lambda x: "SGD_with_momentum_" in x)
print(f"found {len(experiments_stats_filenames)} experiments for SGD_with_momentum_")
for filename in experiments_stats_filenames:
    lr, momentum, batch_size = get_lr_and_momentum_from_experiment_name(filename)
    if batch_size not in batch_sizes or lr not in learning_rates or momentum not in momentums:
        continue
    i = learning_rates.index(lr)
    k = momentums.index(momentum)
    df = get_latest_experiment_df(os.path.join("experiments_stats" , filename))
    results_test_acc[i, k] = df["test_acc"].max()
    results_train_acc[i, k] = df["train_acc"].max()
    results_test_avg_loss[i, k] = df["test_avg_loss"].min()
    results_train_avg_loss[i, k] = df["train_avg_loss"].min()

max_train_acc = results_train_acc.max()
max_test_acc = results_test_acc.max()
min_train_loss = results_train_avg_loss.min()
min_test_loss = results_test_avg_loss.min()

print(f"max train accuracy: lr={learning_rates[np.where(results_train_acc == max_train_acc)[0][0]]} momentum={momentums[np.where(results_train_acc == max_train_acc)[1][0]]} --> {max_train_acc}")
print(f"max test accuracy: lr={learning_rates[np.where(results_test_acc == max_test_acc)[0][0]]} momentum={momentums[np.where(results_test_acc == max_test_acc)[1][0]]} --> {max_test_acc}")
print(f"min train avg loss: lr={learning_rates[np.where(results_train_avg_loss == min_train_loss)[0][0]]} momentum={momentums[np.where(results_train_avg_loss == min_train_loss)[1][0]]} --> {min_train_loss}")
print(f"min test avg loss:  lr={learning_rates[np.where(results_test_avg_loss == min_test_loss)[0][0]]} momentum={momentums[np.where(results_test_avg_loss == min_test_loss)[1][0]]} --> {min_test_loss}")

2: check results for the more fine grained experiments

In [None]:
def draw_subplot_bar(ax, data, title, ylabel="", ylim=None, bar_label_y_padding_frac=0.025):
    bar_width = 0.25
    r1 = [x for x in np.arange(len(data[0, :]))]
    r2 = [x + bar_width for x in r1]
    r3 = [x + bar_width * 2 for x in r1]
    x_labels = list(map(lambda x: str(x), momentums))
    ax.bar(r1, data[0, :], width=bar_width, label="lr=0.01")
    ax.bar(r2, data[1, :], width=bar_width, color="olive", label="lr=0.04")
    ax.bar(r3, data[2, :], width=bar_width, color="sienna", label="lr=0.35")

    ax.set_title(title)
    ax.set_ylabel(ylabel)
    ax.set_xlabel("momentum")
    ax.set_xticks([r + bar_width for r in r1])
    ax.set_xticklabels(x_labels)
    ax.legend()

    # set padding for label
    bar_label_y_padding = 0
    bar_label_y_pos = 0
    if ylim is not None:
        ax.set_ylim(ylim)
        bar_label_y_padding = (ylim[1] - ylim[0]) * bar_label_y_padding_frac
        bar_label_y_pos = ylim[0]
    # decide which label should be drawn (only max or min for acc or loss respectively)
    lr_001_max = data[0,:].max()
    lr_001_min = data[0,:].min()
    lr_004_max = data[1,:].max()
    lr_004_min = data[1,:].min()
    lr_035_max = data[2,:].max()
    lr_035_min = data[2,:].min()
    for i, v in enumerate(data[0, :]):
        if ylabel == "accuracy" and v != lr_001_max or ylabel == "loss" and v != lr_001_min:
            continue
        ax.text(i - bar_width / 2, v + bar_label_y_padding, round(v, 3), fontsize=16, color="blue")
    for i, v in enumerate(data[1, :]):
        if ylabel == "accuracy" and v != lr_004_max or ylabel == "loss" and v != lr_004_min:
            continue
        ax.text(i + bar_width / 2, v + bar_label_y_padding, round(v, 3), fontsize=16, color="olive")
    for i, v in enumerate(data[2, :]):
        if ylabel == "accuracy" and v != lr_035_max or ylabel == "loss" and v != lr_035_min:
            continue
        ax.text(i + bar_width / 2, v + bar_label_y_padding, round(v, 3), fontsize=16, color="sienna")

Plot avg losses and accuracies for different batch sizes

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(28, 15), gridspec_kw=dict(hspace=0.35))
fig.suptitle("SGD with momentum for different momentums and learning rates after 20 epochs (max/min acc/loss for each lr below each graph)")

draw_subplot_bar(axs[0, 0], results_train_acc, "Train Accuracy for different momentums and learning rates", ylabel="accuracy", ylim=(0.0, 1.0), bar_label_y_padding_frac=-1.05)
draw_subplot_bar(axs[0, 1], results_test_acc, "Test Accuracy for different momentums and learning rates", ylabel="accuracy", ylim=(0.0, 1.0))
draw_subplot_bar(axs[1, 0], results_train_avg_loss, "Train avg loss for different momentums and learning rates", ylabel="loss", ylim=(0.0, 1.0), bar_label_y_padding_frac=-0.225)
draw_subplot_bar(axs[1, 1], results_test_avg_loss, "Test avg loss for different momentums and learning rates", ylabel="loss", ylim=(0.0, 1.0), bar_label_y_padding_frac=-0.65)

plt.savefig("visualizations/sgd_with_momentum_batch_sizes.png")