In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import os

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

In [None]:
import os
if IN_COLAB:
    print("running in google colab")
    from google.colab import drive
    drive.mount("/content/drive")
    os.chdir("/content/drive/My Drive/seminar_dl_workspace")
    print("switched workspace:", os.getcwd())
    
from create_visualizations import get_filenames_in_dir

In [None]:
from IPython.display import SVG, display
def show_svg(filename):
    display(SVG(filename=filename))

boilerplate done...

1: in these first experiment results a 3-dimensional grid search was done over batch_size, lr and momentum to find a suitable combination for further experiments

In [None]:
batch_sizes = [16, 128, 256]
learning_rates = [0.01, 0.04, 0.45]
momentums = [0.875, 0.9, 0.925]

def get_batch_size_lr_and_momentum_from_experiment_name(name):
    matches = re.search(r"([0-9]*\.?[0-9]+)_(\d+)_([0-9]*\.?[0-9]+)\.csv$", name)
    momentum = float(matches.group(1))
    batch_size = int(matches.group(2))
    lr = float(matches.group(3))
    return batch_size, lr, momentum

def get_latest_experiment_df(csv_path):
    experiment_stats = pd.read_csv(csv_path)
    experiment_stats_grouped = experiment_stats.groupby("experiment_id")
    latest_training_session = experiment_stats_grouped.get_group(experiment_stats.experiment_id.max())
    if len(experiment_stats) < 20:
        print(f"WARNING: file {csv_path} has only {len(experiment_stats)} epochs but should have 20")
    elif len(experiment_stats) > 20 and len(experiment_stats) % 20 == 0:
        print(f"INFO: file {csv_path} has {experiment_stats_grouped.ngroups} training sessions. Using latest training session with {len(latest_training_session)} epochs and experiment_id {latest_training_session.iloc[0]['experiment_id']}")
    return latest_training_session

find best batch size and learning rate for these first experiments in order to do a more fine grained experiment run with more momentums but with fixed lr and batch size

In [None]:
dims = (len(batch_sizes), len(learning_rates), len(momentums))
results_test_acc = np.zeros(dims)
results_train_acc = np.zeros(dims)
results_test_avg_loss = np.zeros(dims)
results_train_avg_loss = np.zeros(dims)

experiments_stats_filenames = get_filenames_in_dir("experiments_stats", lambda x: "SGD_with_momentum_" in x)
print(f"found {len(experiments_stats_filenames)} experiments for SGD_with_momentum_")
for filename in experiments_stats_filenames:
    batch_size, lr, momentum = get_batch_size_lr_and_momentum_from_experiment_name(filename)
    if lr not in learning_rates or momentum not in momentums:
        continue
    i = batch_sizes.index(batch_size)
    j = learning_rates.index(lr)
    k = momentums.index(momentum)
    df = get_latest_experiment_df(os.path.join("experiments_stats" , filename))
    results_test_acc[i, j, k] = df["test_acc"].max()
    results_train_acc[i, j, k] = df["train_acc"].max()
    results_test_avg_loss[i, j, k] = df["test_avg_loss"].min()
    results_train_avg_loss[i, j, k] = df["train_avg_loss"].min()

max_train_acc = results_train_acc.max()
max_test_acc = results_test_acc.max()
min_train_loss = results_train_avg_loss.min()
min_test_loss = results_test_avg_loss.min()
print(f"max train accuracy: batch_size={batch_sizes[np.where(results_train_acc == max_train_acc)[0][0]]} lr={learning_rates[np.where(results_train_acc == max_train_acc)[1][0]]} momentum={momentums[np.where(results_train_acc == max_train_acc)[2][0]]} --> {max_train_acc}")
print(f"max test accuracy: batch_size={batch_sizes[np.where(results_test_acc == max_test_acc)[0][0]]} lr={learning_rates[np.where(results_test_acc == max_test_acc)[1][0]]} momentum={momentums[np.where(results_test_acc == max_test_acc)[2][0]]} --> {max_test_acc}")
print(f"min train avg loss: batch_size={batch_sizes[np.where(results_train_avg_loss == min_train_loss)[0][0]]} --> {min_train_loss}")
print(f"min test avg loss: batch_size={batch_sizes[np.where(results_test_avg_loss == min_test_loss)[0][0]]} --> {min_test_loss}")

--> choose batch_size=16 and lr=0.01 and 0.04 for further experiments but also lr=0.35 because that learning rate at batch size 16 had the best test accuracy in the previous regular SGD experiments

2: check results for the more fine grained experiments: see notebook_experiments_momentum_results.ipynb