In [None]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib
import csv
import os
import sys
import time
import logging
import scipy.interpolate
import re
import struct

In [None]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)
else:
    print("No compatible GPUs found")

# Stałe i ustawienia

In [None]:
LOG_DIR = "siamese_logs/" 
SAVE_PATH = "/qarr/studia/magister/tekst/graphs/"

In [None]:
COLOR_STEP=90
winterCmap = matplotlib.cm.get_cmap("cool")
wistiaCmap = matplotlib.cm.get_cmap("autumn")
matplotlib.rcParams['axes.xmargin'] = 0
matplotlib.rcParams.update({'figure.autolayout': True})
matplotlib.rcParams['legend.fontsize'] = 'medium'
matplotlib.rcParams['font.size'] = 12.0

# Deklaracje

In [None]:
def merge_tbevents(dirpath, tag):
    eventfiles = [dirpath+"/"+x for x in os.listdir(dirpath)]
    # Assumption - maximum of data entry per file
    xx = []
    yy = []
    for n, evfile in enumerate(eventfiles):
        itr = tf.compat.v1.train.summary_iterator(evfile)
        for i in itr:
            step = i.step
            if i.summary and i.summary.value:
                for v in i.summary.value:
                    if v.tag == tag:
                        f = struct.unpack('f', v.tensor.tensor_content)
                        xx.append(step)
                        yy.append(f[0])
    xx = np.array(xx)
    yy = np.array(yy)
    order = np.argsort(xx)
    xx = xx[order]
    yy = yy[order]
    return xx, yy

def discover_tags(dirpath):
    eventfiles = [dirpath+"/"+x for x in os.listdir(dirpath)]
    tags = set()
    for n, evfile in enumerate(eventfiles):
        itr = tf.compat.v1.train.summary_iterator(evfile)
        for i in itr:
            step = i.step
            if i.summary and i.summary.value:
                for v in i.summary.value:
                    tags.add(v.tag)
    return tags

In [None]:
def smooth(x, series, weight, points=0):
    smoothed = np.zeros(len(series))
    smoothed[0] = series[0]
    for i in range(1, len(series)):
        smoothed[i] = series[i]*(1-weight) + weight*smoothed[i-1]
    if points:
        spline = cubic_interploation_model=scipy.interpolate.interp1d(x,smoothed,kind="cubic")
        xrange = np.linspace(x.min(), x.max(), num=points, endpoint=True, retstep=False, dtype=None, axis=0)
        yrange = spline(xrange)
        return (xrange, yrange)
    else:
        return (x, smoothed)

In [None]:
def numpy_ewma_vectorized_v2(data, window):
    """Exponentially weighted moving average; taken from 
    https://stackoverflow.com/questions/42869495/numpy-version-of-exponential-weighted-moving-average-equivalent-to-pandas-ewm
    Has troubles with large datasets due to high power exponents"""
    alpha = 2 /(window + 1.0)
    alpha_rev = 1-alpha
    n = data.shape[0]

    pows = alpha_rev**(np.arange(n+1))

    scale_arr = 1/pows[:-1]
    offset = data[0]*pows[1:]
    pw0 = alpha*alpha_rev**(n-1)

    mult = data*pw0*scale_arr
    cumsums = mult.cumsum()
    out = offset + cumsums*scale_arr[::-1]
    return out

def window_size(alpha, sum_proportion):
    # Increases with increased sum_proportion and decreased alpha
    # solve (1-alpha)**window_size = (1-sum_proportion) for window_size        
    return int(np.log(1-sum_proportion) / np.log(1-alpha))

def smooth_ewma(x, series, w):
    n = len(series)
    # w*1= n/2
    # w*0= 1
    window = int((n/2-1)*w) + 1
    smoothed = numpy_ewma_vectorized_v2(series, window)
    return (x, smoothed)
    
    

In [None]:
def mean_loss_graph(experiments, figure=None, title="", alpha=1.0, legend=None, drawRaw=True, saveName=None, smoothFn=smooth, smoothParams=None):
    exps = []
    validationCount = 0
    trainCount = 0
    
    if smoothParams == None:
        smoothParams = [0.8]
    
    for ex in experiments:
        s = re.sub(r".*//", "" , ex)
        nameString, dsType = s.split("/")
        expName = re.sub(r"_.*$", "", nameString)
        exps.append((dsType, expName, ex))
        
    colorStep = min(256/len(experiments)*2, COLOR_STEP)
    
    if figure is None:
        fig, axs = plt.subplots(1,1, figsize=(8,6))
    else:
        fig, axs = figure
    #fig.tight_layout()
    axs.set_title(title)
    axs.xaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(5))
    defaultLegend = []
    for dsType, expName, expPath in exps:
        print(dsType, expName, expPath)
        if dsType == "validation":
            color = wistiaCmap(int(colorStep*validationCount))
            defaultLegend.append(expName + " - valid")
            validationCount += 1
        else:
            color = winterCmap(256-int(colorStep*trainCount))
            defaultLegend.append(expName + " - train")
            trainCount += 1
        
        xx, yy = merge_tbevents(expPath, "mean_loss")
        if drawRaw:
            axs.plot(xx,yy, alpha=0.3*alpha, color=color)
        axs.plot(*smoothFn(xx, yy, *smoothParams), alpha=alpha, color=color)

    axs.set_xlabel("numer epoki")
    axs.set_ylabel("uśredniona funkcja straty z epoki")
    if legend is not None:
        axs.legend(legend)
    else:
        axs.legend(defaultLegend)
    if saveName:
        print(f"Saved {SAVE_PATH + saveName}")
        fig.savefig(SAVE_PATH + saveName)

In [None]:
def normalised_ranking_graph(experiments, title="", legend=None, alpha=1.0, saveName=None, figure=None):
    exps = []
    validationCount = 0
    trainCount = 0
        
    for ex in experiments:
        s = re.sub(r".*//", "" , ex)
        nameString, dsType = s.split("/")
        expName = re.sub(r"_.*$", "", nameString)
        if dsType == "validation":
            print(f"Skipping {ex}, no data")
            continue
        trainCount += 1
        exps.append((dsType, expName, ex))
        
    colorStep = min(256/trainCount, COLOR_STEP)
    defaultLegend = []
    
    if figure is None:
        fig, axs = plt.subplots(1,1, figsize=(8,6))
    else:
        fig, axs = figure
    #fig.tight_layout()
    axs.set_title(title)
    axs.xaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(5))
    
    count = 0
    for dsType, expName, expPath in exps:
        print(dsType, expName, expPath)
        color = winterCmap(256-int(colorStep*count))
        defaultLegend.append(expName)
        count += 1
        
        xx, yy = merge_tbevents(expPath, "rank_normalised")
        axs.plot(xx,yy, alpha=alpha, color=color)

        
    axs.set_xlabel("numer epoki")
    axs.set_ylabel("znormalizowany ranking")
    if legend is None:
        axs.legend(defaultLegend)
    else:
        axs.legend(legend)
    if saveName:
        print(f"Saved {SAVE_PATH + saveName}")
        fig.savefig(SAVE_PATH + saveName)



In [None]:
def filter_experiments(patterns, experimentsList):
    return [e for f in patterns for e in experimentsList if re.search(f, e) is not None]
    

# Przygotowanie sciezek eksperymentów

In [None]:
#os.listdir("/home/zenfur/magister/jupyter/siamese_logs/")
root, dirs, _ = next(os.walk(LOG_DIR))

In [None]:
pastExperiments = list()
for d in dirs:
    subdirs = os.listdir(root+d)
    if "validation" in subdirs:
        for sd in subdirs:
            pastExperiments.append(f"{root}/{d}/{sd}")
    elif "train" not in subdirs:
        for dd in subdirs:
            if os.path.isdir(f"{root}/{d}/{dd}"):
                subsubdirs = os.listdir(root +  d + "/" + dd)
                if "validation" in subsubdirs:
                    for sd in subsubdirs:
                        pastExperiments.append(root + d + "//" + dd + "/" + sd)
    else:
        print(f"Omitting {d} experiment - no validation data")

In [None]:
# chronoOrder = np.argsort([e.split('/')[-2].split('_')[1] for e in pastExperiments])

# with open("experiments.list", "w") as file:
#     writer = csv.writer(file)
#     for chord in chronoOrder:
#         splits = pastExperiments[chord].split('/')
#         name, date = splits[-2].split('_')
#         comment = ""
#         writer.writerow((date, name, pastExperiments[chord], comment))

In [None]:

pastExperiments

# Baseline

In [None]:
mean_loss_graph(filter_experiments(["20210424-044333"], pastExperiments), 
                #title="Uśredniona funkcja straty eksperymentu 'baseline'",
               legend=["błąd zbioru trenującego", "wygładzony błąd zb. trenującego", "błąd zbioru walidacyjnego", "wygładzony błąd zb. walidacyjnego"],
               saveName="baseline_meanError_01")

In [None]:
normalised_ranking_graph(filter_experiments(["20210424-044333"], pastExperiments),
                         legend=["ranking baseline"],
                         alpha=0.3,
                         saveName="baseline_normRanking_01")

In [None]:
normalised_ranking_graph(filter_experiments(["baselineLong"], pastExperiments),
                         legend=["ranking baseline"],
                         alpha=0.3,
                         saveName="baseline_normRanking_02")

In [None]:
mean_loss_graph(filter_experiments(["baselineLong"], pastExperiments), 
                #title="Uśredniona funkcja straty eksperymentu 'baseline'",
               legend=["błąd zbioru trenującego", "wygładzony błąd zb. trenującego", "błąd zbioru walidacyjnego", "wygładzony błąd zb. walidacyjnego"],
               saveName="baseline_meanError_02")

# Eksperyment 1.

In [None]:
mean_loss_graph(filter_experiments(["20210424-044333", "20210423-232523", "20210424-005223"], pastExperiments), 
                #[pastExperiments[1], pastExperiments[0], pastExperiments[-1], pastExperiments[-2],  pastExperiments[7], pastExperiments[6]], 
                "",
               None,#['baseline - training', 'baseline - valid.', 'output norm 3 - train', 'output norm 3 - valid.', 'output norm 2 - train', 'output norm 2 - valid.'],
               drawRaw=False,
               saveName="output-normalisations")

In [None]:
normalised_ranking_graph(filter_experiments(["20210424-044333", "20210423-232523", "20210424-005223"], pastExperiments),
                "",
               None,#['baseline', 'output norm 3', 'output norm 2'],
               alpha=1,
               saveName="output-normalisations-ranks")

# Eksperyment 2

In [None]:
mean_loss_graph(filter_experiments(["20210424-044333", "20210424-023006", "20210424-031711", "20210424-001214"], pastExperiments),
                "",
               None,
               drawRaw=False,
               saveName="different-sizes")

In [None]:
normalised_ranking_graph(filter_experiments(["20210424-044333", "20210424-023006", "20210424-031711", "20210424-001214"], pastExperiments),
                                            alpha=1, saveName="different-sizes-ranks.png")

# Eksperyment 3.

In [None]:
mean_loss_graph(filter_experiments(["20210424-001214", "20210520-194658", "20210520-190857", "20210520-183105"], pastExperiments),
                drawRaw=False,
                saveName="different-sizes-v2.png")

In [None]:
normalised_ranking_graph(filter_experiments(["20210424-001214", "20210520-194658", "20210520-190857", "20210520-183105"], pastExperiments),
                        alpha=1.0,
                        saveName="different-sizes-v2-ranks.png")

# Eksperyment 4.

In [None]:
selection = [e for e in pastExperiments if ('batch_03' in e or 'batch_04' in e or 'batch_05' in e) and "baseline_" in e]

In [None]:
errors = [merge_tbevents(s, "mean_loss")[-1][-1] for s in selection]

In [None]:
selectionV = selection[::2]
selectionT = selection[1::2]
errors = errors[::2]

In [None]:
order = np.argsort(errors)

In [None]:
selection = [selectionV[x] for x in order] + [selectionT[x] for x in order]

In [None]:
figpair = plt.subplots(1,1, figsize=(8,6))
mean_loss_graph(selection, figure = figpair, legend=[], drawRaw=False)
baseline = filter_experiments(["20210424-044333/validation"], pastExperiments)
baseline = merge_tbevents(baseline[0], "mean_loss")
line = figpair[1].plot(*smooth(baseline[0], baseline[1], 0.8), color=[0,0,0], linestyle="dotted")
figpair[1].legend(line, ["baseline - referencyjny"])
figpair[0].savefig(SAVE_PATH +"baselines-spread.png")

In [None]:
figpair = plt.subplots(1,1, figsize=(8,6))
baseline = filter_experiments(["20210424-044333/train"], pastExperiments)
baseline = merge_tbevents(baseline[0], "rank_normalised")
normalised_ranking_graph(selection, figure=figpair, legend=[], alpha=1)
line = figpair[1].plot(*baseline, color=[0,0,0], linestyle="dotted")
figpair[1].legend(line, ["baseline - referencyjny"])
figpair[0].savefig(SAVE_PATH +"baselines-spread-ranks.png")

In [None]:
x = merge_tbevents(selectionT[0], "rank_normalised")[0]
rankings = [merge_tbevents(s, "rank_normalised")[1] for s in selectionT]
rankings = np.array(rankings)
mean_ranking = np.mean(rankings, axis=0)
confidence95 = 1.96*np.sqrt(np.var(rankings, axis=0))

figpair = plt.subplots(1,1, figsize=(8,6))
l1 = figpair[1].plot(x, mean_ranking, color=winterCmap(128), linewidth=4)
l2 = figpair[1].fill_between(x, mean_ranking - confidence95, mean_ranking + confidence95, alpha=0.2, color=winterCmap(128))
figpair[1].legend([l1[0], l2], ["uśredniony średni błąd eksp. baseline", "przedział ufności 95%"])
figpair[0].savefig(SAVE_PATH +"baselines-spread-ranks-confidence95.png")

In [None]:
x = merge_tbevents(selectionV[0], "mean_loss")[0]
values = [merge_tbevents(s, "mean_loss")[1] for s in selectionV]
values = np.array(values)
meanValues = np.mean(values, axis=0)
confidence95 = 1.96*np.sqrt(np.var(values, axis=0))

figpair = plt.subplots(1,1, figsize=(8,6))
l1 = figpair[1].plot(x, meanValues, color=wistiaCmap(128), linewidth=4)
l2 = figpair[1].fill_between(x, meanValues - confidence95, meanValues + confidence95, alpha=0.2, color=wistiaCmap(128))

x = merge_tbevents(selectionT[0], "mean_loss")[0]
values = [merge_tbevents(s, "mean_loss")[1] for s in selectionT]
values = np.array(values)
meanValues = np.mean(values, axis=0)
confidence95 = 1.96*np.sqrt(np.var(values, axis=0))

l3 = figpair[1].plot(x, meanValues, color=winterCmap(128), linewidth=4)
l4 = figpair[1].fill_between(x, meanValues - confidence95, meanValues + confidence95, alpha=0.2, color=winterCmap(128))

figpair[1].legend([l1[0],l2,l3[0],l4], ["średni błąd walidacyjny", "przedział ufności 95% błędu walidacyjnego", "średni błąd uczący", "przedział ufności 95% błędu uczącego"])
figpair[0].savefig(SAVE_PATH + "baselines-spread-confidence95.png")

In [None]:
len(rankings)

# Eksperyment 5.

In [None]:
mean_loss_graph(filter_experiments(["20210424-044333", "20210424-001214", "20210520-202513", "20210424-005223"], pastExperiments),
               drawRaw=False,
               saveName="baseline-improvements.png")

In [None]:
normalised_ranking_graph(filter_experiments(["20210424-044333", "20210424-001214", "20210520-202513", "20210424-005223"], pastExperiments),
               alpha=1.0,
               saveName="baseline-improvements-ranks.png")

# Eksperyment 6. - dodanie dropout

In [None]:
mean_loss_graph(filter_experiments(["20210424-044333", "20210521-031306"], pastExperiments),
               drawRaw=False,
               saveName="baseline-with-dropout.png")

In [None]:
normalised_ranking_graph(filter_experiments(["20210424-044333", "20210521-031306"], pastExperiments),
               saveName="baseline-with-dropout-ranks.png")

# Eksperyment 7. - uczenie całej sieci 

In [None]:
mean_loss_graph(filter_experiments(["wholeNet"], pastExperiments),
               drawRaw=False,
               saveName="whole-net.png")

In [None]:
normalised_ranking_graph(filter_experiments(["wholeNet"], pastExperiments),
               saveName="whole-net-ranks.png")