In [None]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib
import csv
import os
import sys
import time
import logging
import scipy.interpolate
import re
import struct

In [None]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)
else:
    print("No compatible GPUs found")

In [None]:
LOG_DIR = "siamese_logs/batch_03/" 
SAVE_PATH = "/qarr/studia/magister/tekst/graphs/"

In [None]:
COLOR_STEP=64
winterCmap = matplotlib.cm.get_cmap("cool")
wistiaCmap = matplotlib.cm.get_cmap("autumn")
matplotlib.rcParams['axes.xmargin'] = 0
matplotlib.rcParams.update({'figure.autolayout': True})
matplotlib.rcParams['legend.fontsize'] = 'medium'
matplotlib.rcParams['font.size'] = 12.0

In [None]:
def merge_tbevents(dirpath, tag):
    eventfiles = [dirpath+"/"+x for x in os.listdir(dirpath)]
    # Assumption - maximum of data entry per file
    xx = []
    yy = []
    for n, evfile in enumerate(eventfiles):
        itr = tf.compat.v1.train.summary_iterator(evfile)
        for i in itr:
            step = i.step
            if i.summary and i.summary.value:
                for v in i.summary.value:
                    if v.tag == tag:
                        f = struct.unpack('f', v.tensor.tensor_content)
                        xx.append(step)
                        yy.append(f[0])
    xx = np.array(xx)
    yy = np.array(yy)
    order = np.argsort(xx)
    xx = xx[order]
    yy = yy[order]
    return xx, yy

def discover_tags(dirpath):
    eventfiles = [dirpath+"/"+x for x in os.listdir(dirpath)]
    tags = set()
    for n, evfile in enumerate(eventfiles):
        itr = tf.compat.v1.train.summary_iterator(evfile)
        for i in itr:
            step = i.step
            if i.summary and i.summary.value:
                for v in i.summary.value:
                    tags.add(v.tag)
    return tags

In [None]:
#os.listdir("/home/zenfur/magister/jupyter/siamese_logs/")
root, dirs, _ = next(os.walk(LOG_DIR))

In [None]:
pastExperiments = list()
for d in dirs:
    subdirs = os.listdir(root+"/"+d)
    if "validation" in subdirs:
        for sd in subdirs:
            pastExperiments.append(root + "/" + d + "/" + sd)
    else:
        print(f"Omitting {d} experiment - no validation data")

In [None]:
# chronoOrder = np.argsort([e.split('/')[-2].split('_')[1] for e in pastExperiments])

# with open("experiments.list", "w") as file:
#     writer = csv.writer(file)
#     for chord in chronoOrder:
#         splits = pastExperiments[chord].split('/')
#         name, date = splits[-2].split('_')
#         comment = ""
#         writer.writerow((date, name, pastExperiments[chord], comment))

In [None]:
pastExperiments

In [None]:
def smooth(x, series, weight, points=0):
    smoothed = np.zeros(len(series))
    smoothed[0] = series[0]
    for i in range(1, len(series)):
        smoothed[i] = series[i]*(1-weight) + weight*smoothed[i-1]
    if points:
        spline = cubic_interploation_model=scipy.interpolate.interp1d(x,smoothed,kind="cubic")
        xrange = np.linspace(x.min(), x.max(), num=points, endpoint=True, retstep=False, dtype=None, axis=0)
        yrange = spline(xrange)
        return (xrange, yrange)
    else:
        return (x, smoothed)

In [None]:
def numpy_ewma_vectorized_v2(data, window):
    """Exponentially weighted moving average; taken from 
    https://stackoverflow.com/questions/42869495/numpy-version-of-exponential-weighted-moving-average-equivalent-to-pandas-ewm
    Has troubles with large datasets due to high power exponents"""
    alpha = 2 /(window + 1.0)
    alpha_rev = 1-alpha
    n = data.shape[0]

    pows = alpha_rev**(np.arange(n+1))

    scale_arr = 1/pows[:-1]
    offset = data[0]*pows[1:]
    pw0 = alpha*alpha_rev**(n-1)

    mult = data*pw0*scale_arr
    cumsums = mult.cumsum()
    out = offset + cumsums*scale_arr[::-1]
    return out

def window_size(alpha, sum_proportion):
    # Increases with increased sum_proportion and decreased alpha
    # solve (1-alpha)**window_size = (1-sum_proportion) for window_size        
    return int(np.log(1-sum_proportion) / np.log(1-alpha))

def smooth_ewma(x, series, w):
    n = len(series)
    # w*1= n/2
    # w*0= 1
    window = int((n/2-1)*w) + 1
    smoothed = numpy_ewma_vectorized_v2(series, window)
    return (x, smoothed)
    
    

In [None]:
def mean_loss_graph(experiments, title, legend, drawRaw=True, saveName=None, smoothFn=smooth, smoothParams=None):
    exps = []
    validationCount = 0
    trainCount = 0
    
    if smoothParams == None:
        smoothParams = [0.8]
    
    for ex in experiments:
        s = re.sub(r".*//", "" , ex)
        nameString, dsType = s.split("/")
        expName = re.sub(r"_.*$", "", nameString)
        exps.append((dsType, expName, ex))
    
    fig, axs = plt.subplots(1,1, figsize=(8,6))
    #fig.tight_layout()
    axs.set_title(title)
    axs.xaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(5))
    defaultLegend = []
    for dsType, expName, expPath in exps:
        print(dsType, expName, expPath)
        if dsType == "validation":
            color = wistiaCmap(256-COLOR_STEP*validationCount)
            defaultLegend.append(expName + " - valid")
            validationCount += 1
        else:
            color = winterCmap(COLOR_STEP*trainCount)
            defaultLegend.append(expName + " - train")
            trainCount += 1
        
        xx, yy = merge_tbevents(expPath, "mean_loss")
        if drawRaw:
            axs.plot(xx,yy, alpha=0.3, color=color)
        axs.plot(*smoothFn(xx, yy, *smoothParams), alpha=1, color=color)

    axs.set_xlabel("numer epoki")
    axs.set_ylabel("uśredniona funkcja straty z epoki")
    if legend is not None:
        axs.legend(legend)
    else:
        axs.legend(defaultLegend)
    if saveName:
        fig.savefig(SAVE_PATH + saveName)

In [None]:
def normalised_ranking_graph(experiments, title, legend, alpha=0.5, saveName=None):
    exps = []
    validationCount = 0
    trainCount = 0
        
    for ex in experiments:
        s = re.sub(r".*//", "" , ex)
        nameString, dsType = s.split("/")
        expName = re.sub(r"_.*$", "", nameString)
        if dsType == "validation":
            print(f"Skipping {ex}, no data")
            continue
        exps.append((dsType, expName, ex))
    
    defaultLegend = []
    
    fig, axs = plt.subplots(1,1, figsize=(8,6))
    #fig.tight_layout()
    axs.set_title(title)
    axs.xaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(5))
    
    for dsType, expName, expPath in exps:
        print(dsType, expName, expPath)
        color = winterCmap(COLOR_STEP*trainCount)
        defaultLegend.append(expName)
        trainCount += 1
        
        xx, yy = merge_tbevents(expPath, "rank_normalised")
        axs.plot(xx,yy, alpha=alpha, color=color)

    axs.set_xlabel("numer epoki")
    axs.set_ylabel("znormalizowany ranking")
    if not legend:
        axs.legend(defaultLegend)
    else:
        axs.legend(legend)
    if saveName:
        fig.savefig(SAVE_PATH + saveName)



In [None]:
xxt, yyt = merge_tbevents(pastExperiments[1], "mean_loss")
xxv, yyv = merge_tbevents(pastExperiments[0], "mean_loss")
print(pastExperiments[0])
fig, axs = plt.subplots(1,1, figsize=(8,6))
fig.tight_layout()
axs.set_title("Uśredniona funkcja straty w przebiegu uczenia eks. 'baseline'")
axs.xaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(5))
axs.plot(xxt,yyt, alpha=0.5, color=winterCmap(0))
axs.plot(*smooth_ewma(xxt, yyt,0.3), alpha=1, color=winterCmap(0))

axs.plot(xxv,yyv, alpha=0.5, color=wistiaCmap(256))
axs.plot(*smooth_ewma(xxv, yyv,0.3), alpha=1, color=wistiaCmap(256))
axs.set_xlabel("numer epoki")
axs.set_ylabel("funkcja straty z epoki")
#axs.legend("a","b")

In [None]:
xxt, yyt = merge_tbevents(pastExperiments[1], "mean_loss")
xxv, yyv = merge_tbevents(pastExperiments[0], "mean_loss")
print(pastExperiments[0])
fig, axs = plt.subplots(1,1, figsize=(8,6))
fig.tight_layout()
axs.set_title("Uśredniona funkcja straty w przebiegu uczenia eks. 'baseline'")
axs.xaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(5))
axs.plot(xxt,yyt, alpha=0.5, color=winterCmap(0))
axs.plot(*smooth(xxt, yyt,0.8), alpha=1, color=winterCmap(0))

axs.plot(xxv,yyv, alpha=0.5, color=wistiaCmap(256))
axs.plot(*smooth(xxv, yyv,0.8), alpha=1, color=wistiaCmap(256))
axs.set_xlabel("numer epoki")
axs.set_ylabel("funkcja straty z epoki")
axs.legend(["błąd zbioru trenującego", "wygładzony błąd zb. trenującego", "błąd zbioru walidacyjnego", "wygładzony błąd zb. walidacyjnego"])
fig.savefig(SAVE_PATH + "baseline_meanError_01")

In [None]:
mean_loss_graph([pastExperiments[1], pastExperiments[0]], 
                "Uśredniona funkcja straty eksperymentu 'baseline'",
               ["błąd zbioru trenującego", "wygładzony błąd zb. trenującego", "błąd zbioru walidacyjnego", "wygładzony błąd zb. walidacyjnego"],
               saveName="baseline_meanError_01")

In [None]:
normalised_ranking_graph([pastExperiments[1]], "Znormalizowany ranking eksperymentu 'baseline'", ["ranking baseline"], saveName="baseline_normRanking_01")

In [None]:
merge_tbevents(pastExperiments[1], "rank_normalised")

In [None]:
mean_loss_graph([pastExperiments[1], pastExperiments[0], pastExperiments[-1], pastExperiments[-2],  pastExperiments[7], pastExperiments[6]], 
                "",
               ['baseline - training', 'baseline - valid.', 'output norm 3 - train', 'output norm 3 - valid.', 'output norm 2 - train', 'output norm 2 - valid.'],
               drawRaw=False,
               saveName="output-normalisations")

In [None]:
normalised_ranking_graph([pastExperiments[1], pastExperiments[-1],   pastExperiments[7]], 
                "",
               ['baseline', 'output norm 3', 'output norm 2'],
               alpha=1,
               saveName="output-normalisations-ranks")

In [None]:
mean_loss_graph([pastExperiments[i] for i in [1,0,5,4,9,8,13,12]], 
                "",
               None,
               drawRaw=False,
               saveName="different-sizes")

In [None]:
normalised_ranking_graph([pastExperiments[i] for i in [1,0,5,4,9,8,13,12]], 
                "",
               None,
               drawRaw=False,
               )

In [None]:
for i,p in enumerate(pastExperiments):
    print(i ,p )
    

In [None]:
normalised_ranking_graph([pastExperiments[i] for i in [1,0,5,4,9,8,13,12]], 
                "",
               None,
               alpha=1,
               saveName="different-sizes-ranks"
               )