## Parser for .res files

Directory should look like this:
```
resluts
| plots.ipynb
└─final_json
   └ json files created by this notebook
└─final_res
   └ res file outputs from server testing
     should have distinct names
└─plots
   └ pdf output of plots
└─presentation
   └ complete presentation
```

In [None]:
# Reading in data and creating a dataframe
import json
import glob
import matplotlib.pyplot as plt
import pandas as pd
from math import log
import numpy as np
from matplotlib import cm
from matplotlib.pyplot import figure
from matplotlib import colors
from matplotlib.patches import Ellipse
figure(figsize=(12, 6));

In [None]:
for file in glob.glob('./final_res/*.res'):
    added_lines = []

    with open(file, 'r') as f:
        lines = f.readlines()
        data = {}
        for line in lines:
            
            line = line[line.index('instances/') + 10:]
            
            temp = {}
            
            if 'complex' in line:
                temp['c'] = 'complex'
            elif 'synthetic' in line:
                temp['c'] = 'synthetic'
            else:
                temp['c'] = 'unknown'
                
            name = line[line.index('/')+1: line.index(' ')]

            if 'correct' in line:
                split_arr = line.split('\t')
                time = split_arr[1].strip().split(' ')[0]
                rec_steps = split_arr[3].strip().split(' ')
                if len(rec_steps) < 3:
                    temp['rec'] = 0
                else:
                    temp['rec'] = int(rec_steps[2])
                temp['t'] = float(time[time.index(':')+1:]) + 60*float(time[:time.index(':')])
                temp['k'] = int(split_arr[2][1:split_arr[2].index(',')])
            else:
                if 'timelimit' not in line and 'error' in line and line[-2] == "0":
                    temp['rec'] = 0
                    temp['t'] = float(time[time.index(':')+1:]) + 60*float(time[:time.index(':')])
                    line = line[:line.index(',')]
                    line = line[line.rindex(' '):]
                    temp['k'] = int(line)
                elif 'timelimit' in line:
                    temp['rec'] = 0
                    temp['t'] = 180.0
                    temp['k'] = 0
                elif 'recursive steps' in line:
                    split_arr = line.split('\t')
                    temp['rec'] = 0
                    temp['t'] = float(time[time.index(':')+1:]) + 60*float(time[:time.index(':')])
                    temp['k'] = int(split_arr[2][1:len(split_arr[2])-1])
                else:
                    continue
                
            data[name] = temp
    with open('./final_json/' + file[12:] + '.json', 'w') as f:
        json.dump(data, f)

In [None]:
# Merging all output json files in to one pandas DataFrame

data = {}
max_time = 180

for json_file in glob.glob('./final_json/*.json'):
    with open(json_file, 'r') as f:
        data[json_file[13:json_file.index('.res')]] = json.loads(f.read())

complete = {}
for dic in data:
    for i in data[dic]:
        if i in complete:
            continue
        else:
            complete[i] = {'k':data[dic][i]['k'],'c':data[dic][i]['c']}
            for j in data:
                if i in data[j]:
                    complete[i][j] = data[j][i]['t']
                else:
                    complete[i][j] = max_time

df = pd.DataFrame(complete).transpose()

In [None]:
# completet dataframe
df

In [None]:
# Comparison of tow models

model_1 = 'abgabe_woche03' # x-Axis
model_2 = 'week4topo_all_mulitthread.jar' # y-Axis

model_1_name = 'Woche 3'
model_2_name = 'TOPO All'


plt.figure(figsize=(12,10))

# scatter(<Model y>, <Model x>)
plt.scatter(df[model_1], df[model_2], marker = "x")
plt.yscale('log')
plt.xscale('log')

# Middel line
plt.plot([10**-4, 10**3], [10**-4, 10**3], c='red', ls='--')

# Timeout Lines
plt.plot([10**-4, 10**3], [180,180], c='black', ls='--')
plt.plot([180,180], [10**-4, 10**3], c='black', ls='--')

# 10x intervalls
plt.text(0.06, 0.75, 'x10', rotation = 45)
plt.text(0.75, 0.06, 'x0.1', rotation = 45)
plt.plot([0.001,100], [0.01,1000], color='green', ls='--', lw=0.5)
plt.plot([0.01,1000], [0.001,100], color='green', ls='--', lw=0.5)

# 100x intervalls
plt.text(0.06, 7.5, 'x100', rotation = 45)
plt.text(7.5, 0.06, 'x0.01', rotation = 45)
plt.plot([0.001,10], [0.1,1000], color='green', ls='--', lw=0.5)
plt.plot([0.1,1000], [0.001,10], color='green', ls='--', lw=0.5)

# 1000x intervalls
plt.text(0.06, 75, 'x1000', rotation = 45)
plt.text(75, 0.06, 'x0.001', rotation = 45)
plt.plot([0.001,1], [1,1000], color='green', ls='--', lw=0.5)
plt.plot([1,1000], [0.001,1], color='green', ls='--', lw=0.5)

plt.xlim([5*10**-2, 300])
plt.ylim([5*10**-2, 300])
plt.title('Comparisson of ' + model_1_name + ' and ' + model_2_name)
plt.xlabel(model_1_name + ' | time in seconds')
plt.ylabel(model_2_name + ' | time in seconds')
plt.show()

In [None]:
df[df[model_1]*10 < df[model_2]]

In [None]:
model = 'woche3'

filt = df[model] < 180

fig, ax = plt.subplots(figsize=(8,4))

complex_filt = np.logical_and(filt, df['c'] == 'complex')
synth_filt = np.logical_and(filt, df['c'] == 'synthetic')

im = ax.scatter(df[complex_filt]['n'], df[complex_filt][model], c=df[complex_filt]['k'], cmap='turbo', marker='+', s=18)
im.add_callback(ax.scatter(df[synth_filt]['n'], df[synth_filt][model], c=df[synth_filt]['k'], cmap='turbo', marker='x', s=16))

ax.set_yscale('log')
ax.set_xlabel('number of vertices')
ax.set_ylabel('time in seconds')
ax.legend(['complex', 'synthetic'])

color_bar = fig.colorbar(im, orientation="vertical")
color_bar.ax.set_ylabel('k')
plt.savefig(model + '_n-k-t-plot.pdf')
plt.show()

fig, ax = plt.subplots(figsize=(11,6))
filt = np.logical_and(filt, df['c'] == 'synthetic')


complex_filt = np.logical_and(filt, df['c'] == 'complex')
synth_filt = np.logical_and(filt, df['c'] == 'synthetic')

#im = ax.scatter(df[complex_filt]['n'], df[complex_filt][model], c=df[complex_filt]['k'], cmap='turbo', marker='+', s=18)
im = ax.scatter(df[synth_filt]['n'], df[synth_filt][model], c=df[synth_filt]['k'], cmap='turbo', marker='x', s=16)
ax.set_yscale('log')
ax.set_xlabel('number of vertices')
ax.set_ylabel('time in seconds')
ax.legend(['synthetic'])

color_bar = fig.colorbar(im, orientation="vertical")
color_bar.ax.set_ylabel('k')
plt.savefig(model + '_n-cutted_k-t-plot.pdf')
plt.show()

In [None]:
# Scatter plot. comparing n and k with time color. Blatt 2 untere Abblidung.
model = 'woche3'

fig, ax = plt.subplots(figsize=(11,6))

filt = np.logical_and(df[model] < 180, df[model] > 0.1)
filt = np.logical_and(filt, df['c'] == 'synthetic')
#filt = np.logical_and(filt, df['n'] < 301)
#filt = np.logical_and(filt, df['k'] < 31)

im = ax.scatter(df[filt]['m']/df[filt]['n'], df[filt]['k'], c=df[filt][model], cmap='turbo',norm=colors.LogNorm(), s = 30, marker='x')
ax.set_xlabel('#edges / #vertices')
ax.set_ylabel('k')
ax.set_title('Correlation between #edges, #vertices, k and solving time in synthetic instances')
ax.add_patch(Ellipse([7.5,24], height=18, width=9, angle=-15,edgecolor='r', fc='None', lw=2, ls='--'))
color_bar = fig.colorbar(im, orientation="vertical")
color_bar.ax.set_ylabel('time in seconds')
plt.savefig('m_n_k_t_correlation.pdf');

In [None]:
with open('cleaning_all.json', 'r') as f:
    cleaning = pd.DataFrame(json.load(f)).transpose()
cleaning['t'] = cleaning['t']/10**6 # transform nanoseconds into milliseconds

reduction = 1 - cleaning['n_after']/cleaning['n_prev']
plt.figure(figsize=(12,10))
plt.scatter(cleaning['n_prev'], reduction, marker='+')
plt.xlabel('Number of vertices (n)')
plt.ylabel('Relative reduction in n after cleaning')
plt.show();

plt.figure(figsize=(12,10))
plt.scatter(cleaning['n_prev'], cleaning['k'], marker='+')
plt.xlabel('Number of vertices (n)')
plt.ylabel('Lower bound for k after cleaning');
plt.plot([0,250], [0, 250], 'r--');
plt.yscale('log')
plt.xscale('log')
plt.show;

# Week 4 Plots

In [None]:
df.head(2)

In [None]:
data = {}

for i in df.columns:
    if '_topo' in i:
        data[i] = sum(df[i] == 180)

# print(data)

plt.figure(figsize=(12,8))
for ind, val in enumerate(data.values()):
    plt.bar(ind*2, val, width=0.5, color='steelblue')
    plt.annotate(xy=(ind*2, val+1), text=str(val), ha='center')
plt.xticks([i*2 for i in range(len(data))], ['\nnaive topo', 'simple reduction', '\nfull reduction', 'digraph constraints', '\n10sec', '\n20sec', '                     Adding edge-disjoint Circle constraints for up to ... sec\n30sec', '\n60sec', '\n120sec', '\n180sec'])
plt.ylabel('Number of Timeout Instances')
#plt.plot([-1, 20], [138, 138], ls='--', lw=2, c='black') solution file timeouts
plt.xlim([-0.75, 18.75])
plt.xlabel('Solver version', size=15)
plt.ylim([90,205])
plt.title('Comparisson of different solvers based on topological sorting')
plt.savefig('topo_version_comparisson.pdf')
plt.show();