## Parser for .res files

Directory should look like this:
```
resluts
| plots.ipynb
└─final_json
   └ json files created by this notebook
└─final_res
   └ res file outputs from server testing
     should have distinct names
└─plots
   └ pdf output of plots
└─presentation
   └ complete presentation
```

In [None]:
# Reading in data and creating a dataframe
import json
import glob
import matplotlib.pyplot as plt
import pandas as pd
from math import log
import numpy as np
from matplotlib import cm
from matplotlib.pyplot import figure
from matplotlib import colors
from matplotlib.patches import Ellipse
figure(figsize=(12, 6));

In [None]:
# Reading graph data. Dataframe contains name, n (number of Vertices) and m (number of edges)
graph_data = pd.read_csv('Graphdaten.txt', sep=' ', header=None, index_col=0)
graph_data.columns = ['n', 'm']
graph_data = graph_data.transpose()

In [None]:
for file in glob.glob('./final_res/*.res'):
    added_lines = []

    with open(file, 'r') as f:
        lines = f.readlines()
        data = {}
        for line in lines:

            temp = {}

            if 'correct' in line:
                split_arr = line.split('\t')
                time = split_arr[1].strip().split(' ')[0]
                rec_steps = split_arr[3].strip().split(' ')
                if len(rec_steps) < 3:
                    temp['rec'] = 0
                else:
                    temp['rec'] = int(rec_steps[2])
                temp['t'] = float(time[time.index(':')+1:]) + 60*float(time[:time.index(':')])
                temp['k'] = int(split_arr[2][1:split_arr[2].index(',')])
            else:
                continue


            if 'complex' in line:
                temp['c'] = 'complex'
            elif 'synthetic' in line:
                temp['c'] = 'synthetic'
            else:
                temp['c'] = 'unknown'

            name = line[line.index(temp['c'] + '/')+len(temp['c'])+1: line.index(' ')]
            
            try:
                temp['n'] = int(graph_data[name]['n'])
                temp['m'] = int(graph_data[name]['m'])
            except KeyError:
                continue
                
            data[name] = temp
    with open('./final_json/' + file[12:] + '.json', 'w') as f:
        json.dump(data, f)

In [None]:
# Merging all output json files in to one pandas DataFrame

data = {}
max_time = 180

for json_file in glob.glob('./final_json/*.json'):
    with open(json_file, 'r') as f:
        data[json_file[13:json_file.index('.res')]] = json.loads(f.read())

complete = {}
for dic in data:
    for i in data[dic]:
        if i in complete:
            continue
        else:
            complete[i] = {'k':data[dic][i]['k'],'n':data[dic][i]['n'],'m':data[dic][i]['m'],'c':data[dic][i]['c']}
            for j in data:
                if i in data[j]:
                    complete[i][j] = data[j][i]['t']
                else:
                    complete[i][j] = max_time

df = pd.DataFrame(complete).transpose()

In [None]:
# completet dataframe
df

In [None]:
# Comparison of tow models

model_1 = 'woche3' # x-Axis
model_2 = 'week2fixed.jar' # y-Axis

model_1_name = 'Week 3'
model_2_name = 'Week 2'

# scatter(<Model y>, <Model x>)
plt.scatter(df[model_1], df[model_2], marker = "x")
plt.yscale('log')
plt.xlim(0.07, max_time*1.2)
plt.ylim(0.07, max_time*1.2)
plt.xscale('log')
plt.plot([0,max_time*1.2], [0,max_time*1.2], color='red', ls='--', lw=1)

# 10x intervalls
plt.plot([0.01,100], [0.1,1000], color='green', ls='--', lw=0.5)
plt.plot([0.1,1000], [0.01,100], color='green', ls='--', lw=0.5)

# 100x intervalls
plt.plot([0.01,10], [1,1000], color='green', ls='--', lw=0.5)
plt.plot([1,1000], [0.01,10], color='green', ls='--', lw=0.5)
plt.title('Comparisson of ' + model_1_name + ' and ' + model_2_name)
plt.xlabel(model_1_name + ' | time in seconds')
plt.ylabel(model_2_name + ' | time in seconds')
plt.show()

In [None]:
df[df[model_2]*10 < df[model_1]]

In [None]:
model = 'woche3'

filt = df[model] < 180

fig, ax = plt.subplots(figsize=(8,4))

complex_filt = np.logical_and(filt, df['c'] == 'complex')
synth_filt = np.logical_and(filt, df['c'] == 'synthetic')

im = ax.scatter(df[complex_filt]['n'], df[complex_filt][model], c=df[complex_filt]['k'], cmap='turbo', marker='+', s=18)
im.add_callback(ax.scatter(df[synth_filt]['n'], df[synth_filt][model], c=df[synth_filt]['k'], cmap='turbo', marker='x', s=16))

ax.set_yscale('log')
ax.set_xlabel('number of vertices')
ax.set_ylabel('time in seconds')
ax.legend(['complex', 'synthetic'])

color_bar = fig.colorbar(im, orientation="vertical")
color_bar.ax.set_ylabel('k')
plt.savefig(model + '_n-k-t-plot.pdf')
plt.show()

fig, ax = plt.subplots(figsize=(11,6))
filt = np.logical_and(filt, df['c'] == 'synthetic')


complex_filt = np.logical_and(filt, df['c'] == 'complex')
synth_filt = np.logical_and(filt, df['c'] == 'synthetic')

#im = ax.scatter(df[complex_filt]['n'], df[complex_filt][model], c=df[complex_filt]['k'], cmap='turbo', marker='+', s=18)
im = ax.scatter(df[synth_filt]['n'], df[synth_filt][model], c=df[synth_filt]['k'], cmap='turbo', marker='x', s=16)
ax.set_yscale('log')
ax.set_xlabel('number of vertices')
ax.set_ylabel('time in seconds')
ax.legend(['synthetic'])

color_bar = fig.colorbar(im, orientation="vertical")
color_bar.ax.set_ylabel('k')
plt.savefig(model + '_n-cutted_k-t-plot.pdf')
plt.show()

In [None]:
# Scatter plot. comparing n and k with time color. Blatt 2 untere Abblidung.
model = 'woche3'

fig, ax = plt.subplots(figsize=(11,6))

filt = np.logical_and(df[model] < 180, df[model] > 0.1)
filt = np.logical_and(filt, df['c'] == 'synthetic')
#filt = np.logical_and(filt, df['n'] < 301)
#filt = np.logical_and(filt, df['k'] < 31)

im = ax.scatter(df[filt]['m']/df[filt]['n'], df[filt]['k'], c=df[filt][model], cmap='turbo',norm=colors.LogNorm(), s = 30, marker='x')
ax.set_xlabel('#edges / #vertices')
ax.set_ylabel('k')
ax.set_title('Correlation between #edges, #vertices, k and solving time in synthetic instances')
ax.add_patch(Ellipse([7.5,24], height=18, width=9, angle=-15,edgecolor='r', fc='None', lw=2, ls='--'))
color_bar = fig.colorbar(im, orientation="vertical")
color_bar.ax.set_ylabel('time in seconds')
plt.savefig('m_n_k_t_correlation.pdf');

In [None]:
with open('cleaning_all.json', 'r') as f:
    cleaning = pd.DataFrame(json.load(f)).transpose()
cleaning['t'] = cleaning['t']/10**6 # transform nanoseconds into milliseconds

reduction = 1 - cleaning['n_after']/cleaning['n_prev']
plt.figure(figsize=(12,10))
plt.scatter(cleaning['n_prev'], reduction, marker='+')
plt.xlabel('Number of vertices (n)')
plt.ylabel('Relative reduction in n after cleaning')
plt.show();

plt.figure(figsize=(12,10))
plt.scatter(cleaning['n_prev'], cleaning['k'], marker='+')
plt.xlabel('Number of vertices (n)')
plt.ylabel('Lower bound for k after cleaning');
plt.plot([0,250], [0, 250], 'r--');
plt.yscale('log')
plt.xscale('log')
plt.show;