In [205]:
# %matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import json
from os.path import join

In [206]:
dataset_name = 'proteins'
level_reduction = '100'
seed = '1860'
folder = f'../results/{dataset_name}/analysis_refused'
filename = join(folder,
                level_reduction,
                seed,
                'graph_stats.json')

with open(filename, 'r') as file:
    graph_stats = json.load(file)

In [207]:
# Set matplotlib's style
plt.style.use('seaborn')

In [208]:
a = 4

In [209]:
def plot_bar(values, title, xlabel='--'):
    counts = np.bincount(values)

    fig, ax = plt.subplots()

    if np.max(values) > 50:
        ax.hist(values,
                bins=range(max(values)))
    else:
        ax.bar(range(counts.size),
               counts,
               width=0.3,
               align='center',
               linewidth=0.9,
               edgecolor="white")
        ax.set(
            xticks=range(counts.size),
            xlim=[0, counts.size],
        )

    ax.set(
        title=title,
        xlabel=xlabel,
        ylabel='# graphs'
    )
    # ax.autoscale(tight=True)

    plt.show()


def print_statistics(values, name):
    resume = '-' * 20 + '\n'
    resume += f'Mean {name}: {np.mean(values):.2f}±{np.std(values):.2f}\n'
    resume += f'Median {name}: {np.median(values):.2f}\n'
    resume += f'Min-Max {name}: {np.min(values):.2f}-{np.max(values):.2f}\n'
    
    return resume

In [210]:
resume = f'-- statistics - {level_reduction} --\n'
resume += print_statistics(graph_stats['n_connected_components'],
                 'num connected components')
resume += print_statistics(graph_stats['n_isolated_nodes'],
                 'num isolated nodes')
resume += print_statistics(graph_stats['n_nodes'],
                 'num nodes')
resume += print_statistics(graph_stats['n_edges'],
                 'num edges')
resume += print_statistics(np.array(graph_stats['n_edges']) / np.array(graph_stats['n_nodes']),
                           'edge density')
resume += print_statistics(graph_stats['mean_degrees'],
                 'mean degree')
resume += print_statistics(graph_stats['max_degrees'],
                 'max degrees')

filename = join(folder,
               f'resume_stats_seed_{seed}_lvl_{level_reduction}.txt')
with open(filename, 'w') as output:
    output.write(resume)
print(resume)

-- statistics - 100 --
--------------------
Mean num connected components: 1.10±0.64
Median num connected components: 1.00
Min-Max num connected components: 1.00-9.00
--------------------
Mean num isolated nodes: 0.01±0.08
Median num isolated nodes: 0.00
Min-Max num isolated nodes: 0.00-1.00
--------------------
Mean num nodes: 39.35±47.29
Median num nodes: 25.00
Min-Max num nodes: 4.00-620.00
--------------------
Mean num edges: 73.33±88.72
Median num edges: 48.00
Min-Max num edges: 5.00-1049.00
--------------------
Mean edge density: 1.86±0.22
Median edge density: 1.83
Min-Max edge density: 0.86-2.50
--------------------
Mean mean degree: 3.72±0.44
Median mean degree: 3.67
Min-Max mean degree: 1.71-5.00
--------------------
Mean max degrees: 5.76±1.11
Median max degrees: 6.00
Min-Max max degrees: 3.00-12.00



In [211]:
## Connected Components
# plot_bar(graph_stats['n_connected_components'],
#          title=f'{dataset_name.upper()} - Analysis Connected Components',
#          xlabel='# Connected Components / graph')

In [212]:
# Isolated nodes
# plot_bar(graph_stats['n_isolated_nodes'],
#          title=f'{dataset_name.upper()} - Analysis Isolated Nodes',
#          xlabel='# isolated nodes / graph')

In [213]:
# plot_bar(graph_stats['n_nodes'],
#          title=f'{dataset_name.upper()} - Analysis Number of Nodes',
#          xlabel='# nodes / graph')

In [214]:
# plot_bar(graph_stats['n_edges'],
#          title=f'{dataset_name.upper()} - Analysis Number of Edges',
#          xlabel='# edges / graph')

In [215]:
# plot_bar(graph_stats['mean_degrees'],
#          title=f'{dataset_name.upper()} - Analysis Mean Degrees',
#          xlabel='# mean degrees / graph')

In [216]:

# plot_bar(graph_stats['max_degrees'],
#          title=f'{dataset_name.upper()} - Analysis Max Degree',
#          xlabel='# max degrees / graph')