# FAIR assessment report notbook from F-uji responses

This notebook provides a basic analysis and visualization of one or several F-uji json responses

Authors: Jens Bröder (j.broeder@fz-juelich.de)

Hint: Notebook can be exported as a pdf, latex or html, we recomment hiding the input cells.
For example by executing 
```
$jupyter nbconvert --no-input --to html Create_FAIR_assessment_report.ipynb
```

In [None]:
results_path = './results/' # We assume that all (and only) json response files from a fuji server are under this path

In [None]:
import os
import json
import pandas as pd
from collections.abc import MutableMapping
import numpy as np

from bokeh.layouts import gridplot
from bokeh.io import output_file, show, output_notebook
from bokeh.plotting import figure as bokeh_figure

In [None]:
output_notebook()

In [None]:
# helpers
def flatten_dict(d: MutableMapping, sep: str= '.') -> MutableMapping:
    """Flatten a given nested python dict and return a dict"""
    [flat_dict] = pd.json_normalize(d, sep=sep).to_dict(orient='records')
    return flat_dict

In [None]:
def bokeh_histogram_plot(hist, edges, x=None, pdf=None, cdf=None, title=None, xlabel='x', ylabel='y', figure=None):
    """Plot an interactiv bokeh histogram
    
    """
    if figure is None:
        tools='hover,pan,wheel_zoom,box_zoom,reset, save'
        pl = bokeh_figure(title=title, tools=tools)#, background_fill_color="#fafafa")
    else:
        pl = figure
    pl.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
           fill_color="navy", line_color="white", alpha=0.5)
    if pdf is not None and x is not None:
        pl.line(x, pdf, line_color="#ff8888", line_width=4, alpha=0.7, legend_label="PDF")
    if cdf is not None and x is not None:
        pl.line(x, cdf, line_color="orange", line_width=2, alpha=0.7, legend_label="CDF")

    pl.y_range.start = 0
    #pl.legend.location = "center_right"
    #pl.legend.background_fill_color = "#fefefe"
    pl.xaxis.axis_label = xlabel
    pl.yaxis.axis_label = ylabel
    pl.grid.grid_line_color="white"
    return pl

# Read jsons responses

In [None]:
all_files = os.listdir(results_path)
print('Reading results from this path {}'.format(results_path))

In [None]:
all_data = []
for i, filec in enumerate(all_files):
    filepath = os.path.join(results_path, filec)
    with open(filepath, 'r', encoding='utf-8') as fileo:
        data = json.load(fileo)
    all_data.append(data)#.get(results)
        


In [None]:
all_keys = []
all_data_dict = {}
to_remove = ['results']
for i, data in enumerate(all_data):
    datat = flatten_dict(data)
    for key in to_remove:
        datat.pop('results')
    for res in all_data[0]['results']:
        results = flatten_dict(res)
        id_t = results['id']
        for key, val in results.items():
            datat[key + '_id{}'.format(id_t)] = val
    # request.normalized_object_identifier not always there
    datat['request.normalized_object_identifier'] = datat.get('request.normalized_object_identifier', None)
    for key, val in datat.items():
        temp = all_data_dict.get(key, [])
        temp.append(val)
        all_data_dict[key] = temp
    

#print(list(all_data_dict.keys()))

In [None]:
df = pd.DataFrame(data=all_data_dict)

In [None]:
df.describe()

# Visualize different FAIR metrics

In [None]:
sumup_string = 'Sum up of evaluation:\n'

In [None]:
key = 'summary.score_percent.FAIR'
measured = df[key]
sumup_string = sumup_string + 'Total FAIR score: {} +- {}\n'.format(measured.mean(), measured.std()) 
hist, edges = np.histogram(measured, density=False, range=(0,100),bins=50)
fig = bokeh_histogram_plot(hist, edges, title='Summary FAIR score percent', xlabel='Total score %', ylabel='# data sets')
print(measured.describe())
show(fig)

In [None]:
keys = {'F':[], 'A': [], 'I': [], 'R':[]}
for key in df.keys():
    if key == 'summary.score_percent.FAIR':
        continue
    if 'summary.score_percent.' in key:
        end = key.split('.')[-1]
        for k in keys.keys():
            if k in end:
                temp = keys.get(k, [])
                temp.append(key)
                keys[k] = temp
                break
        #keys.append(key)
#keys.remove('summary.score_percent.FAIR')

In [None]:
sumup_string = sumup_string + '\n' + '\n' + 'F scores:\n'
for key in keys['F']:
    measured = df[key]
    sumup_string = sumup_string + 'Total{} score: {} +- {}\n'.format(key.split('.')[-1], measured.mean(), measured.std())
    hist, edges = np.histogram(measured, density=False, range=(0,100),bins=50)
    fig = bokeh_histogram_plot(hist, edges, title=key, xlabel='Total score %', ylabel='# data sets')
    print(measured.describe())
    show(fig)

sumup_string = sumup_string + '\n' + '\n' + 'A scores:\n'
for key in keys['A']:
    measured = df[key]
    sumup_string = sumup_string + 'Total{} score: {} +- {}\n'.format(key.split('.')[-1], measured.mean(), measured.std())
    hist, edges = np.histogram(measured, density=False, range=(0,100),bins=50)
    fig = bokeh_histogram_plot(hist, edges, title=key, xlabel='Total score %', ylabel='# data sets')
    print(measured.describe())
    show(fig)

sumup_string = sumup_string + '\n' + '\n' + 'I scores:\n'
for key in keys['I']:
    measured = df[key]
    sumup_string = sumup_string + 'Total{} score: {} +- {}\n'.format(key.split('.')[-1], measured.mean(), measured.std())
    hist, edges = np.histogram(measured, density=False, range=(0,100),bins=50)
    fig = bokeh_histogram_plot(hist, edges, title=key, xlabel='Total score %', ylabel='# data sets')
    print(measured.describe())
    show(fig)
    
sumup_string = sumup_string + '\n' + '\n' + 'R scores:\n'
for key in keys['R']:
    measured = df[key]
    sumup_string = sumup_string + 'Total{} score: {} +- {}\n'.format(key.split('.')[-1], measured.mean(), measured.std())
    hist, edges = np.histogram(measured, density=False, range=(0,100),bins=50)
    fig = bokeh_histogram_plot(hist, edges, title=key, xlabel='Total score %', ylabel='# data sets')
    print(measured.describe())
    show(fig)

In [None]:
# TODO nicer sum up, piecharts, plots as we see on the client side 

In [None]:
print(sumup_string)