This notebook creates [this Confluence page](https://confluence.dhigroupinc.com/display/MT/Composite+Model+Results).

>**Note:** One needs to set up a [Personal Access Token](https://confluence.dhigroupinc.com/display/MT/Writing+and+editing+Confluence+pages+with+Python) and paste down below. We should utilize more of a file read authentication in the future.

In [None]:
%pip install atlassian-python-api

In [None]:
import os
import re
import json
from itertools import chain

from IPython.display import HTML, Markdown as md
import pandas as pd
import yaml

with open('config.yml', 'r') as f:
    params = yaml.load(f, Loader=yaml.FullLoader)
params

In [None]:
# Sort descending model versions
model_dirs = [tuple(map(int, f.split('.'))) for f in os.listdir(params['model_dir']) 
              if (os.path.isdir(os.path.join(params['model_dir'], f))
                  and re.match(r'^\d+.\d+.\d+$', f) is not None)]
model_dirs = sorted(model_dirs, key=lambda x: (x[2], x[1], x[0]))[::-1]
model_dirs = list(map(lambda x: '.'.join(map(str, x)), model_dirs))
model_dirs

In [None]:
def get_xtab_html(xtab, column='overall'):
    df = pd.DataFrame(xtab)
    df.columns = [int(eval(c)) for c in df.columns]
    return df.rename_axis(index='pred', columns=column).to_html().replace(
        '<table border="1" class="dataframe">', '<table border="1" style="float: left; margin-top:0; margin-right:10px">')

In [None]:
def get_stats_table_html(d):
    stat_keys = ['disk_mb', 'importances', 'prediction_thresholds', 'n_records', 'n_correct', 
                 'absolute_accuracy', 'one_half_accuracy', 'gaussian_accuracy'][::-1]
    stat_prec = [2, 2, 3, 0, 0, 3, 3, 3][::-1]
    htmlstr = """
<table border="1" style="float: left; margin-right:10px">"""
      
    for (k, prec) in zip(stat_keys, stat_prec):
        if k in d:
            htmlstr += f"""
  <tr>
    <th>{k}</th>"""
            if isinstance(d[k], int):
                htmlstr += f"""
      <td>{d[k]}</td>"""
            elif isinstance(d[k], list):
                htmlstr += f"""
      <td>{', '.join(f"{i:0.{prec}f}" for i in d[k])}</td>"""
            else:
                htmlstr += f"""
      <td>{d[k]:.{prec}f}</td>"""
            htmlstr += """
  </tr>"""
    if 'timing' in d:
        htmlstr += f"""
  <tr>
    <th>timing median ms</th><td>{d['timing']['median']:.0f}</td>
  </tr>"""

    htmlstr += """
</table>"""
    return htmlstr


In [None]:
def get_html_results(d, name=None):
    htmlstr = ''
    name = d['name']
    if name == 'composite':
        tag = 'H2'
        htmlstr += f"""<H1>Model {d['config']['version']}</H1>"""
    else:
        tag = 'H3'

    htmlstr += f"""
    <{tag}>{name} results<br/></{tag}><br/>"""
    
    htmlstr += get_stats_table_html(d)
        
    if 'xtab' in d:
        htmlstr += get_xtab_html(d['xtab'])
        
    if 'skills' in d:
        htmlstr += '<div style="display: inline-table;margin-left: 30px">'
        htmlstr += '<H4>Skills subset</H4>'
        htmlstr += get_stats_table_html(d['skills'])
    if 'skills_xtab' in d:
        htmlstr += get_xtab_html(d['skills_xtab'], 'skills')
    if 'skills' in d:
        htmlstr += """
            </div>"""


    if 'submodels' in d:
        htmlstr += '<hr style="width:50%;text-align:left;margin-left:0"></hr>'
        for i, sub in enumerate(d['submodels']):
            htmlstr += get_html_results(sub) + '<p></p>'
            if i < len(d['submodels']) - 1:
                htmlstr += '<hr style="width:25%;text-align:left;margin-left:0"></hr>'
    return htmlstr


In [None]:
htmlstr = """
"""
recs = []
for m in model_dirs:
    with open(os.path.join(params['model_dir'], m, 'results.json'), 'rb') as f:
        d = json.load(f)
        if 'config' not in d:
            d['config'] = {'version': m}
        rec = {
            'model': m,
        }
        if 'dsmatch_version' in d:
            rec['dsmatch_version'] = d['dsmatch_version']
        if 'timing' in d:
            rec['timing median ms'] = int(d['timing']['median'])
        for c in ['disk_mb', 'n_records', 'n_correct', 'absolute_accuracy', 'one_half_accuracy', 'gaussian_accuracy']:
            if c in d:
                rec[c] = d[c]
        if 'config' in d:
            for c in ['train_data', 'labeled_data', 'skills_labeled_data']:
                if c in d['config']:
                    rec[c] = d['config'][c]
        recs.append(rec)
        htmlstr += get_html_results(d)
        htmlstr += '<hr style="width:100%;height=5px;text-align:left;margin-left:0"></hr>'
header = """
This page summarizes various composite models. For particular model descriptions, please see the 
<a href="https://bitbucket.org/dhigroupinc/dhi-match-model-composite/src/3.0.x/changelog.md">changelog</a>.
This page is automatically created when running <a href="https://bitbucket.org/dhigroupinc/dhi-match-model-composite/src/3.0.x/scripts/compile_results.ipynb">
this notebook</a>.
<H1>Summary Results</H1>
"""
header += f"""
{pd.DataFrame.from_records(recs).fillna('').set_index('model').to_html()}
"""

htmlstr = '<html>' + header + htmlstr + '</html>'

In [None]:
# print(htmlstr)

In [None]:
# HTML(htmlstr)

In [None]:
# set auth token and get the basic auth code
from atlassian import Confluence
# auth_token = "EXECUTE WITH A PERSONAL APP TOKEN"
confluence = Confluence(url='https://confluence.dhigroupinc.com', token=auth_token)
space = 'MT'
parent_title = 'Unsupervised Composite Match Model'
title = 'Composite Model Results'
parent_id = confluence.get_page_id(space, parent_title)  # This actually returns a string that can be cast to an int.
# Update page or create it if it does not exist
confluence.update_or_create(parent_id, title, body=htmlstr, representation='storage');