In [1]:
import os
import glob
import json
import re
from pathlib import Path
import io
import time

In [2]:
def get_type(line):
    '''return type'''
    if line[:3] == 'def':
        return 'def'
    if line[:5] == 'class':
        return 'class'
    return None

def script2doc(f_name):
    '''Parse single script to doc'''
    def process_function():
        '''Parse function'''
        nonlocal start, docs
        name = f[start].split('(', 1)[0].split()[-1]
        end = start
        while end < len(f):
            if f[end].strip().startswith("'''"):
                break
            end += 1
        comment = f[end][:-1].strip()[3:-3]
        params = ''.join(f[start:end])
        params = params.split('(', 1)[1].rsplit(')', 1)[0].replace(' ', '').replace('\n', '').split(',')
        print(f'name   : {name}')
        print(f'params : {params}')
        print(f'comment: {comment}')
        docs.append(('def', name, params, comment))
        start = end + 1
        
    def process_class():
        '''Parse class'''
        nonlocal start, docs
        name = f[start].split()[1].rsplit(')')[0] + ')'
        
        end = start
        while end < len(f):
            if f[end].strip().startswith("'''"):
                break
            end += 1
        comment = f[end][:-1].strip()[3:-3]
        end += 1
        
        start = -1
        while end < len(f):
            if 'def __init__' in f[end] and start == -1:
                start = end
            if ')' in f[end]:
                break
            end += 1
        if start == -1: # no __init__
            params = []
        else:
            params = ''.join(f[start:end+1])
            params = params.split('(', 1)[1].rsplit(')', 1)[0].replace(' ', '').replace('\n', '').split(',')[1:]
        print(f'name   : {name}')
        print(f'params : {params}')
        print(f'comment: {comment}')
        docs.append(('class', name, params, comment))
        start = end + 1
        
    docs = []
    with open(f_name, 'r') as f:
        f = list(f)
        start = 0
        while start < len(f):
            type_ = get_type(f[start])
            if type_ == 'def':
                print(f'{f[start][:-1]}')
                process_function()
                print()
            elif type_ == 'class':
                print(f'{f[start][:-1]}')
                process_class()
                print()
            start += 1
    return docs

In [3]:
def clean():
    '''Purge generated html directory'''
    files = glob.glob('documentation/assets/htmls/*')
    for f_name in files:
        if f_name.endswith('txt'):
            os.remove(f_name)

def scripts2docs():
    '''Parse all scripts and return a mapping from script name to doc'''
    all_docs = {}
#     all_docs['scripts/utils.py'] = script2doc('scripts/utils.py')
    for name in glob.glob('scripts/*'):
        if name.endswith('py'):
            all_docs[name] = script2doc(name)
    return all_docs

In [4]:
template1 = '''
                                <div class="section-block-small">
                                    <div class="doc-block">
                                        <table>
                                           <td><span class="label">&nbsp;{}</span></td>
                                            <td><span class="name">{}</span></td>
                                            <td class="expand"><span class="params">({})</span></td>
                                        </table>
                                        <p><span class="desc">{}</span></p>
                                    </div>
                                </div>'''


template2_head = '''                                <div class="section-block-small">
                                    <div class="doc-block">
                                        <table>
                                           <td><span class="label">&nbsp;{}</span></td>
                                            <td><span class="name">{}</span></td>
                                            <td class="expand"><span class="params">({})</span></td>
                                        </table>
                                        <p><span class="desc">{}</span></p>
                                        <ul class="list">'''

template2_tail = '''
                                        </ul>
                                    </div>
                                </div>'''

In [5]:
def doc2html(doc):
    '''Convert single doc to html'''
    html = []
    for type_, name, params, comment in doc:
        type_ = 'DEF' if type_ == 'def' else 'CLS'
        if not params or len(params)==1 and not params[0]:
            html.append(template1.format(type_, name, ', '.join(params), comment))
        else:
            cur = template2_head.format(type_, name, ', '.join(params), comment)
            for p in params:
                cur += f'\n                                           <li>{p.split("=")[0]}: </li>'
            cur += template2_tail
            html.append(cur)
    return '\n'.join(html)

def docs2htmls(all_docs):
    '''Convert all docs to map from file name to html files'''
    htmls = {}
    for name, doc in all_docs.items():
        htmls[name] = doc2html(doc)
    return htmls

def store_htmls(htmls):
    '''Store htmls to documentation/assets/htmls'''
    for name, html in htmls.items():
        name = 'documentation/assets/htmls/' + name.split('/')[1][:-3] + '.html'
        with open(name, 'w') as f:
            f.write(html)

In [6]:
def scripts2htmls():
    '''Wrapper function for going from python scripts to html'''
    all_docs = scripts2docs()
    htmls = docs2htmls(all_docs)
    store_htmls(htmls)
    return htmls

In [7]:
clean()
htmls = scripts2htmls()

def camel2snake(name):
name   : camel2snake
params : ['name']
comment: camel2snake case with regex

class Callback():
name   : Callback()
params : []
comment: Callback class with order

class TrainEval(Callback):
name   : TrainEval(Callback)
params : []
comment: Basic training <-> evaluation callback

class Dataset():
name   : Dataset()
params : ['x_data', 'y_data']
comment: Dataset class to store data and labels

class Sampler():
name   : Sampler()
params : ['size', 'batch_size', 'shuffle']
comment: Simple indices generator with option to randomly sample input data

def collate(batch):
name   : collate
params : ['batch']
comment: Util function to stack batches of x and y data

class DataLoader():
name   : DataLoader()
params : ['dataset', 'sampler', 'collate_fn=collate']
comment: Data loader class with data/label data and sampler to batch generation

class DataBunch():
name   : DataBunch()
params : ['train_dl', 'valid_dl']
comment: Data bunch class with both training and validation da