In [1]:
from jinja2 import Environment, PackageLoader, select_autoescape, meta
import pandas as pd, boto3, re, pytz
from datetime import datetime
from tabulate import tabulate
from sparknlp.annotator import *
from sparknlp_jsl.annotator import *
pd.set_option("display.max_rows",1000)

In [2]:
all_models_meta = pd.read_csv("docs_module/metadata/models_metadata_all.csv")
all_classes_meta = pd.read_csv("docs_module/metadata/class_metadata_all.csv")
full_meta = pd.merge(all_models_meta[all_models_meta.include==1], all_classes_meta, on="model_class", how=("left"), suffixes=("","class"))
full_meta.shape

(87, 48)

In [3]:
def tabulate_row(x):
    return tabulate(pd.DataFrame(x).dropna(),tablefmt="github")
full_meta["table"] = \
    full_meta[["name","model_class","compatibility","license","edition","inputs","output","language","dimension","case_sensitive","upstream_deps"]]\
    .apply(tabulate_row, axis=1)

In [4]:
env = Environment(
    loader=PackageLoader('docs_module', 'templates'),
    autoescape=select_autoescape(['html', 'xml'])
)

In [5]:
mdsrc = env.loader.get_source(env, 'model.md')[0]
parsed_content = env.parse(mdsrc)
#meta.find_undeclared_variables(parsed_content)

In [6]:
mdmd = env.get_template("model.md")

In [7]:
full_meta.fillna("",inplace=True)

In [8]:
def generate_code(x, scala=False):
    ins = ",".join([f'"{y.strip()}"' for y in x.inputs.split(",")])
    c = f'model = {x.model_class}.pretrained("{x["name"]}","{x.language}","{x.repo}")\n\t.setInputCols({ins})\n\t.setOutputCol("{x.output}")'
    return "val "+c if scala else c

def generate_buttons(x):
    ret = "{:.btn-box}\n" if x.demo_url or x.colab_url or x.download_url else ""
    if x.demo_url:
        ret = ret + f"[Live Demo]({x.demo_url})"+"{:.button.button-orange}"
    if x.colab_url:
        ret = ret + f"[Open in Colab]({x.colab_url})"+"{:.button.button-orange.button-orange-trans.co.button-icon}"
    if x.download_url:
        ret = ret + f"[Download]({x.download_url})"+"{:.button.button-orange.button-orange-trans.arr.button-icon}"
    return ret
                                   
#full_meta["model_title_seo"] = full_meta.class_description + ": " +full_meta.name
full_meta["python_sample"] = full_meta.apply(generate_code, axis=1)
full_meta["scala_sample"] = full_meta.apply(lambda x: generate_code(x, True), axis=1)
full_meta["buttons"] = full_meta.apply(generate_buttons, axis=1)
# full_meta["real_labels"] = full_meta.apply(lambda r: None if r.model_class!="NerDLModel" else NerDLModel.pretrained(r["name"],r.language,r.repo).getClasses(), axis=1)

In [9]:
# for i, r in full_meta.iterrows():
#     with open(f"docs_module/output/{r.latest_date.replace('-','')}_{r.model_class}_{r['name']}_{r.language}.md","w") as f:
#         f.write(mdmd.render(**r))

In [10]:
for i, r in full_meta.iterrows():

    with open(f"docs_module/output/{r.latest_date}-{r['name']}_{r.language}.md","w") as f:

        f.write(mdmd.render(**r))
