In [1]:
from jinja2 import Environment, PackageLoader, select_autoescape, meta
import pandas as pd, boto3, re, pytz, numpy as np
from datetime import datetime
from tabulate import tabulate
from sparknlp.base import *
from sparknlp.pretrained import *
from sparknlp.annotator import *
from sparknlp_jsl.annotator import *
pd.set_option("display.max_rows",1000)

In [2]:
#all_models_meta = pd.read_csv("docs_module/metadata/models_metadata_all.csv")
#all_classes_meta = pd.read_csv("docs_module/metadata/class_metadata_all.csv")
#full_meta = pd.merge(all_models_meta[all_models_meta.include==1], all_classes_meta, on="Model Class", how=("left"), suffixes=("","class"))

full_meta = pd.read_csv("docs_module/metadata/all_models_metadata.csv")


In [3]:
def tabulate_row(x):
    if "dimension" in x.index:
        x["dimension"] = None if not x["dimension"] or pd.isna(x["dimension"]) else str(int(x["dimension"]))
    df = pd.DataFrame(x).dropna()
    return tabulate(df,tablefmt="github")
full_meta["table"] = \
    full_meta[["Model Name","Model Class","Spark Compatibility","Spark NLP Compatibility","License","Edition","Input Labels","Output Labels","Language","Dimension","Case Sensitive","Upstream Dependencies"]]\
    .apply(tabulate_row, axis=1)

KeyError: "None of [Index(['Model Name', 'Model Class', 'Spark Compatibility',\n       'Spark NLP Compatibility', 'License', 'Edition', 'Input Labels',\n       'Output Labels', 'Language', 'Dimension', 'Case Sensitive',\n       'Upstream Dependencies'],\n      dtype='object')] are in the [columns]"

In [4]:
env = Environment(
    loader=PackageLoader('docs_module', 'templates'),
    autoescape=select_autoescape(['html', 'xml'])
)

In [242]:
mdsrc = env.loader.get_source(env, 'model.md')[0]
parsed_content = env.parse(mdsrc)
#meta.find_undeclared_variables(parsed_content)

In [243]:
mdmd = env.get_template("model.md")

In [244]:
full_meta.fillna("",inplace=True)

In [245]:
def generate_code(x, scala=False):
    ins = ",".join([f'"{y.strip()}"' for y in x.inputs.split(",")])
    is_pl = x.model_class in ["PipelineModel"]
    class_to_call = x.model_class if not is_pl else "PretrainedPipeline"
    meth_to_call = "pretrained" if not is_pl else "downloadPipeline"
    fun = f"{class_to_call}"
    if not is_pl:
        fun = fun+f".{meth_to_call}"
    language = f"Some({x.language})" if scala and is_pl else x.language
    language = x.language
    c = f'model = {fun}("{x["name"]}","{language}","{x.repo}")'
    if not is_pl:
        c += f'\\\n\t.setInputCols({ins})\\\n\t.setOutputCol("{x.output}")'
    else:
        c+='\n\nmodel.annotate("Include a healthcare document here. Can be a prescription, medical note, anything...")'
    return "val "+c.replace("\\","") if scala else c

def generate_buttons(x):
    ret = "{:.btn-box}\n"
    if x.demo_url:
        ret = ret + f"[Live Demo]({x.demo_url})"+"{:.button.button-orange}<br/>"
    else:
        ret = ret + "<button class=\"button button-orange\" disabled>Live Demo</button><br/>"
    if x.colab_url:
        ret = ret + f"[Open in Colab]({x.colab_url})"+"{:.button.button-orange.button-orange-trans.co.button-icon}<br/>"
    else:
        ret = ret + "<button class=\"button button-orange\" disabled>Open in Colab</button><br/>"
    if x.download_url:
        ret = ret + f"[Download]({x.download_url})"+"{:.button.button-orange.button-orange-trans.arr.button-icon}<br/>"
    else:
        ret = ret + "<button class=\"button button-orange\" disabled>Download</button><br/>"
    return ret
                                   
# full_meta["python_sample"] = full_meta.apply(generate_code, axis=1)
full_meta["python_sample"] =""
# full_meta["scala_sample"] = full_meta.apply(lambda x: generate_code(x, True), axis=1)
full_meta["scala_sample"]=""
full_meta["buttons"] = full_meta.apply(generate_buttons, axis=1)
# full_meta["real_labels"] = full_meta.apply(lambda r: None if r.model_class!="NerDLModel" else NerDLModel.pretrained(r["name"],r.language,r.repo).getClasses(), axis=1)

In [246]:
for i, r in full_meta.iterrows():
    with open(f"docs_module/output/{r.latest_date}-{r['Model Name']}_{r.Language}.md","w") as f:
        f.write(mdmd.render(**r))
