In [2]:
import pandas as pd
import json
import glob
from collections import Counter
from typing import List, Dict

In [3]:
pd.set_option('display.max_colwidth', None)

def df_to_latex(df: pd.DataFrame) -> None:
    print(df.to_latex(index=False))

In [4]:
def get_module(name, data):
    module = next(filter(lambda x: name == x["name"], data))
    return module

def count_parameters(library_name: str, library_dir: str, files: str) -> pd.DataFrame:

    with open(library_dir, "r", encoding="utf-8") as library_file:
        library_data = json.load(library_file)

        # Get Most used Class
    classes = []
    total_params_set = 0
    total_params_available = 0
    default_params = 0
    customized_params = 0
    for project in glob.glob(files):
        with open(project, "r", encoding="utf-8") as project_file:
            project_data = json.load(project_file)

            for file in project_data.keys():
                file_data = project_data[file]
                for library in file_data.keys():
                    if library == library_name:
                        module_data = file_data[library]
                        for key, data in module_data.items():
                            if key[0].isupper():
                                class_name = key.split("_")[0]
                                
                                library_module_data = get_module(class_name, library_data)
                                library_module_params = library_module_data["params"]
                                total_params_available += len(library_module_params)

                                for name, value in data.items():
                                    if name in ("variable", "params"):
                                        continue
                                    else:
                                        total_params_set += 1
                                        if name in library_module_params.keys():

                                            if str(library_module_params[name]).replace("'", "") == value["value"]:
                                                default_params += 1
                                            else:
                                                customized_params += 1
                                        else:
                                            customized_params += 1


    assert total_params_set == default_params + customized_params

    #print(library_name)
    #print("total params set:", total_params_set)
    #print("total params available:", total_params_available)
    #print("default params: ", default_params)
    #print("custom params: ", customized_params)


    df = pd.DataFrame()
    df["Library"] = [library_name]
    df["Available"] = [total_params_available]
    df["Set"] = [total_params_set]
    df["Default"] = [default_params]
    df["Custom"] = [customized_params]

    return df

df_sklearn = count_parameters("sklearn", "modules/sklearn_default_values.json" , "data/statistics/*")
df_tf = count_parameters("tensorflow", "modules/tensorflow_default_values.json" , "data/statistics/*")
df_pytorch = count_parameters("torch", "modules/torch_default_values.json" , "data/statistics/*")
df_all = pd.concat([df_sklearn, df_tf, df_pytorch])

df_to_latex(df=df_all)



\begin{tabular}{lrrrr}
\toprule
   Library &  Available &    Set &  Default &  Custom \\
\midrule
   sklearn &      11134 &   1969 &      327 &    1642 \\
tensorflow &      94557 &  22670 &      878 &   21792 \\
     torch &     397964 & 149286 &     9262 &  140024 \\
\bottomrule
\end{tabular}

