In [16]:
import pandas as pd
import json
import glob
from collections import Counter

In [17]:
def get_library_classes(library_name: str, library_data_dir, project_dir: str) -> pd.DataFrame:
    with open(library_data_dir, "r", encoding="utf-8") as sklearn_file:
        library_data = json.load(sklearn_file)

    classes = []

    for project in glob.glob(project_dir):
        with open(project, "r", encoding="utf-8") as project_file:
            project_data = json.load(project_file)

            for file in project_data.keys():
                file_data = project_data[file]
                for library in file_data.keys():
                    if library == library_name:
                        module_data = file_data[library]
                        for key, value in module_data.items():
                            if key[0].isupper():
                                class_name = key.split("_")[0]
                                if "base_class_0" in value:
                                    continue
                                classes.append(class_name)

    class_data = Counter(classes)
    df_classes = pd.DataFrame.from_dict(class_data, orient="index").reset_index()
    df_classes = df_classes.rename(columns={'index':'Class', 0:'Count'})
    df_classes = df_classes.sort_values(by=['Count'], ascending=False)

    return df_classes


In [18]:
def get_library_methods(library_name: str, data_dir: str) -> pd.DataFrame:
    pass

In [19]:
def df_to_latex(df: pd.DataFrame, row_number) -> None:
    df = df[:row_number]
    print(df.to_latex(index=False))

In [20]:
df = get_library_classes("torch", "modules/tensorflow_default_values.json", "statistics/*")
df_to_latex(df=df, row_number=30)

\begin{tabular}{lr}
\toprule
                  Class &  Count \\
\midrule
                 Conv2d &   1474 \\
                 Linear &   1116 \\
                   ReLU &   1110 \\
             Sequential &    923 \\
             DataLoader &    888 \\
            BatchNorm2d &    496 \\
                   Adam &    353 \\
             ModuleList &    342 \\
       CrossEntropyLoss &    342 \\
              Parameter &    339 \\
                 Tensor &    305 \\
                Dropout &    273 \\
              LeakyReLU &    223 \\
           DataParallel &    195 \\
              MaxPool2d &    173 \\
                    SGD &    153 \\
              Embedding &    136 \\
            BatchNorm1d &    134 \\
        ConvTranspose2d &    121 \\
                MSELoss &    121 \\
          TensorDataset &    103 \\
                Sigmoid &     95 \\
                Softmax &     90 \\
               Upsample &     87 \\
     DistributedSampler &     85 \\
              LayerNorm & 