In [6]:
import pandas as pd
import json
import glob
from collections import Counter

In [7]:
def get_library_classes(library_name: str, library_data_dir, project_dir: str) -> pd.DataFrame:
    with open(library_data_dir, "r", encoding="utf-8") as sklearn_file:
        library_data = json.load(sklearn_file)

    classes = []

    for project in glob.glob(project_dir):
        with open(project, "r", encoding="utf-8") as project_file:
            project_data = json.load(project_file)

            for file in project_data.keys():
                file_data = project_data[file]
                for library in file_data.keys():
                    if library == library_name:
                        module_data = file_data[library]
                        for key, value in module_data.items():
                            if key[0].isupper():
                                class_name = key.split("_")[0]
                                if "base_class_0" in value:
                                    continue
                                classes.append(class_name)

    class_data = Counter(classes)
    df_classes = pd.DataFrame.from_dict(class_data, orient="index").reset_index()
    df_classes = df_classes.rename(columns={'index':'Class', 0:'Count'})
    df_classes = df_classes.sort_values(by=['Count'], ascending=False)

    return df_classes


In [8]:
def get_library_methods(library_name: str, data_dir: str) -> pd.DataFrame:
    pass

In [9]:
def df_to_latex(df: pd.DataFrame, row_number) -> None:
    df = df[:row_number]
    print(df.to_latex(index=False))

In [10]:
df = get_library_classes("tensorflow", "modules/tensorflow_default_values.json", "statistics/*")
df_to_latex(df=df, row_number=30)

\begin{tabular}{lr}
\toprule
                 Class &  Count \\
\midrule
        StandardScaler &     44 \\
    LogisticRegression &     40 \\
                KMeans &     30 \\
          MinMaxScaler &     25 \\
              Pipeline &     21 \\
          LabelEncoder &     17 \\
      LinearRegression &     13 \\
                   PCA &     12 \\
         OneHotEncoder &     11 \\
          GridSearchCV &     10 \\
                 KFold &     10 \\
   MultiLabelBinarizer &     10 \\
       CountVectorizer &      9 \\
                  TSNE &      9 \\
                   RFE &      8 \\
      NearestNeighbors &      7 \\
                   SVC &      7 \\
       TfidfVectorizer &      6 \\
RandomForestClassifier &      6 \\
       StratifiedKFold &      5 \\
         SimpleImputer &      5 \\
   FunctionTransformer &      5 \\
       GaussianMixture &      5 \\
 DecisionTreeRegressor &      5 \\
DecisionTreeClassifier &      4 \\
          MaxAbsScaler &      4 \\
            Group