In [14]:
import json
import pandas as pd
import glob
from collections import Counter

In [15]:
with open("repos.json", "r", encoding="utf-8") as src:
    repos = json.load(src)
    print("Number repos: ", len(repos))

repo_files = set()

for x in glob.glob("../data/statistics/*"):
    file_name = x.split("\\")[-1]
    if file_name in repos:
        repo_files.add(x)

print("Number files: ", len(repo_files))

Number repos:  982
Number files:  978


In [16]:
def df_to_latex(df: pd.DataFrame) -> None:
    print(df.to_latex(index=False))

def get_module(name, data):
    module = next(filter(lambda x: name == x["name"], data))
    return module

In [26]:
def get_parameters(library_name: str, library_dir: str, files: set) -> pd.DataFrame:
    """
    Name | Count | Set | Default | Customized
    -----------------------------------------
    x    |     1 |   1 |       1 |         0 
    """
    with open(library_dir, "r", encoding="utf-8") as library_file:
        library_data = json.load(library_file)

    parameters = []

    with open("../data/statistics/3PU_params.json", "r", encoding="utf-8") as project_file:
        project_data = json.load(project_file)
        for file in project_data.keys():
            file_data = project_data[file]
            for library in file_data.keys():
                if library == library_name:
                    module_data = file_data[library]
                    for key, data in module_data.items():
                        if key[0].isupper():
                            class_name_parts = key.split("_")
                            if len(class_name_parts) > 2:
                                class_name = "_".join(class_name_parts[:-1])
                            else:
                                class_name = class_name_parts[0]

                            library_module_data = get_module(class_name, library_data)
                            library_module_params = library_module_data["params"]

                            for name, value in data.items():
                                if name in ("variable", "params"):
                                    continue
                                else:
                                    if name in library_module_params.keys():
                                        if str(library_module_params[name]).replace("'", "") == value["value"]:
                                            param_data = (class_name, name, value["value"], "default")
                                        else:
                                            param_data = (class_name, name, value["value"], "custom")
                                    else:
                                        param_data = (class_name, name, value["value"], "custom")
                                    parameters.append(param_data)

    for x in parameters:
        print(x)

get_parameters("tensorflow", "../modules/tensorflow_default_values.json" , repo_files)


('Variable', 'initial_value', 'step_ratio', 'custom')
('Variable', 'name', 'ratio', 'custom')
('Variable', 'trainable', 'False', 'custom')
('Variable', 'initial_value', 'False', 'custom')
('Variable', 'name', 'is_combined', 'custom')
('Variable', 'trainable', 'False', 'custom')
('Variable', 'initial_value', 'step_ratio', 'custom')
('Variable', 'name', 'ratio', 'custom')
('Variable', 'trainable', 'False', 'custom')
('Variable', 'initial_value', 'False', 'custom')
('Variable', 'name', 'is_combined', 'custom')
('Variable', 'trainable', 'False', 'custom')
('TFRecordDataset', 'filenames', 'record_paths', 'custom')
('Categorical', 'probs', 'tf.range(1, max_idx + 2) / tf.reduce_sum(tf.range(1, max_idx + 2))', 'custom')
('Categorical', 'name', 'ratio_sample_weight', 'custom')
('FixedLenFeature', 'shape', '[size, 3]', 'custom')
('FixedLenFeature', 'dtype', 'tf.float32', 'custom')
('Saver', 'max_to_keep', 'None', 'custom')
('AdamOptimizer', 'learning_rate', 'self.learning_rate', 'custom')
('Adam