In [8]:
import glob
import json
import re
import plotly.graph_objs as go
from typing import List
from sklearn_object import SklearnObject
from sklearn_object import get_key_value_pairs, check_for_default_values

STATISTICS_DIR = "data/statistics/"
SKL_MODULES_FILE = "data/sklearn/modules/sklearn_modules.json"
ALL_PROJECTS = "data/statistics/*"


In [9]:
with open(SKL_MODULES_FILE) as f:
    skl_modules_json = json.load(f)

In [10]:
all_objects: List[SklearnObject] = []

for project in glob.glob(ALL_PROJECTS):
    
    with open(project) as f:
        project_json = json.load(f)

        for key in project_json:
            file = project_json[key]
            if file:
                for module in file:
                    sklearn_object_parts = module.split("_")
                    name = sklearn_object_parts[0]
                    if name == "LinearRegression":
                        print(project)
                        print(key)
                    line_nr = sklearn_object_parts[1]
                    option_data = get_key_value_pairs(file[module])
                    final_option_data = check_for_default_values(name, option_data, skl_modules_json)
                    all_objects.append(SklearnObject(name=name, file_name=file, line_nr=line_nr, options=final_option_data))


data/statistics/statistics_100DaysofMLCodeChallenge.json
Regression/Multiple-Linear-Regression/multiple_linear_regression.py
data/statistics/statistics_100DaysofMLCodeChallenge.json
Regression/Polynomial-Regression/polynomial_regression.py
data/statistics/statistics_100DaysofMLCodeChallenge.json
Regression/Polynomial-Regression/polynomial_regression.py
data/statistics/statistics_100DaysofMLCodeChallenge.json
Regression/Simple-Linear-Regression/simple_linear_regression.py
data/statistics/statistics_ECBM6040-Project.json
utils/lane.py
data/statistics/statistics_paper_boltzmann_generators.json
software/deep_boltzmann/networks/plot.py


In [11]:
results = {}

modules = {module.name for module in all_objects}

for module in modules:
    default_counter = 0
    custom_counter = 0
    required_counter = 0
    option_counter = 0
    unknown_counter = 0
    target_modules = list(filter(lambda x: x.name == module, all_objects)) 
    for target_module in target_modules:
        for option in target_module.options:
            option_counter += 1
            if option[2] == "default":
                default_counter += 1
            if option[2] == "custom":
                custom_counter += 1
            if option[2] == "required":
                required_counter += 1
            if option[2] == "unknown":
                unknown_counter += 1

    results[module] = {
        "total": option_counter,
        "default": default_counter,
        "custom": custom_counter,
        "required": required_counter, 
        "unknown": unknown_counter,
    }


with open("data/modules_option_number.json", "w") as f:
    json.dump(results, f, sort_keys=True, indent=4)
   

In [12]:
modules_without_options = []

for key in results:
    module = results[key]
    if module["total"] == 0:
        modules_without_options.append(key)

print(len(modules_without_options))
print(modules_without_options)

7
['GaussianNB', 'MaxAbsScaler', 'MultiLabelBinarizer', 'LabelBinarizer', 'LeaveOneGroupOut', 'LinearRegression', 'LabelEncoder']
