In [58]:
import glob
import json
import re

SKLEARN_DATA = "../data/sklearn/modules/sklearn_modules.json"

ALL_SKLEARN_PROJECTS = "statistics/sklearn/statistics/*"
ALL_TENSORFLOW_PROJECTS = "statistics/tensorflow/statistics/*"
ALL_PYTORCH_PROJECTS = "statistics/pytorch/statistics/*"
TEST_PROJECT = "statistics/sklearn/statistics/statistics_100DaysofMLCodeChallenge.json"

DEFAULT_REGEX = re.compile(r".+=.+")

Calculate distribution of options

In [59]:
used_default_counter = 0
used_custom_counter = 0
used_total_counter = 0

total_options_counter = 0

with open(SKLEARN_DATA, "r", encoding="utf-8") as sklearn_file:
    sklearn_data = json.load(sklearn_file)

for project in glob.glob(ALL_SKLEARN_PROJECTS):
    with open(project, "r", encoding="utf-8") as project_file:
        project_data = json.load(project_file)

    for file in project_data:
        file_data = project_data[file]

        for module in file_data:
            module_name = module.split("_")[0]
            sklearn_module = next(filter(lambda x: x["name"] == module_name, sklearn_data))
            module_data = file_data[module]

            total_options_counter += len(sklearn_module["params"])

            if module_data:
                for option in module_data:
                    used_total_counter += 1
                    try:
                        param = next(filter(lambda x: x[0] == option, sklearn_module["params"]))
                        default_value = param[1].split("=")[-1]
                        option_value = module_data[option]   
                        if default_value == option_value:
                            used_default_counter += 1
                        else:
                            used_custom_counter +=1
                    except:
                        used_custom_counter +=1


print("Total: ", used_total_counter)
print("Default: ", used_default_counter)
print("Custom: ", used_custom_counter)


Total:  252
Default:  44
Custom:  208


Visualize distribution of options

In [60]:
import plotly.graph_objs as go

labels = ["total options provided", "total options used"]
values = [total_options_counter, used_total_counter]

data = [go.Bar(
    x=labels,
    y=values
)]

fig = go.Figure(data=data)
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)

In [61]:
labels = ["total default options used", "total custom options used"]
values = [used_default_counter, used_custom_counter]

fig = go.Figure(data=[go.Pie(labels=labels, values=values, textinfo='percent+value')])
fig.update_layout(
    title="Distribution of total options used",
    autosize=False,
    width=800,
    height=600,
)
fig.show()