In [75]:
import glob
import json
import pandas as pd

SKLEARN_DATA = "../data/sklearn/modules/sklearn_modules.json"

ALL_SKLEARN_PROJECTS = "statistics/sklearn/statistics/*"
ALL_TENSORFLOW_PROJECTS = "statistics/tensorflow/statistics/*"
ALL_PYTORCH_PROJECTS = "statistics/pytorch/statistics/*"

Calculate number of options for each project

In [76]:
projects = []
project_names = []
project_total_default = []
project_total_custom = []

with open(SKLEARN_DATA, "r", encoding="utf-8") as sklearn_file:
    sklearn_data = json.load(sklearn_file)

for project in glob.glob(ALL_SKLEARN_PROJECTS):
    default_counter = 0
    custom_counter = 0
    total_counter = 0
    project_name = project.split("\\")[-1].split(".")[0]
    project_name = project_name.replace("statistics_", "")

    with open(project, "r", encoding="utf-8") as project_file:
        project_data = json.load(project_file)

    for file in project_data:
        file_data = project_data[file]

        for module in file_data:
            module_name = module.split("_")[0]
            sklearn_module = next(filter(lambda x: x["name"] == module_name, sklearn_data))
            module_data = file_data[module]

            if module_data:
                for option in module_data:
                    total_counter += 1
                    try:
                        param = next(filter(lambda x: x[0] == option, sklearn_module["params"]))
                        default_value = param[1].split("=")[-1]
                        option_value = module_data[option]   
                        if default_value == option_value:
                            default_counter += 1
                        else:
                            custom_counter +=1
                    except:
                        custom_counter +=1
    
    percent_default = 0 if total_counter == 0 else round((default_counter/total_counter)*100, 1)
    percent_custom = 0 if total_counter == 0 else round((custom_counter/total_counter)*100, 1)


    project_names.append(project_name)
    project_total_default.append(default_counter)
    project_total_custom.append(custom_counter)

    data = {
        "total": {project_name: total_counter},
        "total_default": {project_name: default_counter},
        "total_custom": {project_name: custom_counter},
        "percent_default": {project_name: percent_default},
        "percent_custom": {project_name: percent_custom},
    }

    df = pd.DataFrame.from_dict(data)
    projects.append(df)

result = pd.concat(projects)
result

Unnamed: 0,total,total_default,total_custom,percent_default,percent_custom
100DaysofMLCodeChallenge,52,15,37,28.8,71.2
ade-code,0,0,0,0.0,0.0
Adversarial-Robust-Supervised-Sparse-Coding,0,0,0,0.0,0.0
AI-Feynman,6,0,6,0.0,100.0
AIED_2021_TRMRC_code,0,0,0,0.0,0.0
...,...,...,...,...,...
VGG_Paper,0,0,0,0.0,0.0
VLight,0,0,0,0.0,0.0
vln-bert,0,0,0,0.0,0.0
x-deeplearning,6,0,6,0.0,100.0


In [77]:
import plotly.graph_objects as go

fig = go.Figure(data=[
    go.Bar(name='default', x=project_names, y=project_total_default),
    go.Bar(name='custom', x=project_names, y=project_total_custom)
])
# Change the bar mode
fig.update_layout(barmode='stack')
fig.show()

How many projects do not use ML algorithms?

In [78]:
df_without_ml_algorithms = result[result['total'] == 0]
len(df_without_ml_algorithms)
df_without_ml_algorithms


Unnamed: 0,total,total_default,total_custom,percent_default,percent_custom
ade-code,0,0,0,0.0,0.0
Adversarial-Robust-Supervised-Sparse-Coding,0,0,0,0.0,0.0
AIED_2021_TRMRC_code,0,0,0,0.0,0.0
amortized-rsa,0,0,0,0.0,0.0
ApproPO,0,0,0,0.0,0.0
...,...,...,...,...,...
TuiGAN-PyTorch,0,0,0,0.0,0.0
VGG_Paper,0,0,0,0.0,0.0
VLight,0,0,0,0.0,0.0
vln-bert,0,0,0,0.0,0.0


In [79]:
df_with_ml_algorithms= result[result['total'] > 0]
len(df_with_ml_algorithms)
df_with_ml_algorithms

Unnamed: 0,total,total_default,total_custom,percent_default,percent_custom
100DaysofMLCodeChallenge,52,15,37,28.8,71.2
AI-Feynman,6,0,6,0.0,100.0
alignarr,6,2,4,33.3,66.7
AMINN,3,0,3,0.0,100.0
CE3,2,0,2,0.0,100.0
CGS,3,0,3,0.0,100.0
cloud-masking-SOMs,2,2,0,100.0,0.0
COMS4995_Team_4_Zero_Shot_Classifier,1,1,0,100.0,0.0
contact-human-dynamics,2,0,2,0.0,100.0
covid_19_hate_speech,23,4,19,17.4,82.6


In [84]:
print(len(result))
print("Total: ", result["total"].sum())
print("Default: ", result["total_default"].sum())

141
Total:  252
Default:  44
