In [166]:
import pandas as pd
import json
import glob
from collections import Counter
from typing import List, Dict
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [167]:
def assign_category(type_name: str) -> str:

    if type_name.lower() in ("str", "joinedstr"):
        return "String"

    if type_name.lower() in ("int", "float", "double"):
        return "Number"

    if type_name.lower() in ("list", "dict", "set", "tuple", "listcomp", "dictcomp", "setcomp", "generatorexp"):
        return "Sequence"

    if type_name.lower() in ("lambda", "subscript"):
        return "Call"

    if type_name.lower() in ("binop", "boolop", "unaryop", "compare", "ifexp"):
        return "Operation"

    if type_name.lower() == "none":
        return "None Type"

    if type_name.lower() == "starred":
        return "Call"

    if type_name.lower() == "methodargument":
        return "Argument"
        
    return type_name

In [168]:
def get_value_types(library_name: str, project_dir: str):
    value_types = []

    for project in glob.glob(project_dir):
        with open(project, "r", encoding="utf-8") as project_file:
            project_data = json.load(project_file)

            for file in project_data.keys():
                file_data = project_data[file]
                for library in file_data.keys():
                    if library == library_name:
                        module_data = file_data[library]
                        for key, data in module_data.items():
                            if key[0].isupper():
                                for param, param_data in data.items():
                                    if param in ("variable", "params"):
                                        continue
                                    
                                    value_type = assign_category(str(param_data["type"]))
                                    value_types.append(value_type)


    type_data = Counter(value_types)
    df = pd.DataFrame.from_dict(type_data, orient="index").reset_index()
    df = df.rename(columns={'index':'Type', 0:'Count'})

    return df


df_sklearn = get_value_types("sklearn", "data/statistics/*")
df_tf = get_value_types("tensorflow", "data/statistics/*")
df_torch = get_value_types("torch", "data/statistics/*")
        
#fig = px.pie(df, values="Count", names="Type", title='Test')
#fig = go.Figure(data=[go.Pie(labels=labels, values=values, textinfo='percent', insidetextorientation='radial')])

fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df_sklearn["Type"], values=df_sklearn["Count"], title="scikit learn"), 1, 1)
fig.add_trace(go.Pie(labels=df_tf["Type"], values=df_tf["Count"], title="Tensorflow"), 1, 2)
fig.add_trace(go.Pie(labels=df_torch["Type"], values=df_torch["Count"], title="Pytorch"), 1, 3)

fig.update_traces(textposition='inside')
fig.update_layout(
    uniformtext_minsize=12, 
    uniformtext_mode='hide',
    #title_text='Types of values passed to parameter of scikit learn, Tensorflow, and Pytorch',
    width=800,
    height=400,
    autosize=False,
    legend_title="Types of Values",
    legend=dict(
        bordercolor="Black",
        borderwidth=1
    )
)
fig.show()
fig.write_image("value_types.pdf")
