In [14]:
import draco as drc
import pandas as pd
from vega_datasets import data as vega_data
import altair as alt
from IPython.display import display, Markdown
import json
import numpy as np
from draco.renderer import AltairRenderer
# alt.renderers.enable("png")
import pdb
from draco import Draco
dr_check=Draco()
from itertools import permutations


# Handles serialization of common numpy datatypes
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)


def md(markdown: str):
    display(Markdown(markdown))


def pprint(obj):
    md(f"```json\n{json.dumps(obj, indent=2, cls=NpEncoder)}\n```")

def localpprint(obj):
        print(json.dumps(obj, indent=2, cls=NpEncoder))

def recommend_charts(
    spec: list[str], draco: drc.Draco, df: pd.DataFrame, num: int = 5, labeler=lambda i: f"CHART {i+1}"
) -> dict[str, tuple[list[str], dict]]:
    # Dictionary to store the generated recommendations, keyed by chart name

    renderer = AltairRenderer()
    chart_specs = {}
    chart_vega_specs = {}
    for i, model in enumerate(draco.complete_spec(spec, num)):
        chart_name = labeler(i)
        spec = drc.answer_set_to_dict(model.answer_set)
        chart_specs[chart_name] = drc.dict_to_facts(spec), spec
        chart = renderer.render(spec=spec, data=df)
        if not ( isinstance(chart, alt.FacetChart) or isinstance(chart, alt.LayerChart)):
            chart_vega_specs[chart_name] = {'chart': chart.to_json(), 'cost': model.cost[0]}

    return chart_vega_specs



In [35]:
def rec_from_generated_spec(
    marks: list[str],
    fields: list[str],
    encoding_channels: list[str],
    draco: drc.Draco,
    input_spec_base: list[str],
    data: pd.DataFrame,
    num: int = 5, config=None
) -> dict[str, dict]:
    if config is None:
        # make different arrangement of fields elements
        perms_fields = list(permutations(fields))
        input_specs = []
        for fields in perms_fields:
          # for mark in marks:
            force_attributes = []

            for index, item in enumerate(fields):
                connect_root = f'entity(encoding,m0,e{index}).'
                force_attributes.append(connect_root)
                specify_field = f'attribute((encoding,field),e{index},{item}).'
                force_attributes.append(specify_field)
            spec =(
                    (str(fields) ,'any-mark'),
                    input_spec_base
                    # +
                    # [
                    #     f"attribute((mark,type),m0,{mark})."
                    # ]
                    +

                    force_attributes
                    +

                    [
                        # ":- {attribute((encoding,field),_,_)} =" + str(num_encodings) + ".",
                        ":- {attribute((encoding,field),_,_)} < 1."
                    ]
                )
            input_specs.append(spec)


        recs = {}
        for cfg, spec in input_specs:
            labeler = lambda i: f"CHART {i + 1} ({' | '.join(cfg)})"
            try:
                new_recs = recommend_charts(spec=spec, draco=draco, df=data, num=num, labeler=labeler)
                recs.update(new_recs)
            except:
                print('Altair went wrong')
                pass

    # sort recs by cost
    recs = dict(sorted(recs.items(), key=lambda item: item[1]['cost']))
    # remove the cost from the dictionary
    new_recs = {}
    for key in recs:
        cost=recs[key]['cost']
        new_key = key + str(cost)
        new_recs[new_key] = recs[key]['chart']

    return new_recs

In [36]:
def start_draco(fields,datasetname='birdstrikes',config=None):
    # Loading data to be explored
    d = drc.Draco()
    if datasetname == 'movies':
        df: pd.DataFrame = vega_data.movies()
        # df = df.drop(columns = 'Worldwide_Gross')
    elif datasetname=='seattle':
        df: pd.DataFrame = vega_data.seattle_weather()
    else:
        df: pd.DataFrame = vega_data.birdstrikes()
        df = df.sample(n=500, random_state=1)
    # print(df.head(10))
    df.columns = [col.replace('__', '_').lower() for col in df.columns]
    df.columns = [col.replace('$', 'a') for col in df.columns]
    data_schema = drc.schema_from_dataframe(df)
    # pprint(data_schema)
    data_schema_facts = drc.dict_to_facts(data_schema)
    # print(df.columns)
    # pprint(data_schema_facts)

    input_spec_base = data_schema_facts + [
        "entity(view,root,v0).",
        "entity(mark,v0,m0).",
    ]
    # initial_recommendations = recommend_charts(spec=input_spec_base, draco=d, df=df)

    recommendations = rec_from_generated_spec(
    marks=['bar', 'point', 'circle', 'line', 'tick'],
    fields=fields,
    encoding_channels=["x", "y", "color", "shape", "size"],
    draco=d,
    input_spec_base=input_spec_base,
    data=df,
    config=config
    )
    return recommendations






In [49]:
def get_draco_recommendations(attributes,datasetname='birdstrikes',config=None):
    ret = [f.replace('__', '_').lower() for f in attributes]
    field_names_renamed = [f.replace('$', 'a') for f in ret]
    recommendations=start_draco(fields=field_names_renamed,datasetname=datasetname,config=config)
    return recommendations


if __name__ == '__main__':
    fields_birdstrikes = ['cost_other','wildlife_size']
    recommendations=get_draco_recommendations(fields_birdstrikes, datasetname='birdstrikes')
    for chart_key, _ in recommendations.items():
        chart = recommendations[chart_key]
        print(f"Recommendation for {chart_key}:")
        #get the mark type
        mark = json.loads(chart)['mark']
        encoding = json.loads(chart)['encoding']
        print(mark)
        print(encoding)


Recommendation for CHART 1 (('cost_other', 'wildlife_size') | any-mark)14:
{'type': 'tick'}
{'x': {'field': 'cost_other', 'scale': {'type': 'linear', 'zero': True}, 'type': 'quantitative'}, 'y': {'field': 'wildlife_size', 'type': 'ordinal'}}
Recommendation for CHART 2 (('cost_other', 'wildlife_size') | any-mark)14:
{'type': 'tick'}
{'x': {'field': 'cost_other', 'scale': {'type': 'linear', 'zero': True}, 'type': 'quantitative'}, 'y': {'field': 'wildlife_size', 'type': 'ordinal'}}
Recommendation for CHART 1 (('wildlife_size', 'cost_other') | any-mark)14:
{'type': 'tick'}
{'x': {'field': 'cost_other', 'scale': {'type': 'linear', 'zero': True}, 'type': 'quantitative'}, 'y': {'field': 'wildlife_size', 'type': 'ordinal'}}
Recommendation for CHART 2 (('wildlife_size', 'cost_other') | any-mark)14:
{'type': 'tick'}
{'x': {'field': 'cost_other', 'scale': {'type': 'linear', 'zero': True}, 'type': 'quantitative'}, 'y': {'field': 'wildlife_size', 'type': 'ordinal'}}
Recommendation for CHART 3 (('co

# ## Draco Recommendations Findings
1) The order how the attributes are passed does not matter
2) Looking at the costs it makes sense for point to be popular since Draco says its lowest cost

In [22]:
import numpy as np

# List of variables to load
variables = [
    'response_history', 'state_history', 'momentum_attributes_history',
    'greedy_attributes_history', 'random_attributes_history',
    'rl_attributes_history', 'last_users_attributes_history',
    'actor_critic_action_history', 'rl_accuracies', 'random_accuracies',
    'momentum_accuracies', 'greedy_accuracies', 'master_current_user_attributes',
    'current_user_attributes', 'interaction_map', 'dataset', 'fieldnames',
    'all_algorithms_distribution_map', 'user_distribution_map',
    'response_algorithm_predictions', 'response_accuracy'
]

# Load the model
model_path = '/Users/aryal/Desktop/Personal/RLVisRec/interface/ShiftScopeLogs/20240523193204/online_learning_models.pkl'
model = np.load(model_path, allow_pickle=True)

# Create a dictionary to store the loaded variables
loaded_model = {}

# Access each variable from the model and store it in the dictionary
for key in variables:
    if hasattr(model, key):
        loaded_model[key] = getattr(model, key)
    else:
        print(f"Warning: The model does not have attribute '{key}'")

# Now, loaded_model contains the desired attributes from the model


In [23]:
loaded_model

{'response_history': [['cost_total_a', 'airport_name', 'none'],
  ['origin_state', 'aircraft_make_model', 'none'],
  ['cost_other', 'when_time_of_day', 'aircraft_make_model'],
  ['origin_state', 'aircraft_make_model', 'none'],
  ['cost_total_a', 'airport_name', 'none'],
  ['origin_state', 'aircraft_make_model', 'none'],
  ['cost_other', 'when_time_of_day', 'aircraft_make_model'],
  ['cost_repair', 'wildlife_species', 'origin_state'],
  ['cost_total_a', 'wildlife_size', 'none'],
  ['cost_repair', 'when_phase_of_flight', 'when_time_of_day'],
  ['cost_repair', 'wildlife_species', 'origin_state'],
  ['when_time_of_day', 'airport_name', 'wildlife_species'],
  ['cost_repair', 'effect_amount_of_damage', 'cost_total_a'],
  ['cost_other', 'wildlife_species', 'effect_amount_of_damage'],
  ['cost_repair', 'effect_amount_of_damage', 'cost_total_a'],
  ['when_time_of_day', 'airport_name', 'wildlife_species'],
  ['cost_repair', 'effect_amount_of_damage', 'cost_total_a'],
  ['cost_other', 'wildlife_s