In [None]:
import daft
from daft import DataType as dt
from daft import col
import plotly.graph_objects as go
import math
import numpy as np

In [None]:

scale_factors = [100, 1000]
for sf in scale_factors:
    df = daft.read_csv(f"../results/tpch-{sf}-sf.csv")
    question_names = [f"Q{i}" for i in range(1,11)]
    question_list = [col(q) for q in question_names] 
    df = (df
            .groupby("Cluster Configuration", "Framework")
            .min(*[q.cast(dt.float64())/60. for q in question_list])
         )

    df = df.with_column("Total", sum(question_list))
    pdf = df.to_pandas()
    daft_answers = np.array([pdf[pdf["Framework"] == "Daft"][qname] for qname in question_names]).flatten()
    spark_answers = np.array([pdf[pdf["Framework"] == "EMR Spark SQL"][qname] for qname in question_names]).flatten()
    dask_answers = np.array([pdf[pdf["Framework"] == "Dask"][qname] for qname in question_names]).flatten()
    modin_answers = np.array([pdf[pdf["Framework"] == "Modin"][qname] for qname in question_names]).flatten()

    fig = go.Figure()


    annotations=[
    ]
    offset = 1
    if len(daft_answers):

        fig.add_trace(go.Bar(x=question_names,
                        y=daft_answers,
                        name='Daft',
                        marker_color='rgba(108, 11, 169, 1)',
                        ))
        if math.isnan(sum(daft_answers)):
            anno = f'Daft Total: DNF'
        else:
            anno = f'Daft Total: {sum(daft_answers):.1f} min'
        annotations.append(
            go.layout.Annotation(
                showarrow=False,
                text=anno,
                xanchor='left',
                x=offset,
                yanchor='top',
                y=0)
        )
        offset += 2

    if len(spark_answers):
        fig.add_trace(go.Bar(x=question_names,
                        y=spark_answers,
                        name='Spark',
                        marker_color='rgba(226,90,28, 0.75)',
                        hovertext=[f"{val:.1f}x Slower" for val in (spark_answers/daft_answers)]
                        ))
        if math.isnan(sum(spark_answers)):
            anno = f'Spark Total: DNF'
        else:
            anno = f'Spark Total: {sum(spark_answers):.1f} min'
        annotations.append(
            go.layout.Annotation(
                showarrow=False,
                text=anno,
                xanchor='left',
                x=offset,
                yanchor='top',
                y=0)
        )
        offset += 2

    if len(dask_answers):
        fig.add_trace(go.Bar(x=question_names,
                        y=dask_answers,
                        name='Dask',
                        marker_color='rgba(255,193,30, 0.75)',
                        hovertext=[f"{val:.1f}x Slower" for val in (dask_answers/daft_answers)]
                        ))
        if math.isnan(sum(dask_answers)):
            anno = f'Dask Total: DNF'
        else:
            anno = f'Dask Total: {sum(dask_answers):.1f} min'
        annotations.append(
            go.layout.Annotation(
                showarrow=False,
                text=anno,
                xanchor='left',
                x=offset,
                yanchor='top',
                y=0)
        )
        offset += 2


    if len(modin_answers):
        fig.add_trace(go.Bar(x=question_names,
                        y=modin_answers,
                        name='Modin',
                        marker_color='rgba(0,173,233, 0.6)',
                        hovertext=[f"{val:.1f}x Slower" for val in (modin_answers/daft_answers)]
                        ))

        if math.isnan(sum(modin_answers)):
            anno = f'Modin Total: DNF'
        else:
            anno = f'Modin Total: {sum(modin_answers):.1f} min'
        annotations.append(
            go.layout.Annotation(
                showarrow=False,
                text=anno,
                xanchor='left',
                x=offset,
                yanchor='top',
                y=0)
        )

        offset += 2




    fig.update_layout(
        title_text=f'TPCH {sf} Scale Factor - 4 Nodes (lower is better)',
        yaxis=dict(title='Time (minutes)'),
        xaxis=dict(title='TPCH Question'),
        annotations=annotations
    )



    fig.update_traces(textposition='inside')
    fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
    fig.show()
    fig.write_html(f"tpch-{sf}sf.html")