In [None]:
!pip install pandas plotly ipywidgets

In [4]:
import pandas as pd
import plotly.express as px
import re
from plotly.subplots import make_subplots

In [10]:
# Read and extract data
def load_data(filepath):
    data = []
    with open(filepath, 'r') as file:
        for line in file:
            match = re.match(r"\('<http://dbpedia.org/resource/(.*?)>', (.*?)\)", line.strip())
            if match:
                link, score = match.groups()
                data.append((link, float(score)))
    return pd.DataFrame(data, columns=["Link", "PageRank"])

def load_process_time(filepath):
    data = []
    with open(filepath, 'r') as file:
        for line in file:
            match = re.match(r"\('([^']+)', ([\d.]+)\)", line.strip())
            if match:
                process, execution_time = match.groups()
                data.append((process, float(execution_time)))
    
    return pd.DataFrame(data, columns=["Process", "ExecutionTime"])

def plot_bar_chart(df, title):
    # Remove the prefixes
    df["ProcessGroup"] = df["Process"].str.replace(r"^8-|^4-", "", regex=True)
    
    df = df.sort_values(by=["Process", "ExecutionTime"], ascending=[True, False])
    
    # Create the bar chart
    fig = px.bar(df, x="ProcessGroup", y="ExecutionTime", title=title, 
                 category_orders={"ProcessGroup": df["ProcessGroup"].unique().tolist()})
    
    fig.update_layout(
        xaxis_tickangle=45,
        yaxis_title="Execution Time",
        bargap=0.05,  
        showlegend=False, 
        width=900,  
        height=600,  
        margin=dict(l=50, r=50, t=50, b=50), 
        title_x=0.5, 
        title_y=0.96, 
    )

    fig.show()


def plot_bar_charts(df1, df2, title, title_df1, title_df2):
    # Remove the prefixes
    df1["ProcessGroup"] = df1["Process"].str.replace(r"^8-", "", regex=True)
    df2["ProcessGroup"] = df2["Process"].str.replace(r"^4-", "", regex=True)
    
    df1 = df1.sort_values(by=["Process", "ExecutionTime"], ascending=[True, False])
    df2 = df2.sort_values(by=["Process", "ExecutionTime"], ascending=[True, False])
    
    # Create subplot structure with 2 columns for separate group plots
    fig = make_subplots(
        rows=1, 
        cols=2, 
        subplot_titles=(title_df1, title_df2)
    )

    fig1 = px.bar(df1, x="ProcessGroup", y="ExecutionTime", title=title, 
                   category_orders={"ProcessGroup": df1["ProcessGroup"].tolist()})
    for trace in fig1.data:
        fig.add_trace(trace, row=1, col=1)

    fig2 = px.bar(df2, x="ProcessGroup", y="ExecutionTime", title=title,
                   category_orders={"ProcessGroup": df2["ProcessGroup"].tolist()})
    for trace in fig2.data:
        fig.add_trace(trace, row=1, col=2)

    fig.update_layout(
        title=title,
        title_x=0.5,
        yaxis_title="Execution Time",
        bargap=0.05, 
        showlegend=False,  
        width=900,  
        height=600, 
        margin=dict(l=50, r=50, t=50, b=50),  
    )

    # Adjust subplot spacing between groups
    fig.update_layout(
        xaxis=dict(domain=[0, 0.45]), 
        xaxis2=dict(domain=[0.55, 1]),  
    )

    fig.show()


In [12]:
# Load the data
df = load_process_time("../execution-time.txt")

df_start_8 = df[df["Process"].str.startswith("8")].reset_index(drop=True)
    
df_start_8_end_2 = df[df["Process"].str.startswith("8") & df["Process"].str.endswith("2")].reset_index(drop=True)
df_start_4 = df[df["Process"].str.startswith("4")].reset_index(drop=True)

plot_bar_chart(df_start_8, "Process using n1-highmem-8")
plot_bar_charts(df_start_8_end_2, df_start_4, "Process using different machine type", "n1-highmem-8", "n1-standard-4")