In [110]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
from Python.pricingmdprunner.utils import load_yaml

FIGROOT = Path(r"C:\Users\mrkos\scth\projects\MDP_pricing_paper\fig\new_experiments")

In [138]:
# experiments_path = Path(r"C:\Users\mrkos\scth\projects\MDPPricing\data\ev_variable_resources")
experiments_path = Path(r"C:\Users\mrkos\scth\projects\MDPPricing\data\ev_variable_resources_higher_demand")

In [139]:
res_dfs = []
for result_path in experiments_path.rglob("**/config*_result.csv"):
    # print(result_path)
    solver_config_path = result_path.parent/(result_path.stem[:-len("_result")]+".yaml")
    solver_config = load_yaml(solver_config_path)
    # print(solver_config)
    problem_config_path = result_path.parent.parent.parent/"pp_config.yaml"
    problem_config = load_yaml(problem_config_path)
    result = pd.read_csv(result_path)
    
    # if result_path.stem == "config_flatrate_result":
    #     flatrate_per_timeslot = pd.read_csv(result_path.parent/"config_flatrate_result_price.csv").iloc[0,0]
    #     result["flatrate_per_timeslot"] = flatrate_per_timeslot 
        
    pp_row = pd.DataFrame(problem_config["pp_params"], index=[0,])
    for col in pp_row.columns:
        result[col] = pp_row.loc[0, col]
        
    solver_cfg_row = pd.json_normalize(solver_config)
    for col in solver_cfg_row.columns:
        result[col] = solver_cfg_row.loc[0, col]
        
    res_dfs.append(result)
    
    # if solver_config["runner"] == "mcts":
    #     break

In [140]:
df = pd.concat(res_dfs)

In [141]:
# df.runner.replace({"mcts": "MCTS", "vi": "VI", "hindsight": "Oracle", "flatrate": "Flatrate"}, inplace=True)
df.replace({"runner": {"mcts": "MCTS", "vi": "VI", "hindsight": "Oracle", "flatrate": "Flatrate"}}, inplace=True)
df["runner"] = pd.Categorical(df.runner, categories=["Oracle", "Flatrate", "MCTS", "VI"], ordered=True)
df.rename(columns={"res_budget_Î¼": "res_budget_mean", "náµ£": "n_r"}, inplace=True)

## Column meanings
- `r` - revenue
- `nᵣ` - number of requests
- `n_r` - number of resources (timeslots)
- `nₛ` - number of "sold" requests
- `u` - utilization in number of resources (timeslots)
- `res_budget_mean` - mean resource budget
- time - runtime of the experiment in seconds

In [164]:
df.groupby(['n_r', 'runner'])["r"].mean()





n_r  runner  
2    Oracle      73.540063
     Flatrate    43.499467
     MCTS        46.601040
     VI          48.179785
3    Oracle      65.489924
                   ...    
84   VI                NaN
96   Oracle      50.034192
     Flatrate    35.653552
     MCTS              NaN
     VI                NaN
Name: r, Length: 72, dtype: float64

In [165]:
df["n_res_noisy"] = df["n_r"] + 0.3*np.random.rand(len(df))
px.scatter(df, x="n_res_noisy", y = "r", color="runner", opacity=0.3)

In [166]:
px.scatter(df.groupby(['n_r', 'runner'])["r"].mean().reset_index(), x="n_r", y = "r", color="runner")





## Violin plot - revenue distribution for each method

In [147]:
df_box = df[df.expected_res==12]
# sort by runner
df_box = df_box.sort_values("runner")
fig = px.violin(df_box, x="runner", y="r", box=True, points="all", color="runner")
# fig.update_traces(meanline_visible=True, width=0.7)
fig.update_layout(
    xaxis_title=None,
    width=576,  # 6 inches * 96 pixels/inch
    height=384,  # 4 inches * 96 pixels/inch (or adjust based on your needs)
    showlegend=False,
    plot_bgcolor='rgba(0, 0, 0, 0)',  # Transparent background inside the plot
    paper_bgcolor='rgba(0, 0, 0, 0)',  # Transparent background outside the plot
    margin=dict(l=10, r=10, t=40, b=10),  # Adjust margins to make space for border
    xaxis=dict(
        showline=True,  # Show x-axis line
        linewidth=1,  # Set line width
        linecolor='black',  # Set line color
        # dash='dash',  # Make x-axis line dashed
        ticks='outside',  # Move ticks outside the plot
        ticklen=2,  # Length of the tick marks
        tickwidth=1,  # Width of the tick marks
        tickcolor='black',  # Color of the tick marks
    ),
    yaxis=dict(
        showline=True,  # Show y-axis line
        linewidth=1,  # Set line width
        linecolor='black',  # Set line color
        # dash='dash',  # Make y-axis line dashed
        ticks='outside',  # Move ticks outside the plot
        ticklen=2,  # Length of the tick marks
        tickwidth=1,  # Width of the tick marks
        tickcolor='black',  # Color of the tick marks
        title="Revenue [1]"
    ),
    # Optional: Add border around the entire plot area
    shapes=[dict(
        type='rect',
        x0=0, y0=0, x1=1, y1=1,
        xref='paper', yref='paper',
        line=dict(color='black', width=1)
    )]
)
fig.write_image(FIGROOT/"violin_revenue.pdf")
fig.show()

## Scatter plot - normalized revenue vs #n of resources

In [159]:
df_mean = df.groupby(['n_r', 'runner'])["r"].mean().reset_index()
df_mean_normalized = df_mean.copy()





In [160]:
for runner in df_mean.runner.unique():
    print(runner)
    a = df_mean.loc[df_mean.runner==runner, "r"]
    b = df_mean.loc[df_mean.runner=="Oracle", "r"]
    df_mean_normalized.loc[df_mean_normalized.runner==runner, "r"] =  a.values/b.iloc[:len(a)].values

Oracle
Flatrate
MCTS
VI


In [161]:
df_mean_normalized

Unnamed: 0,n_r,runner,r
0,2,Oracle,1.000000
1,2,Flatrate,0.591507
2,2,MCTS,0.633682
3,2,VI,0.655150
4,3,Oracle,1.000000
...,...,...,...
67,84,VI,
68,96,Oracle,1.000000
69,96,Flatrate,0.712584
70,96,MCTS,


In [162]:
fig = px.scatter(df_mean_normalized, x="n_r", y = "r", color="runner")
fig.update_traces(mode='markers+lines', line=dict(dash="dot"))
fig.update_layout(
    width=576,  # 6 inches * 96 pixels/inch
    height=384,  # 4 inches * 96 pixels/inch (or adjust based on your needs)
    legend=dict(
        yanchor="bottom",  # Anchor the legend from the bottom
        y=1,  # Position the legend above the plot (y > 1)
        xanchor="center",  # Center the legend horizontally
        x=0.5,  # Center the legend (x = 0.5)
        orientation="h",  # Display the legend horizontally
        title=None,
    ),
    plot_bgcolor='rgba(0, 0, 0, 0)',  # Transparent background inside the plot
    paper_bgcolor='rgba(0, 0, 0, 0)',  # Transparent background outside the plot
    margin=dict(l=10, r=10, t=40, b=10),  # Adjust margins to make space for border
    xaxis=dict(
        showline=True,  # Show x-axis line
        linewidth=1,  # Set line width
        linecolor='black',  # Set line color
        # dash='dash',  # Make x-axis line dashed
        ticks='outside',  # Move ticks outside the plot
        ticklen=2,  # Length of the tick marks
        tickwidth=1,  # Width of the tick marks
        tickcolor='black',  # Color of the tick marks
        title="# resources [1]"
    ),
    yaxis=dict(
        showline=True,  # Show y-axis line
        linewidth=1,  # Set line width
        linecolor='black',  # Set line color
        # dash='dash',  # Make y-axis line dashed
        ticks='outside',  # Move ticks outside the plot
        ticklen=2,  # Length of the tick marks
        tickwidth=1,  # Width of the tick marks
        tickcolor='black',  # Color of the tick marks
        title="Normalized revenue [1]"
    ),
    # Optional: Add border around the entire plot area
    shapes=[dict(
        type='rect',
        x0=0, y0=0, x1=1, y1=1,
        xref='paper', yref='paper',
        line=dict(color='black', width=1)
    )]
)
# save the figure as a static image
fig.write_image(FIGROOT/"normalized_revenue_vs_n_res.pdf")
fig.show()

In [123]:
df

Unnamed: 0,r,u,nₛ,nᵣ,time,bytes,gctime,gcstats,name,sequence,...,seed,solver_params.n_iterations,solver_params.rng,solver_params.reuse_tree,solver_params.exploration_constant,solver_params.depth,solver_params.max_iterations,solver_params.belres,solver_params.verbose,expected_res_noisy
0,25.431203,21,10,17,0.171245,4450616,0.000000,"Base.GC_Diff(4450616, 1, 0, 60044, 123, 0, 0, 0, 0)",flatrate,4893081056443852934,...,1234,,,,,,,,,16.256002
1,23.009183,19,13,22,0.000212,95328,0.000000,"Base.GC_Diff(95328, 1, 0, 273, 0, 0, 0, 0, 0)",flatrate,9270573426987251111,...,1234,,,,,,,,,16.056214
2,27.853222,23,13,25,0.000176,95328,0.000000,"Base.GC_Diff(95328, 1, 0, 273, 0, 0, 0, 0, 0)",flatrate,11290458143304139495,...,1234,,,,,,,,,16.106414
3,29.064232,24,15,32,0.000180,95328,0.000000,"Base.GC_Diff(95328, 1, 0, 273, 0, 0, 0, 0, 0)",flatrate,10160685370670558515,...,1234,,,,,,,,,16.165854
4,33.908270,28,18,25,0.000189,95328,0.000000,"Base.GC_Diff(95328, 1, 0, 273, 0, 0, 0, 0, 0)",flatrate,15071498346189605494,...,1234,,,,,,,,,16.125715
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,28.639695,17,12,21,1.862984,165454224,0.000000,"Base.GC_Diff(165454224, 232, 0, 69412, 704, 0, 0, 0, 0)",mcts,12030875508165328565,...,1234,10000.0,MersenneTwister,False,3.0,3.0,,,,12.252301
96,30.525618,19,11,17,2.049637,164008224,0.204545,"Base.GC_Diff(164008224, 226, 0, 67453, 701, 460, 204545103, 2, 1)",mcts,2171462996008479148,...,1234,10000.0,MersenneTwister,False,3.0,3.0,,,,12.034058
97,18.680031,12,9,18,1.895858,169330064,0.000000,"Base.GC_Diff(169330064, 252, 0, 76520, 724, 0, 0, 0, 0)",mcts,10717929039174679858,...,1234,10000.0,MersenneTwister,False,3.0,3.0,,,,12.173631
98,24.756781,15,10,20,2.086849,166977888,0.208692,"Base.GC_Diff(166977888, 231, 0, 75211, 704, 482, 208691700, 1, 0)",mcts,3987525176682621524,...,1234,10000.0,MersenneTwister,False,3.0,3.0,,,,12.146748


In [124]:
df_requests = df.groupby(['runner','n_r'])[["nᵣ", "nₛ", "u"]].mean()
df_requests.reset_index(inplace=True)
px.line(df_requests, x="nᵣ", y="nₛ" ,facet_col="runner")





In [123]:
px.line(df_requests, x="nᵣ", y=df_requests["u"]/df_requests["n_r"] ,facet_col="runner")

In [116]:
px.line(df_requests, x="n_r", y="u" ,facet_col="runner")

In [211]:
df.loc[(df.runner=="Flatrate") & (df.expected_res==96), :]

Unnamed: 0,r,u,nₛ,nᵣ,time,bytes,gctime,gcstats,name,sequence,...,seed,solver_params.n_iterations,solver_params.rng,solver_params.reuse_tree,solver_params.exploration_constant,solver_params.depth,solver_params.max_iterations,solver_params.belres,solver_params.verbose,expected_res_noisy
0,2.050970,5,5,63,0.215604,8170440,0.0,"Base.GC_Diff(8170440, 3, 0, 86960, 136, 0, 0, 0, 0)",flatrate,17768007165827142303,...,1234,,,,,,,,,96.139656
1,0.410194,1,1,40,0.001649,2561248,0.0,"Base.GC_Diff(2561248, 1, 0, 1810, 2, 0, 0, 0, 0)",flatrate,10968958881520043481,...,1234,,,,,,,,,96.268360
2,1.640776,4,4,43,0.001453,2561248,0.0,"Base.GC_Diff(2561248, 1, 0, 1810, 2, 0, 0, 0, 0)",flatrate,16150991115764544715,...,1234,,,,,,,,,96.272056
3,0.820388,2,2,41,0.001393,2561248,0.0,"Base.GC_Diff(2561248, 1, 0, 1810, 2, 0, 0, 0, 0)",flatrate,6658965284178139946,...,1234,,,,,,,,,96.132149
4,0.820388,2,2,55,0.001313,2561248,0.0,"Base.GC_Diff(2561248, 1, 0, 1810, 2, 0, 0, 0, 0)",flatrate,11987002654447814693,...,1234,,,,,,,,,96.222507
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.820388,2,2,59,0.001368,2561248,0.0,"Base.GC_Diff(2561248, 1, 0, 1810, 2, 0, 0, 0, 0)",flatrate,15598959996626656092,...,1234,,,,,,,,,96.254929
96,1.640776,4,4,41,0.001377,2561248,0.0,"Base.GC_Diff(2561248, 1, 0, 1810, 2, 0, 0, 0, 0)",flatrate,10021905783586725185,...,1234,,,,,,,,,96.293589
97,1.230582,3,3,48,0.001326,2561248,0.0,"Base.GC_Diff(2561248, 1, 0, 1810, 2, 0, 0, 0, 0)",flatrate,15098473763412081672,...,1234,,,,,,,,,96.290899
98,0.410194,1,1,48,0.001290,2561248,0.0,"Base.GC_Diff(2561248, 1, 0, 1810, 2, 0, 0, 0, 0)",flatrate,5538312275439920179,...,1234,,,,,,,,,96.200159


## Load traces

In [367]:
# filepath = Path(r"C:\Users\mrkos\scth\projects\MDPPricing\data\ev_variable_resources\single_day_cs_pp_T=80_c=3_expected_res=10_nᵣ=10_res_budget_μ=2.4\traces\traces_N=100_seed=888.csv")
# filepath = Path(r"C:\Users\mrkos\scth\projects\MDPPricing\data\ev_variable_resources\single_day_cs_pp_T=1536_c=3_expected_res=192_nᵣ=192_res_budget_μ=0.125\traces\traces_N=100_seed=888.csv")



In [125]:
def vec_from_str(s):
    vec = []
    for c in s:
        vec.append(int(c))
    return np.array(vec)

# traces["prod_vec"] = traces["product"].apply(vec_from_str)

In [126]:
# tr = traces[traces.trace_id==1]
def traces2allocation(trace):
    dim = (len(trace), len(trace.prod_vec.iloc[0]))
    cs_allocation = np.zeros(dim, dtype=float)
    cs_allocation_with_budget = np.zeros(dim, dtype=float)
    
    for trace in trace.iterrows():
        # print(trace[1].prod_vec)
        req = trace[1].prod_vec
        budget = trace[1].budget
        r = 0
        while True:
            if np.max(cs_allocation[r,:]+req) <= 1:
                cs_allocation[r,:] += req
                cs_allocation_with_budget[r,:] += req*budget
                break
            else:
                r += 1
    
    # if last row is empty, remove all empty rows
    if np.sum(cs_allocation[-1,:]) == 0:
        first_empty_row = np.where(~cs_allocation.any(axis=1))[0][0]
        cs_allocation = cs_allocation[:first_empty_row,:]
        cs_allocation_with_budget = cs_allocation_with_budget[:first_empty_row,:]
    
    return cs_allocation[::-1], cs_allocation_with_budget[::-1]


In [127]:
# traces_filepath = Path(r"C:\Users\mrkos\scth\projects\MDPPricing\data\ev_variable_resources\single_day_cs_pp_T=192_c=3_expected_res=24_nᵣ=24_res_budget_μ=1.0\traces\traces_N=100_seed=888.csv")
traces_filepath = Path(r"C:\Users\mrkos\scth\projects\MDPPricing\data\ev_variable_resources\single_day_cs_pp_T=80_c=3_expected_res=10_nᵣ=10_res_budget_μ=2.4\traces\traces_N=100_seed=888.csv")
traces = pd.read_csv(traces_filepath, dtype={"product":str})
traces["prod_vec"] = traces["product"].apply(vec_from_str)
tr = traces[traces.trace_id==1]

flatrate_per_timeslot = pd.read_csv(traces_filepath.parents[1]/"results"/"flatrate"/"config_flatrate_result_price.csv").iloc[0,0]

cs_allocation, cs_allocation_with_budget = traces2allocation(tr)

fig = px.imshow(cs_allocation)
# fig.update_layout(height=1000, width=800)
fig.update(layout_coloraxis_showscale=False)

fig = px.imshow(cs_allocation_with_budget, aspect="auto", width=800, height=500)
# fig.update_layout(height=1000, width=800)
fig.update(layout_coloraxis_showscale=False)

In [135]:
# traces_filepath = Path(r"C:\Users\mrkos\scth\projects\MDPPricing\data\ev_variable_resources\single_day_cs_pp_T=192_c=3_expected_res=24_nᵣ=24_res_budget_μ=1.0\traces\traces_N=100_seed=888.csv")
traces_filepath = Path(r"C:\Users\mrkos\scth\projects\MDPPricing\data\ev_variable_resources\single_day_cs_pp_T=768_c=3_expected_res=96_nᵣ=96_res_budget_μ=0.25\traces\traces_N=100_seed=888.csv")
# traces_filepath = Path(r"C:\Users\mrkos\scth\projects\MDPPricing\data\ev_variable_resources\single_day_cs_pp_T=80_c=3_expected_res=10_nᵣ=10_res_budget_μ=2.4\traces\traces_N=100_seed=888.csv")
# traces_filepath = Path(r"C:\Users\mrkos\scth\projects\MDPPricing\data\tmp_investigating_flatrate\traces\traces_N=100_seed=888.csv")

traces = pd.read_csv(traces_filepath, dtype={"product":str})
traces["prod_vec"] = traces["product"].apply(vec_from_str)
traces["n_timeslots"] = traces.prod_vec.apply(sum)

flatrate_per_timeslot = pd.read_csv(traces_filepath.parents[1]/"results"/"flatrate"/"config_flatrate_result_price.csv").iloc[0,0]
print(flatrate_per_timeslot)

traces["accepted_by_flatrate"] = traces.budget>=traces.n_timeslots*flatrate_per_timeslot
traces["revenue_if_sold_with_flatrate"] = traces["n_timeslots"]*flatrate_per_timeslot

tr = traces[traces.trace_id==6]
requests_accepted_by_flatrate = tr[tr.accepted_by_flatrate]
requests_rejected_by_flatrate = tr[~tr.accepted_by_flatrate]

alloc_acc, _ = traces2allocation(requests_accepted_by_flatrate)
alloc_rej, _ = traces2allocation(requests_rejected_by_flatrate)
alloc_rej[alloc_rej==0.0] = 0.5

alloc = np.concatenate([alloc_rej, alloc_acc], axis=0)
fig = px.imshow(alloc, aspect="auto", width=800, height=500)
fig.show()

0.4101939456930751


In [81]:
approximated_flatrate_revenue = traces[traces["accepted_by_flatrate"]].groupby("trace_id").revenue_if_sold_with_flatrate.sum()
approximated_flatrate_revenue.reset_index().revenue_if_sold_with_flatrate

0      5.963301
1     15.504583
2     16.697243
3     15.504583
4     15.504583
        ...    
95    13.119263
96    10.733942
97    16.697243
98     4.770641
99    17.889903
Name: revenue_if_sold_with_flatrate, Length: 100, dtype: float64

In [84]:
df_flatrate = df[(df.runner=="Flatrate") & (df.expected_res == 24)]
diff = df_flatrate.r - approximated_flatrate_revenue.reset_index().revenue_if_sold_with_flatrate
diff.value_counts()

 0.000000e+00    46
-1.776357e-15    25
-3.552714e-15    15
 1.776357e-15     6
 3.552714e-15     3
-1.192660e+00     3
-1.192660e+00     1
 4.440892e-16     1
Name: count, dtype: int64

In [52]:
traces_filepath = Path(r"C:\Users\mrkos\scth\projects\MDPPricing\data\ev_variable_resources\single_day_cs_pp_T=768_c=3_expected_res=96_nᵣ=96_res_budget_μ=0.25\traces\traces_N=100_seed=888.csv")
traces = pd.read_csv(traces_filepath, dtype={"product":str})
traces["prod_vec"] = traces["product"].apply(vec_from_str)
traces["n_timeslots"] = traces.prod_vec.apply(sum)

flatrate_per_timeslot = pd.read_csv(traces_filepath.parents[1]/"results"/"flatrate"/"config_flatrate_result_price.csv").iloc[0,0]

tr = traces[traces.trace_id==1]

accept = tr.budget>=tr.n_timeslots*flatrate_per_timeslot
requests_accepted_by_flatrate = tr[accept]
requests_rejected_by_flatrate = tr[~accept]

alloc_acc, _ = traces2allocation(requests_accepted_by_flatrate)
alloc_rej, _ = traces2allocation(requests_rejected_by_flatrate)
alloc_rej[alloc_rej==0.0] = 0.5

alloc = np.concatenate([alloc_rej, alloc_acc], axis=0)
fig = px.imshow(alloc, aspect="auto", width=800, height=500)
fig.show()