Plotting the results of the scalability experiments. The subfolder "images/" should exist. Use the path of the resulting result.csv in Cell 2.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

In [None]:
pathToScalabilityResult = '<pathToScalabilityResult>'

In [None]:
import pandas as pd
df = pd.read_csv(pathToScalabilityResult, sep=";")
df['Number'] = df['Dataset'].str.extract('(\d+)').astype(int)

In [None]:
df = df[df['Local'] == True]

In [None]:
# Extract dataset size from 'Dataset' column
df['Dataset size'] = df['Dataset'].str.extract('(\d+)').astype(int)

# Pivot the table to get Time values for each number of threads
df_pivot = df.pivot(index='Dataset size', columns='Threads', values='Time')

# Select the necessary columns
df_transformed = df_pivot[[1, 2,  12, 64]].copy()  # Adjust this to include any other threads number you have
df_transformed.reset_index(inplace=True)

# Rename columns
df_transformed.columns = ['Dataset size', '1', '2', '12', '64']

In [None]:
# Extract dataset size from 'Dataset' column
df['Dataset size'] = df['Dataset'].str.extract('(\d+)').astype(int)

# Pivot the table to get Time values for each number of threads
df_pivot = df.pivot(index='Dataset size', columns='Threads', values='Granularity')

# Select the necessary columns
df_transformed_granularity = df_pivot[[1, 2, 12, 64]].copy()  # Adjust this to include any other threads number you have
df_transformed_granularity.reset_index(inplace=True)

# Rename columns
df_transformed_granularity.columns = ['Dataset size', '1', '2', '12', '64']

In [None]:
import matplotlib.pyplot as plt
plt.rcParams.update({    'font.size': 15,
                         'axes.titlesize': 15  # Ensure the title size matches other text
                         })
x_ticks = [5000000, 10000000, 15000000, 20000000, 25000000, 35000000, 45000000, 55000000, 65000000, 75000000, 85000000]

granularity_df = df_transformed_granularity
time_df = df_transformed

fig, ax1 = plt.subplots(figsize=(10, 6))
fig.patch.set_facecolor('white')
ax1.set_facecolor('white')

colors = ['b', 'g', 'r', 'cyan']
threads = ['1', '2', '12', '64']

# Plot granularity on primary y-axis
for i, thread in enumerate(threads):
    ax1.plot(granularity_df['Dataset size'], granularity_df[thread], '-o', label=f'Utility {thread}', color=colors[i])

ax1.set_ylabel('Utility')
ax1.set_ylim([0.7, 1])
ax1.tick_params('y')
ax1.set_xlabel('Dataset Size')
ax1.set_xticks(x_ticks)
ax1.set_yscale('linear')

# Explicitly set x-axis tick marks to be visible and inside
ax1.tick_params(axis='x', which='both', direction='out', length=6, bottom=True)
x_tick_labels = [x // 1000000 for x in x_ticks]

ax1.set_xticklabels(x_tick_labels )

ax1.set_xlabel('Dataset size (in millions of records)')

ax1.margins(x=0) #remove space between graph and y-axis


ax1.grid(None)
# Display legend and show plot
handles, labels = ax1.get_legend_handles_labels()
from matplotlib.lines import Line2D
utility_handle = Line2D([], [], color='grey', marker='o', linestyle='-', label='Utility')
time_handle = Line2D([], [], color='grey', marker='o', linestyle='--', label='Utility')
invisible_handle = Line2D([0], [0], alpha=0, color='none', label='Invisible Item')

fig.legend(handles, ['1 Thread', '2 Threads', '12 Threads', '64 Threads'], loc='upper center', bbox_to_anchor=(0.5, 0.01), fancybox=False, shadow=False, ncol=4, facecolor='white', edgecolor='black')
print(handles)
ax1.spines['bottom'].set_color('black')
ax1.spines['top'].set_color('black')
ax1.spines['right'].set_color('black')
ax1.spines['left'].set_color('black')
plt.title("Scalability experiment")
fig.tight_layout()
if os.path.isfile("images/scalability.png"):
    os.remove("images/scalability.png")   # Opt.: os.system("rm "+strFile)
fig.savefig("images/scalability.png", format='png', bbox_inches='tight', dpi=300)
plt.show()



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams.update({    'font.size': 15,
                         'axes.titlesize': 15  # Ensure the title size matches other text
                         })
x_ticks = [5000000, 10000000, 15000000, 20000000, 25000000, 35000000, 45000000, 55000000, 65000000, 75000000, 85000000]

granularity_df = df_transformed_granularity
time_df = df_transformed

fig, ax1 = plt.subplots(figsize=(10, 6))
fig.patch.set_facecolor('white')
ax1.set_facecolor('white')

colors = ['b', 'g', 'r', 'cyan']
threads = ['1', '2', '12', '64']

# Plot granularity on primary y-axis
for i, thread in enumerate(threads):
    ax1.plot(time_df['Dataset size'], time_df[thread]/1000, '--o', label=f'Time {thread}', color=colors[i])

ax1.set_ylabel('Utility')
ax1.set_ylim(bottom=0)
ax1.tick_params('y')
ax1.set_xlabel('Dataset Size')
ax1.set_xticks(x_ticks)
ax1.set_yscale('linear')

# Plot time on secondary y-axis
ax1.set_yscale('linear')

# Explicitly set x-axis tick marks to be visible and inside
x_tick_labels = [x // 1000000 for x in x_ticks]

print(x_tick_labels)
ax1.set_xticklabels(x_tick_labels)


ax1.set_ylabel('Time [s]')
ax1.set_xlabel('Dataset size (in millions of records)')

ax1.margins(x=0.005) #remove space between graph and y-axis

ax1.set_ylim(bottom=0) #remove space between graph and y-axis

ax1.grid(None)
# Display legend and show plot
handles, labels = ax1.get_legend_handles_labels()
from matplotlib.lines import Line2D
utility_handle = Line2D([], [], color='grey', marker='o', linestyle='-', label='Utility')
time_handle = Line2D([], [], color='grey', marker='o', linestyle='--', label='Utility')
invisible_handle = Line2D([0], [0], alpha=0, color='none', label='Invisible Item')


fig.legend(handles, ['1 Thread', '2 Threads', '12 Threads', '64 Threads'], loc='upper center', bbox_to_anchor=(0.5, 0.01), fancybox=False, shadow=False, ncol=4, facecolor='white', edgecolor='black')
print(handles)
ax1.spines['bottom'].set_color('black')
ax1.spines['top'].set_color('black')
ax1.spines['right'].set_color('black')
ax1.spines['left'].set_color('black')
plt.title("Scalability experiment")
fig.tight_layout()
if os.path.isfile("images/scalability-time.png"):
    os.remove("images/scalability-time.png")   # Opt.: os.system("rm "+strFile)
fig.savefig("images/scalability-time.png", format='png', bbox_inches='tight', dpi=300)
plt.show()

