In [2]:
from pathlib import Path
import pandas as pd
import ast

In [12]:
def analyze_speed(csv_dir: str, base_file_name: str) -> dict:
    graph_csv_files = Path(csv_dir).rglob(f'{base_file_name}*.csv')
    results = {}

    for csv_file in sorted(graph_csv_files):
        try:
            df = pd.read_csv(csv_file)
            if df.empty:
                results[csv_file.name] = 'CSV file is empty.'
                continue

            df.set_index('Environment', inplace=True)
            mean_values = df.mean(axis=1)
            sorted_means = mean_values.sort_values(ascending=True)

            notes = []
            for i in range(len(sorted_means) - 1):
                faster_env = sorted_means.index[i]
                slower_env = sorted_means.index[i + 1]
                if sorted_means.iloc[i + 1] != 0:
                    speedup = f'{sorted_means.iloc[i + 1] / sorted_means.iloc[i]:.2f}'
                    notes.append(f"{faster_env} is {speedup} times faster than {slower_env}")
                elif sorted_means.iloc[i] == 0:
                    notes.append(f"{faster_env} and {slower_env} have the same speed")
                else:
                    notes.append(f"{slower_env} is infinitely slower than {faster_env}")

            results[csv_file.name] = ' and '.join(notes) if notes else 'No significant difference in speed.'
        except Exception as e:
            results[csv_file.name] = f'Error processing file: {str(e)}'

    return results

In [13]:
# Get all double_recursion csv files from output/CSVs directory
csv_files = Path('output/CSVs')

# Analyze speed of double_recursion
double_recursion_speed_results = analyze_speed(csv_files, 'double_recursion')
double_recursion_speed_results



  speedup = f'{sorted_means.iloc[i + 1] / sorted_means.iloc[i]:.2f}'


{'double_recursion_binary_tree.csv': 'xsb is 4.24 times faster than clingo and clingo is 2.66 times faster than souffle',
 'double_recursion_complete.csv': 'xsb is 3.45 times faster than clingo and clingo is 1.77 times faster than souffle',
 'double_recursion_cycle.csv': 'xsb is 1.89 times faster than clingo and clingo is 2.86 times faster than souffle',
 'double_recursion_cycle_with_shortcuts.csv': 'xsb is 1.80 times faster than clingo and clingo is 2.86 times faster than souffle',
 'double_recursion_max_acyclic.csv': 'xsb is 2.40 times faster than clingo and clingo is 2.82 times faster than souffle',
 'double_recursion_multi_path.csv': 'xsb is 3.33 times faster than clingo and clingo is 3.05 times faster than souffle',
 'double_recursion_path.csv': 'xsb is 2.20 times faster than clingo and clingo is 3.01 times faster than souffle',
 'double_recursion_reverse_binary_tree.csv': 'xsb is 5.65 times faster than clingo and clingo is 2.71 times faster than souffle',
 'double_recursion_star.

In [14]:
left_recursion_speed_results = analyze_speed(csv_files, 'left_recursion')
left_recursion_speed_results

  speedup = f'{sorted_means.iloc[i + 1] / sorted_means.iloc[i]:.2f}'


{'left_recursion_binary_tree.csv': 'xsb is 5.65 times faster than clingo and clingo is 1.62 times faster than souffle',
 'left_recursion_complete.csv': 'xsb is 2.85 times faster than clingo and clingo is 1.62 times faster than souffle',
 'left_recursion_cycle.csv': 'xsb is 7.44 times faster than clingo and clingo is 1.63 times faster than souffle',
 'left_recursion_cycle_with_shortcuts.csv': 'xsb is 6.87 times faster than clingo and clingo is 1.49 times faster than souffle',
 'left_recursion_max_acyclic.csv': 'xsb is 2.04 times faster than clingo and clingo is 1.69 times faster than souffle',
 'left_recursion_multi_path.csv': 'xsb is 8.67 times faster than clingo and clingo is 1.36 times faster than souffle',
 'left_recursion_path.csv': 'xsb is 7.51 times faster than clingo and clingo is 1.66 times faster than souffle',
 'left_recursion_reverse_binary_tree.csv': 'xsb is 7.02 times faster than clingo and clingo is 1.65 times faster than souffle',
 'left_recursion_star.csv': 'clingo is i

In [15]:
right_recursion_speed_results = analyze_speed(csv_files, 'right_recursion')
right_recursion_speed_results

  speedup = f'{sorted_means.iloc[i + 1] / sorted_means.iloc[i]:.2f}'


{'right_recursion_binary_tree.csv': 'xsb is 5.23 times faster than clingo and clingo is 1.71 times faster than souffle',
 'right_recursion_complete.csv': 'xsb is 3.07 times faster than souffle and souffle is 1.15 times faster than clingo',
 'right_recursion_cycle.csv': 'xsb is 8.29 times faster than clingo and clingo is 1.65 times faster than souffle',
 'right_recursion_cycle_with_shortcuts.csv': 'xsb is 5.97 times faster than clingo and clingo is 1.32 times faster than souffle',
 'right_recursion_max_acyclic.csv': 'xsb is 2.44 times faster than clingo and clingo is 1.40 times faster than souffle',
 'right_recursion_multi_path.csv': 'xsb is 8.52 times faster than clingo and clingo is 1.60 times faster than souffle',
 'right_recursion_path.csv': 'xsb is 9.41 times faster than clingo and clingo is 1.92 times faster than souffle',
 'right_recursion_reverse_binary_tree.csv': 'xsb is 7.60 times faster than clingo and clingo is 1.81 times faster than souffle',
 'right_recursion_star.csv': 'c

In [6]:
# Load the data from the provided file
file_path = 'data.txt'  # Adjust this path to your local file location
with open(file_path, 'r') as file:
    data = ast.literal_eval(file.read())

# Extract the relevant data for analysis
records = []

for (environment, graph_type, recursion_variant), entries in data.items():
    for entry in entries:
        size, metrics = entry
        for metric_name, (real_time, cpu_time) in metrics.items():
            if 'Query' in metric_name:
                records.append({
                    'environment': environment,
                    'graph_type': graph_type,
                    'recursion_variant': recursion_variant,
                    'size': size,
                    'metric_name': metric_name,
                    'real_time': real_time,
                    'cpu_time': cpu_time
                })

# Create a DataFrame for easier analysis
df = pd.DataFrame(records)

# Ensure the environments are unique in the sorting process
unique_records = []

for (environment, graph_type, recursion_variant), entries in data.items():
    for entry in entries:
        size, metrics = entry
        for metric_name, (real_time, cpu_time) in metrics.items():
            if 'Query' in metric_name:
                unique_records.append({
                    'environment': environment,
                    'graph_type': graph_type,
                    'recursion_variant': recursion_variant,
                    'size': size,
                    'metric_name': metric_name,
                    'real_time': real_time,
                    'cpu_time': cpu_time
                })

# Create a DataFrame for unique environments
unique_df = pd.DataFrame(unique_records).drop_duplicates(subset=['environment', 'graph_type', 'recursion_variant', 'metric_name'])

# Group by 'graph_type' and 'recursion_variant', and then sort by 'real_time' and 'cpu_time'
unique_result = {}
grouped_unique = unique_df.groupby(['graph_type', 'recursion_variant'])

for name, group in grouped_unique:
    graph_type, recursion_variant = name
    group_sorted_by_real_time = group.sort_values(by='real_time')
    group_sorted_by_cpu_time = group.sort_values(by='cpu_time')
    unique_result[name] = {
        'sorted_by_real_time': group_sorted_by_real_time[['environment', 'real_time', 'size']].reset_index(drop=True),
        'sorted_by_cpu_time': group_sorted_by_cpu_time[['environment', 'cpu_time', 'size']].reset_index(drop=True)
    }

# Correct calculation of factors and handling lengths correctly
final_tables = {}

# Generate the required tables for real_time and cpu_time for left and right recursion
for key in unique_result:
    graph_type, recursion_variant = key
    sorted_by_real_time = unique_result[key]['sorted_by_real_time']
    sorted_by_cpu_time = unique_result[key]['sorted_by_cpu_time']
    
    factors_real_time = [None] + (sorted_by_real_time['real_time'].iloc[:-1].values / sorted_by_real_time['real_time'].iloc[1:].values).tolist()
    factors_cpu_time = [None] + (sorted_by_cpu_time['cpu_time'].iloc[:-1].values / sorted_by_cpu_time['cpu_time'].iloc[1:].values).tolist()
    
    real_time_table = sorted_by_real_time.copy()
    real_time_table['position'] = range(1, len(real_time_table) + 1)
    real_time_table['factor'] = factors_real_time
    
    cpu_time_table = sorted_by_cpu_time.copy()
    cpu_time_table['position'] = range(1, len(cpu_time_table) + 1)
    cpu_time_table['factor'] = factors_cpu_time
    
    if (graph_type, 'real_time') not in final_tables:
        final_tables[(graph_type, 'real_time')] = {}
    if (graph_type, 'cpu_time') not in final_tables:
        final_tables[(graph_type, 'cpu_time')] = {}
    
    final_tables[(graph_type, 'real_time')][recursion_variant] = real_time_table
    final_tables[(graph_type, 'cpu_time')][recursion_variant] = cpu_time_table

# Export the tables to CSV files with the specified folder structure
for key, tables in final_tables.items():
    graph_type, time_type = key
    
    for recursion_variant, table in tables.items():
        variant_dir = Path(f"analysis/{graph_type}/{recursion_variant}")
        variant_dir.mkdir(parents=True, exist_ok=True)
        
        file_path = variant_dir / f"{time_type}_times.csv"
        table.to_csv(file_path, index=False, mode='a', header=not file_path.exists())