In [3]:
import pandas as pd
import os

# Base relative path from current notebook
base_path = "../../results/pipeline results/5884paras_598qna/llama3.1_res/"
base_path2 = "../../results/pipeline results/5884paras_598qna/llama3.1_gemma_res/"

# File paths
simple_rag_path = os.path.join(base_path, "simple_rag_qna_results_GPU_version.csv")
lqr_path = os.path.join(base_path2, "LQR_processed_results_en.csv")
mod_lqr_path = os.path.join(base_path2, "modLQR_processed_results_en.csv")

# Read CSVs
simple_rag_df = pd.read_csv(simple_rag_path)
lqr_df = pd.read_csv(lqr_path)
mod_lqr_df = pd.read_csv(mod_lqr_path)

# Rename columns for consistency
simple_rag_df = simple_rag_df.rename(columns={
    "retriever_time": "retrieval_time",
    "generator_time": "generation_time"
})

# Calculate average times
simple_rag_avg = simple_rag_df[["retrieval_time", "generation_time", "total_time"]].mean()

lqr_avg = lqr_df[[
    "classification_time",
    "decomposition_time",
    "retrieval_time",
    "generation_time",
    "total_time",
    "total_time_without_classification"
]].mean()

mod_lqr_avg = mod_lqr_df[[
    "classification_time",
    "decomposition_time",
    "retrieval_time",
    "generation_time",
    "total_time",
    "total_time_without_classification"
]].mean()

# Combine into a summary table
summary_df = pd.DataFrame({
    "Simple RAG": simple_rag_avg,
    "LQR RAG": lqr_avg,
    "Mod LQR RAG": mod_lqr_avg
})

# Display
summary_df = summary_df.rename_axis("Metric").reset_index()
print(summary_df)


                              Metric  Simple RAG   LQR RAG  Mod LQR RAG
0                classification_time         NaN  3.718835     3.796855
1                 decomposition_time         NaN  0.775065     0.777422
2                    generation_time    0.402560  3.152548     3.526767
3                     retrieval_time    0.010021  1.013097     1.888375
4                         total_time    0.412803  8.659544     9.989418
5  total_time_without_classification         NaN  4.940710     6.192563


In [4]:

base_save_path = "../../results/scores_results/times/llama3.1_gemma_res/"

output_path = os.path.join(base_save_path, "average_times_summary.csv")
summary_df.to_csv(output_path, index=False)

print(f"Summary saved to: {output_path}")

Summary saved to: ../../results/scores_results/times/llama3.1_gemma_res/average_times_summary.csv


In [5]:
import pandas as pd
import os

# Base relative paths
base_save_path = "../../results/scores_results/times/llama3.1_gemma_res/"

# File paths
simple_rag_path = os.path.join(base_path, "simple_rag_qna_results_GPU_version.csv")
lqr_path = os.path.join(base_path2, "LQR_processed_results_en.csv")
mod_lqr_path = os.path.join(base_path2, "modLQR_processed_results_en.csv")

# Read CSVs
simple_rag_df = pd.read_csv(simple_rag_path)
lqr_df = pd.read_csv(lqr_path)
mod_lqr_df = pd.read_csv(mod_lqr_path)

# Rename for consistency
simple_rag_df = simple_rag_df.rename(columns={
    "retriever_time": "retrieval_time",
    "generator_time": "generation_time"
})

# Slice data after index 98
simple_rag_df = simple_rag_df.iloc[99:]
lqr_df = lqr_df.iloc[99:]
mod_lqr_df = mod_lqr_df.iloc[99:]

# Compute averages
simple_rag_avg = simple_rag_df[["retrieval_time", "generation_time", "total_time"]].mean()

lqr_avg = lqr_df[[
    "classification_time",
    "decomposition_time",
    "retrieval_time",
    "generation_time",
    "total_time",
    "total_time_without_classification"
]].mean()

mod_lqr_avg = mod_lqr_df[[
    "classification_time",
    "decomposition_time",
    "retrieval_time",
    "generation_time",
    "total_time",
    "total_time_without_classification"
]].mean()

# Create summary DataFrame
summary_df = pd.DataFrame({
    "Simple RAG": simple_rag_avg,
    "LQR RAG": lqr_avg,
    "Mod LQR RAG": mod_lqr_avg
})

summary_df = summary_df.rename_axis("Metric").reset_index()

# Save to CSV
output_path = os.path.join(base_save_path, "average_times_summary_after_98.csv")
summary_df.to_csv(output_path, index=False)

print(f"Summary for records after index 98 saved to: {output_path}")


Summary for records after index 98 saved to: ../../results/scores_results/times/llama3.1_gemma_res/average_times_summary_after_98.csv


In [None]:
summary_df

Unnamed: 0,Metric,Simple RAG,LQR RAG,Mod LQR RAG
0,classification_time,,3.706144,3.786253
1,decomposition_time,,0.927388,0.930179
2,generation_time,0.398716,3.17168,3.631574
3,retrieval_time,0.009884,1.210126,2.257627
4,total_time,0.408821,9.015338,10.605633
5,total_time_without_classification,,5.309194,6.81938


Times for 100 QnAs and 984 paras

In [1]:
import pandas as pd

# Load CSV using relative path
csv_path = "../../results/scores_results/times/100_qna_984_paras/updated_results_with_all_data.csv"
df = pd.read_csv(csv_path)

# Ignore the first 20 rows
df = df.iloc[20:].reset_index(drop=True)

# Define metric mapping
metrics = {
    'classification_time': ['query_classification_total_time'],
    'decomposition_time': ['query_decomposition_total_time'],
    'generation_time': ['generation_time', 'LQR_generator_total_time', 'modLQR_generator_total_time'],
    'retrieval_time': ['retrieval_time', 'LQR_total_retrival_time', 'modLQR_total_retrival_time'],
    'total_time': ['total_time', 'total_LQR_time', 'total_modLQR_time'],
    'total_time_without_classification': [None, 'total_LQR_time_without_classification', 'total_modLQR_time_without_classification']
}

# Build summary row by row
summary_data = []

for metric_name, columns in metrics.items():
    simple_rag_col, lr_rag_col, dq_rag_col = (columns + [None] * (3 - len(columns)))  # handle missing

    summary_data.append({
        "Metric": metric_name,
        "Simple RAG": df[simple_rag_col].mean() if simple_rag_col else None,
        "LR-RAG": df[lr_rag_col].mean() if lr_rag_col else None,
        "DQ-RAG": df[dq_rag_col].mean() if dq_rag_col else None
    })

# Create summary DataFrame
summary_df = pd.DataFrame(summary_data)

# Round values for readability
summary_df[['Simple RAG', 'LR-RAG', 'DQ-RAG']] = summary_df[['Simple RAG', 'LR-RAG', 'DQ-RAG']].round(3)

# Display result
print("\n⏱️ Average Time Summary (First 20 Skipped):")
display(summary_df)



⏱️ Average Time Summary (First 20 Skipped):


Unnamed: 0,Metric,Simple RAG,LR-RAG,DQ-RAG
0,classification_time,7.496,,
1,decomposition_time,13.026,,
2,generation_time,1.673,5.951,6.426
3,retrieval_time,0.019,25.073,41.319
4,total_time,1.692,51.547,68.268
5,total_time_without_classification,,44.051,60.771


In [2]:
metrics = {
    'classification_time': [None, 'query_classification_total_time', 'query_classification_total_time'],
    'decomposition_time': [None, 'query_decomposition_total_time', 'query_decomposition_total_time'],
    'generation_time': ['generation_time', 'LQR_generator_total_time', 'modLQR_generator_total_time'],
    'retrieval_time': ['retrieval_time', 'LQR_total_retrival_time', 'modLQR_total_retrival_time'],
    'total_time': ['total_time', 'total_LQR_time', 'total_modLQR_time'],
    'total_time_without_classification': [None, 'total_LQR_time_without_classification', 'total_modLQR_time_without_classification']
}


In [3]:
summary_data = []

for metric_name, columns in metrics.items():
    simple_rag_col, lr_rag_col, dq_rag_col = columns

    summary_data.append({
        "Metric": metric_name,
        "Simple RAG": df[simple_rag_col].mean() if simple_rag_col else None,
        "LR-RAG": df[lr_rag_col].mean() if lr_rag_col else None,
        "DQ-RAG": df[dq_rag_col].mean() if dq_rag_col else None
    })

summary_df = pd.DataFrame(summary_data)
summary_df[['Simple RAG', 'LR-RAG', 'DQ-RAG']] = summary_df[['Simple RAG', 'LR-RAG', 'DQ-RAG']].round(3)
display(summary_df)


Unnamed: 0,Metric,Simple RAG,LR-RAG,DQ-RAG
0,classification_time,,7.496,7.496
1,decomposition_time,,13.026,13.026
2,generation_time,1.673,5.951,6.426
3,retrieval_time,0.019,25.073,41.319
4,total_time,1.692,51.547,68.268
5,total_time_without_classification,,44.051,60.771
