In [1]:
import csv
import os
import re
from collections import defaultdict

os.chdir("..")
def analyze_output_file(output_file):
    # Initialize counters
    total_rows = 0
    true_count = 0
    false_count = 0
    error_count = 0
    similar_count = 0

    # Read and process the output file
    with open(output_file, 'r', newline='', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)  # Skip the header row
        for row in reader:
            total_rows += 1
            test_result = row[2].strip()  # Test result is in the 3rd column
            if test_result == "True":
                true_count += 1
            elif test_result == "False":
                false_count += 1
            elif test_result == "Error":
                error_count += 1
            elif test_result== "Similar":
                similar_count += 1

    # Calculate statistics
    true_percentage = (true_count / total_rows) * 100 if total_rows > 0 else 0
    similar_percentage = (similar_count / total_rows) * 100 if total_rows > 0 else 0
    false_percentage = (false_count / total_rows) * 100 if total_rows > 0 else 0
    error_percentage = (error_count / total_rows) * 100 if total_rows > 0 else 0

    # Print results
    print(f"Total Rows: {total_rows}")
    print(f"True Results: {true_count} ({true_percentage:.2f}%)")
    print(f"Similar Results: {similar_count} ({similar_percentage:.2f}%)")
    print(f"False Results: {false_count} ({false_percentage:.2f}%)")
    print(f"Error Results: {error_count} ({error_percentage:.2f}%)")
    print("=="*20)


    error_strings = [
        "Error in process_schema of Value Retrieval",
        "Error in process_question_classification of Classification",
        "Error in process_question_sql of SQL Generation",
        "Error in refine_query of Self-Correction",
        "Error Executing LLM-Generated SQL",
        "Error Executing Ground Truth SQL"
    ]
    
    # Initialize a dictionary to store counts and file lists for each error
    error_summary = {err: {"count": 0, "files": []} for err in error_strings}
    
    true_string = "Result: True"
    true_summary = {"count": 0, "files": []}
    false_string = "Result: False"
    false_summary = {"count": 0, "files": []}
    similar_string = "Result: Similar"
    similar_summary = {"count": 0, "files": []}

    # Regex pattern to match file names like "all_outputs*.txt"
    file_pattern = re.compile(r"all_outputs\d+\.txt")

    # Iterate over all files in the directory
    for filename in os.listdir("."):
        if file_pattern.match(filename):
            filepath = os.path.join(".", filename)
            with open(filepath, 'r', encoding='utf-8') as file:
                content = file.read()
                
                for error in error_strings:
                    if error in content:
                        error_summary[error]["count"] += content.count(error)
                        if filename not in error_summary[error]["files"]:
                            error_summary[error]["files"].append(filename)
                
                if true_string in content:
                    true_summary["count"] += 1
                    if filename not in true_summary["files"]:
                        true_summary["files"].append(filename)
                if similar_string in content:
                    similar_summary["count"] += 1
                    if filename not in similar_summary["files"]:
                        similar_summary["files"].append(filename)
                if false_string in content:
                    false_summary["count"] += 1
                    if filename not in false_summary["files"]:
                        false_summary["files"].append(filename)
                    




    # Print the summary
    print("STATISTIC SUMMARIES:")
    print("Error Summary:")
    for error, data in error_summary.items():
        if data["count"] > 0:
            print(f"{error}:")
            print(f"  Total Occurrences: {data['count']}")
            print(f"  Files: {', '.join(data['files']) if data['files'] else 'None'}")
            print()

    print("True Summary:")
    print(f"  Total Occurrences: {true_summary['count']}")
    print(f"  Files: {', '.join(true_summary['files']) if true_summary['files'] else 'None'}")
    print()

    print("Similar Summary:")
    print(f"  Total Occurrences: {similar_summary['count']}")
    print(f"  Files: {', '.join(similar_summary['files']) if similar_summary['files'] else 'None'}")
    print()

    #print("False Summary:")
    #print(f"  Total Occurrences: {false_summary['count']}")
    #print(f"""  Files: {''', 
    #'''.join((false_summary['files'])) if false_summary['files'] else 'None'}""")
    #print()
    #return error_summary


In [2]:
import time
output_file = "din_accuracy_120.csv"
i=0
#for i in range(120): 
#    time.sleep(60)
#    analyze_output_file(output_file)
with open("query_summary.csv", 'r') as file:
    reader = csv.reader(file)
    print("row_count: ", sum(1 for row in reader)-1)### -1 cuz first row is column



row_count:  120


In [3]:
analyze_output_file(output_file)

Total Rows: 120
True Results: 35 (29.17%)
Similar Results: 48 (40.00%)
False Results: 35 (29.17%)
Error Results: 2 (1.67%)
STATISTIC SUMMARIES:
Error Summary:
Error Executing LLM-Generated SQL:
  Total Occurrences: 2
  Files: all_outputs106.txt, all_outputs59.txt

True Summary:
  Total Occurrences: 35
  Files: all_outputs0.txt, all_outputs1.txt, all_outputs10.txt, all_outputs100.txt, all_outputs101.txt, all_outputs11.txt, all_outputs12.txt, all_outputs14.txt, all_outputs15.txt, all_outputs16.txt, all_outputs19.txt, all_outputs21.txt, all_outputs22.txt, all_outputs25.txt, all_outputs30.txt, all_outputs31.txt, all_outputs33.txt, all_outputs34.txt, all_outputs36.txt, all_outputs38.txt, all_outputs39.txt, all_outputs48.txt, all_outputs6.txt, all_outputs7.txt, all_outputs72.txt, all_outputs75.txt, all_outputs76.txt, all_outputs77.txt, all_outputs8.txt, all_outputs80.txt, all_outputs82.txt, all_outputs83.txt, all_outputs87.txt, all_outputs94.txt, all_outputs96.txt

Similar Summary:
  Total O