In [20]:
import os
import json
import ast

# Task 1: Reading config1.txt and extracting lines with "cot-like"
def extract_cot_like_lines(input_file, output_file):
    """
    Reads the input file line by line, extracts lines containing 'cot-like',
    and appends them to the output file.
    """
    with open(input_file, 'r') as f:
        lines = f.readlines()

    # Extract lines containing 'cot-like'
    cot_like_lines = [line for line in lines if 'cot-like' in line]

    # Write or append to the output file
    os.makedirs(os.path.dirname(output_file), exist_ok=True)  # Ensure the directory exists
    mode = 'a' if os.path.exists(output_file) else 'w'

    with open(output_file, mode) as f:
        f.writelines(cot_like_lines)

# Task 2: Extract lookup tables based on timestamp and shift
def read_lookup_table_from_file(file_path):
    """
    Reads the lookup table from a given file. Skips empty files.
    """
    try:
        if os.path.getsize(file_path) == 0:  # Check if the file is empty
            print(f"Warning: The file {file_path} is empty, skipping.")
            return None
        
        with open(file_path, 'r') as f:
            lookup_table_data = json.load(f)  # Adjust according to the file format
        return lookup_table_data
    except json.JSONDecodeError as e:
        print(f"Error reading lookup table from {file_path}: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error when reading {file_path}: {e}")
        return None

def extract_lookup_tables_from_files(input_file, output_file, base_data_dir, shifts):
    """
    Reads lines from the input file, extracts corresponding lookup tables based on timestamps,
    and stores them in a structured output JSON file.
    """
    with open(input_file, 'r') as f:
        lines = f.readlines()

    constructed_lookup_table = {}

    for line in lines:
        # Extract timestamp and shift
        timestamp = line.split('_')[0] + "_" + line.split('_')[1]
        shift = int(line.split('_')[3])

        if shift not in shifts:
            print(f"Warning: Skipping unsupported shift value {shift} in line: {line}")
            continue
        
        # Construct file path
        matching_files = [file for file in os.listdir(base_data_dir) if file.startswith(timestamp)]
        if not matching_files:
            print(f"Warning: No matching file found for timestamp: {timestamp}")
            continue

        file_path = os.path.join(base_data_dir, matching_files[0])

        # Read the lookup table from the file
        lookup_table_data = read_lookup_table_from_file(file_path)
        if lookup_table_data:
            constructed_lookup_table[timestamp] = {
                "shift": shift,
                "lookup_table": lookup_table_data
            }

    # Save the constructed lookup table to output file
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    with open(output_file, 'w') as f:
        json.dump(constructed_lookup_table, f, indent=4)

# Task 3: Compare lookup tables
def read_lookup_table(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

def compare_lookup_tables(constructed_lookup_table_path, golden_lookup_table_path, output_json_path):
    # Read the constructed and golden lookup tables
    constructed_table = read_lookup_table(constructed_lookup_table_path)
    golden_table = read_lookup_table(golden_lookup_table_path)
    
    # Prepare the output structure
    results = {
        "file_evaluation": {},
        "total_mappings": 0,
        "correct_mappings": 0,
        "details": []
    }
    
    # Process each entry in the constructed lookup table
    for entry_key, entry in constructed_table.items():
        if "lookup_table" in entry:
            shift = entry["shift"]
            results["file_evaluation"][entry_key] = {
                "correct": 0,
                "total": 0
            }

            # Get the corresponding golden lookup table based on the shift
            golden_shift_key = f"shift_{shift}"
            golden_lookup_table = golden_table.get(golden_shift_key, {})
            
            for mapping in entry["lookup_table"]:
                if isinstance(mapping, dict):  # Ensure mapping is a dict
                    plain_text = mapping.get("plain_text", "")
                    constructed_lookup = mapping.get("lookup_table", {})
                    
                    # Handle case where lookup_table is a string and needs conversion to a dict
                    if isinstance(constructed_lookup, str):
                        try:
                            constructed_lookup = ast.literal_eval(constructed_lookup)
                        except (SyntaxError, ValueError):
                            print(f"Warning: Failed to parse constructed lookup table for entry {plain_text}. Skipping comparison.")
                            continue
                    
                    # Ensure both constructed and golden lookup tables are dicts
                    if not isinstance(constructed_lookup, dict) or not isinstance(golden_lookup_table, dict):
                        print(f"Warning: Invalid lookup table format for {plain_text}. Skipping comparison.")
                        continue

                    # Compare constructed lookup table with golden lookup table
                    correct_mappings = 0
                    total_mappings = 0
                    for key, value in constructed_lookup.items():
                        total_mappings += 1
                        if key in golden_lookup_table and golden_lookup_table[key] == value:
                            correct_mappings += 1

                    results["total_mappings"] += total_mappings
                    results["correct_mappings"] += correct_mappings
                    results["file_evaluation"][entry_key]["total"] += total_mappings
                    results["file_evaluation"][entry_key]["correct"] += correct_mappings
                    
                    # Add details for this mapping
                    results["details"].append({
                        "plain_text": plain_text,
                        "shift": shift,
                        "matched": correct_mappings == total_mappings,
                        "total_mappings": total_mappings,
                        "correct_mappings": correct_mappings,
                        "constructed_lookup": constructed_lookup,
                        "gold_lookup": golden_lookup_table
                    })

    # Calculate percentage for each file
    for entry_key, entry in results["file_evaluation"].items():
        if entry["total"] > 0:
            percentage = (entry["correct"] / entry["total"]) * 100
            results["file_evaluation"][entry_key] = f"{percentage:.2f}%"
        else:
            results["file_evaluation"][entry_key] = "0%"

    # Save results to a JSON file
    with open(output_json_path, 'w') as output_file:
        json.dump(results, output_file, indent=4)



# Paths
config1_path = "./config1.txt"
cot_like_output_path = "./eval/lookup_table/lookup_table_files.txt"
base_data_dir = "./data/encoded/experiments/"
constructed_lookup_table_path = './eval/lookup_table/constructed_lookup_table.json'
golden_lookup_table_path = './eval/lookup_table/golden_lookup_table.json'
output_json_path = './eval/lookup_table/evaluation_result_lookup_table.json'

# Task 1: Extract cot-like lines from config1.txt
extract_cot_like_lines(config1_path, cot_like_output_path)

# Task 2: Extract lookup tables based on timestamp and shift from cot-like files
shifts = [3, 6, 9, 12]  # Define supported shifts
extract_lookup_tables_from_files(cot_like_output_path, constructed_lookup_table_path, base_data_dir, shifts)

# Task 3: Compare lookup tables and output accuracy
compare_lookup_tables(constructed_lookup_table_path, golden_lookup_table_path, output_json_path)
print("All tasks completed successfully.")


All tasks completed successfully.
