In [None]:
import os
import subprocess
import csv
import re

def extract_id(filename):
    match = re.search(r"_(\d{5})\.amr$", filename)
    return match.group(1) if match else None

def compute_similarity_scores(folder1, folder2, output_csv):
    files1 = {extract_id(f): os.path.join(folder1, f) for f in os.listdir(folder1) if f.endswith('.amr')}
    files2 = {extract_id(f): os.path.join(folder2, f) for f in os.listdir(folder2) if f.endswith('.amr')}

    common_ids = set(files1.keys()).intersection(set(files2.keys()))
    print(common_ids)
    with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(["ID", "F Score"])

        for file_id in sorted(common_ids, key=int): 
            file1 = files1[file_id]
            file2 = files2[file_id]
            print(f"Computing similarity score for files {file1} and {file2}...")
            command = ["python", "smatch/smatch.py", "-f", file1, file2]
            try:
                result = subprocess.run(command, capture_output=True, text=True, check=True)
                score = result.stdout.strip().split("\n")[-1]  
                print(score)
            except subprocess.CalledProcessError as e:
                score = "Error"

            csv_writer.writerow([file_id, score])


folder1 = "amr_humans"  
folder2 = "amr_llm"  
output_csv = "similarity_scores.csv"  

compute_similarity_scores(folder1, folder2, output_csv)


{'00150', '00000', '00244', '00107', '00015', '00169', '00243', '00013', '00235', '00245', '00125', '00242', '00100', '00156', '00112', '00052', '00021', '00200', '00051', '00201', '00233', '00160', '00238', '00011', '00121', '00205', '00022', '00030', '00227', '00040', '00042', '00138', '00116', '00045', '00062', '00102', '00224', '00105', '00261', '00058', '00103', '00264', '00066', '00127', '00119', '00265', '00020', '00004', '00120', '00204', '00001', '00136', '00144', '00048', '00041', '00266', '00135', '00109', '00008', '00110', '00024', '00157', '00270', '00111', '00267', '00219', '00255', '00053', '00060', '00149', '00143', '00009', '00228', '00230', '00128', '00067', '00215', '00115', '00145', '00209', '00057', '00025', '00248', '00151', '00164', '00104', '00130', '00239', '00237', '00241', '00211', '00019', '00142', '00044', '00036', '00263', '00050', '00146', '00118', '00262', '00231', '00018', '00222', '00114', '00168', '00101', '00256', '00147', '00063', '00218', '00223', 