In [None]:
import os
import subprocess
import re
import csv

# Directory containing the pdb files
input_directory = "./pdbs"
# Reference pdb file
reference_file = "ref.pdb"
# Output CSV file
output_csv = "usalign_results.csv"

# Define the regex patterns for RMSD and TM-score
rmsd_pattern = re.compile(r'RMSD=\s+([\d.]+)')
tm_score_pattern = re.compile(r'TM-score=\s+([\d.]+)')

# Initialize a list to store the results
results = []
# Iterate through each file in the directory
for filename in os.listdir(input_directory):
    file_path = os.path.join(input_directory, filename)
    
    # Check if it's a file
    if os.path.isfile(file_path):
        try:
            # Run USalign command
            result = subprocess.run(
                ["USalign", file_path, reference_file],
                capture_output=True, text=True, check=True
            )
            output = result.stdout
            
            # Extract RMSD and TM-score using regex
            rmsd_match = rmsd_pattern.search(output)
            tm_score_match = tm_score_pattern.search(output)
            
            # If both RMSD and TM-score were found, add to results
            if rmsd_match and tm_score_match:
                rmsd = rmsd_match.group(1)
                tm_score = tm_score_match.group(1)
                results.append([filename, rmsd, tm_score])
            else:
                print(f"Warning: Could not parse output for file {filename}")
        
        except subprocess.CalledProcessError as e:
            print(f"Error processing file {filename}: {e}")

# Write the results to a CSV file
with open(output_csv, mode='w', newline='') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(["File Name", "RMSD", "TM-Score"])
    writer.writerows(results)

print(f"Results written to {output_csv}")
