In [3]:
import os
import pandas as pd

def convert_tsv_to_excel(source_directory, target_directory):
    """
    Converts all TSV files found in the source_directory into Excel files
    and saves them into the target_directory.

    Args:
    - source_directory (str): The path to the directory containing TSV files.
    - target_directory (str): The path to the directory where Excel files will be saved.
    """
    # Ensure the target directory exists
    if not os.path.exists(target_directory):
        os.makedirs(target_directory)
    
    # Iterate over all files in the source directory
    for filename in os.listdir(source_directory):
        if filename.endswith('.tsv'):
            # Construct the full file paths
            source_file_path = os.path.join(source_directory, filename)
            # Change the file extension from .tsv to .xlsx for the output file
            target_file_path = os.path.join(target_directory, filename.replace('.tsv', '.xlsx'))
            
            # Load the TSV file
            df = pd.read_csv(source_file_path, sep='\t')
            # Save the dataframe to an Excel file
            df.to_excel(target_file_path, index=False)
            
            #print(f'Converted {filename} to Excel and saved as {os.path.basename(target_file_path)}')

# Example usage
source_directory = '/Users/neginmanshour/Desktop/haddock/Haddock/haddock_em_TB'
target_directory = '/Users/neginmanshour/Desktop/haddock/Haddock/haddock_em_TB_ex'
convert_tsv_to_excel(source_directory, target_directory)



In [None]:
import os
import shutil
import pandas as pd
from openpyxl import load_workbook

def extract_pdb_id_from_dockq(filename):
    """Extracts the PDB ID from a DockQ filename."""
    parts = filename.split('_')
    return parts[0]  # PDB ID is the first part

def extract_pdb_id_from_haddock(filename):
    """Extracts the PDB ID from a haddock filename."""
    parts = filename.split('_')
    return parts[2]  # PDB ID is after 'haddock_scoring'

def copy_haddock_sheets_to_dockq(dockq_dir, haddock_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    dockq_files = os.listdir(dockq_dir)
    haddock_files = os.listdir(haddock_dir)
    
    # Map haddock files to their PDB IDs
    haddock_map = {extract_pdb_id_from_haddock(f).upper(): f for f in haddock_files if 'mdscoring_total' in f}
    
    for dockq_file in dockq_files:
        if dockq_file.endswith('_TB.xlsx') and 'DockQ_data' in dockq_file:
            pdb_id = extract_pdb_id_from_dockq(dockq_file).upper()
            if pdb_id in haddock_map:
                dockq_path = os.path.join(dockq_dir, dockq_file)
                haddock_path = os.path.join(haddock_dir, haddock_map[pdb_id])
                output_path = os.path.join(output_dir, dockq_file)
                
                # Copy the DockQ file to the output directory if it's not already there
                if not os.path.exists(output_path):
                    shutil.copyfile(dockq_path, output_path)
                
                print(f"Processing {dockq_file} and {haddock_map[pdb_id]} for PDB ID {pdb_id}")
                
                haddock_df = pd.read_excel(haddock_path, sheet_name='Sheet1')
                
                with pd.ExcelWriter(output_path, engine='openpyxl', mode='a') as writer:
                    book = load_workbook(output_path)
                    writer.book = book
                    if 'Hadd_md' in book.sheetnames:
                        std = book['Hadd_md']
                        book.remove(std)
                    haddock_df.to_excel(writer, sheet_name='Hadd_md', index=False)
                    
                    print(f'Updated {dockq_file} with Hadd_md data for PDB ID {pdb_id}')
            else:
                print(f"No matching Hadd_md file found for {dockq_file}")

# Replace these paths with the actual paths to your directories
dockq_dir = '/Users/neginmanshour/Desktop/haddock/D_P_F_H_TB'
haddock_dir = '/Users/neginmanshour/Desktop/haddock/haddock_md_TB_ex'
output_dir = '/Users/neginmanshour/Desktop/haddock/D_P_FF_HH_TB'

copy_haddock_sheets_to_dockq(dockq_dir, haddock_dir, output_dir)

