### 1. Delete files that don't exist

In [15]:
def check_files_with_id(folder_path, checkid_file, output_folder):
    """
    Check the files in the given folder whose filenames contain the IDs in the specified files to the output folder.
    """
    id_set = set()

    with open(checkid_file, 'r') as f:
        for line in f:
            line = line.strip()
            if line.startswith('>'):
                id_set.add(line[1:])

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for filename in os.listdir(folder_path):
        if any(id_str in filename for id_str in id_set):
            shutil.copy(os.path.join(folder_path, filename), os.path.join(output_folder, filename))

In [16]:
folder_path = '/homes/zihao/DATAS/TEST_for_annot/'
checkid_file = '/nfs/research/goldman/demaio/fastLK/MapleRealErrorsVariation_errorEstimation_estimatedErrors.txt'
middle_output_folder = '/homes/zihao/DATAS/AAA_FOR_Annot/'

check_files_with_id(folder_path, checkid_file, middle_output_folder)

### 1. handle

In [17]:
import glob
import os
import pandas as pd
import numpy as np


class RatioProcessor:
    def __init__(self, folder_path, output_folder):
        self.folder_path = folder_path
        self.output_folder = output_folder
        self._ensure_output_folder_exists()

    def _ensure_output_folder_exists(self):
        if not os.path.exists(self.output_folder):
            os.makedirs(self.output_folder)

    def get_files(self):
        """Return a list of all files in the folder."""
        return glob.glob(os.path.join(self.folder_path, '*'))

    def read_ratios(self, file_path, ratio_type):
        """Read the ratio values from the file."""
        df = pd.read_csv(file_path, sep='\t')
        return df[ratio_type].tolist()

    def write_ratios(self, output_file, header, ratios):
        """Write the header and ratio values to the output file."""
        with open(output_file, 'w') as f_out:
            f_out.write(header + '\n')
            f_out.write('\n'.join(map(str, ratios)) + '\n')

    def process_files(self, files, ratio_type):
        """Compute the max ratio values for the given ratio_type across all files."""
        # Read the first file
        ratios = self.read_ratios(files[0], ratio_type)

        # Loop through the remaining files
        for file in files[1:]:
            try:
                # Read the new ratios
                ratios_new = self.read_ratios(file, f'{ratio_type}_RATIO')

                # Compute the max ratio values
                ratios = np.max([ratios, ratios_new], axis=0)
            except:
                # If there is an error, skip the current file
                pass

        return ratios

    def run(self):
        files = self.get_files()
        ratio_types = ['SB', 'AF']

        for ratio_type in ratio_types:
            # Process files and compute the max ratio values
            ratios = self.process_files(files, ratio_type)

            # Write the results to the output file
            output_file = os.path.join(self.output_folder, f'{ratio_type}_RATIO.txt')
            self.write_ratios(output_file, 'ERROR_all', ratios)

In [18]:
if __name__ == '__main__':
    output_folder = 'RATIO_for_all_Annot/'
    processor = RatioProcessor(middle_output_folder, output_folder)
    processor.run()

#### old version
```python
import glob
import os
import pandas as pd
import numpy as np

folder_path = '/homes/zihao/DATAS/AAA/'

# Get all files in the folder
files = glob.glob(os.path.join(folder_path, '*'))

# Set the ratio types
ratio_types = ['SB', 'AF']

# Iterate over the ratio types
for ratio_type in ratio_types:
    # Read the first file
    df = pd.read_csv(files[0], sep='\t')
    ratios = df[ratio_type].tolist()

    # Set the output file name
    output_file = f'{ratio_type}_1_RATIO.txt'

    # Write the average value to a txt file
    with open(output_file, 'w') as f:
        # Write the header
        f.write('ERROR_all\n')

        # Write the average value of the first file
        f.write('\n'.join(map(str, ratios)))
        f.write('\n')

        # Delete the ratios variable from memory
        del ratios

        # Loop through the remaining files
        for file in files[1:]:
            try:
                # Read the file using pandas
                df_new = pd.read_csv(file, sep='\t')

                # Calculate the average value of RATIO in each file
                ratios_new = df_new[f'{ratio_type}_RATIO'].tolist()
                ratios = np.max([ratios, ratios_new], axis=0)

                # Write the average value to the txt file (overwriting previous content)
                with open(output_file, 'w') as f_out:
                    # Write the header
                    f_out.write('ERROR_all\n')

                    # Write the current average value
                    f_out.write('\n'.join(map(str, ratios)))
                    f_out.write('\n')

                # Delete the ratios variable from memory
                del ratios

            except:
                # If there is an error, skip the current file
                pass
```