In [None]:
import os
import pandas as pd

def check_Carbapenemase_gene(ids_file_path, base_path, output_file_path):
    
    """
    Checks for the presence of the word "Carbapenemase" in the column names of summary_matches.txt files for given IDs.

    Args:
        ids_file_path (str): Path to the file containing the list of IDs.
        base_path (str): Base path where the summary_matches.txt files are located.
        output_file_path (str): Path to the output file where IDs without "Carbapenemase" columns will be saved.

    Description:
        This function reads a list of IDs from the specified ids_file_path. For each ID, it constructs the path to the corresponding
        summary_matches.txt file located in the base_path. It then checks if any column name in the summary_matches.txt file contains
        the word "Carbapenemase" (case-insensitive). If such a column is found, it prints the column name and the non-null values
        (genes) in that column. If no such column is found, it adds the ID to a list of IDs without "Carbapenemase" columns.
        Finally, it exports this list of IDs to the specified output_file_path.

    Prints:
        - Number of IDs loaded from the ids_file_path.
        - Status messages indicating the progress of the check for each ID.
        - Column names of the summary_matches.txt file being checked.
        - Presence or absence of "Carbapenemase" columns for each ID.
        - Genes found in the "Carbapenemase" columns, if any.
        - Confirmation message upon exporting the list of IDs without "Carbapenemase" columns.

    Raises:
        - Prints an error message if the summary_matches.txt file cannot be read.

    Example usage:
        ids_file_path = '/home/mdu/qc/2024/M2024-01087/verification_temp/abritamr/ids_cpos.txt'
        base_path = '/home/mdu/qc/2024/M2024-01087/abritamr'
        output_file_path = '/home/mdu/qc/2024/M2024-01087/verification_temp/abritamr/ids_without_Carbapenemase.txt'
        check_Carbapenemase_gene(ids_file_path, base_path, output_file_path)
    """
    # Function implementation
        
    # Read the IDs from the ids_cpos.txt file
    with open(ids_file_path, 'r') as file:
        ids_list = file.read().splitlines()
    
    print(f"Loaded {len(ids_list)} IDs from {ids_file_path}")

    ids_without_Carbapenemase = []

    for ids in ids_list:
        # Construct the path to the summary_matches.txt file
        summary_file_path = os.path.join(base_path, ids, 'summary_matches.txt')
        
        if not os.path.exists(summary_file_path):
            print(f"File not found: {summary_file_path}")
            ids_without_Carbapenemase.append(ids)
            continue
        
        print(f"Checking file: {summary_file_path}")
        
        # Read the summary_matches.txt file
        try:
            df = pd.read_csv(summary_file_path, sep='\t')
        except Exception as e:
            print(f"Error reading {summary_file_path}: {e}")
            ids_without_Carbapenemase.append(ids)
            continue
        
        # Check for the presence of the word "Carbapenemase" in any column name (case-insensitive)
        Carbapenemase_present = False
        for column in df.columns:
            if 'Carbapenemase'.lower() in column.lower():
                Carbapenemase_present = True
                gene_values = df[column].dropna().tolist()
                print(f"Carbapenemase column '{column}' is present for {ids}. Genes: {', '.join(gene_values)}")
                break
        
        if not Carbapenemase_present:
            print(f"Carbapenemase column is not present for {ids}")
            ids_without_Carbapenemase.append(ids)
    
    # Export the list of IDs without Carbapenemase column to a file
    with open(output_file_path, 'w') as output_file:
        for ids in ids_without_Carbapenemase:
            output_file.write(f"{ids}\n")
    
    print(f"Exported list of IDs without Carbapenemase column to {output_file_path}")

# Example usage
ids_file_path = '/home/mdu/qc/2024/M2024-01087/verification_temp/abritamr/ids_cpos.txt'
base_path = '/home/mdu/qc/2024/M2024-01087/abritamr'
output_file_path = '/home/mdu/qc/2024/M2024-01087/verification_temp/abritamr/ids_without_Carbapenemase.txt'
check_Carbapenemase_gene(ids_file_path, base_path, output_file_path)

In [11]:
"""
Reads a list of IDs from a file, constructs paths to corresponding spades.fa files, and exports the data to a tab-separated file.

Steps:
1. Reads the IDs from the specified ids_sistr.txt file.
2. Initializes a list to store the data.
3. Sets the base directory path using the provided run_id.
4. Iterates over each ID to construct the path to the spades.fa file.
5. Appends each ID and its corresponding path to the data list.
6. Creates a DataFrame from the data list.
7. Exports the DataFrame to a tab-separated file named contigs.tab.

Args:
    ids_file (str): Path to the file containing the list of IDs.
    run_id (str): The run ID used to construct the base directory path.

Example usage:
    ids_file = '/path/to/ids_sistr.txt'
    run_id = 'RUN_ID'
    # Read ids_sistr.txt file
    with open(ids_file, 'r') as file:
        ids = file.read().splitlines()

    # Initialize list to store data
    data = []

    # Base directory path
    base_dir = f'/home/mdu/data/{run_id}/'

    # Iterate over each ID
    for id in ids:
        # Construct path to the spades.fa file
        spades_path = os.path.join(base_dir, id, 'spades', 'spades.fa')
        
        # Append the ID and path to the data list
        data.append([id, spades_path])

    # Create DataFrame
    df = pd.DataFrame(data, columns=['ID', 'Path'])

    # Export the DataFrame as a tab-separated file
    df.to_csv('contigs.tab', sep='\t', index=False)
"""


import os
import pandas as pd

# Variables
ids_file = 'ids_without_Carbapenemase.txt'
run_id = 'M2024-01087'

# Read ids_sistr.txt file
with open(ids_file, 'r') as file:
    ids = file.read().splitlines()

# Initialize list to store data
data = []

# Base directory path
base_dir = f'/home/mdu/data/{run_id}/'

# Iterate over each ID
for id in ids:
    # Construct path to the spades.fa file
    spades_path = os.path.join(base_dir, id, 'spades', 'spades.fa')
    
    # Append the ID and path to the data list
    data.append([id, spades_path])

# Create DataFrame
df = pd.DataFrame(data, columns=['ID', 'Path'])

# Export the DataFrame as a tab-separated file
df.to_csv('contigs.tab', sep='\t', index=False)

In [None]:
ids_file_path = 'ids_without_Carbapenemase.txt'
base_path = '/home/mdu/qc/2024/M2024-01087/verification_temp/abritamr'
output_file_path = '/home/mdu/qc/2024/M2024-01087/verification_temp/abritamr/ids_without_Carbapenemase_2.txt'
check_Carbapenemase_gene(ids_file_path, base_path, output_file_path)