In [2]:
"""
Reads a list of IDs from a file, constructs paths to corresponding spades.fa files, and exports the data to a tab-separated file.

Steps:
1. Reads the IDs from the specified ids_sistr.txt file.
2. Initializes a list to store the data.
3. Sets the base directory path using the provided run_id.
4. Iterates over each ID to construct the path to the spades.fa file.
5. Appends each ID and its corresponding path to the data list.
6. Creates a DataFrame from the data list.
7. Exports the DataFrame to a tab-separated file named contigs.tab.

Args:
    ids_file (str): Path to the file containing the list of IDs.
    run_id (str): The run ID used to construct the base directory path.

Example usage:
    ids_file = '/path/to/ids_sistr.txt'
    run_id = 'RUN_ID'
    # Read ids_sistr.txt file
    with open(ids_file, 'r') as file:
        ids = file.read().splitlines()

    # Initialize list to store data
    data = []

    # Base directory path
    base_dir = f'/home/mdu/data/{run_id}/'

    # Iterate over each ID
    for id in ids:
        # Construct path to the spades.fa file
        spades_path = os.path.join(base_dir, id, 'spades', 'spades.fa')
        
        # Append the ID and path to the data list
        data.append([id, spades_path])

    # Create DataFrame
    df = pd.DataFrame(data, columns=['ID', 'Path'])

    # Export the DataFrame as a tab-separated file
    df.to_csv('contigs.tab', sep='\t', index=False)
"""

import os
import pandas as pd

# Variables
ids_file = 'ids_sistr.txt'
run_id = 'M2024-01087'

# Read ids_sistr.txt file
with open(ids_file, 'r') as file:
    ids = file.read().splitlines()

# Initialize list to store data
data = []

# Base directory path
base_dir = f'/home/mdu/data/{run_id}/'

# Iterate over each ID
for id in ids:
    # Construct path to the spades.fa file
    spades_path = os.path.join(base_dir, id, 'spades', 'spades.fa')
    
    # Append the ID and path to the data list
    data.append([id, spades_path])

# Create DataFrame
df = pd.DataFrame(data, columns=['ID', 'Path'])

# Export the DataFrame as a tab-separated file
df.to_csv('contigs.tab', sep='\t', index=False)

In [None]:
"""
Generates shell commands based on IDs and hash values from input files and writes them to a script file.

Steps:
1. Defines variables for file paths and other parameters.
2. Reads the list of IDs from the specified ids_sistr.txt file.
3. Reads the trace.txt.1 file into a DataFrame.
4. Defines a function to find the hash value for a given ID in the trace DataFrame.
5. Iterates over each ID to generate shell commands based on the hash values.
6. Writes the generated commands to a shell script file named run_me.sh.

Args:
    ids_file (str): Path to the file containing the list of IDs.
    run_id (str): The run ID used in the commands.
    trace_file (str): Path to the trace file containing hash values.
    stype (str): A string used to filter the names in the trace DataFrame.

Functions:
    find_hash(trace_df, id):
        Finds the hash value for a given ID in the trace DataFrame.

        Args:
            trace_df (DataFrame): The DataFrame containing trace data.
            id (str): The ID to search for in the trace DataFrame.

        Returns:
            str: The hash value if found, otherwise None.
"""

import os
import pandas as pd

# Variables
ids_file = 'ids_sistr.txt'
run_id = 'M2024-01087'
trace_file = '/home/mdu/qc/2024/M2024-01087/trace.txt.1'
stype = 'STYPE'

# Read ids_sistr.txt file
with open(ids_file, 'r') as file:
    ids = file.read().splitlines()

# Read trace.txt.1 file as a DataFrame
trace_df = pd.read_csv(trace_file, sep='\t')

# Function to find hash value in trace DataFrame
def find_hash(trace_df, id):
    for index, row in trace_df.iterrows():
        name = row['name']
        hash_value = row['hash']
        # print(f"Checking row {index}: name='{name}', hash='{hash_value}'")  # Debugging statement
        if id in name and stype in name:
            print(f"Found hash for {id}: {hash_value}")  # Debugging statement
            return hash_value
    print(f"No hash found for {id}")  # Debugging statement
    return None

# Generate commands
commands = []
for id in ids:
    hash_value = find_hash(trace_df, id)
    if hash_value:
        cmd = f"""
# For {id} in run {run_id}
cd work/{hash_value}
ls -a
cp typer_stype.csv typer_stype_shovill.csv

cp /home/mdu/qc/2024/{run_id}/verification_temp/{id}/sistr_filtered.csv typer_stype.csv
cp {id}/sistr_filtered.csv typer_stype.csv
"""
        print(f"Generated command for {id}:\n{cmd}")  # Debugging statement
        commands.append(cmd.strip())

# Write the generated commands to run_me.sh
with open('run_me.sh', 'w') as file:
    file.write('\n\n'.join(commands))

print("Commands written to run_me.sh")  # Debugging statement