In [1]:
# !mdu contigs --idfile ids.txt -a shovill > contigs.tab
# !mdu contigs --idfile ids.txt -a spades > contigs_spades.tab


In [2]:
# ! cat contigs.tab

In [3]:
# ! cat contigs_spades.tab

In [4]:
# !head -n 1 <path_to_amrfinder.out> > <amr_gene>_abritamr.tab
# !cat <path_to_amrfinder.out> | grep -i <amr_gene> >> amr_gene_abritamr.tab
#seqkit grep -r -p "contig00021" <path_to_contigs.fa> > <output_file>.fa
#mob_typer --infile <output_file>.fa --out_file <output_file>_mobtyper.txt

In [None]:
# code to prepare the input files for the nextflow pipeline
import pandas as pd

# Read the NDM-1_abritamr.tab file
abritamr_df = pd.read_csv('NDM-1_abritamr.tab', sep='\t')

# Extract Contig id column (second column) and rename it
contig_info_df = pd.DataFrame()
contig_info_df['contig_id'] = abritamr_df.iloc[:, 1]

# Read the contigs.tab file (no header)
contigs_df = pd.read_csv('contigs.tab', sep='\t', header=None)

# Since the first row in contigs.tab corresponds to the first row in abritamr.tab,
# we can directly map them by row index
sample_ids = []
contig_paths = []

for i in range(len(contig_info_df)):
    if i < len(contigs_df):
        sample_ids.append(contigs_df.iloc[i, 0])  # First column of contigs.tab
        contig_paths.append(contigs_df.iloc[i, 1])  # Second column of contigs.tab
    else:
        # Handle case where abritamr.tab has more rows than contigs.tab
        sample_ids.append(None)
        contig_paths.append(None)

# Add sample_id and contigs_path columns
contig_info_df['sample_id'] = sample_ids
contig_info_df['contigs_path'] = contig_paths

# Write to a new file
contig_info_df.to_csv('NDM-1_abritamr_contig_info.tab', sep='\t', index=False)

# Create seqkit_grep.sh file
with open('seqkit_grep.sh', 'w') as f:
    f.write('#!/bin/bash\n\n')
    for _, row in contig_info_df.dropna().iterrows():
        contig_id = row['contig_id']
        sample_id = row['sample_id']
        contigs_path = row['contigs_path']
        f.write(f'seqkit grep -r -p "{contig_id}" {contigs_path} > {sample_id}-NDM-1.fa\n')

# Create mob_typer.sh file
with open('mob_typer.sh', 'w') as f:
    f.write('#!/bin/bash\n\n')
    for sample_id in contig_info_df['sample_id'].dropna().unique():
        f.write(f'mob_typer --infile {sample_id}-NDM-1.fa --out_file {sample_id}-NDM-1_mobtyper.txt\n')

print("Files created successfully:")
print("1. NDM-1_abritamr_contig_info.tab")
print("2. seqkit_grep.sh")
print("3. mob_typer.sh")

Files created successfully:
1. NDM-1_abritamr_contig_info.tab
2. seqkit_grep.sh
3. mob_typer.sh
