In [1]:
# Import necessary packages
import glob
import os
import pandas as pd

# Set the directory path
figure_path = '/mnt/Local_Disk_1/Hospital_Microbiome/Data/Figures/'
input_path = '/mnt/Local_Disk_1/Hospital_Microbiome/Data/Input_data/'
model_path = '/mnt/Local_Disk_1/Hospital_Microbiome/Data/Modeling/Models/'
output_path = '/mnt/Local_Disk_1/Hospital_Microbiome/Data/Output_data/'

In [8]:
# Load the gram staining information file
staining_info = pd.read_csv(input_path + 'gram_staining.csv', sep=',', index_col="organism_name")
staining_info

# Load the genome information file
genome_info = pd.read_csv(output_path + 'genome_details.csv', sep=',', index_col="organism_name")
genome_info = genome_info[["assembly_accession"]]

# Matching index
genome_info.index = genome_info.index.to_series().apply(lambda x: ' '.join(x.split()[:2]))

# Merge the two dataframes
genome_info = genome_info.merge(staining_info, left_index=True, right_index=True)

genome_info

Unnamed: 0_level_0,assembly_accession,gram_staining
organism_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Abiotrophia defectiva,GCF_000160075.2,positive
Achromobacter xylosoxidans,GCF_000165835.1,negative
Acinetobacter baumannii,GCF_000069245.1,negative
Acinetobacter johnsonii,GCF_000302335.1,negative
Acinetobacter junii,GCF_000162075.1,negative
...,...,...
Stutzerimonas stutzeri,GCF_000013785.1,negative
Veillonella atypica,GCF_000179755.1,negative
Veillonella parvula,GCF_000024945.1,negative
Xanthomonas citri,GCF_000175155.1,negative


In [4]:
carve_file_path = model_path + 'generate_models.sh'
with open(carve_file_path, "w") as f:
    f.write("#!/bin/bash\n")
    for idx, row in genome_info.iterrows():
        f.write(f"carve --refseq {row['assembly_accession']} -o {idx.replace(' ', '_')}.xml --solver cplex -u {'grampos' if row['gram_staining'] == 'positive' else 'gramneg'}\n")

# Print the completion message
print(f"Generated bash commands written to {carve_file_path}")

Generated bash commands written to /mnt/Local_Disk_1/Hospital_Microbiome/Data/Modeling/Models/generate_models.sh


In [8]:
# Run the bash script
os.chdir(model_path)

!chmod +x {carve_file_path}
!{carve_file_path}

Invalid accession code
Failed to download genome from NCBI.
Invalid accession code
Failed to download genome from NCBI.
Invalid accession code
Failed to download genome from NCBI.
Invalid accession code
Failed to download genome from NCBI.


In [15]:
# Check all generated models
models = glob.glob(model_path + '*.xml')
models = [item.replace(model_path, '') for item in models]
models = [item.replace('.xml', '') for item in models]
models = [item.replace('_', ' ') for item in models]

models

# Check non-generated models
model_ng = list(set(genome_info.index) - set(models))
model_ng

# Print accession for the genomes
missing_genome_info = genome_info.loc[model_ng]
missing_genome_info

Unnamed: 0_level_0,assembly_accession,gram_staining
organism_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Sphingomonas echinoides,GCF_029623395.1,negative
Kocuria atrinae,GCF_000286355.1,positive
Deinococcus geothermalis,GCF_002384255.1,positive
Enhydrobacter aerosaccus,GCF_001188545.1,negative
