In [None]:
import os
import argparse
import subprocess
import pandas as pd
from itertools import repeat
from multiprocessing import Pool, freeze_support

```shell
prodigal -i /mnt/d/Lab/Bile-Acid/rawdata/GCF_000154445.1_ASM15444v1_genomic.fna -o /mnt/d/Lab/Bile-Acid/result/prodigal/GCF_000154445.1_ASM15444v1_genomic.gff -a /mnt/d/Lab/Bile-Acid/result/prodigal/GCF_000154445.1_ASM15444v1_genomic.faa -f gff
```

In [None]:
def RunProdigal(genomeFile, gffOut, faaOut):
    cmd = "prodigal -i " + genomeFile + " -o " + os.path.join(outputDir, gffOut) + " -a " + os.path.join(outputDir, faaOut) + " -f gff"
    subprocess.call(cmd, shell=True)

In [None]:
def RunProdigalParallel(genomeFileList, gffOutList, faaOutList, njobs):
    pool = Pool(processes=njobs)
    pool.starmap(RunProdigal, zip(genomeFileList, gffOutList,  faaOutList))
    pool.close()
    pool.join()
    pool.terminate()

```shell
hmmsearch --cpu 4 --tblout /mnt/d/Lab/Bile-Acid/result/hmmer/GCF_000154445.1_ASM15444v1_genomic.tbl /mnt/d/Lab/Bile-Acid/phmm/CBAH.hmm /mnt/d/Lab/Bile-Acid/result/prodigal/GCF_000154445.1_ASM15444v1_genomic.faa > /dev/null
```

In [None]:
def RunHMMer(fastaFile, hmmFile, hmmOut, ncpus=4):
    cmd = "hmmsearch --tblout " + hmmOut + " " + os.path.join(outputDir, fastaFile) + " " + os.path.join(outputDir, hmmFile) + " --cpu " + str(ncpus)
    subprocess.call(cmd, shell=True)

In [None]:
def RunHMMerParallel(fastaFileList, hmmFile, hmmOutList, ncpus, njobs):
    pool = Pool(processes=njobs)
    pool.starmap(RunHMMer, zip(fastaFileList, repeat(hmmFile), hmmOutList, repeat(ncpus)))
    pool.close()
    pool.join()
    pool.terminate()

In [None]:
#Run Spades on a directory
def RunSpadesDirectory(inputDir, outputDir):
    genomeFileList = []
    gffOutList = []
    faaOutList = []
    hmmOutList = []
    for subdir, dirs, files in os.walk(inputDir):
        genomeFile = ""
        gffOut = ""
        faaOut = ""
        hmmOut = ""
        for file in files:
            if file.endswith(".fna"):
                genomeFile = os.path.join(subdir, file)
                genomeFileList.append(genomeFile)
                #outFile
                gffOut = os.path.join(outputDir, file, ".gff")
                faaOut = os.path.join(outputDir, file, ".faa")
                hmmOut = os.path.join(outputDir, file, ".tbl")
                #outFileList
                gffOutList.append(gffOut)
                faaOutList.append(faaOut)
                hmmOutList.append(hmmOut)
    RunProdigalParallel(genomeFileList, gffOutList, faaOutList, njobs)
    RunHMMerParallel(faaOutList, hmmFile, hmmOutList, ncpus, njobs)

In [None]:
python /mnt/d/Lab/WGS-Pipeline/BileAcid.py -i mnt/d/Lab/Bile-Acid/rawdata -o /mnt/d/Lab/Bile-Acid/result/test-2020-10-20

In [None]:
def RunDiamondParallel(fastaList, db, jobs, threads, outFileList):
    pool = Pool(processes=jobs)
    pool.starmap(RunDiamond, zip(fastaList, repeat(db), repeat(threads), outFileList))
    pool.close()
    pool.join()
    pool.terminate()

def RunDiamond(fasta, db, threads, OutFile):
    cmd = "diamond blastx -q " + fasta  + " -o " + OutFile + " --evalue 1.0 --max-target-seqs 1 --outfmt 6 --db " + db + " -p " + str(threads) 
    subprocess.call(cmd, shell=True)

### Bio.SearchIO.HmmerIO package

https://biopython.org/docs/1.75/api/Bio.SearchIO.HmmerIO.html