In addition to the original script, 2_generate_TP, this script does a blast search of selected_tp_genes against temp_neg_genomes and filters out all genomes that have a hit. In addition to 2.1_generate_TP, this script also does an hmm search, using an alignment of selected_tp_genes as query against negative genomes. It combines results of blast and hmm searches and removes all samples with hits from the pool of neg_genomes to choose from. One further parameter is required to specify an hmm evalue cutoff for samples to remove. Ideally, this would be 0, but I decided to keep a cutoff, as the original negative genomes from the sugimoto paper also have positive hits for an oxyN hmm.


This script generates the 2 remaining files required for input:
- True positive genes (nucleotide fasta)
- protein alignment (amino acid fasta)
IMPORTANTLY, these 2 files are created from non-overlapping sequences!


And also moves the corresponding files to new directories so that coverage tables can be generated from them.

In the first cell specify:
- BGC type (This name must stay constant throughout the scripts)
- select_neg_genomes, i.e. the amount of negative genomes to be transferred to the neg_genomes directory
- select_pos_genomes, i.e. the amount of positive genomes to be transferred to the pos_genomes directory and to generate the tp_genes file from (the surplus amount will be used to generate the protein alignment from)
- pos_isolation_source_filter, if these terms are found in the isolation_source column of the positive samples in the summary file, they will be scored higher in a scoring column, i.e. samples from a known and desired isolation source will be used preferentially.
- neg_isolation_source_filter, accordingly
- avoid_list. These terms are scored with a 0, end at the bottom of the table, and will be picked last. This is useful when an uncommon gene is searched for and more, and/or more tenuous isolation sources have been allowed during download. These are generally words that contain one of the search terms, e.g. 'sea' in 'diseased'.

Modify in such a way that TP genes are used as query against all individual negative genomes. Negative genomes are only moved from temp directory to neg_genomes directory if the blast search comes back negative

In [1]:
BGC_type = 'RTX_toxin_acyltransferase'
select_neg_genomes = 140
select_pos_genomes = 10
hmm_evalue_cutoff = 3

avoid_list = ['', 'isolation_source not annotated', 'diseased', 'mice', 'spice', 'septicemic', 'research', 'crevice']
#these are identical to first script, but don't have to be
pos_isolation_source_filter =  ['marine', 'sea', 'sponge', 'ocean', 'porifera', 'seafloor','sediment', 'water', 'tidal', 'coral', 'reef', 'coast', 'ship', 'fish', 'aquaculture', 'atlantic', 'pacific', 'mediterranean', 'baltic', 'pond', 'river', 'ice', 'carribean', 'lake', 'fjord', 'marina', 'hydro', 'algal', 'algae', 'clam', 'shell', 'mussel']
neg_isolation_source_filter = ['marine', 'sea', 'sponge', 'ocean', 'porifera', 'seafloor', 'sediment', 'water', 'tidal', 'coral', 'reef', 'coast', 'ship', 'fish', 'aquaculture', 'atlantic', 'pacific', 'mediterranean', 'baltic', 'pond', 'river', 'ice', 'carribean', 'lake', 'fjord', 'marina', 'hydro', 'algal', 'algae', 'clam', 'shell', 'mussel']

In [2]:
import os
from os import listdir, mkdir
from os.path import isfile, join
from pathlib import Path
import pandas as pd
from pandas.errors import EmptyDataError
import warnings

In [3]:
def makedir(dirpath):
    if os.path.isdir(dirpath):
        print(dirpath,'exists already')
    else:
        print('Making', dirpath)    
        os.mkdir(dirpath)

        
# Defining paths for required directory structure for input and output files relative to parent directory
#parent_dir='/media/manu/RiPP_Prioritiser/'
#will make directories relative to the path the notebook was opened in
parent_dir= !echo $(pwd)
BGC_path=os.path.join(parent_dir[0], BGC_type)
neg_genomes_path=os.path.join(BGC_path, 'base_genomes/temp_neg_genomes')
pos_genomes_path=os.path.join(BGC_path, 'base_genomes/temp_pos_genomes')
output_neg_path=os.path.join(BGC_path, 'base_genomes/neg_genomes')
output_pos_path=os.path.join(BGC_path, 'base_genomes/pos_genomes')
#qc-paths.
quality_control_path=os.path.join(BGC_path, 'base_genomes/quality_control')
neg_blast_path=os.path.join(quality_control_path, 'neg_blast')
neg_blast_db_path=os.path.join(neg_blast_path, 'databases')
neg_blast_results_path=os.path.join(neg_blast_path, 'outfiles')
pos_blast_path=os.path.join(quality_control_path, 'pos_blast')
pos_blast_db_path=os.path.join(pos_blast_path, 'databases')
pos_blast_results_path=os.path.join(pos_blast_path, 'results')
neg_hmm_path=os.path.join(quality_control_path, 'neg_hmm')
neg_hmm_db_path=os.path.join(neg_hmm_path, 'queries')
neg_hmm_results_path=os.path.join(neg_hmm_path, 'outfiles')
pos_hmm_path=os.path.join(quality_control_path, 'pos_hmm')
pos_hmm_db_path=os.path.join(pos_hmm_path, 'databases')
pos_hmm_results_path=os.path.join(pos_hmm_path, 'results')

# made directories manually. remove or comment after running.
#validation_pos_genomes_path=os.path.join(BGC_path, 'base_genomes/validation_pos_genomes')
#validation_coverage_table_path=os.path.join(BGC_path, 'validation_coverage_tables')

# Calling function to make directories if they don't exist yet
makedir(output_neg_path)
makedir(output_pos_path)
makedir(quality_control_path)
makedir(neg_blast_path)
makedir(neg_blast_db_path)
makedir(neg_blast_results_path)
makedir(pos_blast_path)
makedir(pos_blast_db_path)
makedir(pos_blast_results_path)
makedir(neg_hmm_path)
makedir(neg_hmm_db_path)
makedir(neg_hmm_results_path)
makedir(pos_hmm_path)
makedir(pos_hmm_db_path)
makedir(pos_hmm_results_path)

#makedir(validation_pos_genomes_path)

os.chdir(BGC_path)

Making /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/neg_genomes
Making /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/pos_genomes
Making /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control
Making /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast
Making /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases
Making /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/outfiles
Making /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/pos_blast
Making /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/pos_blast/databases
Making /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/pos_blast/results
Making /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/n

In [4]:
# Generating a report file for this script
with open(BGC_path+'/'+'report_2_generate_tp.txt', 'w') as f:
    f.write('Output directory is: '+BGC_path+'\n')
    f.write('\nBGC_type = '+BGC_type)
    f.write('\nselect_neg_genomes = '+str(select_neg_genomes))
    f.write('\nselect_pos_genomes = '+str(select_pos_genomes))
    f.write('\nhmm_evalue_cutoff = '+str(hmm_evalue_cutoff))
    f.write('\navoid_list = '+str(avoid_list))
    f.write('\nneg_isolation_source_filter = '+str(neg_isolation_source_filter))
    f.write('\npos_isolation_source_filter = '+str(pos_isolation_source_filter)+'\n')

In [5]:
# load summary table into data frame () output from 1.)
summary_file = pd.read_csv('summary.tsv', sep='\t')

Change order of tables to prioritize samples that have an isolation source

In [6]:
warnings.filterwarnings('ignore')

#filter positives and drop all duplicate protein sequences originating from different organisms
pos_mask = (summary_file['dir'] == '+')
pos_df = summary_file[pos_mask]
pos_df.drop_duplicates(subset='protein_id', keep=False, inplace=True)


#filter negatives
neg_mask = (summary_file['dir'] == '-')
neg_df = summary_file[neg_mask]

#scoring words in isolation source so as to preferentially pick samples with chosen isolation sources

def custom_sorting(source,isolation_source_filter):
    score = 1
    if isolation_source_filter=='pos':
        for word in pos_isolation_source_filter:
            if word in source:
                score +=1
        for word in avoid_list:
            if source == word:
                score=0
    elif isolation_source_filter=='neg':
        for word in neg_isolation_source_filter:
            if word in source:
                score +=1
        for word in avoid_list:
            if source == word:
                score=0
    return score


pos_df['scoring_column'] = pos_df.apply(lambda x: custom_sorting(x['isolation_source'],'pos'),axis=1)
neg_df['scoring_column'] = neg_df.apply(lambda x: custom_sorting(x['isolation_source'],'neg'),axis=1)

pos_df.sort_values(by=['scoring_column'], axis=0, ascending=False, inplace=True)
neg_df.sort_values(by=['scoring_column'], axis=0, ascending=False, inplace=True)

In [7]:
#Split positive genomes into 2 bins, one goes towards tp-genes and is the pos-genomes used for synthesising metagenomes
#the other one constitutes a source of protein sequences for alignment as an input file
#(removed the latter functionality and used pfam alignments of the protein instead)

# Genomes selected in such a way that they are from the top of the pre-sorted pos_df
unique_pos_df = pos_df.drop_duplicates(subset='assembly', inplace=False)
selected_tp_genomes = list(unique_pos_df.iloc[:,1])[0:select_pos_genomes]
remaining_pos_genomes = list(unique_pos_df.iloc[:,1])[select_pos_genomes:]

with open(BGC_path+'/'+'report_2_generate_tp.txt', 'a') as f:
    f.write('\nselected_tp_genomes are:\n')
    f.write(str(selected_tp_genomes)+'\n')

#select genomes and isolate GCF number from them, move selected tp genomes to final pos_genomes directory
for genome in selected_tp_genomes:
    print('moving positive', genome, 'to', output_pos_path)
    !mv "{pos_genomes_path}"/"{genome}"* "{output_pos_path}"
    
#generate dataframe containing all tp-genomes and all the tp-genes contained in it
filtered_pos_df = pos_df[pos_df['assembly'].isin(selected_tp_genomes)]
remaining_pos_df = pos_df[~pos_df['assembly'].isin(selected_tp_genomes)]

#isolate all the headers and transfer them to the selected_tp_genes file
full_header_list = []
for i in range(0,len(filtered_pos_df)):
    full_header=str('>')+filtered_pos_df.iloc[i,1]+str('_')+filtered_pos_df.iloc[i,3]+str('_')+filtered_pos_df.iloc[i,5]
    full_header_list.append(full_header)

# generate fasta file with selected tp genes found in the selected genomes
print('generating selected_tp_genes.fasta')
with open(BGC_path+'/'+'report_2_generate_tp.txt', 'a') as f:
    f.write('\nselected_tp_genes in positive genomes are:\n')
tp_gene_counter=0
with open(BGC_path+'/'+BGC_type+'_tp_genes.fasta') as fh:
    lines=fh.readlines()
    for i in range(0,len(lines)):
        for j in range(0,len(full_header_list)):
            if full_header_list[j] in lines[i]:
                tp_gene_counter+=1
                with open(BGC_path+'/'+'report_2_generate_tp.txt', 'a') as f:
                    f.write(lines[i][1:-1]+'\n')
                with open(BGC_path+'/'+BGC_type+'_selected_tp_genes.fasta', 'a') as outfile:
                    outfile.write(lines[i]+lines[i+1])    
with open(BGC_path+'/'+'report_2_generate_tp.txt', 'a') as f:
    f.write('\n'+str(len(selected_tp_genomes))+' unique genomes with '+ str(tp_gene_counter)+' unique tp genes.\n\n')
                    
                    
# transfer all amino acid sequences that are not part of the tp-genomes to a fasta file
print('generating selected_tp_aa.fasta')
with open(BGC_path+'/'+'report_2_generate_tp.txt', 'a') as f:
    f.write('\nsremaining samples are:\n')
tp_aa_counter = 0
with open(BGC_path+'/'+BGC_type+'_selected_tp_aa.fasta', 'a') as outfile:
    for i in range(0,len(remaining_pos_df)):
        tp_aa_counter+=1
        fasta_header=str('>')+remaining_pos_df.iloc[i,1]+str('_')+remaining_pos_df.iloc[i,3]+str('_')+remaining_pos_df.iloc[i,5]+'\n'
        sequence = remaining_pos_df.iloc[i,6][2:-2]+'\n'
        with open(BGC_path+'/'+'report_2_generate_tp.txt', 'a') as f:
            f.write(fasta_header[1:-1]+'\n')
        outfile.write(fasta_header)
        outfile.write(sequence)
with open(BGC_path+'/'+'report_2_generate_tp.txt', 'a') as f:
    f.write('\n'+str(len(remaining_pos_genomes))+' unique genomes with '+ str(tp_aa_counter)+' unique aa sequences.\n\n')

    
print('Done')

moving positive GCF_013357885.1 to /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/pos_genomes
moving positive GCF_002119525.1 to /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/pos_genomes
moving positive GCF_000307145.1 to /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/pos_genomes
moving positive GCF_013357825.1 to /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/pos_genomes
moving positive GCF_008369605.1 to /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/pos_genomes
generating selected_tp_genes.fasta
generating selected_tp_aa.fasta
Done


In [8]:
# move 10 more pos_genomes to validation_pos_genomes
#unique_pos_df = pos_df.drop_duplicates(subset='assembly', inplace=False)
#selected_tp_genomes = list(unique_pos_df.iloc[:,1])[0:select_pos_genomes]
#validation_tp_genomes=list(unique_pos_df.iloc[:,1])[select_pos_genomes:select_pos_genomes+10]

#select genomes and isolate GCF number from them, move selected tp genomes to final pos_genomes directory
#for genome in validation_tp_genomes:
#    print('moving positive', genome, 'to', validation_pos_genomes_path)
#    !mv "{pos_genomes_path}"/"{genome}"* "{validation_pos_genomes_path}"

In [9]:
# To confirm, run blast on all samples selected as validation pos genomes

# makes blast databases of all individual neg genomes (easier to keep track of accession numbers than when combining)
#for genomes in validation_tp_genomes:
#    !makeblastdb -in "{validation_tp_genomes}"/"{genomes}"* -dbtype nucl -out "{pos_blast_db_path}"/"{genomes}"_db

#for genomes in selected_tp_genomes:
#    !blastn -db "{pos_blast_db_path}"/"{genomes}"_db -query "{BGC_path}"/"{BGC_type}"_selected_tp_genes.fasta -out "{pos_blast_results_path}"/"{genomes}".blastout -outfmt "6 qseqid sseqid pident evalue length"
    
# use pandas to concatenate all blast output tables
#df_list = []
#for outfile in os.listdir(pos_blast_results_path):
#    try:
#        blast_df = pd.read_csv(pos_blast_results_path+'/'+outfile, sep='\t', names=['qseqid', 'sseqid', 'pident', 'evalue', 'length'], index_col=None)
#        blast_df['sample'] = '.'.join(outfile.split('.')[0:2])
#        df_list.append(blast_df)
#    except EmptyDataError:
#        continue
        
# Generate a list of contaminated negative genomes        
#pos_blast_hits_df = pd.concat(df_list)
#pos_blast_hits_df.to_csv(pos_blast_path+'/blast_results_summary.csv', index=False)

In [10]:
#Use blast to find tp_gene contamination in negative samples
unique_neg_df = neg_df.drop_duplicates(subset='assembly', inplace=False)
# gets a list of length of specified amount of neg genomes
neg_genomes_list = list(unique_neg_df.iloc[:,1])

with open(BGC_path+'/'+'report_2_generate_tp.txt', 'a') as f:
    f.write('\n'+'Using blastn to check negative genomes for contamination'+'\n')

# makes blast databases of all individual neg genomes (easier to keep track of accession numbers than when combining)
for genomes in neg_genomes_list:
    !makeblastdb -in "{neg_genomes_path}"/"{genomes}"* -dbtype nucl -out "{neg_blast_db_path}"/"{genomes}"_db

# runs blastn search of all TP genes against all blast databases of negative genomes
# even if no alteration is made to the original tp_genes file, rengame it to fit the path in the command below, accordingly.
for genomes in neg_genomes_list:
    !blastn -db "{neg_blast_db_path}"/"{genomes}"_db -query "{BGC_path}"/"{BGC_type}"_selected_tp_genes_altered.fasta -out "{neg_blast_results_path}"/"{genomes}".blastout -outfmt "6 qseqid sseqid pident evalue length"

# use pandas to concatenate all blast output tables
df_list = []
for outfile in os.listdir(neg_blast_results_path):
    try:
        blast_df = pd.read_csv(neg_blast_results_path+'/'+outfile, sep='\t', names=['qseqid', 'sseqid', 'pident', 'evalue', 'length'], index_col=None)
        blast_df['sample'] = '.'.join(outfile.split('.')[0:2])
        df_list.append(blast_df)
    except EmptyDataError:
        continue
        
# Generate a list of contaminated negative genomes        
remove_df = pd.concat(df_list)
remove_df.to_csv(quality_control_path+'/blast_fn_samples.csv', index=False)
unique_remove_df = remove_df.drop_duplicates(subset='sample', inplace=False)
blast_remove_list = list(unique_remove_df.iloc[:,4])

with open(BGC_path+'/'+'report_2_generate_tp.txt', 'a') as f:
    f.write('\n'+'Contamination with tp_seqs:\n'+str(list(remove_df.iloc[:,0]))+'\nsequences identified in negative samples:\n'+str(blast_remove_list)+'\n\n')

print('Done')



Building a new DB, current time: 11/09/2021 11:19:36
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003075095.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003075095.1_ASM307509v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 87 sequences in 0.327964 seconds.


Building a new DB, current time: 11/09/2021 11:19:36
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_013282925.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_013282925.1_ASM1328292v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 21 sequences in 0.161026 seconds.


Building a new DB, current time: 11/09/2021 11:19:3

Adding sequences from FASTA; added 922 sequences in 0.177863 seconds.


Building a new DB, current time: 11/09/2021 11:19:42
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003797885.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003797885.1_ASM379788v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 45 sequences in 0.138212 seconds.


Building a new DB, current time: 11/09/2021 11:19:42
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003975335.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003975335.1_ASM397533v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 42 sequences in 0.

Adding sequences from FASTA; added 22 sequences in 0.067265 seconds.


Building a new DB, current time: 11/09/2021 11:19:48
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_900537165.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_900537165.1_ASM90053716v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 34 sequences in 0.171132 seconds.


Building a new DB, current time: 11/09/2021 11:19:49
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002786285.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002786285.1_ASM278628v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 23 sequences in 0

Adding sequences from FASTA; added 4 sequences in 0.129736 seconds.


Building a new DB, current time: 11/09/2021 11:19:54
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002737765.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002737765.1_ASM273776v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 406 sequences in 0.179132 seconds.


Building a new DB, current time: 11/09/2021 11:19:55
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_004196115.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_004196115.1_ASM419611v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 147 sequences in 0.

Adding sequences from FASTA; added 149 sequences in 0.164984 seconds.


Building a new DB, current time: 11/09/2021 11:20:00
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_000497995.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_000497995.1_TB25_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 458 sequences in 0.113337 seconds.


Building a new DB, current time: 11/09/2021 11:20:00
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002876165.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002876165.1_ASM287616v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 169 sequences in 0.09989

Adding sequences from FASTA; added 2 sequences in 0.0924439 seconds.


Building a new DB, current time: 11/09/2021 11:20:05
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002237735.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002237735.1_ASM223773v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 31 sequences in 0.144369 seconds.


Building a new DB, current time: 11/09/2021 11:20:05
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002813915.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002813915.1_ASM281391v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 1 sequences in 0.10

Adding sequences from FASTA; added 33 sequences in 0.125262 seconds.


Building a new DB, current time: 11/09/2021 11:20:11
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001402435.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001402435.1_ASM140243v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 57 sequences in 0.13123 seconds.


Building a new DB, current time: 11/09/2021 11:20:12
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001402375.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001402375.1_ASM140237v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 63 sequences in 0.11

Adding sequences from FASTA; added 328 sequences in 0.13577 seconds.


Building a new DB, current time: 11/09/2021 11:20:19
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003545765.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003545765.1_ASM354576v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 71 sequences in 0.116294 seconds.


Building a new DB, current time: 11/09/2021 11:20:19
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_014969745.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_014969745.1_ASM1496974v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 4 sequences in 0.1

Adding sequences from FASTA; added 209 sequences in 0.177287 seconds.


Building a new DB, current time: 11/09/2021 11:20:24
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001469735.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001469735.1_ASM146973v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 5 sequences in 0.165362 seconds.


Building a new DB, current time: 11/09/2021 11:20:25
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_014163875.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_014163875.1_ASM1416387v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 53 sequences in 0.

Adding sequences from FASTA; added 20 sequences in 0.135915 seconds.


Building a new DB, current time: 11/09/2021 11:20:30
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003050485.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003050485.1_ASM305048v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 40 sequences in 0.143842 seconds.


Building a new DB, current time: 11/09/2021 11:20:31
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001885655.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001885655.1_ASM188565v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 413 sequences in 0.

Adding sequences from FASTA; added 474 sequences in 0.287367 seconds.


Building a new DB, current time: 11/09/2021 11:20:37
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001296025.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001296025.1_ASM129602v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 30 sequences in 0.147646 seconds.


Building a new DB, current time: 11/09/2021 11:20:37
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_004194535.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_004194535.1_ASM419453v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 4 sequences in 0.1

Adding sequences from FASTA; added 22 sequences in 0.0467958 seconds.


Building a new DB, current time: 11/09/2021 11:20:42
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_014764305.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_014764305.1_ASM1476430v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 1 sequences in 0.0818701 seconds.


Building a new DB, current time: 11/09/2021 11:20:42
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001457025.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001457025.1_ASM145702v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 18 sequences in 0

Adding sequences from FASTA; added 3 sequences in 0.352059 seconds.


Building a new DB, current time: 11/09/2021 11:20:49
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_007989895.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_007989895.1_ASM798989v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 313 sequences in 1.46879 seconds.


Building a new DB, current time: 11/09/2021 11:20:51
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_000256165.2_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_000256165.2_ASM25616v2_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 237 sequences in 2.04

Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 82 sequences in 0.132762 seconds.


Building a new DB, current time: 11/09/2021 11:21:12
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001707825.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001707825.1_ASM170782v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 163 sequences in 0.191579 seconds.


Building a new DB, current time: 11/09/2021 11:21:13
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001880035.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001880035.1_ASM188003v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding s

Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 1 sequences in 0.120066 seconds.


Building a new DB, current time: 11/09/2021 11:21:19
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_013114055.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_013114055.1_ASM1311405v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 114 sequences in 0.143121 seconds.


Building a new DB, current time: 11/09/2021 11:21:19
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002993365.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002993365.1_ASM299336v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding s

Adding sequences from FASTA; added 151 sequences in 0.120158 seconds.


Building a new DB, current time: 11/09/2021 11:21:24
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_015277005.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_015277005.1_ASM1527700v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 37 sequences in 0.0725892 seconds.


Building a new DB, current time: 11/09/2021 11:21:25
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003959325.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003959325.1_ASM395932v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 484 sequences in

Adding sequences from FASTA; added 175 sequences in 1.25391 seconds.


Building a new DB, current time: 11/09/2021 11:21:34
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_004378685.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_004378685.1_ASM437868v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 21 sequences in 2.92157 seconds.


Building a new DB, current time: 11/09/2021 11:21:37
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_011043775.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_011043775.1_ASM1104377v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 22 sequences in 1.9

Adding sequences from FASTA; added 204 sequences in 0.179357 seconds.


Building a new DB, current time: 11/09/2021 11:22:06
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_020177265.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_020177265.1_ASM2017726v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 28 sequences in 0.112273 seconds.


Building a new DB, current time: 11/09/2021 11:22:07
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002554135.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002554135.1_ASM255413v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 91 sequences in 0

Adding sequences from FASTA; added 1 sequences in 0.0662861 seconds.


Building a new DB, current time: 11/09/2021 11:22:16
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_000447395.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_000447395.1_GA8_Ver1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 173 sequences in 0.157791 seconds.


Building a new DB, current time: 11/09/2021 11:22:16
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_007990995.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_007990995.1_ASM799099v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 59 sequences in 0.104

Adding sequences from FASTA; added 26 sequences in 2.8299 seconds.


Building a new DB, current time: 11/09/2021 11:22:27
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_006381405.2_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_006381405.2_ASM638140v2_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 89 sequences in 2.55348 seconds.


Building a new DB, current time: 11/09/2021 11:22:30
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_014103735.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_014103735.1_ASM1410373v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 21 sequences in 1.858

Adding sequences from FASTA; added 149 sequences in 0.13734 seconds.


Building a new DB, current time: 11/09/2021 11:22:38
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002102475.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002102475.1_ASM210247v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 55 sequences in 0.0770619 seconds.


Building a new DB, current time: 11/09/2021 11:22:38
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001467405.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001467405.1_ASM146740v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 34 sequences in 0.

Adding sequences from FASTA; added 123 sequences in 0.148883 seconds.


Building a new DB, current time: 11/09/2021 11:22:51
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_019285615.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_019285615.1_ASM1928561v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 4 sequences in 0.141855 seconds.


Building a new DB, current time: 11/09/2021 11:22:51
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_000328125.2_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_000328125.2_ASM32812v2_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 40 sequences in 0.1

Adding sequences from FASTA; added 73 sequences in 0.116087 seconds.


Building a new DB, current time: 11/09/2021 11:23:08
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003057015.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003057015.1_ASM305701v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 121 sequences in 0.128761 seconds.


Building a new DB, current time: 11/09/2021 11:23:08
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_900185655.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_900185655.1_NDB2Meth2_Assembly_1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 39 sequen

Adding sequences from FASTA; added 190 sequences in 0.655272 seconds.


Building a new DB, current time: 11/09/2021 11:23:15
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003025475.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003025475.1_ASM302547v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 73 sequences in 2.19606 seconds.


Building a new DB, current time: 11/09/2021 11:23:17
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_011212705.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_011212705.1_ASM1121270v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 3 sequences in 2.4

Adding sequences from FASTA; added 22 sequences in 2.52047 seconds.


Building a new DB, current time: 11/09/2021 11:23:33
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_013724765.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_013724765.1_ASM1372476v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 3 sequences in 2.05491 seconds.


Building a new DB, current time: 11/09/2021 11:23:35
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002507835.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002507835.1_ASM250783v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 34 sequences in 1.904

Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 144 sequences in 0.336849 seconds.


Building a new DB, current time: 11/09/2021 11:23:56
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002100145.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002100145.1_ASM210014v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 37 sequences in 0.46442 seconds.


Building a new DB, current time: 11/09/2021 11:23:57
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_014142595.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_014142595.1_ASM1414259v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding s

Adding sequences from FASTA; added 20 sequences in 0.639127 seconds.


Building a new DB, current time: 11/09/2021 11:24:12
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_006371495.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_006371495.1_ASM637149v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 138 sequences in 3.38414 seconds.


Building a new DB, current time: 11/09/2021 11:24:16
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_006380715.2_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_006380715.2_ASM638071v2_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 84 sequences in 0.9

Adding sequences from FASTA; added 143 sequences in 0.065769 seconds.


Building a new DB, current time: 11/09/2021 11:24:40
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_014169635.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_014169635.1_ASM1416963v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 10 sequences in 0.14642 seconds.


Building a new DB, current time: 11/09/2021 11:24:40
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_004921245.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_004921245.1_ASM492124v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 23 sequences in 0.

Adding sequences from FASTA; added 2 sequences in 0.110914 seconds.


Building a new DB, current time: 11/09/2021 11:24:50
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001752425.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001752425.1_ASM175242v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 52 sequences in 0.086493 seconds.


Building a new DB, current time: 11/09/2021 11:24:50
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002140155.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002140155.1_ASM214015v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 41 sequences in 0.19

Adding sequences from FASTA; added 62 sequences in 1.45111 seconds.


Building a new DB, current time: 11/09/2021 11:25:11
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_004922175.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_004922175.1_ASM492217v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 21 sequences in 0.101555 seconds.


Building a new DB, current time: 11/09/2021 11:25:11
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003068265.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003068265.1_ASM306826v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 51 sequences in 0.12

Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 150 sequences in 0.22091 seconds.


Building a new DB, current time: 11/09/2021 11:25:30
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_017599305.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_017599305.1_ASM1759930v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 66 sequences in 1.21794 seconds.


Building a new DB, current time: 11/09/2021 11:25:31
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002177865.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002177865.1_ASM217786v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding se

Adding sequences from FASTA; added 148 sequences in 0.183792 seconds.


Building a new DB, current time: 11/09/2021 11:25:40
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_019823915.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_019823915.1_ASM1982391v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 5 sequences in 0.189201 seconds.


Building a new DB, current time: 11/09/2021 11:25:40
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001296425.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001296425.1_ASM129642v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 89 sequences in 0.

Adding sequences from FASTA; added 98 sequences in 0.0979438 seconds.


Building a new DB, current time: 11/09/2021 11:25:59
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_006377635.2_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_006377635.2_ASM637763v2_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 262 sequences in 0.157199 seconds.


Building a new DB, current time: 11/09/2021 11:25:59
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_000332195.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_000332195.1_ASM33219v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 10 sequences in 0.

Adding sequences from FASTA; added 7 sequences in 0.143444 seconds.


Building a new DB, current time: 11/09/2021 11:26:05
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_004922535.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_004922535.1_ASM492253v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 37 sequences in 0.09093 seconds.


Building a new DB, current time: 11/09/2021 11:26:05
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003731635.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003731635.1_ASM373163v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 90 sequences in 0.182

Adding sequences from FASTA; added 41 sequences in 3.11432 seconds.


Building a new DB, current time: 11/09/2021 11:26:29
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_016906005.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_016906005.1_ASM1690600v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 90 sequences in 1.67157 seconds.


Building a new DB, current time: 11/09/2021 11:26:31
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_018688335.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_018688335.1_ASM1868833v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 1 sequences in 0.71

Adding sequences from FASTA; added 15 sequences in 0.115544 seconds.


Building a new DB, current time: 11/09/2021 11:26:47
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_000948175.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_000948175.1_BFA_2_A_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 197 sequences in 0.195796 seconds.


Building a new DB, current time: 11/09/2021 11:26:48
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003958205.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003958205.1_ASM395820v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 251 sequences in 0.123

Adding sequences from FASTA; added 4 sequences in 0.152097 seconds.


Building a new DB, current time: 11/09/2021 11:26:53
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_000213255.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_000213255.1_ASM21325v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 1 sequences in 0.0825641 seconds.


Building a new DB, current time: 11/09/2021 11:26:53
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002204815.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002204815.1_ASM220481v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 376 sequences in 0.08

Adding sequences from FASTA; added 289 sequences in 0.172909 seconds.


Building a new DB, current time: 11/09/2021 11:27:04
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_015712165.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_015712165.1_ASM1571216v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 5 sequences in 0.236393 seconds.


Building a new DB, current time: 11/09/2021 11:27:04
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_016745425.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_016745425.1_ASM1674542v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 64 sequences in 0

Adding sequences from FASTA; added 65 sequences in 0.153839 seconds.


Building a new DB, current time: 11/09/2021 11:27:10
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_015277465.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_015277465.1_ASM1527746v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 16 sequences in 0.244932 seconds.


Building a new DB, current time: 11/09/2021 11:27:11
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003583675.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003583675.1_ASM358367v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 39 sequences in 0.

Adding sequences from FASTA; added 2 sequences in 0.956959 seconds.


Building a new DB, current time: 11/09/2021 11:27:49
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_900258045.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_900258045.1_ASM90025804v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 4 sequences in 0.616275 seconds.


Building a new DB, current time: 11/09/2021 11:27:50
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_015911295.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_015911295.1_ASM1591129v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 148 sequences in 1

Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 131 sequences in 0.1493 seconds.


Building a new DB, current time: 11/09/2021 11:28:03
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_013392905.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_013392905.1_ASM1339290v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 25 sequences in 0.0526619 seconds.


Building a new DB, current time: 11/09/2021 11:28:03
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_009671285.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_009671285.1_ASM967128v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding s

Adding sequences from FASTA; added 156 sequences in 0.20593 seconds.


Building a new DB, current time: 11/09/2021 11:28:12
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_014269795.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_014269795.1_ASM1426979v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 20 sequences in 0.088125 seconds.


Building a new DB, current time: 11/09/2021 11:28:12
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003105135.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003105135.1_ASM310513v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 218 sequences in 0

Adding sequences from FASTA; added 152 sequences in 0.153905 seconds.


Building a new DB, current time: 11/09/2021 11:28:18
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_900380005.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_900380005.1_JVD-0046_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 297 sequences in 0.1051 seconds.


Building a new DB, current time: 11/09/2021 11:28:18
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_018458485.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_018458485.1_ASM1845848v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 117 sequences in 0.16

Adding sequences from FASTA; added 28 sequences in 1.54279 seconds.


Building a new DB, current time: 11/09/2021 11:28:39
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001996005.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001996005.1_ASM199600v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 61 sequences in 1.29547 seconds.


Building a new DB, current time: 11/09/2021 11:28:41
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_009790365.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_009790365.1_ASM979036v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 273 sequences in 1.27

Adding sequences from FASTA; added 1 sequences in 0.103173 seconds.


Building a new DB, current time: 11/09/2021 11:28:59
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_013114385.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_013114385.1_ASM1311438v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 51 sequences in 0.149946 seconds.


Building a new DB, current time: 11/09/2021 11:28:59
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002212765.2_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002212765.2_ASM221276v2_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 462 sequences in 1.

Adding sequences from FASTA; added 4 sequences in 0.225887 seconds.


Building a new DB, current time: 11/09/2021 11:29:10
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_016919385.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_016919385.1_ASM1691938v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 1 sequences in 0.120456 seconds.


Building a new DB, current time: 11/09/2021 11:29:10
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_017696175.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_017696175.1_ASM1769617v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 3 sequences in 0.04

Adding sequences from FASTA; added 105 sequences in 0.195758 seconds.


Building a new DB, current time: 11/09/2021 11:29:16
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001644095.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001644095.1_ASM164409v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 147 sequences in 0.122949 seconds.


Building a new DB, current time: 11/09/2021 11:29:16
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_007741475.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_007741475.1_ASM774147v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 1 sequences in 0.

Adding sequences from FASTA; added 6 sequences in 2.1083 seconds.


Building a new DB, current time: 11/09/2021 11:29:38
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_019336185.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_019336185.1_ASM1933618v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 64 sequences in 2.63267 seconds.


Building a new DB, current time: 11/09/2021 11:29:41
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_006372545.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_006372545.1_ASM637254v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 203 sequences in 2.152

Adding sequences from FASTA; added 100 sequences in 0.185658 seconds.


Building a new DB, current time: 11/09/2021 11:29:58
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_006379415.2_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_006379415.2_ASM637941v2_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 48 sequences in 0.115958 seconds.


Building a new DB, current time: 11/09/2021 11:29:58
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_013618635.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_013618635.1_ASM1361863v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 48 sequences in 0

Adding sequences from FASTA; added 276 sequences in 0.0859201 seconds.


Building a new DB, current time: 11/09/2021 11:30:08
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_001721285.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_001721285.1_PanISTKB1.0_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 115 sequences in 0.188472 seconds.


Building a new DB, current time: 11/09/2021 11:30:09
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_002330425.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_002330425.1_ASM233042v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 99 sequences in 

Adding sequences from FASTA; added 52 sequences in 0.154754 seconds.


Building a new DB, current time: 11/09/2021 11:30:26
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_008502035.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_008502035.1_CC2_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 31 sequences in 0.133653 seconds.


Building a new DB, current time: 11/09/2021 11:30:26
New DB name:   /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/quality_control/neg_blast/databases/GCF_003449125.1_db
New DB title:  /media/manu/RiPP_Prioritiser/RTX_toxin_acyltransferase/base_genomes/temp_neg_genomes/GCF_003449125.1_ASM344912v1_genomic.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 228 sequences in 0.101821 s

UnboundLocalError: local variable 'child' referenced before assignment

In [None]:
# Use hmmer to find tp_gene contamination in negative samples
# This is to some extent dependent on which tp genes are chosen. If fewer neg_genomes result from the decontamination
#     process than are specified by select_neg_genomes, it might be worth trying a different set of tp_genes

with open(BGC_path+'/'+'report_2_generate_tp.txt', 'a') as f:
    f.write('\n'+'Using hmmer to check negative genomes for contamination'+'\n\n')

# Build net alignment of tp_genes
!muscle -in "{BGC_path}"/"{BGC_type}"_selected_tp_genes_altered.fasta -out "{neg_hmm_db_path}"/"{BGC_type}"_selected_tp_gene_alignment.fasta -loga "{BGC_path}"/report_2_generate_tp.txt

with open(BGC_path+'/'+'report_2_generate_tp.txt', 'a') as f:
    f.write('\n'+'Building nucleotide hmm of selected true positive genes'+'\n\n'+'Searching neg genomes with nucleotide hmm.\n\n')

# Build nucleotide based hmm of tp_genes alignment
!hmmbuild -n tp_nucl_aln --dna "{neg_hmm_db_path}"/"{BGC_type}"_tp_nucl.hmm "{neg_hmm_db_path}"/"{BGC_type}"_selected_tp_gene_alignment.fasta

# Search neg genomes with hmm
for genomes in neg_genomes_list:
    #metaBGC uses these cutoffs for all the protein hmms. This is more stringent/produces fewer results
    !nhmmer --F1 0.02 --F2 0.02 --F3 0.02 --tblout "{neg_hmm_results_path}"/"{genomes}"_hmm_result.tbl "{neg_hmm_db_path}"/"{BGC_type}"_tp_nucl.hmm "{neg_genomes_path}"/"{genomes}"*
    #This produces more results, should there be a lack of negative genomes.
#    !nhmmer --tblout "{neg_hmm_results_path}"/"{genomes}"_hmm_result.tbl "{neg_hmm_db_path}"/"{BGC_type}"_tp_nucl.hmm "{neg_genomes_path}"/"{genomes}"*

print('Done')

In [None]:
# Parse hmm output
#https://stackoverflow.com/questions/62012615/convert-a-hmmer-tblout-output-to-a-pandas-dataframe

sample_list = []
id_list=[]
len_list=[]
evalue_list=[]


#Can't parse mhmmer output with biopython at the moment, so did parsing manually.
for filename in os.listdir(neg_hmm_results_path):
    samplename = '_'.join(filename.split('_')[0:2])
    with open(neg_hmm_results_path+'/'+filename) as handle:
        content = handle.readlines()
        for line in content:
            if not line.startswith('#'):
                sample_list.append(samplename)
                id_list.append(line.split()[0])
                len_list.append(int(float(line.split()[5])-float(line.split()[12])))
                evalue_list.append(float(line.split()[12]))

                
       
# Generate hit_df based on found tp_hits in neg genomes
hmm_dict = {'sample': sample_list, 'target_name': id_list, 'length': len_list,  'evalue': evalue_list}             
hit_df = pd.DataFrame.from_dict(hmm_dict)
hit_df.to_csv(neg_hmm_path+'/all_nhmmer_fn_samples.csv', index=False)

#filter hit_df by evalue cutoff
evalue_filter = hit_df['evalue'] <= hmm_evalue_cutoff
cutoff_hit_df = hit_df[evalue_filter]

#deduplicate samples
unique_hit_df = cutoff_hit_df.drop_duplicates(subset='sample', inplace=False)
unique_hit_df.to_csv(quality_control_path+'/nhmmer_elt'+str(hmm_evalue_cutoff)+'_fn_samples.csv', index=False)

# Generate a list of contaminated negative genomes with combined results from blast and hmm hits
hmm_remove_list = list(unique_hit_df.iloc[:,0])
remove_list = blast_remove_list
remove_list.extend(x for x in hmm_remove_list if x not in remove_list)

with open(BGC_path+'/'+'report_2_generate_tp.txt', 'a') as f:
    f.write('\n'+'Contamination with tp_seqs found in samples:\n'+str(hmm_remove_list)+'\n\n'+'Found '+str(len(remove_list))+' total samples with tp contamination in neg genomes.')

# Remove these contaminated samples from the possible pool of negative sequences
cleaned = ~unique_neg_df['assembly'].isin(remove_list)
cleaned_unique_neg_df = unique_neg_df[cleaned]

# Select a preset number of negative genomes (this could lead to fewer genomes available than selected. Handle)
selected_neg_genomes = list(cleaned_unique_neg_df.iloc[:,1])[0:select_neg_genomes]

# report block
with open(BGC_path+'/'+'report_2_generate_tp.txt', 'a') as f:
    f.write('\n\n decontaminated selected_neg_genomes are:\n')
    f.write(str(selected_neg_genomes))

for genome in selected_neg_genomes:
    print('moving negative', genome, 'to', output_neg_path)
    !mv "{neg_genomes_path}"/"{genome}"* "{output_neg_path}"
    
print('Done')

In [None]:
# To confirm, run blast on all samples selected as pos genomes

# makes blast databases of all individual neg genomes (easier to keep track of accession numbers than when combining)
for genomes in selected_tp_genomes:
    !makeblastdb -in "{output_pos_path}"/"{genomes}"* -dbtype nucl -out "{pos_blast_db_path}"/"{genomes}"_db

for genomes in selected_tp_genomes:
    !blastn -db "{pos_blast_db_path}"/"{genomes}"_db -query "{BGC_path}"/"{BGC_type}"_selected_tp_genes_altered.fasta -out "{pos_blast_results_path}"/"{genomes}".blastout -outfmt "6 qseqid sseqid pident evalue length"
    
# use pandas to concatenate all blast output tables
df_list = []
for outfile in os.listdir(pos_blast_results_path):
    try:
        blast_df = pd.read_csv(pos_blast_results_path+'/'+outfile, sep='\t', names=['qseqid', 'sseqid', 'pident', 'evalue', 'length'], index_col=None)
        blast_df['sample'] = '.'.join(outfile.split('.')[0:2])
        df_list.append(blast_df)
    except EmptyDataError:
        continue
        
# Generate a list of contaminated negative genomes        
pos_blast_hits_df = pd.concat(df_list)
pos_blast_hits_df.to_csv(pos_blast_path+'/blast_results_summary.csv', index=False)

In [None]:
# To confirm, run nhmmer on all samples selected as pos genomes


# Search neg genomes with hmm
for genomes in selected_tp_genomes:
    !nhmmer --F1 0.02 --F2 0.02 --F3 0.02 --tblout "{pos_hmm_results_path}"/"{genomes}"_hmm_result.tbl "{neg_hmm_db_path}"/"{BGC_type}"_tp_nucl.hmm "{output_pos_path}"/"{genomes}"*

sample_list = []
id_list=[]
evalue_list=[]
len_list = []

#Can't parse mhmmer output with biopython at the moment, so did parsing manually.
for filename in os.listdir(pos_hmm_results_path):
    samplename = '_'.join(filename.split('_')[0:2])
    with open(pos_hmm_results_path+'/'+filename) as handle:
        content = handle.readlines()
        for line in content:
            if not line.startswith('#'):
                sample_list.append(samplename)
                id_list.append(line.split()[0])
                len_list.append(int(float(line.split()[5])-float(line.split()[12])))
                evalue_list.append(float(line.split()[12]))

                
       
# Generate hit_df based on found tp_hits in neg genomes
hmm_dict = {'sample': sample_list, 'target_name': id_list, 'length': len_list, 'evalue': evalue_list}             
hit_df = pd.DataFrame.from_dict(hmm_dict)
hit_df.to_csv(pos_hmm_path+'/nhmmer_all_pos_samples.csv', index=False)

#deduplicate samples
unique_hit_df = hit_df.drop_duplicates(subset='sample', inplace=False)
unique_hit_df.to_csv(pos_hmm_path+'/nhmmer_unique_pos_samples.csv', index=False)

print('Done')