## COMBINING RESULTS from mlplasmid AND abricate software - Resistome location

This script allows the combination of results from mlplasmid software (contig prediction as plasmid or chromosome) and abricate software (presence/absence of genes of interest) results. In this case we are using abricate results based on ResFinder database: antibiotic resistance genes.  

In [1]:
import pandas as pd
import os
import json

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', -1)

In [2]:
mlplasmid = 'mlplasmids_all_curated.csv'
#this is the same as open excel file and "save as" CSV UTF-8 comma-delimited .csv
#read_file = pd.read_excel('Resfinder_Klebsiella_cov70id90.xlsx')
#read_file.to_csv('Resfinder_Klebsiella_cov70id90.csv', index = None, header=True)
abricate = 'resfinder_results_cov80id90.csv'

In [3]:
#check separators of your csv files in TextEditor for example
dfml = pd.read_csv(mlplasmid, sep=',') 
dfab = pd.read_csv(abricate, sep=',') 

In [4]:
dfml.head()

Unnamed: 0.1,Unnamed: 0,Prob_Chromosome,Prob_Plasmid,Prediction,Isolate_contig,Contig_length
0,1,0.998197,0.001803,Chromosome,544651_contig_1,116416
1,2,0.997998,0.002002,Chromosome,544651_contig_2,461922
2,3,0.995752,0.004248,Chromosome,544651_contig_3,100222
3,4,0.995657,0.004343,Chromosome,544651_contig_4,34295
4,5,0.999379,0.000621,Chromosome,544651_contig_5,162471


In [5]:
#dfml = dfml[['Isolate_contig', 'Prediction']]
dfml = dfml[['Isolate_contig', 'Prob_Chromosome', 'Prob_Plasmid', 'Prediction']]

In [6]:
dfml.head()

Unnamed: 0,Isolate_contig,Prob_Chromosome,Prob_Plasmid,Prediction
0,544651_contig_1,0.998197,0.001803,Chromosome
1,544651_contig_2,0.997998,0.002002,Chromosome
2,544651_contig_3,0.995752,0.004248,Chromosome
3,544651_contig_4,0.995657,0.004343,Chromosome
4,544651_contig_5,0.999379,0.000621,Chromosome


In [7]:
dfab.head()

Unnamed: 0,Isolate_contig,START,END,STRAND,GENE,COVERAGE,COVERAGE_MAP,GAPS,%COVERAGE,%IDENTITY,DATABASE,ACCESSION,PRODUCT,RESISTANCE
0,544651_contig_2,37029,38204,+,oqxA_1,1-1176/1176,===============,0/0,100.0,98.89,resfinder,EU370913,oqxA,Nalidixic_acid;Ciprofloxacin
1,544651_contig_2,38228,41380,+,oqxB_1,1-3153/3153,===============,0/0,100.0,98.57,resfinder,EU370913,oqxB,Nalidixic_acid;Ciprofloxacin
2,544651_contig_20,22760,23179,+,fosA6_1,1-420/433,===============,0/0,97.0,99.76,resfinder,KU254579,fosA6,Fosfomycin
3,544651_contig_41,4350,5210,-,blaSHV-1_1,1-861/861,===============,0/0,100.0,100.0,resfinder,AF148850,blaSHV-1,Amoxicillin;Ampicillin;Cephalothin;Piperacillin;Ticarcillin
4,544651_contig_59,54,851,+,blaOXA-48_1,1-798/798,===============,0/0,100.0,100.0,resfinder,AY236073,blaOXA-48,Amoxicillin;Amoxicillin+Clavulanic_acid;Ampicillin;Ampicillin+Clavulanic_acid;Imipenem;Meropenem;Piperacillin;Piperacillin+Tazobactam


In [8]:
df = pd.merge(dfab, dfml, how='left', on=['Isolate_contig'])

In [9]:
df.to_csv('mlplasmids_resistance_intersect.csv', index=False)

In [10]:
dfab.shape

(1633, 14)

In [11]:
df.shape

(1633, 17)

In [12]:
df.head()

Unnamed: 0,Isolate_contig,START,END,STRAND,GENE,COVERAGE,COVERAGE_MAP,GAPS,%COVERAGE,%IDENTITY,DATABASE,ACCESSION,PRODUCT,RESISTANCE,Prob_Chromosome,Prob_Plasmid,Prediction
0,544651_contig_2,37029,38204,+,oqxA_1,1-1176/1176,===============,0/0,100.0,98.89,resfinder,EU370913,oqxA,Nalidixic_acid;Ciprofloxacin,0.997998,0.002002,Chromosome
1,544651_contig_2,38228,41380,+,oqxB_1,1-3153/3153,===============,0/0,100.0,98.57,resfinder,EU370913,oqxB,Nalidixic_acid;Ciprofloxacin,0.997998,0.002002,Chromosome
2,544651_contig_20,22760,23179,+,fosA6_1,1-420/433,===============,0/0,97.0,99.76,resfinder,KU254579,fosA6,Fosfomycin,0.999436,0.000564,Chromosome
3,544651_contig_41,4350,5210,-,blaSHV-1_1,1-861/861,===============,0/0,100.0,100.0,resfinder,AF148850,blaSHV-1,Amoxicillin;Ampicillin;Cephalothin;Piperacillin;Ticarcillin,0.898761,0.101239,Chromosome
4,544651_contig_59,54,851,+,blaOXA-48_1,1-798/798,===============,0/0,100.0,100.0,resfinder,AY236073,blaOXA-48,Amoxicillin;Amoxicillin+Clavulanic_acid;Ampicillin;Ampicillin+Clavulanic_acid;Imipenem;Meropenem;Piperacillin;Piperacillin+Tazobactam,0.099711,0.900289,Plasmid


In [13]:
df['sample'] = df.Isolate_contig.str.split('_').str[0]

In [14]:
#to add contig column
df['contig'] = df.Isolate_contig.str.split('_').str[1] + df.Isolate_contig.str.split('_').str[2]

In [15]:
#If the databases used are correct, there should not be empty cells
#df.Prediction.fillna('Undetermined', inplace=True)

In [16]:
df.head()

Unnamed: 0,Isolate_contig,START,END,STRAND,GENE,COVERAGE,COVERAGE_MAP,GAPS,%COVERAGE,%IDENTITY,DATABASE,ACCESSION,PRODUCT,RESISTANCE,Prob_Chromosome,Prob_Plasmid,Prediction,sample,contig
0,544651_contig_2,37029,38204,+,oqxA_1,1-1176/1176,===============,0/0,100.0,98.89,resfinder,EU370913,oqxA,Nalidixic_acid;Ciprofloxacin,0.997998,0.002002,Chromosome,544651,contig2
1,544651_contig_2,38228,41380,+,oqxB_1,1-3153/3153,===============,0/0,100.0,98.57,resfinder,EU370913,oqxB,Nalidixic_acid;Ciprofloxacin,0.997998,0.002002,Chromosome,544651,contig2
2,544651_contig_20,22760,23179,+,fosA6_1,1-420/433,===============,0/0,97.0,99.76,resfinder,KU254579,fosA6,Fosfomycin,0.999436,0.000564,Chromosome,544651,contig20
3,544651_contig_41,4350,5210,-,blaSHV-1_1,1-861/861,===============,0/0,100.0,100.0,resfinder,AF148850,blaSHV-1,Amoxicillin;Ampicillin;Cephalothin;Piperacillin;Ticarcillin,0.898761,0.101239,Chromosome,544651,contig41
4,544651_contig_59,54,851,+,blaOXA-48_1,1-798/798,===============,0/0,100.0,100.0,resfinder,AY236073,blaOXA-48,Amoxicillin;Amoxicillin+Clavulanic_acid;Ampicillin;Ampicillin+Clavulanic_acid;Imipenem;Meropenem;Piperacillin;Piperacillin+Tazobactam,0.099711,0.900289,Plasmid,544651,contig59


In [17]:
df_summary3 = df.groupby(['sample', 'Prob_Plasmid', 'Prediction', 'contig'])['GENE'].apply(list).reset_index(name='genes')

In [18]:
df_summary3.to_csv('summaryByContigProb_ml_resistance.csv', index=False)

In [19]:
df_summary3

Unnamed: 0,sample,Prob_Plasmid,Prediction,contig,genes
0,544651,0.000564,Chromosome,contig20,[fosA6_1]
1,544651,0.002002,Chromosome,contig2,"[oqxA_1, oqxB_1]"
2,544651,0.101239,Chromosome,contig41,[blaSHV-1_1]
3,544651,0.900289,Plasmid,contig59,[blaOXA-48_1]
4,544663,0.001393,Chromosome,contig10,[blaSHV-106_1]
5,544663,0.00926,Chromosome,contig61,[fosA_6]
6,544663,0.61575,Plasmid,contig64,[blaCTX-M-15_1]
7,544663,0.825777,Plasmid,contig60,"[blaOXA-1_1, aac(6')-Ib-cr_1]"
8,544663,0.844377,Plasmid,contig7,[dfrA14_5]
9,544663,0.908219,Plasmid,contig83,[blaOXA-48_1]
