Skip to content

Commit

Permalink
updated VCFCompare.py
Browse files Browse the repository at this point in the history
  • Loading branch information
LindoNkambule committed Nov 27, 2019
1 parent 89bf9e3 commit 9308c0d
Showing 1 changed file with 67 additions and 55 deletions.
122 changes: 67 additions & 55 deletions src/python/VCFCompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,60 +4,69 @@
import csv
import fun

def SNVs(truth, query):
snv = []
#Truth and Query
tsnv = fun.snvList(truth)
truthSNVs = len(tsnv)
qsnv = fun.snvList(query)
querySNVs = len(qsnv)

#Calls, Recall, and Precision
stp, sfp, sfn = fun.variantCalls(tsnv, qsnv)
lenstp = len(stp)
lensfp = len(sfp)
lensfn = len(sfn)

try:
snvRecall = lenstp/(lenstp+lensfn)
snvPrecision = lenstp/(lenstp+lensfp)
snv.extend(('SNV', truthSNVs, lenstp, lensfp, lensfn, querySNVs, snvRecall, snvPrecision))
except Exception:
pass
return snv

def INDELs(truth, query):
indel = []
#Truth and Query
tindel = fun.indelList(truth)
truthINDELs = len(tindel)
qindel = fun.indelList(query)
queryINDELs = len(qindel)

#Calls, Recall, and Precision
itp, ifp, ifn = fun.variantCalls(tindel, qindel)
lenitp = len(itp)
lenifp = len(ifp)
lenifn = len(ifn)

try:
indelRecall = lenitp/(lenitp+lenifn)
indelPrecision = lenitp/(lenitp+lenifp)
indel.extend(('INDEL', truthINDELs, lenitp, lenifn, lenifn, queryINDELs, indelRecall, indelPrecision))
except Exception:
pass
return indel

class variantSeparation:
def __init__(self, truth, query):
self.truth = truth
self.query = query


def SNVs(self):
snv = []
# Truth and Query
tsnv = fun.snvList(self.truth)
truthSNVs = len(tsnv)
qsnv = fun.snvList(self.query)
querySNVs = len(qsnv)

# Calls, Recall, and Precision
stp, sfp, sfn = fun.variantCalls(tsnv, qsnv)
lenstp = len(stp)
lensfp = len(sfp)
lensfn = len(sfn)

try:
snvRecall = lenstp/(lenstp+lensfn)
snvPrecision = lenstp/(lenstp+lensfp)
snv.extend(('SNV', truthSNVs, lenstp, lensfp, lensfn, querySNVs, snvRecall, snvPrecision))
except Exception:
pass
return snv


def INDELs(self):
indel = []
# Truth and Query
tindel = fun.indelList(self.truth)
truthINDELs = len(tindel)
qindel = fun.indelList(self.query)
queryINDELs = len(qindel)

# Calls, Recall, and Precision
itp, ifp, ifn = fun.variantCalls(tindel, qindel)
lenitp = len(itp)
lenifp = len(ifp)
lenifn = len(ifn)

try:
indelRecall = lenitp/(lenitp+lenifn)
indelPrecision = lenitp/(lenitp+lenifp)
indel.extend(('INDEL', truthINDELs, lenitp, lenifn, lenifn, queryINDELs, indelRecall, indelPrecision))
except Exception:
pass
return indel


def main():
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(description='VCFCompare V1.0')
parser.add_argument("truth", help="Truth VCF") #required positional arg
parser.add_argument("query", help="Query VCF") #required positonal arg
parser.add_argument("-o", "--out", help="Output CSV file prefix") #optional arg
parser.add_argument("-t", "--type", help="The type of variant concordance: SNV or INDEL") #optional arg
parser.add_argument("truth", help="Truth VCF") # required positional arg
parser.add_argument("query", help="Query VCF") # required positonal arg
parser.add_argument("-o", "--out", help="Output CSV file prefix") # optional arg
parser.add_argument("-t", "--type", help="The type of variant concordance: SNV or INDEL") # optional arg
args = parser.parse_args()

#Error handling
# Error handling
if not args.out:
raise Exception("Please specify an output prefix using -o or --output")

Expand All @@ -71,25 +80,27 @@ def main():
truth_list = fun.vcfDFtoList(truth)
query_list = fun.vcfDFtoList(query)

#Totals
# Totals
Truth_Total = len(truth_list)
print ("Total Truth VCF records: {}".format(Truth_Total))
Query_Total = len(query_list)
print ("Total Query VCF records: {}".format(Query_Total))

#Output file
# Output file
header = ['Type', 'TRUTH.TOTAL', 'TP', 'FP', 'FN', 'QUERY.TOTAL', 'Recall', 'Precision']

variants = variantSeparation(truth_list, query_list)

if args.type == "SNV":
snv = SNVs(truth_list, query_list)
snv = variants.SNVs()
csvfile = args.out + ".SNV.csv"
elif args.type == "INDEL":
indel = INDELs(truth_list, query_list)
indel = variants.INDELs()
csvfile = args.out + ".INDEL.csv"
else:
#NB!!! PARALLELIZE THE FOLOWING TWO TO SAVE SAVE
snv = SNVs(truth_list, query_list)
indel = INDELs(truth_list, query_list)
# NB!!! PARALLELIZE THE FOLOWING TWO TO SAVE SAVE
snv = variants.SNVs()
indel = variants.INDELs()
csvfile = args.out + ".csv"

with open(csvfile, "w", newline='') as f:
Expand All @@ -104,5 +115,6 @@ def main():
writer.writerow(indel)
f.close()


if __name__ == '__main__':
main()

0 comments on commit 9308c0d

Please sign in to comment.