From 2e9baa9290402cb0e6d76d944ecc0871c2e8be54 Mon Sep 17 00:00:00 2001 From: LindoNkambule Date: Sat, 30 Nov 2019 20:45:11 +0200 Subject: [PATCH] grouped functions into methods under classes --- src/python/fun.py | 116 +++++++++++++++++++++++++--------------------- 1 file changed, 63 insertions(+), 53 deletions(-) diff --git a/src/python/fun.py b/src/python/fun.py index d156487..34185fa 100644 --- a/src/python/fun.py +++ b/src/python/fun.py @@ -1,55 +1,65 @@ #!/usr/bin/env python -def vcfExtract(vcf): - import allel - vcfInfo = allel.vcf_to_dataframe(vcf, ['variants/CHROM', 'variants/POS', 'variants/REF', 'variants/ALT'], alt_number=1) - return vcfInfo - #extract information from vcf to a df - -def vcfDFtoList(vcfdf): - vcfList = vcfdf.values.tolist() - return vcfList - #convert df to a list - -#for SNVs only -def snvList(variantsList): - snvList = [] - for variant in variantsList: - ref = len(str(variant[2])) - alt = len(str(variant[3])) - if(ref == 1 and alt == 1): - snvList.append(variant) - return snvList - -#for INDELs only -def indelList(variantsList): - indelList = [] - for variant in variantsList: - ref = len(str(variant[2])) - alt = len(str(variant[3])) - if (ref > 1 or alt > 1): - indelList.append(variant) - return indelList - -#for both SNVs and INDELs -def snvINDELlists(variantsList): - snvList = [] - indelList = [] - for variant in variantsList: - ref = len(str(variant[2])) - alt = len(str(variant[3])) - if (ref > 1 or alt > 1): - indelList.append(variant) - else: - snvList.append(variant) - return snvList, indelList - #separate SNVs and INDELs into separate lists - #this function takes (1) two empty SNV and INDEL lists and (2) a list with variants, and separates the variants according to size (SNVs and INDELs) - -def variantCalls(truth, query): - Truth_set = set(map(tuple, truth)) #convert nested lists to sets for speed - Query_Set = set(map(tuple, query)) - TPs = Truth_set.intersection(Query_Set) - FPs = Query_Set.difference(Truth_set) - FNs = Truth_set.difference(Query_Set) - return TPs, FPs, FNs + +class infoExtract: + def __init__(self, vcf): + self.vcf = vcf + + + def alleles(self): + import allel + vcfInfo = allel.vcf_to_dataframe(self.vcf, ['variants/CHROM', 'variants/POS', 'variants/REF', 'variants/ALT'], alt_number=1) + vcfList = vcfInfo.values.tolist() + return vcfList + + +class createLists(): + def __init__(self, variantsList): + self.variantsList = variantsList + + + def snvList(self): + snvList = [] + for variant in self.variantsList: + ref = len(str(variant[2])) + alt = len(str(variant[3])) + if(ref == 1 and alt == 1): + snvList.append(variant) + return snvList + + + def indelList(self): + indelList = [] + for variant in self.variantsList: + ref = len(str(variant[2])) + alt = len(str(variant[3])) + if (ref > 1 or alt > 1): + indelList.append(variant) + return indelList + + + def snvINDELlists(self): + snvList = [] + indelList = [] + for variant in self.variantsList: + ref = len(str(variant[2])) + alt = len(str(variant[3])) + if (ref > 1 or alt > 1): + indelList.append(variant) + else: + snvList.append(variant) + return snvList, indelList + +class concordance(): + def __init__(self, truth, query): + self.truth = truth + self.query = query + + + def variantCalls(self): + Truth_set = set(map(tuple, self.truth)) + Query_Set = set(map(tuple, self.query)) + TPs = Truth_set.intersection(Query_Set) + FPs = Query_Set.difference(Truth_set) + FNs = Truth_set.difference(Query_Set) + return TPs, FPs, FNs