# Concatenating all tools and strains at threshold 0, 10, 100, 1000, and 10K

In [16]:
import os
import allel
import pandas as pd


strain=['A_J','AKR_J','BALB_CJ','CBA_J','C3H_HeJ','DBA_2J','LP_J']

tools=['BioGraph*',
'breakdancer',
'clever',
'delly',
'gasv',
'gridss',
'indelminer',
'lumpexpress',
'mistrvar',
'pindel',
'platypus',
'popdel',
'rdxplorer',
'smoove',
'sniffles']


## 0 bp Threshold

In [17]:
#0 threshold deletions

th=0
df_current = pd.DataFrame(columns=['strain','length','flag','position','threshold'])
df_0 = pd.DataFrame(columns=['tool','strain','length','flag','position','threshold'])
for s in strain:
    for t in tools:
        file='../raw_data/mouse/custom_vcf_fulldata/'+str(th)+'t/deletions/nf_'+str(th)+'t.'+t+'.'+s+'.chr19.100p_sorted.modified.vcf'
        
        callset = allel.read_vcf(file,fields='*')
        if callset!=None:
            df_current = pd.DataFrame({'tool': t, 'strain': s, 'length': callset['variants/SVLEN'],'flag': callset['variants/FLAG'],'position': callset['variants/POS'],'threshold': th})
            df_0 = pd.concat([df_current, df_0],ignore_index=True)   
        
        else:
            print("Empty File: " + file)
            
df_0.head()

Empty File: ../raw_data/mouse/custom_vcf_fulldata/0t/deletions/nf_0t.pindel.BALB_CJ.chr19.100p_sorted.modified.vcf
Empty File: ../raw_data/mouse/custom_vcf_fulldata/0t/deletions/nf_0t.platypus.BALB_CJ.chr19.100p_sorted.modified.vcf


Unnamed: 0,tool,strain,length,flag,position,threshold
0,sniffles,LP_J,1725,FP,6055203,0
1,sniffles,LP_J,78,FP,17072687,0
2,sniffles,LP_J,34582728,FP,17098523,0
3,sniffles,LP_J,48,FP,17201451,0
4,sniffles,LP_J,114,FP,17231867,0


In [18]:
#0 threshold non-deletions
import os

th=0
df_current = pd.DataFrame(columns=['strain','length','flag','position','threshold'])
df_0_nondel = pd.DataFrame(columns=['tool','strain','length','flag','position','threshold'])
for s in strain:
    for t in tools:
        
        file='../raw_data/mouse/custom_vcf_fulldata/'+str(th)+'t/non-deletions/nf_'+str(th)+'t.'+t+'.'+s+'.chr19.100p_sorted.modified.nondel.vcf'
        
        callset = allel.read_vcf(file,fields='*')
        if callset!=None:
            print ("----")
            df_current = pd.DataFrame({'tool': t, 'strain': s, 'length': callset['variants/SVLEN'],'flag': callset['variants/FLAG'],'position': callset['variants/POS'],'threshold': th})
            df_0_nondel = pd.concat([df_current, df_0_nondel],ignore_index=True)   
            
        #else:
            #print("Empty File: " + file)
        
df_0_nondel.head()

Unnamed: 0,tool,strain,length,flag,position,threshold


## 10 bp Threshold

In [19]:
#10 threshold deletions
#missing files: pindel.BALB_CJ, and platypus.BALB_CJ
#tools were not able to run on mouse data; ignore?

th=10     
df_current = pd.DataFrame(columns=['strain','length','flag','position','threshold'])
df_10 = pd.DataFrame(columns=['tool','strain','length','flag','position','threshold'])

for s in strain:
    for t in tools:
        file='../raw_data/mouse/custom_vcf_fulldata/'+str(th)+'t/deletions/nf_'+str(th)+'t.'+t+'.'+s+'.chr19.100p_sorted.modified.vcf'
        
        if os.path.exists(file):
            callset = allel.read_vcf(file,fields='*')
            if callset!=None:
                df_current = pd.DataFrame({'tool': t, 'strain': s, 'length': callset['variants/SVLEN'],'flag': callset['variants/FLAG'],'position': callset['variants/POS'],'threshold': th})
                df_10 = pd.concat([df_current, df_10],ignore_index=True)

            else:
                print("Empty File: " + file)
        else:
            print("Nonexistant File: " + file)
            
df_10.head()

Nonexistant File: ../raw_data/mouse/custom_vcf_fulldata/10t/deletions/nf_10t.pindel.BALB_CJ.chr19.100p_sorted.modified.vcf
Nonexistant File: ../raw_data/mouse/custom_vcf_fulldata/10t/deletions/nf_10t.platypus.BALB_CJ.chr19.100p_sorted.modified.vcf


Unnamed: 0,tool,strain,length,flag,position,threshold
0,sniffles,LP_J,1725,FP,6055203,10
1,sniffles,LP_J,34582728,FP,17098523,10
2,sniffles,LP_J,48,FP,17201451,10
3,sniffles,LP_J,114,FP,17231867,10
4,sniffles,LP_J,35,FP,17291187,10


In [20]:
#10 threshold non-deletions
th=10     
df_current = pd.DataFrame(columns=['strain','length','flag','position','threshold'])
df_10_nondel = pd.DataFrame(columns=['tool','strain','length','flag','position','threshold'])

for s in strain:
    for t in tools:
        file='../raw_data/mouse/custom_vcf_fulldata/'+str(th)+'t/non-deletions/nf_'+str(th)+'t.'+t+'.'+s+'.chr19.100p_sorted.modified.nondel.vcf'
        
        callset = allel.read_vcf(file,fields='*')
        if callset!=None:
            df_current = pd.DataFrame({'tool': t, 'strain': s, 'length': callset['variants/SVLEN'],'flag': callset['variants/FLAG'],'position': callset['variants/POS'],'threshold': th})
            df_10_nondel = pd.concat([df_current, df_10_nondel],ignore_index=True)
            
        else:
            print("Empty File: " + file)
            
df_10_nondel.head()

Empty File: ../raw_data/mouse/custom_vcf_fulldata/10t/non-deletions/nf_10t.gasv.A_J.chr19.100p_sorted.modified.nondel.vcf
Empty File: ../raw_data/mouse/custom_vcf_fulldata/10t/non-deletions/nf_10t.indelminer.A_J.chr19.100p_sorted.modified.nondel.vcf
Empty File: ../raw_data/mouse/custom_vcf_fulldata/10t/non-deletions/nf_10t.mistrvar.A_J.chr19.100p_sorted.modified.nondel.vcf
Empty File: ../raw_data/mouse/custom_vcf_fulldata/10t/non-deletions/nf_10t.popdel.A_J.chr19.100p_sorted.modified.nondel.vcf
Empty File: ../raw_data/mouse/custom_vcf_fulldata/10t/non-deletions/nf_10t.gasv.AKR_J.chr19.100p_sorted.modified.nondel.vcf
Empty File: ../raw_data/mouse/custom_vcf_fulldata/10t/non-deletions/nf_10t.lumpexpress.AKR_J.chr19.100p_sorted.modified.nondel.vcf
Empty File: ../raw_data/mouse/custom_vcf_fulldata/10t/non-deletions/nf_10t.mistrvar.AKR_J.chr19.100p_sorted.modified.nondel.vcf
Empty File: ../raw_data/mouse/custom_vcf_fulldata/10t/non-deletions/nf_10t.rdxplorer.AKR_J.chr19.100p_sorted.modified

Unnamed: 0,tool,strain,length,flag,position,threshold
0,sniffles,LP_J,28641,TN,22431541,10
1,sniffles,LP_J,19855,TN,26270409,10
2,sniffles,LP_J,40409,TN,28848778,10
3,sniffles,LP_J,167267,TN,32209571,10
4,sniffles,LP_J,40453,TN,50557877,10


## 100 bp Threshold

In [21]:
#100 threshold deletions
#missing files: pindel.BALB_CJ, and platypus.BALB_CJ

th=100
df_current = pd.DataFrame(columns=['strain','length','flag','position','threshold'])
df_100 = pd.DataFrame(columns=['tool','strain','length','flag','position','threshold'])

for s in strain:
    for t in tools:
        file='../raw_data/mouse/custom_vcf_fulldata/'+str(th)+'t/deletions/nf_'+str(th)+'t.'+t+'.'+s+'.chr19.100p_sorted.modified.vcf'
        
        if os.path.exists(file):
            callset = allel.read_vcf(file,fields='*')
            if callset!=None:
                df_current = pd.DataFrame({'tool': t, 'strain': s, 'length': callset['variants/SVLEN'],'flag': callset['variants/FLAG'],'position': callset['variants/POS'],'threshold': th})
                df_100 = pd.concat([df_current, df_100],ignore_index=True)
        
            else:
                print("Empty File: " + file)
                
        else:
            print("Nonexistant File: " + file)
            
df_100.head()

Nonexistant File: ../raw_data/mouse/custom_vcf_fulldata/100t/deletions/nf_100t.pindel.BALB_CJ.chr19.100p_sorted.modified.vcf
Nonexistant File: ../raw_data/mouse/custom_vcf_fulldata/100t/deletions/nf_100t.platypus.BALB_CJ.chr19.100p_sorted.modified.vcf


Unnamed: 0,tool,strain,length,flag,position,threshold
0,sniffles,LP_J,1725,FP,6055203,100
1,sniffles,LP_J,34582728,FP,17098523,100
2,sniffles,LP_J,48,FP,17201451,100
3,sniffles,LP_J,35,FP,17291187,100
4,sniffles,LP_J,34,FP,17805736,100


In [22]:
#100 threshold non-deletions

th=100     
df_current = pd.DataFrame(columns=['strain','length','flag','position','threshold'])
df_100_nondel = pd.DataFrame(columns=['tool','strain','length','flag','position','threshold'])

for s in strain:
    for t in tools:
        file='../raw_data/mouse/custom_vcf_fulldata/'+str(th)+'t/non-deletions/nf_'+str(th)+'t.'+t+'.'+s+'.chr19.100p_sorted.modified.nondel.vcf'
        callset = allel.read_vcf(file,fields='*')
        if callset!=None:
            df_current = pd.DataFrame({'tool': t, 'strain': s, 'length': callset['variants/SVLEN'],'flag': callset['variants/FLAG'],'position': callset['variants/POS'],'threshold': th})
            df_100_nondel = pd.concat([df_current, df_100_nondel],ignore_index=True)
            
        else:
            print("Empty File: " + file)
            
df_100_nondel.head()

Empty File: ../raw_data/mouse/custom_vcf_fulldata/100t/non-deletions/nf_100t.lumpexpress.AKR_J.chr19.100p_sorted.modified.nondel.vcf
Empty File: ../raw_data/mouse/custom_vcf_fulldata/100t/non-deletions/nf_100t.indelminer.LP_J.chr19.100p_sorted.modified.nondel.vcf


Unnamed: 0,tool,strain,length,flag,position,threshold
0,sniffles,LP_J,24683,TN,21449705,100
1,sniffles,LP_J,21785,TN,22409688,100
2,sniffles,LP_J,28641,TN,22431541,100
3,sniffles,LP_J,261746,TN,22797608,100
4,sniffles,LP_J,149268,TN,23059703,100


## 1000 bp Threshold

In [23]:
#1000 threshold deletions
#missing files: pindel.BALB_CJ, and platypus.BALB_CJ

th=1000    
df_current = pd.DataFrame(columns=['strain','length','flag','position','threshold'])
df_1000 = pd.DataFrame(columns=['tool','strain','length','flag','position','threshold'])

for s in strain:
    for t in tools:
        file='../raw_data/mouse/custom_vcf_fulldata/'+str(th)+'t/deletions/nf_'+str(th)+'t.'+t+'.'+s+'.chr19.100p_sorted.modified.vcf'
        
        if os.path.exists(file):
            callset = allel.read_vcf(file,fields='*')
            if callset!=None:
                df_current = pd.DataFrame({'tool': t, 'strain': s, 'length': callset['variants/SVLEN'],'flag': callset['variants/FLAG'],'position': callset['variants/POS'],'threshold': th})
                df_1000 = pd.concat([df_current, df_1000],ignore_index=True)
            
        else:
            print("Empty File: " + file)
            
    else:
        print ("Nonexistant File: " + file)
           
df_1000.head()


Nonexistant File: ../raw_data/mouse/custom_vcf_fulldata/1000t/deletions/nf_1000t.sniffles.A_J.chr19.100p_sorted.modified.vcf
Nonexistant File: ../raw_data/mouse/custom_vcf_fulldata/1000t/deletions/nf_1000t.sniffles.AKR_J.chr19.100p_sorted.modified.vcf
Empty File: ../raw_data/mouse/custom_vcf_fulldata/1000t/deletions/nf_1000t.pindel.BALB_CJ.chr19.100p_sorted.modified.vcf
Empty File: ../raw_data/mouse/custom_vcf_fulldata/1000t/deletions/nf_1000t.platypus.BALB_CJ.chr19.100p_sorted.modified.vcf
Nonexistant File: ../raw_data/mouse/custom_vcf_fulldata/1000t/deletions/nf_1000t.sniffles.BALB_CJ.chr19.100p_sorted.modified.vcf
Nonexistant File: ../raw_data/mouse/custom_vcf_fulldata/1000t/deletions/nf_1000t.sniffles.CBA_J.chr19.100p_sorted.modified.vcf
Nonexistant File: ../raw_data/mouse/custom_vcf_fulldata/1000t/deletions/nf_1000t.sniffles.C3H_HeJ.chr19.100p_sorted.modified.vcf
Nonexistant File: ../raw_data/mouse/custom_vcf_fulldata/1000t/deletions/nf_1000t.sniffles.DBA_2J.chr19.100p_sorted.modi

Unnamed: 0,tool,strain,length,flag,position,threshold
0,sniffles,LP_J,34582728,FP,17098523,1000
1,sniffles,LP_J,48,FP,17201451,1000
2,sniffles,LP_J,35,FP,17291187,1000
3,sniffles,LP_J,34,FP,17805736,1000
4,sniffles,LP_J,37,FP,18086146,1000


In [24]:
#1000 threshold non-deletions

th=1000    
df_current = pd.DataFrame(columns=['strain','length','flag','position','threshold'])
df_1000_nondel = pd.DataFrame(columns=['tool','strain','length','flag','position','threshold'])

for s in strain:
    for t in tools:
        file='../raw_data/mouse/custom_vcf_fulldata/'+str(th)+'t/non-deletions/nf_'+str(th)+'t.'+t+'.'+s+'.chr19.100p_sorted.modified.nondel.vcf'
        
        
        callset = allel.read_vcf(file,fields='*')
        if callset!=None:
            df_current = pd.DataFrame({'tool': t, 'strain': s, 'length': callset['variants/SVLEN'],'flag': callset['variants/FLAG'],'position': callset['variants/POS'],'threshold': th})
            df_1000_nondel = pd.concat([df_current, df_1000_nondel],ignore_index=True)
            
        else:
            print("Empty File: " + file)
        
df_1000_nondel.head()



Empty File: ../raw_data/mouse/custom_vcf_fulldata/1000t/non-deletions/nf_1000t.lumpexpress.AKR_J.chr19.100p_sorted.modified.nondel.vcf


Unnamed: 0,tool,strain,length,flag,position,threshold
0,sniffles,LP_J,2164,TN,18924303,1000
1,sniffles,LP_J,868204,TN,18926808,1000
2,sniffles,LP_J,24683,TN,21449705,1000
3,sniffles,LP_J,21785,TN,22409688,1000
4,sniffles,LP_J,28641,TN,22431541,1000


## 10000 bp Threshold

In [25]:
#10000 threshold deletions
#missing files: pindel.BALB_CJ, and platypus.BALB_CJ

th=10000    
df_current = pd.DataFrame(columns=['strain','length','flag','position','threshold'])
df_10000 = pd.DataFrame(columns=['tool','strain','length','flag','position','threshold'])

for s in strain:
    for t in tools:
        file='../raw_data/mouse/custom_vcf_fulldata/'+str(th)+'t/deletions/nf_'+str(th)+'t.'+t+'.'+s+'.chr19.100p_sorted.modified.vcf'
        
        if os.path.exists(file):
            callset = allel.read_vcf(file,fields='*')
            if callset!=None:
                df_current = pd.DataFrame({'tool': t, 'strain': s, 'length': callset['variants/SVLEN'],'flag': callset['variants/FLAG'],'position': callset['variants/POS'],'threshold': th})
                df_10000 = pd.concat([df_current, df_10000],ignore_index=True)
            
            else:
                print("Empty File: " + file)
                
        else:
            print("Nonexistant File: " + file)
        
df_10000.head()


Nonexistant File: ../raw_data/mouse/custom_vcf_fulldata/10000t/deletions/nf_10000t.pindel.BALB_CJ.chr19.100p_sorted.modified.vcf
Nonexistant File: ../raw_data/mouse/custom_vcf_fulldata/10000t/deletions/nf_10000t.platypus.BALB_CJ.chr19.100p_sorted.modified.vcf


Unnamed: 0,tool,strain,length,flag,position,threshold
0,sniffles,LP_J,34582728,FP,17098523,10000
1,sniffles,LP_J,48,FP,17201451,10000
2,sniffles,LP_J,34,FP,17805736,10000
3,sniffles,LP_J,37,FP,18086146,10000
4,sniffles,LP_J,47,FP,18253183,10000


In [26]:
#10000 threshold non-deletions

th=10000    
df_current = pd.DataFrame(columns=['strain','length','flag','position','threshold'])
df_10000_nondel = pd.DataFrame(columns=['tool','strain','length','flag','position','threshold'])

for s in strain:
    for t in tools:
        file='../raw_data/mouse/custom_vcf_fulldata/'+str(th)+'t/non-deletions/nf_'+str(th)+'t.'+t+'.'+s+'.chr19.100p_sorted.modified.nondel.vcf'
        
        
        callset = allel.read_vcf(file,fields='*')
        if callset!=None:
            df_current = pd.DataFrame({'tool': t, 'strain': s, 'length': callset['variants/SVLEN'],'flag': callset['variants/FLAG'],'position': callset['variants/POS'],'threshold': th})
            df_10000_nondel = pd.concat([df_current, df_10000_nondel],ignore_index=True)
        else:
            print("Empty File: " + file)
           
        
df_10000_nondel.head()


Empty File: ../raw_data/mouse/custom_vcf_fulldata/10000t/non-deletions/nf_10000t.lumpexpress.AKR_J.chr19.100p_sorted.modified.nondel.vcf


Unnamed: 0,tool,strain,length,flag,position,threshold
0,sniffles,LP_J,2164,TN,18924303,10000
1,sniffles,LP_J,868204,TN,18926808,10000
2,sniffles,LP_J,37527,TN,20102302,10000
3,sniffles,LP_J,24683,TN,21449705,10000
4,sniffles,LP_J,69668,TN,22336470,10000


## Concatenate

In [29]:
#List of your dataframes
#removed df_0 and df_0_nondel for graphing purposes
pdList = [df_10,df_100,df_1000,df_10000,df_10_nondel,df_100_nondel,df_1000_nondel,df_10000_nondel] 
df_cat = pd.concat(pdList)

In [30]:
#output counts for TP, FP, TN rows are not the same as Varuni's; due to missing files?
group_data_TP=df_cat[df_cat['flag'] == 'TP'].groupby(['tool','threshold','strain'],as_index=False)['flag'].count()
group_data_TP=group_data_TP.rename(columns={"flag": "nTP"})
group_data_TP.shape

(399, 4)

In [31]:
group_data_FP=df_cat[df_cat['flag'] == 'FP'].groupby(['tool','threshold','strain'],as_index=False)['flag'].count()
group_data_FP=group_data_FP.rename(columns={"flag": "nFP"})
group_data_FP.shape

(412, 4)

In [32]:
group_data_TN=df_cat[df_cat['flag'] == 'TN'].groupby(['tool','threshold','strain'],as_index=False)['flag'].count()
group_data_TN=group_data_TN.rename(columns={"flag": "nTN"})
group_data_TN.shape

(394, 4)

In [33]:
#fill missing combinations with 0s
#removed 0 threshold group since it = 0 for all tools
for s in strain:
    for t in tools:
        for th in [10,100,1000,10000]:
            if not (((group_data_TP['tool'] == t) & (group_data_TP['strain'] == s) & (group_data_TP['threshold'] == th)).any()):
                group_data_TP = group_data_TP.append({'tool' : t , 'strain' : s,'threshold' : th,'nTP' : 0} , ignore_index=True)
group_data_TP.shape

(420, 4)

In [34]:
for s in strain:
    for t in tools:
        for th in [10,100,1000,10000]:
            if not (((group_data_FP['tool'] == t) & (group_data_FP['strain'] == s) & (group_data_FP['threshold'] == th)).any()):
                #group_data_FP = group_data_FP.append({'tool' : t , 'strain' : s,'threshold' : th,'nFP' : 0} , ignore_index=True)
                print (s,t,th)
group_data_FP.shape

BALB_CJ pindel 10
BALB_CJ pindel 100
BALB_CJ pindel 1000
BALB_CJ pindel 10000
BALB_CJ platypus 10
BALB_CJ platypus 100
BALB_CJ platypus 1000
BALB_CJ platypus 10000


(412, 4)

In [35]:
for s in strain:
    for t in tools:
        for th in [10,100,1000,10000]:
            if not (((group_data_TN['tool'] == t) & (group_data_TN['strain'] == s) & (group_data_TN['threshold'] == th)).any()):
                group_data_TN = group_data_TN.append({'tool' : t , 'strain' : s,'threshold' : th,'nTN' : 0} , ignore_index=True)
                #print (s,t,th)
group_data_TN.shape

(420, 4)

In [36]:
#merge TP and FP
df_merge=pd.merge(group_data_TP, group_data_FP,on=['tool','threshold','strain'])
df_merge=pd.merge(df_merge, group_data_TN,on=['tool','threshold','strain'])
df_merge.shape

(412, 6)

In [37]:
df_merge.head(10)

Unnamed: 0,tool,threshold,strain,nTP,nFP,nTN
0,BioGraph*,10,AKR_J,36,389,11
1,BioGraph*,10,A_J,47,421,20
2,BioGraph*,10,BALB_CJ,45,417,13
3,BioGraph*,10,C3H_HeJ,41,416,17
4,BioGraph*,10,CBA_J,44,445,18
5,BioGraph*,10,DBA_2J,45,454,17
6,BioGraph*,10,LP_J,39,336,12
7,BioGraph*,100,AKR_J,249,176,108
8,BioGraph*,100,A_J,279,189,131
9,BioGraph*,100,BALB_CJ,269,193,122


In [38]:
#true SV deletions
df_current = pd.DataFrame(columns=['strain','length'])
df = pd.DataFrame(columns=['strain','length'])


for s in strain:
    file='../gold_standard/mouse_vcf/'+s+'_reference.vcf'
    callset = allel.read_vcf(file,fields='*')
    
    df_current = pd.DataFrame({'strain': s, 'length': callset['variants/SVLEN']})
    df = pd.concat([df_current, df],ignore_index=True)
group_data_true = df.groupby(['strain'],as_index=False).count()
group_data_true=group_data_true.rename(columns={"length": "n_true"})
group_data_true

Unnamed: 0,strain,n_true
0,AKR_J,504
1,A_J,533
2,BALB_CJ,545
3,C3H_HeJ,539
4,CBA_J,586
5,DBA_2J,609
6,LP_J,483


In [39]:
df_merge=pd.merge(df_merge, group_data_true)
df_merge.shape

(412, 7)

## Calculating precision, sensitivity, specificity, f-score

In [40]:
df_merge['nFN']=df_merge['n_true']-df_merge['nTP']

df_merge['sensitivity']=df_merge['nTP']/df_merge['n_true']
df_merge['precision']=df_merge['nTP']/(df_merge['nTP']+df_merge['nFP'])
df_merge['specificity']=df_merge['nTN']/(df_merge['nTN']+df_merge['nFP'])
df_merge['f-score']=2*(df_merge['sensitivity']*df_merge['precision'])/(df_merge['sensitivity']+df_merge['precision']+0.00000001)
df_merge.head()

Unnamed: 0,tool,threshold,strain,nTP,nFP,nTN,n_true,nFN,sensitivity,precision,specificity,f-score
0,BioGraph*,10,AKR_J,36,389,11,504,468,0.071429,0.084706,0.0275,0.077503
1,BioGraph*,100,AKR_J,249,176,108,504,255,0.494048,0.585882,0.380282,0.53606
2,BioGraph*,1000,AKR_J,269,156,122,504,235,0.53373,0.632941,0.438849,0.579117
3,BioGraph*,10000,AKR_J,291,134,173,504,213,0.577381,0.684706,0.563518,0.62648
4,breakdancer,10,AKR_J,9,527,4,504,495,0.017857,0.016791,0.007533,0.017308


In [41]:
#count number of TP/FP/TN/FN for each tool at each threshold
df_sum=df_merge.groupby(['tool','threshold'],as_index=False)['nTP','nFP','nFN','nTN'].sum()
df_sum.head(10)

Unnamed: 0,tool,threshold,nTP,nFP,nFN,nTN
0,BioGraph*,10,297,2878,3502,108
1,BioGraph*,100,1827,1348,1972,782
2,BioGraph*,1000,2004,1171,1795,908
3,BioGraph*,10000,2163,1012,1636,1292
4,breakdancer,10,58,3484,3741,29
5,breakdancer,100,1883,1659,1916,947
6,breakdancer,1000,2420,1122,1379,1405
7,breakdancer,10000,2522,1020,1277,1769
8,clever,10,486,13270,3313,218
9,clever,100,2613,11143,1186,1358


In [42]:
%store df_sum


Stored 'df_sum' (DataFrame)


In [43]:
df_merge.to_csv(r'df_merged.csv')