In [1]:
import sys
import argparse
import pandas as pd
import numpy as np
import io
import os

In [3]:
'''
Defining functions below
'''

def store_vcf_header(path):
    '''
    input: a string path to the vcf file
    output: a list of lines starting with '##'

    '''
    with open(path, 'r') as f:
        lines=[l for l in f if l.startswith('##')]
    return lines

def read_vcf(path):
    '''
    input: a string path to the vcf file
    output: a pandas dataframe of the vcf file
    read_vcf from https://gist.github.com/dceoy/99d976a2c01e7f0ba1c813778f9db744
    '''
    with open(path, 'r') as f:
        lines=[l for l in f if not l.startswith('##')]
    return pd.read_csv(
        io.StringIO(''.join(lines)),
        dtype={'#CHROM': str, 'POS': int, 'ID': str, 'REF': str, 'ALT': str,
               'QUAL': str, 'FILTER': str, 'INFO': str},
        sep='\t'
    ).rename(columns={'#CHROM': 'CHROM'})

def sample_heterozygous(df):
    '''
    input: dataframe from a vcf file, dataframe should be one sample's fields. can be made by:
        df=vcf_df['sample_name'].str.split(":", expand=True)
    output: boolean list denoting which loci are heterozygous (this ignores any "2" or "3" alt alleles)

    '''
    heterozygous=((df[1].astype(float) != 0))
    return heterozygous

def sample_not_alt1(df):
    '''
    input: dataframe from a vcf file, dataframe should be one sample's fields. can be made by:
        df=vcf_df['sample_name'].str.split(":", expand=True)
    output: boolean list denoting which loci are homozygous for the reference allele

    '''
    not_alt=((df[1].astype(float) == 0))
    return not_alt

def het_verify(df):
    '''
    input: dataframe from a vcf file, dataframe should be one sample's fields. can be made by:
        df=vcf_df['sample_name'].str.split(":", expand=True)
        Expected input has already been filtered down to just heterozygous alleles.
    output: boolean list denoting which loci are heterozygous, and fall within expected ranges of allele ratios, and have at least 8 variant reads

    '''

    allele_depth=df[1]
    allele_depth=allele_depth.str.split(',', expand=True).iloc[:,0:2]
    vrf=allele_depth[1].astype(int)/np.sum(allele_depth.astype(int), axis=1)
    verify=( (vrf > 0.35) & (allele_depth[1].astype(int) > 8) )
    return verify

def filter(sample, control):
    '''
    input: dataframes of progeny and parental vcf data, dataframes should each be one sample's fields. can be made by:
        df=vcf_df['sample_name'].str.split(":", expand=True)
    output: subsetted dataframe with just the rows in the progeny sample that are heterozygous, but are putatively homozygous for the reference allele in control
    '''
    
    sample_het_bool=sample_heterozygous(sample)
    sample_sub=sample[sample_het_bool]
    #sample_sub=sample_sub[het_verify(sample_sub)]
    
    control_sub=control.loc[sample_sub.index,:]
    control_filter=sample_not_alt1(control_sub)

    sample_sub=sample_sub[(control_filter)]
    return(sample_sub)

In [5]:
#readin vcf file 
SV=read_vcf('NKGOF_GRIDDS_SV.vcf')
header=store_vcf_header('NKGOF_GRIDDS_SV.vcf')

#filter variants based on quality metrics (only keep the ones on Chr1-22, X)
SV=SV[(([len(i) < 7 for i in SV['CHROM']]) & (SV['FILTER'] == 'PASS') & (SV['QUAL'].astype(float) > 1000))] 
SV

Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,Donor0958_NT,Donor0958_OR7A10OE,Donor0958_OR7A10Stop
1,chr1,820916,gridss0fb_22o,C,C[chr1:820952[,1131.01,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.200:0:0.00:0:0.00:188.29:0:8:0.00:0:0:22.6...,.:0.087:0:0.00:0:0.00:94.29:0:4:0.00:0:0:46.06...,.:0.111:0:0.00:0:0.00:94.43:0:4:0.00:0:0:44.64...
3,chr1,820952,gridss0fb_22h,T,]chr1:820916]T,1131.01,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.174:0:0.00:0:0.00:188.29:0:8:0.00:0:0:21.8...,.:0.091:0:0.00:0:0.00:94.29:0:4:0.00:0:0:0.00:...,.:0.133:0:0.00:0:0.00:94.43:0:4:0.00:0:0:0.00:...
28,chr1,886224,gridss0fb_115o,T,TGCCCTTTGGCAGAGCAGGTGTGCTGTGCTG[chr1:886225[,5378.66,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.468:0:0.00:0:0.00:890.73:0:49:0.00:0:0:0.0...,.:0.450:0:0.00:0:0.00:693.21:0:44:0.00:0:0:0.0...,.:0.525:0:0.00:0:0.00:759.58:0:50:0.00:0:0:0.0...
30,chr1,886225,gridss0fb_115h,G,]chr1:886224]GCCCTTTGGCAGAGCAGGTGTGCTGTGCTGG,5378.66,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.468:0:0.00:0:0.00:780.98:0:49:0.00:0:0:201...,.:0.450:0:0.00:0:0.00:605.90:0:44:0.00:0:0:200...,.:0.525:0:0.00:0:0.00:827.29:0:50:0.00:0:0:256...
34,chr1,890451,gridss0fb_126o,A,AAAGAAAAAAAAAAG[chr1:890452[,2111.83,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=2;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.267:0:0.00:0:0.00:297.74:0:18:0.00:0:0:22....,.:0.183:0:0.00:0:0.00:207.48:0:14:0.00:0:0:28....,.:0.132:0:0.00:0:0.00:75.77:0:6:0.00:0:0:0.00:...
...,...,...,...,...,...,...,...,...,...,...,...,...
735720,chrM,12996,gridss71ff_3182h,A,A]chr4:29439624],9296.99,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.012:0:0.00:0:0.00:1023.60:33:62:0.00:0:0:1...,.:0.048:0:0.00:0:0.00:1030.98:30:67:0.00:0:0:1...,.:0.036:0:0.00:0:0.00:911.52:39:50:0.00:0:0:11...
735730,chrM,14777,gridss28bb_2034h,A,[chr2:33667412[A,3059.54,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:3.828e-03:0:0.00:0:0.00:198.97:9:16:0.00:0:0...,.:0.026:0:0.00:0:0.00:440.78:22:28:0.00:0:0:23...,.:0.017:0:0.00:0:0.00:178.52:9:17:0.00:0:0:44....
735731,chrM,15022,gridss28ff_3233h,C,C]chr2:33667415],1964.29,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=2...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:2.495e-03:0:0.00:0:0.00:94.86:10:4:0.00:0:0:...,.:0.028:0:0.00:0:0.00:189.73:24:18:0.00:0:0:24...,.:8.840e-03:0:0.00:0:0.00:36.24:6:5:0.00:0:0:0...
735739,chrM,16091,gridss186bb_78h,A,[chr11:49862022[A,7819.36,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:8.530e-03:0:0.00:0:0.00:652.94:30:36:0.00:0:...,.:0.039:0:0.00:0:0.00:979.42:37:47:0.00:0:0:16...,.:0.035:0:0.00:0:0.00:791.22:34:44:0.00:0:0:47...


In [6]:
SV = SV[SV['CHROM'] != 'chrM']
SV

Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,Donor0958_NT,Donor0958_OR7A10OE,Donor0958_OR7A10Stop
1,chr1,820916,gridss0fb_22o,C,C[chr1:820952[,1131.01,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.200:0:0.00:0:0.00:188.29:0:8:0.00:0:0:22.6...,.:0.087:0:0.00:0:0.00:94.29:0:4:0.00:0:0:46.06...,.:0.111:0:0.00:0:0.00:94.43:0:4:0.00:0:0:44.64...
3,chr1,820952,gridss0fb_22h,T,]chr1:820916]T,1131.01,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.174:0:0.00:0:0.00:188.29:0:8:0.00:0:0:21.8...,.:0.091:0:0.00:0:0.00:94.29:0:4:0.00:0:0:0.00:...,.:0.133:0:0.00:0:0.00:94.43:0:4:0.00:0:0:0.00:...
28,chr1,886224,gridss0fb_115o,T,TGCCCTTTGGCAGAGCAGGTGTGCTGTGCTG[chr1:886225[,5378.66,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.468:0:0.00:0:0.00:890.73:0:49:0.00:0:0:0.0...,.:0.450:0:0.00:0:0.00:693.21:0:44:0.00:0:0:0.0...,.:0.525:0:0.00:0:0.00:759.58:0:50:0.00:0:0:0.0...
30,chr1,886225,gridss0fb_115h,G,]chr1:886224]GCCCTTTGGCAGAGCAGGTGTGCTGTGCTGG,5378.66,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.468:0:0.00:0:0.00:780.98:0:49:0.00:0:0:201...,.:0.450:0:0.00:0:0.00:605.90:0:44:0.00:0:0:200...,.:0.525:0:0.00:0:0.00:827.29:0:50:0.00:0:0:256...
34,chr1,890451,gridss0fb_126o,A,AAAGAAAAAAAAAAG[chr1:890452[,2111.83,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=2;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.267:0:0.00:0:0.00:297.74:0:18:0.00:0:0:22....,.:0.183:0:0.00:0:0.00:207.48:0:14:0.00:0:0:28....,.:0.132:0:0.00:0:0.00:75.77:0:6:0.00:0:0:0.00:...
...,...,...,...,...,...,...,...,...,...,...,...,...
735181,chrX,155638355,gridss303fb_3300h,T,]chrX:155638308]T,2302.17,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.714:0:0.00:0:0.00:392.06:0:13:0.00:0:0:68....,.:0.842:0:0.00:0:0.00:371.32:0:17:0.00:0:0:85....,.:0.917:0:0.00:0:0.00:254.46:0:11:0.00:0:0:96....
735197,chrY,3844366,gridss297fb_340h,T,]chrX:90276290]T,1621.54,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=2...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.786:0:0.00:0:0.00:110.80:14:2:0.00:0:0:0.0...,.:0.750:0:0.00:0:0.00:183.90:21:1:0.00:0:0:0.0...,.:0.667:0:0.00:0:0.00:144.95:18:3:0.00:0:0:0.0...
735212,chrY,4980534,gridss42bb_3043h,T,[chr2:178210339[TTTTTTTTTT,1077.41,PASS,ANRP=3;ANRPQ=54.97;ANSR=0;ANSRQ=0.00;AS=1;ASC=...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:1.00:1:18.47:0:0.00:0.00:6:2:0.00:0:0:0.00:0...,.:1.00:1:18.39:0:0.00:0.00:6:4:0.00:0:0:0.00:0...,.:1.00:1:18.12:0:0.00:0.00:6:8:0.00:0:0:0.00:0...
735420,chrY,10954388,gridss304ff_122o,C,CACTGTTCCATTTTG]KI270736.1:62761],1254,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=4...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:1.00:0:0.00:0:0.00:125.00:0:13:0.00:0:0:0.00...,.:1.00:0:0.00:0:0.00:146.00:0:16:0.00:0:0:0.00...,.:1.00:0:0.00:0:0.00:126.00:0:12:0.00:0:0:0.00...


In [7]:
NT=SV['Donor0958_NT'].str.split(":", expand=True)
OE=SV['Donor0958_OR7A10OE'].str.split(":", expand=True)
Stop=SV['Donor0958_OR7A10Stop'].str.split(":", expand=True)
NT

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
1,.,0.200,0,0.00,0,0.00,188.29,0,8,0.00,...,188.29,564.86,188.29,16,15,0,0.00,0,0.00,4
3,.,0.174,0,0.00,0,0.00,188.29,0,8,0.00,...,188.29,564.86,188.29,19,15,0,0.00,0,0.00,4
28,.,0.468,0,0.00,0,0.00,890.73,0,49,0.00,...,359.57,2031.28,780.98,33,9,0,0.00,0,0.00,29
30,.,0.468,0,0.00,0,0.00,780.98,0,49,0.00,...,359.57,2031.28,890.73,33,9,0,0.00,0,0.00,29
34,.,0.267,0,0.00,0,0.00,297.74,0,18,0.00,...,330.74,937.84,309.35,33,12,0,0.00,0,0.00,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
735181,.,0.714,0,0.00,0,0.00,392.06,0,13,0.00,...,325.30,893.34,71.54,6,8,0,0.00,4,104.46,15
735197,.,0.786,0,0.00,0,0.00,110.80,14,2,0.00,...,0.00,469.95,192.96,3,0,9,166.19,0,0.00,11
735212,.,1.00,1,18.47,0,0.00,0.00,6,2,0.00,...,0.00,279.47,168.68,0,0,6,110.80,0,0.00,7
735420,.,1.00,0,0.00,0,0.00,125.00,0,13,0.00,...,0.00,396.00,146.00,0,0,0,0.00,6,125.00,7


In [10]:
#filter variants unique to control 
OE_filtered=SV.loc[filter(OE, NT).index,:]
Stop_filtered=SV.loc[filter(Stop, NT).index,:]
OE_filtered

Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,Donor0958_NT,Donor0958_OR7A10OE,Donor0958_OR7A10Stop
26084,chr1,88589300,gridss8fb_9022o,C,CTTTTGTTTTTGT[chr1:88589301[,2434.71,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.00:0:0.00:0:0.00:0.00:0:0:0.00:0:0:0.00:0:...,.:0.322:0:0.00:0:0.00:458.88:0:33:0.00:0:0:0.0...,.:0.283:0:0.00:0:0.00:398.80:0:22:0.00:0:0:0.0...
26085,chr1,88589301,gridss8fb_9022h,T,]chr1:88589300]TTTTGTTTTTGTT,2434.71,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.00:0:0.00:0:0.00:0.00:0:0:0.00:0:0:0.00:0:...,.:0.322:0:0.00:0:0.00:580.53:0:33:0.00:0:0:168...,.:0.283:0:0.00:0:0.00:309.27:0:22:0.00:0:0:22....
44681,chr1,188887089,gridss18fb_9871o,C,C[chr1:188887110[,1720.2,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=2...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.00:0:0.00:0:0.00:0.00:0:0:0.00:0:0:125.99:...,.:0.250:0:0.00:0:0.00:318.56:0:24:0.00:0:0:102...,.:0.184:0:0.00:0:0.00:162.49:0:12:0.00:0:0:143...
44682,chr1,188887110,gridss18fb_9871h,A,]chr1:188887089]A,1720.2,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=2;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.00:0:0.00:0:0.00:0.00:0:0:0.00:0:0:22.54:1...,.:0.280:0:0.00:0:0.00:428.22:0:24:0.00:0:0:90....,.:0.219:0:0.00:0:0.00:214.06:0:12:0.00:0:0:22....
59361,chr1,244621042,gridss24fb_4428o,A,A[chr1:244621055[,1649.98,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=2;ASC=7...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.00:0:0.00:0:0.00:0.00:0:0:0.00:0:0:0.00:0:...,.:0.500:0:0.00:0:0.00:298.56:0:18:0.00:0:0:0.0...,.:0.353:0:0.00:0:0.00:335.16:0:21:0.00:0:0:0.0...
59362,chr1,244621055,gridss24fb_4428h,T,]chr1:244621042]T,1649.98,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.00:0:0.00:0:0.00:0.00:0:0:0.00:0:0:0.00:0:...,.:0.500:0:0.00:0:0.00:223.10:0:18:0.00:0:0:0.0...,.:0.400:0:0.00:0:0.00:255.03:0:21:0.00:0:0:0.0...
107925,chr2,208176950,gridss45fb_7538o,T,TCTGTATACAG[chr2:208176951[,1246.85,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=3;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.00:0:0.00:0:0.00:0.00:0:0:0.00:0:0:24.12:1...,.:0.084:0:0.00:0:0.00:323.72:0:18:0.00:0:0:56....,.:0.073:0:0.00:0:0.00:246.35:0:12:0.00:0:0:24....
107927,chr2,208176951,gridss45fb_7538h,A,]chr2:208176950]CTGTATACAGA,1246.85,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=1;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.00:0:0.00:0:0.00:0.00:0:0:0.00:0:0:0.00:0:...,.:0.084:0:0.00:0:0.00:219.95:0:18:0.00:0:0:0.0...,.:0.073:0:0.00:0:0.00:130.52:0:12:0.00:0:0:0.0...
154715,chr3,164804622,gridss65fb_6285o,A,ATATATATATACATATATATATACG[chr3:164804623[,1258.51,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=2;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.00:0:0.00:0:0.00:0.00:0:0:0.00:0:0:0.00:0:...,.:0.139:0:0.00:0:0.00:318.94:0:20:0.00:0:0:0.0...,.:0.154:0:0.00:0:0.00:147.99:0:10:0.00:0:0:0.0...
154717,chr3,164804623,gridss65fb_6285h,T,]chr3:164804622]TATATATATACATATATATATACGT,1258.51,PASS,ANRP=0;ANRPQ=0.00;ANSR=0;ANSRQ=0.00;AS=2;ASC=1...,GT:AF:ANRP:ANRPQ:ANSR:ANSRQ:ASQ:ASRP:ASSR:BAQ:...,.:0.00:0:0.00:0:0.00:0.00:0:0:0.00:0:0:0.00:0:...,.:0.139:0:0.00:0:0.00:318.94:0:20:0.00:0:0:0.0...,.:0.154:0:0.00:0:0.00:200.20:0:10:0.00:0:0:21....


In [11]:
outfile=open('Donor0958_OR7A10OE_Unique_SV1000.vcf', 'w')
for line in header:
    outfile.write(line)
outfile.close()
OE_filtered.to_csv('Donor0958_OR7A10OE_Unique_SV1000.vcf', mode='a', sep='\t', index=False)

outfile=open('Donor0958_OR7A10Stop_Unique_SV1000.vcf', 'w')
for line in header:
    outfile.write(line)
outfile.close()
Stop_filtered.to_csv('Donor0958_OR7A10Stop_Unique_SV1000.vcf', mode='a', sep='\t', index=False)
