# SNP detection 
Split reads into subreads based on the 51bp reference

To detect the SNP the reads are spliced into subreads using the 51bp reference (25bp flanking each side of the SNP) and the barcoded sequence data. 
This works just like splicing based on primersequences. 
The spliced subreads are mapped against the reference and the SNP profile is identified. 

1) Loading reference and sequencing data

2) Using a fuzzy regex to splice the reads into subreads

3) BWA based mapping, SAMtools sorting and BCFtools profile generation

## Loading reference and sequencing data
Inlezen van referentie ipv de primers
Fasta of csv....

In [67]:
# Load primer sequences (comma separated: snp_name,forward_primer_seq,reverse_primer_seq)

################## Hier moeten we dus een referentie van 52 SNPs aan toevoegen! ##########################

primerFile = '/home/senne/nanopore/SNP/primers2.csv'
#Ook terug te vinden in /home/senne/nanopore/Multiplex/Reference_sequences/SNP/SNPprimers.csv
primerData = {}

with open(primerFile) as f:
  for l in f:
    l = l.strip()
    
    # Ignore the column header line (should start with a '#')
    if l.startswith('#'):
      continue
    
    locus, fPrimer, rPrimer = l.split(',')
    primerData[locus] = {'f': fPrimer, 'r': rPrimer}
    
print('Loaded primers for {} loci'.format(len(primerData)))

Loaded primers for 52 loci


In [68]:
# Load sequencing data (ligated alleles)
#

readFile = '/home/senne/nanopore/Multiplex/results/barcode_NB08.fasta'
readData = {}

with open(readFile) as f:
  cnt = 0

  for l in f:
    cnt += 1
    if cnt % 2 == 1:
      seqName = l.strip()
    elif cnt % 2 == 0:
      seqData = l.strip()
      readData[seqName] = seqData
    else:
      continue
            
print('Loaded {} sequences'.format(len(readData)))


Loaded 9851 sequences


## Using a fuzzy regex to splice the reads into subreads

In [69]:
# Utility functions
#

def reverseComplement(seq):
  transTab = str.maketrans('agctyrwskmdvhbAGCTYRWSKMDVHB', 'tcgarywsmkhbdvTCGARYWSMKHBDV')
  return seq.translate(transTab)[::-1]

In [70]:
# Aantal mogelijkheden van de referentie is beperkt. 
# Forward en reverse compliment.

############################## Vanaf hier verder aanpassen ############################################################
# De naamgeving moet aangepast worden, geen primers maar referenties
# Geen rev ed meer, enkel nog de gewonen referentie en revComp
import regex

maxMisMatch  = 3
matchPosData = {}
ampliconSeq  = []
ampLenData   = {}

for locus in sorted(primerData):
    # DEBUG
    #if len(ampLenData)==4: break

    # Init
    ampLenData[locus] = {}
        
    # Regex
    pFor   = regex.compile('(?e)({}){{e<={}}}'.format(primerData[locus]['f'], maxMisMatch))
    pForRc = regex.compile('(?e)({}){{e<={}}}'.format(reverseComplement(primerData[locus]['f']), maxMisMatch))
    pRev   = regex.compile('(?e)({}){{e<={}}}'.format(primerData[locus]['r'], maxMisMatch))
    pRevRc = regex.compile('(?e)({}){{e<={}}}'.format(reverseComplement(primerData[locus]['r']), maxMisMatch))
    
    allowedPairs      = [(pFor, pRevRc), (pRev, pForRc)]
    allowedPairSense  = ('sense', 'anti-sense')
    allowedPairSeqLen = ((len(primerData[locus]['f']), len(primerData[locus]['r'])), (len(primerData[locus]['r']), len(primerData[locus]['f'])))
    minAmpliconSize   = 50
    maxAmpliconSize   = 150
    readCnt           = 0
    
    # Print locus name to indicate progress
    print(locus)
    
    for seq in readData:
        readCnt += 1
        
        # DEBUG
        #if readCount == 100: break
        
        #for pair in allowedPairs:
        for i in range(len(allowedPairs)):
            pair    = allowedPairs[i]
            sense   = allowedPairSense[i]
            posList = [[],[]]
            
            for match in pair[0].finditer(readData[seq]):
                b,e = match.span()
                posList[0].append(b) # Including the primer
                #posList[0].append(e+1) # Excluding the primer, need to refine this if the error is the last nucleotide
                
            for match in pair[1].finditer(readData[seq]):
                b,e = match.span()
                posList[1].append(e) # Including the primer
                #posList[1].append(b-1) # Excluding the primer, need to refine this if the error is the first nucleotide
                
            # Retain possible pairs based on amplicon size
            if len(posList[0]) and len(posList[1]):
                # DEBUG
                #print('Found both {} primers in read {}: {}, {}'.format(locus, readCnt, posList[0], posList[1]))
                
                for p1 in posList[0]:
                    for p2 in posList[1]:
                        if minAmpliconSize < (p2 - p1) < maxAmpliconSize:
                            # We have a potential amplicon?
                            #ampLen  = p2 - p1
                            sense   = '{} - {} ({})'.format(p1, p2, sense)
                            ampSeq  = readData[seq][p1:p2]
                            ampLen  = len(ampSeq)
                            ampName = '{} {} ({}): {} {}'.format(len(ampliconSeq) + 1, locus, ampLen, sense, seq)
                            ampliconSeq.append((ampName, ampSeq))
                            
                            # Keep amplicon lengths
                            if ampLen in ampLenData[locus]:
                                ampLenData[locus][ampLen] += 1
                            else:
                                ampLenData[locus][ampLen] = 1
                            
                            # DEBUG
                            #print('{}, read:{}, amplen={}, {}'.format(locus, readCnt, ampLen, sense))
                                                    
    
# Save potential amplicons
outFile = '/home/senne/nanopore/Multiplex/results/SNP/SNP_BC8-{}mism.fasta'.format(maxMisMatch)
with open(outFile, 'w') as f:
    for amplicon in ampliconSeq:
        f.write('>' + amplicon[0] + '\n')
        f.write(amplicon[1] + '\n')
    
print('Done')

rs1005533
rs1015250
rs1024116
rs1028528
rs1029047
rs1031825
rs10495407
rs1335873
rs1355366
rs1357617
rs1360288
rs1382387
rs1413212
rs1454361
rs1463729
rs1490413
rs1493232
rs1528460
rs1886510
rs1979255
rs2016276
rs2040411
rs2046361
rs2056277
rs2076848
rs2107612
rs2111980
rs251934
rs2830795
rs2831700
rs354439
rs717302
rs719366
rs722098
rs727811
rs729172
rs733164
rs735155
rs737681
rs740910
rs763869
rs8037429
rs826472
rs873196
rs876724
rs891700
rs901398
rs907100
rs914165
rs917118
rs938283
rs964681
Done


## Looking up via mapping
Is dit niet dubbel werk doen? Eerst regex en dan pas mapping is toch dubbel werk?

In [95]:
# Init
#
NBarcode = 2

snpFile   = '/home/senne/nanopore/SNP/known_SNP_sequence/SNP_sequence.fasta'  # REMOVE FIRST LINE IN ORIGINAL FILE (BREAKS FASTA FORMAT)
readFile  = '/home/senne/nanopore/Multiplex/results/SNP/SNP_BC{}-3mism.fasta'.format(NBarcode)
#resultDir = '/home/senne/nanopore/Multiplex/results/SNP'
fastq_file_name= '/home/senne/nanopore/Multiplex/results/SNP/SNP_BC{}-3mism.fasta'.format(NBarcode)
samfile = '/home/senne/nanopore/Multiplex/results/SNP/SNP_BC{}.sam'.format(NBarcode)
bamfile = '/home/senne/nanopore/Multiplex/results/SNP/SNP_BC{}.bam'.format(NBarcode)
bambaifile = '/home/senne/nanopore/Multiplex/results/SNP/SNP_BC{}.bam.bai'.format(NBarcode)
vcffile = '/home/senne/nanopore/Multiplex/results/SNP/SNP_BC{}.bam.vcf'.format(NBarcode)
csvfile = '/home/senne/nanopore/Multiplex/results/SNP/SNP_BC{}.csv'.format(NBarcode)

bwa       = '/opt/tools/bwa-0.7.15'            # v0.7.5
samtools  = '/opt/tools/samtools-1.3.1' # v1.3.1
bcftools  = '/opt/tools/bcftools-1.3.1' # v1.3.1

# Check
!ls {snpFile}
print('Number of SNPs in reference file:')
!grep -c ">" {snpFile}



/home/senne/nanopore/SNP/known_SNP_sequence/SNP_sequence.fasta
Number of SNPs in reference file:
52


In [87]:
#from collections import Counter
#fastq_file_name = ''
#histogram_data = Counter()
hist_array = []
hist_arrayG = []
hist_arrayA = []
hist_arrayC = []
hist_arrayT = []
with open(fastq_file_name, "rb") as infile:
    for line in infile:
        if line.startswith(b'G'):
            read_length = len(line[:-1]) #Last char is \n
            #histogram_data[read_length] += 1
            hist_arrayG.append(read_length)
with open(fastq_file_name, "rb") as infile:
    for line in infile:
        if line.startswith(b'A'):
            read_length = len(line[:-1]) #Last char is \n
            #histogram_data[read_length] += 1
            hist_arrayA.append(read_length) 
with open(fastq_file_name, "rb") as infile:
    for line in infile:
        if line.startswith(b'C'):
            read_length = len(line[:-1]) #Last char is \n
            #histogram_data[read_length] += 1
            hist_arrayC.append(read_length) 
with open(fastq_file_name, "rb") as infile:
    for line in infile:
        if line.startswith(b'T'):
            read_length = len(line[:-1]) #Last char is \n
            #histogram_data[read_length] += 1
            hist_arrayT.append(read_length)

hist_array.extend(hist_arrayA)
hist_array.extend(hist_arrayC)
hist_array.extend(hist_arrayG)
hist_array.extend(hist_arrayT)

import numpy as np

len (hist_array), np.mean(hist_array), np.median(hist_array) , max(hist_array), np.std(hist_array)

(1642, 89.86358099878197, 87.0, 149, 17.053427581930865)

In [88]:
# Map reads to reference sequences
#

# Build index of the references
!{bwa} index {snpFile}

# Map reads
!{bwa} mem -a {snpFile} {readFile} > {samfile}
print('done')

[bwa_index] Pack FASTA... 0.00 sec
[bwa_index] Construct BWT for the packed sequence...
[bwa_index] 0.00 seconds elapse.
[bwa_index] Update BWT... 0.00 sec
[bwa_index] Pack forward-only FASTA... 0.00 sec
[bwa_index] Construct SA from BWT and Occ... 0.00 sec
[main] Version: 0.7.15-r1140
[main] CMD: /opt/tools/bwa-0.7.15 index /home/senne/nanopore/SNP/known_SNP_sequence/SNP_sequence.fasta
[main] Real time: 0.016 sec; CPU: 0.016 sec
[M::bwa_idx_load_from_disk] read 0 ALT contigs
[M::process] read 1642 sequences (147556 bp)...
[M::mem_process_seqs] Processed 1642 reads in 0.124 CPU sec, 0.123 real sec
[main] Version: 0.7.15-r1140
[main] CMD: /opt/tools/bwa-0.7.15 mem -a /home/senne/nanopore/SNP/known_SNP_sequence/SNP_sequence.fasta /home/senne/nanopore/Multiplex/results/SNP/SNP_BC8-3mism.fasta
[main] Real time: 0.137 sec; CPU: 0.131 sec
done


In [89]:
### %%bash

#bwa-0.7.15 index /home/senne/nanopore/SNP/known_SNP_sequence/SNP_sequence.fasta
#bwa-0.7.15 mem -x ont2d /home/senne/nanopore/SNP/known_SNP_sequence/SNP_sequence.fasta /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta > /home/senne/nanopore/SNP/results_review/20161122-total-ont2d.sam 



In [90]:
# Make sorted bam and index
#
#
!{samtools} view -Sbu {samfile} | {samtools} sort -o {bamfile} -
!{samtools} index {bamfile} {bambaifile}
print('Done')


Done


In [91]:
# Display some stats
#

!{samtools} flagstat {bamfile}
print('done')

1642 + 0 in total (QC-passed reads + QC-failed reads)
0 + 0 secondary
0 + 0 supplementary
0 + 0 duplicates
913 + 0 mapped (55.60% : N/A)
0 + 0 paired in sequencing
0 + 0 read1
0 + 0 read2
0 + 0 properly paired (N/A : N/A)
0 + 0 with itself and mate mapped
0 + 0 singletons (N/A : N/A)
0 + 0 with mate mapped to a different chr
0 + 0 with mate mapped to a different chr (mapQ>=5)
done


In [92]:
# Generate vcf file from bam file. Needs the reference and its index file 
#
# Note: the commands below are for samtools and bcftools v1.3.1 (will not work on v0.1.19!)

# Reporting all positions
!{samtools} mpileup -d 100000 -uf {snpFile} {bamfile} | {bcftools} call -V indels -m - > {vcffile}
# Reporting variants only (excludes SNPs homozygous for reference allele)
#!{samtools} mpileup -d 100000 -uf {snpFile} {resultDir}/test_sorted.bam | {bcftools} call -V indels -mv - > {resultDir}/test_sorted.bam.vcf

print('Done')

Note: Neither --ploidy nor --ploidy-file given, assuming all sites are diploid
[mpileup] 1 samples in 1 input files
Done


In [93]:
# Check the vcf file
#

!head -n 100 {vcffile}

##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##samtoolsVersion=1.3.1+htslib-1.3.1
##samtoolsCommand=samtools mpileup -d 100000 -uf /home/senne/nanopore/SNP/known_SNP_sequence/SNP_sequence.fasta /home/senne/nanopore/Multiplex/results/SNP/SNP_BC8.bam
##reference=file:///home/senne/nanopore/SNP/known_SNP_sequence/SNP_sequence.fasta
##contig=<ID=rs1490413,length=51>
##contig=<ID=rs876724,length=51>
##contig=<ID=rs1357617,length=51>
##contig=<ID=rs2046361,length=51>
##contig=<ID=rs717302,length=51>
##contig=<ID=rs1029047,length=51>
##contig=<ID=rs917118,length=51>
##contig=<ID=rs763869,length=51>
##contig=<ID=rs1015250,length=51>
##contig=<ID=rs735155,length=51>
##contig=<ID=rs901398,length=51>
##contig=<ID=rs2107612,length=51>
##contig=<ID=rs1886510,length=51>
##contig=<ID=rs1454361,length=51>
##contig=<ID=rs2016276,length=51>
##contig=<ID=rs729172,length=51>
##contig=<ID=rs740910,length=51>
##contig=<ID=rs1493232,length=51>
##contig=<ID=r

In [96]:
#Get SNP profile


snpData = {}

with open(vcffile) as f:
    for l in f:
        if l.startswith('#'):
            continue
            
        snp, pos, id, ref, alt, qual, filter, info, d, dd = l.split()
        
        # Our SNP of interest is always at position 26 of the reference
        if int(pos) != 26:
            continue

        par = {}
        for p in info.split(';'):
            pv = p.split('=')
            par[pv[0]] = pv[1]
        
        snpData[snp] = {'pos': pos, 'ref': ref, 'alt': alt, 'qual': qual, 'filter': filter, 'info': par}

# DEBUG
print('Got data for {} SNPs:'.format(len(snpData)))

# Save/print results
with open(csvfile', 'w') as f:
    # Table header
    f.write('snp, coverage, ref_allele, ref_percent, alt_allele, alt_percent, genotype\n')
    
    # Table data
    for s in sorted(snpData.keys()):
        totalDepth = int(snpData[s]['info']['DP'])
        depthList  = [int(d) for d in snpData[s]['info']['DP4'].split(',')]
        refDepth   = sum(depthList[0:2])
        altDepth   = sum(depthList[2:4])
        
        # Estimate the diploid genotype: when the minor allele is more than 10 times weaker than the major allele,
        # we should ignore it for a pure sample?
        if refDepth > altDepth and altDepth/refDepth < 0.1:
            genotype = snpData[s]['ref'] + snpData[s]['ref']
        elif altDepth > refDepth and refDepth/altDepth < 0.1:
            genotype = snpData[s]['alt'] + snpData[s]['alt']
        else:
            genotype = snpData[s]['ref'] + snpData[s]['alt']
        
        if snpData[s]['alt'] == '.':
            # Only 1 allele was observed
            f.write(','.join([s, str(totalDepth), snpData[s]['ref'], '{:.1f}'.format(100*refDepth/totalDepth), '', '', snpData[s]['ref']+snpData[s]['ref']]) + '\n')
            # DEBUG
            print('  {} ({})  {} ({:.1f} %)'.format(s, totalDepth, snpData[s]['ref'], 100*refDepth/totalDepth))
        else:
            # Two alleles were observed
            f.write(','.join([s, str(totalDepth), snpData[s]['ref'], '{:.1f}'.format(100*refDepth/totalDepth), snpData[s]['alt'], '{:.1f}'.format(100*altDepth/totalDepth), genotype]) + '\n')
            # DEBUG
            print('  {} ({})  {} ({:.1f} %)  {} ({:.1f} %)'.format(s, totalDepth, snpData[s]['ref'], 100*refDepth/totalDepth, snpData[s]['alt'], 100*altDepth/totalDepth))


Got data for 48 SNPs:
  rs1005533 (1)  A (100.0 %)
  rs1015250 (1)  C (0.0 %)  G (100.0 %)
  rs1024116 (10)  A (40.0 %)  G (60.0 %)
  rs1029047 (1)  A (100.0 %)
  rs1031825 (4)  A (75.0 %)  C (25.0 %)
  rs10495407 (82)  A (45.1 %)  G (54.9 %)
  rs1335873 (5)  A (40.0 %)  T (60.0 %)
  rs1355366 (12)  A (58.3 %)  G (41.7 %)
  rs1357617 (5)  A (100.0 %)
  rs1360288 (10)  C (70.0 %)  T (30.0 %)
  rs1382387 (4)  G (100.0 %)
  rs1413212 (25)  A (40.0 %)  G (60.0 %)
  rs1454361 (14)  A (57.1 %)  T (42.9 %)
  rs1463729 (3)  A (33.3 %)  G (66.7 %)
  rs1490413 (747)  A (94.2 %)
  rs1493232 (109)  A (98.2 %)
  rs1528460 (665)  C (2.6 %)  T (97.4 %)
  rs1886510 (22)  C (18.2 %)  T (81.8 %)
  rs1979255 (5)  C (40.0 %)  G (60.0 %)
  rs2016276 (2055)  A (95.5 %)
  rs2040411 (9)  A (66.7 %)  G (33.3 %)
  rs2046361 (3)  A (66.7 %)  T (33.3 %)
  rs2056277 (3)  C (100.0 %)
  rs2076848 (30)  A (46.7 %)  T (53.3 %)
  rs2107612 (4)  A (100.0 %)
  rs2111980 (17)  A (64.7 %)  G (35.3 %)
  rs251934 (34)  C (17

## Locus count 

In [None]:
%%bash

############################### To be optimized!!!#####################################

  grep -c 'rs2056277' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1413212' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs2107612' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs2111980' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs251934' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1028528' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs2831700' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs901398' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs722098' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta 
  grep -c 'rs2076848' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1493232' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs735155' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1528460' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1005533' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs733164' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1029047' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs727811' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1024116' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1015250' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs907100' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs737681' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs717302' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs740910' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1979255' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs719366' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs2040411' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs2016276' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs135761' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs8037429' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1335873' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs876724' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs354439' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1031825' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs873196' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1463729' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1886510' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs763869' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs2830795' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1454361' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1355366' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs938283' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1490413' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs964681' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs826472' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs729172' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1382387' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs10495407' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs891700' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs917118' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs914165' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs2046361' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
  grep -c 'rs1360288' /home/senne/nanopore/SNP/Nanopore_data/20161122-total-3mism.fasta
