# GBS data processing
------
#### from reads to markers to genetic maps using LM3

### 1. quality filter (QC) of raw reads   
         fastq-mcf -q 30 -l 50
### 2. align QC reads to genome   
          bowtie2 --very-sensitive --no-unal --rg-id F7_RIL103 --rg SM:blubb --rg LB:blubb --rg PI:blubby --rg PL:ILLUMINA
### 3. prepare fofn's for posterior calls and LM3
         bam_list.txt: space delimited fofn to bam-files
         example: 
         ../F7_plate1.95.b2.peex113.s.bam ../F7_plate1.94.b2.peex113.s.bam ../F7_plate1.1.b2.peex113.s.bam ../F7_plate1.10.b2.peex113.s.bam ../F7_plate1.11.b2.peex113.s.bam 
         
         mapping.txt: space delimited file of sample names in same order as bam_list.txt
         example: 
         axillaris axillaris exserta 1 2 3 4                   

        3.1: create posterior basecalls
            $samtools mpileup -q 10 -Q 10 -s $(cat bam_list.txt) |awk -f LEP-MAP3/scripts/pileupParser2.awk |awk -f LEP-MAP3/scripts/pileup2posterior.awk |head -1000 |gzip > tt.post.gz             

        pedigree.txt: tab-delimited file of relation between individuals
        resemble F7 as F2: 
        
        #family  indID   mother   father   sex(1=female;2=male)  phenotype                       
        F2      exserta 0       0       1       0   
        F2      axillaris       0       0       2       0                                        
        F2      P1      exserta axillaris       1       0                                        
        F2      P2      exserta axillaris       2       0                                        
        F2      1       P1      P2      0       0   
        F2      2       P1      P2      0       0   
        F2      3       P1      P2      0       0   
        F2      4       P1      P2      0       0   


        3.2: transform pedigree.txt
            $LEP-MAP3/scripts/transpose_tab Michel.F4.ped |awk '{print "CHR\tPOS\t"$0}' > Michel.F4.tped 
               
        3.3: create posterior calls with pedigree and posterior calls       
            $zcat tt.post.gz |java -cp LEP-MAP3/bin/ ParentCall2 data=Michel.F2.tped posteriorFile=- removeNonInformative=1  > tt.postcall                  
### 4. filter and transform postcalls to asmap input: 



In [1]:
import pandas as pd
pd.get_option("display.max_rows")
pd.set_option("display.max_rows",10)
pd.set_option("display.max_columns",40)
test_K = pd.read_csv("/media/mmoser/data1/P.EXSERTA/PEXV1.1.3/genetic_maps/F7_total.PD.postcall", sep = "\t")
test_K.columns

#filter out dummy parents
cols = test_K.columns.str.contains("RIL|axillaris|exserta|POS|CHR") 
cols
cols = test_K.columns[cols]
display(cols)
test_Kril = test_K[cols].copy()

for i in cols[2:]: 
    #print(i)
    test_Kril[i] = test_K[i].apply(lambda x: x.split(' '))
test_Kril.columns

Index(['CHR', 'POS', 'exserta', 'axillaris', 'RIL_196', 'RIL_197', 'RIL_198',
       'RIL_199', 'RIL_200', 'RIL_201',
       ...
       'RIL_187', 'RIL_105', 'RIL_188', 'RIL_189', 'RIL_190', 'RIL_191',
       'RIL_192', 'RIL_194', 'RIL_195', 'RIL_106'],
      dtype='object', length=199)

Index(['CHR', 'POS', 'exserta', 'axillaris', 'RIL_196', 'RIL_197', 'RIL_198',
       'RIL_199', 'RIL_200', 'RIL_201',
       ...
       'RIL_187', 'RIL_105', 'RIL_188', 'RIL_189', 'RIL_190', 'RIL_191',
       'RIL_192', 'RIL_194', 'RIL_195', 'RIL_106'],
      dtype='object', length=199)

In [6]:
test_Kril.loc[1,["axillaris", "exserta" ]]
test_Kril.POS = test_Kril.POS.apply(str)

test_Kril['marker'] = test_Kril[['CHR', 'POS']].apply(lambda x: '_'.join(x), axis=1)
display(test_Kril)

Unnamed: 0,CHR,POS,exserta,axillaris,RIL_196,RIL_197,RIL_198,RIL_199,RIL_200,RIL_201,RIL_202,RIL_203,RIL_16,RIL_18,RIL_19,RIL_1,RIL_20,RIL_21,RIL_22,RIL_23,...,RIL_178,RIL_179,RIL_180,RIL_181,RIL_182,RIL_183,RIL_184,RIL_185,RIL_186,RIL_187,RIL_105,RIL_188,RIL_189,RIL_190,RIL_191,RIL_192,RIL_194,RIL_195,RIL_106,marker
0,Peex113Ctg00004,119009,"[6.59054E-6, 1.98058E-13, 1.0, 1.98058E-13, 0,...","[0, 0, 7.61952E-9, 0, 0, 7.61952E-9, 0, 1.0, 7...","[1.0, 0, 1.226418E-4, 0, 0, 0, 0, 0, 0, 0]","[4.60461E-25, 0, 0.01566154, 0, 0, 0, 0, 1.0, ...","[0.9996661115187527, 0, 1.0, 0, 0, 0, 0, 3.335...","[1.0, 0, 0.125167, 0, 0, 0, 0, 1.23952E-14, 0, 0]","[1.0, 0, 0.0626044, 0, 0, 0, 0, 4.13586E-18, 0...","[1.0, 0, 0.1268572, 0, 0, 0, 0, 5.14221E-13, 0...","[1.0, 0, 0.0626044, 0, 0, 0, 0, 4.13586E-18, 0...","[1.0, 0, 9.80152E-4, 0, 0, 0, 0, 5.7075E-39, 0...","[1.0, 0, 0.001959652, 0, 0, 0, 0, 1.71054E-35,...","[1.0, 0, 0.500334, 0, 0, 0, 0, 1.11334E-7, 0, 0]","[0, 0, 2.35492E-10, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 5.72166E-17, 0, 0, 0, 0, 1.0, 0, 0]","[5.12648E-32, 0, 0.003918, 0, 0, 0, 0, 1.0, 0, 0]","[1.0, 0, 0.01566154, 0, 0, 0, 0, 4.60461E-25, ...","[1.0, 0, 0.003918, 0, 0, 0, 0, 5.12648E-32, 0, 0]","[1.0, 0, 6.13414E-5, 0, 0, 0, 0, 0, 0, 0]",...,"[1.0, 0, 0.003918, 0, 0, 0, 0, 5.12648E-32, 0, 0]","[1.0, 0, 0.0313126, 0, 0, 0, 0, 1.38E-21, 0, 0]","[1.0, 0, 0.0626044, 0, 0, 0, 0, 4.13586E-18, 0...","[1.0, 0, 1.920796E-6, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 9.6038E-7, 0, 0, 0, 0, 0, 0, 0]","[0.5, 0, 1.0, 0, 0, 0, 0, 0.5, 0, 0]","[0.5, 0, 1.0, 0, 0, 0, 0, 0.5, 0, 0]","[0, 0, 4.75808E-10, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 7.67534E-6, 0, 0, 0, 0, 1.0, 0, 0]","[0.5, 0, 1.0, 0, 0, 0, 0, 0.5, 0, 0]","[1.0, 0, 1.920118E-6, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 4.9024E-4, 0, 0, 0, 0, 1.9044E-42, 0, 0]","[1.0, 0, 1.550778E-5, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 9.60718E-7, 0, 0, 0, 0, 1.0, 0, 0]","[1.23952E-14, 0, 0.125167, 0, 0, 0, 0, 1.0, 0, 0]","[1.71054E-35, 0, 0.001959652, 0, 0, 0, 0, 1.0,...","[1.0, 0, 0.0626044, 0, 0, 0, 0, 4.13586E-18, 0...","[1.0, 0, 0.01566154, 0, 0, 0, 0, 4.60461E-25, ...","[1.0, 0, 0.125167, 0, 0, 0, 0, 1.23952E-14, 0, 0]",Peex113Ctg00004_119009
1,Peex113Ctg00043,151704,"[0, 0, 1.1566E-13, 0, 0, 0.583969, 0, 1.0, 1.1...","[0, 9.73024E-7, 0, 0, 1.0, 9.73024E-7, 9.73024...","[0, 0, 0, 0, 0, 7.67806E-6, 0, 1.0, 0, 0]","[0, 0, 0, 0, 1.0, 0.25025, 0, 3.71484E-11, 0, 0]","[0, 0, 0, 0, 0.9996661115187527, 1.0, 0, 3.335...","[0, 0, 0, 0, 1.17598E-38, 9.805E-4, 0, 1.0, 0, 0]","[0, 0, 0, 0, 1.0, 0.01566154, 0, 4.60461E-25, ...","[0, 0, 0, 0, 1.0, 0.25025, 0, 3.71484E-11, 0, 0]","[0, 0, 0, 0, 1.0, 0.003918, 0, 5.12648E-32, 0, 0]","[0, 0, 0, 0, 1.38E-21, 0.0313126, 0, 1.0, 0, 0]","[0, 0, 0, 0, 0.0213049, 1.0, 0, 8.8114E-20, 0, 0]","[0, 0, 0, 0, 1.0, 1.534558E-5, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 1.506844E-8, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 3.0681E-5, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 2.45454E-4, 0, 0, 0, 0]","[0, 0, 0, 0, 5.7075E-39, 9.80152E-4, 0, 1.0, 0...","[0, 0, 0, 0, 1.71054E-35, 0.001959652, 0, 1.0,...","[0, 0, 0, 0, 0.5, 1.0, 0, 0.5, 0, 0]",...,"[0, 0, 0, 0, 1.0, 0.01566154, 0, 4.60461E-25, ...","[0, 0, 0, 0, 1.0, 0.125167, 0, 1.23952E-14, 0, 0]","[0, 0, 0, 0, 1.0, 0.0316436, 0, 4.51093E-20, 0...","[0, 0, 0, 0, 1.0, 1.920796E-6, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 4.81216E-7, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0.0313126, 0, 1.38E-21, 0, 0]","[0, 0, 0, 0, 1.0, 0.500334, 0, 1.11334E-7, 0, 0]","[0, 0, 0, 0, 1.0, 9.41338E-10, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0.00791898, 0, 1.03478E-26, ...","[0, 0, 0, 0, 5.02218E-27, 0.00791618, 0, 1.0, ...","[0, 0, 0, 0, 1.0, 0.0313126, 0, 1.38E-21, 0, 0]","[0, 0, 0, 0, 1.0, 7.8441E-6, 0, 0, 0, 0]","[0, 0, 0, 0, 1.23952E-14, 0.125167, 0, 1.0, 0, 0]","[0, 0, 0, 0, 4.13586E-18, 0.0626044, 0, 1.0, 0...","[0, 0, 0, 0, 5.7075E-39, 9.80152E-4, 0, 1.0, 0...","[0, 0, 0, 0, 1.0, 0.01566154, 0, 4.60461E-25, ...","[0, 0, 0, 0, 1.0, 2.45376E-4, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0.01566154, 0, 4.60461E-25, ...","[0, 0, 0, 0, 1.0, 1.534558E-5, 0, 0, 0, 0]",Peex113Ctg00043_151704
2,Peex113Ctg00050,84548,"[0, 0, 1.38841E-20, 0, 0, 1.38841E-20, 0, 1.0,...","[0, 0, 0, 1.3864E-20, 0, 0, 1.3864E-20, 0, 1.3...","[0, 0, 0, 0, 0, 0, 0, 1.0, 2.45288E-4, 0]","[0, 0, 0, 0, 0, 0, 0, 1.38E-21, 0.0313126, 1.0]","[0, 0, 0, 0, 0, 0, 0, 0.0014978931602596106, 1...","[0, 0, 0, 0, 0, 0, 0, 7.90326E-24, 0.01569438,...","[0, 0, 0, 0, 0, 0, 0, 5.64953E-13, 0.127028, 1.0]","[0, 0, 0, 0, 0, 0, 0, 3.3355559243212763E-4, 1...","[0, 0, 0, 0, 0, 0, 0, 0, 2.45202E-4, 1.0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0.00391938, 1.05627...","[0, 0, 0, 0, 0, 0, 0, 1.38E-21, 0.0313126, 1.0]","[0, 0, 0, 0, 0, 0, 0, 0, 9.60718E-7, 1.0]","[0, 0, 0, 0, 0, 0, 0, 0, 1.194168E-10, 1.0]","[0, 0, 0, 0, 0, 0, 0, 0, 1.215682E-7, 1.0]","[0, 0, 0, 0, 0, 0, 0, 0, 4.8221E-7, 1.0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 2.45992E-4, 7.33714...","[0, 0, 0, 0, 0, 0, 0, 1.0, 0.0039594, 1.67574E...","[0, 0, 0, 0, 0, 0, 0, 0.5, 1.0, 0.5]",...,"[0, 0, 0, 0, 0, 0, 0, 2.56605E-38, 9.81296E-4,...","[0, 0, 0, 0, 0, 0, 0, 0, 2.45202E-4, 1.0]","[0, 0, 0, 0, 0, 0, 0, 0, 3.0188E-8, 1.0]","[0, 0, 0, 0, 0, 0, 0, 0, 7.68598E-6, 1.0]","[0, 0, 0, 0, 0, 0, 0, 0, 9.63484E-7, 1.0]","[0, 0, 0, 0, 0, 0, 0, 1.05627E-31, 0.00391938,...","[0, 0, 0, 0, 0, 0, 0, 0.5, 1.0, 0.5]","[0, 0, 0, 0, 0, 0, 0, 0, 4.7163E-10, 1.0]","[0, 0, 0, 0, 0, 0, 0, 0, 9.64478E-7, 1.0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0.003918, 5.12648E-32]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0.0313236, 2.84338E...","[0, 0, 0, 0, 0, 0, 0, 4.13586E-18, 0.0626044, ...","[0, 0, 0, 0, 0, 0, 0, 0.9892116576615432, 1.0,...","[0, 0, 0, 0, 0, 0, 0, 1.0, 9.82268E-4, 7.53419...","[0, 0, 0, 0, 0, 0, 0, 1.0, 6.14738E-5, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 1.22863E-4, 1.0]","[0, 0, 0, 0, 0, 0, 0, 3.71484E-11, 0.25025, 1.0]","[0, 0, 0, 0, 0, 0, 0, 0, 2.39252E-10, 1.0]","[0, 0, 0, 0, 0, 0, 0, 3.71484E-11, 0.25025, 1.0]",Peex113Ctg00050_84548
3,Peex113Ctg00050,84557,"[0, 0, 1.3904E-20, 0, 0, 1.3904E-20, 0, 1.0, 1...","[1.0, 1.20089E-19, 1.20089E-19, 1.20089E-19, 0...","[5.12648E-32, 0, 0.003918, 0, 0, 0, 0, 1.0, 0, 0]","[1.0, 0, 0.0626044, 0, 0, 0, 0, 4.13586E-18, 0...","[0.5, 0, 1.0, 0, 0, 0, 0, 0.5, 0, 0]","[1.0, 0, 0.250342, 0, 0, 0, 0, 7.79136E-11, 0, 0]","[1.0, 0, 0.127028, 0, 0, 0, 0, 5.64953E-13, 0, 0]","[0.5, 0, 1.0, 0, 0, 0, 0, 0.5, 0, 0]","[1.0, 0, 2.48512E-4, 0, 0, 0, 0, 0, 0, 0]","[4.60461E-25, 0, 0.01566154, 0, 0, 0, 0, 1.0, ...","[1.0, 0, 0.0313126, 0, 0, 0, 0, 1.38E-21, 0, 0]","[1.0, 0, 1.920118E-6, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 2.36086E-10, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 1.20254E-7, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 4.81714E-7, 0, 0, 0, 0, 0, 0, 0]","[1.06723E-40, 0, 4.91646E-4, 0, 0, 0, 0, 1.0, ...","[1.05627E-31, 0, 0.00391938, 0, 0, 0, 0, 1.0, ...","[0.5, 0, 1.0, 0, 0, 0, 0, 0.5, 0, 0]",...,"[1.0, 0, 0.001959652, 0, 0, 0, 0, 1.71054E-35,...","[1.0, 0, 2.45288E-4, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 6.04864E-8, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 3.93642E-6, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 1.92694E-6, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 0.00391938, 0, 0, 0, 0, 1.05627E-31, ...","[0.5, 0, 1.0, 0, 0, 0, 0, 0.5, 0, 0]","[1.0, 0, 4.72174E-10, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 1.92947E-6, 0, 0, 0, 0, 0, 0, 0]","[0.5, 0, 1.0, 0, 0, 0, 0, 0.5, 0, 0]","[2.84338E-21, 0, 0.0313236, 0, 0, 0, 0, 1.0, 0...","[1.0, 0, 0.0626044, 0, 0, 0, 0, 4.13586E-18, 0...","[6.870193307004782E-4, 0, 1.0, 0, 0, 0, 0, 0.9...","[7.53419E-38, 0, 9.82268E-4, 0, 0, 0, 0, 1.0, ...","[0, 0, 3.06918E-5, 0, 0, 0, 0, 1.0, 0, 0]","[1.0, 0, 1.22908E-4, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 0.250338, 0, 0, 0, 0, 7.65411E-11, 0, 0]","[1.0, 0, 2.41356E-10, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 0.25025, 0, 0, 0, 0, 3.71484E-11, 0, 0]",Peex113Ctg00050_84557
4,Peex113Ctg00050,84666,"[0, 4.52844E-16, 0, 0, 1.0, 4.52844E-16, 4.528...","[0, 0, 0, 4.67566E-13, 0, 0, 4.67566E-13, 0, 4...","[0, 0, 0, 0, 1.0, 0, 0.01566154, 0, 0, 4.60461...","[0, 0, 0, 0, 1.23952E-14, 0, 0.125167, 0, 0, 1.0]","[0, 0, 0, 0, 0.5, 0, 1.0, 0, 0, 0.5]","[0, 0, 0, 0, 1.53641E-28, 0, 0.00783338, 0, 0,...","[0, 0, 0, 0, 4.13586E-18, 0, 0.0626044, 0, 0, ...","[0, 0, 0, 0, 0.5, 0, 1.0, 0, 0, 0.5]","[0, 0, 0, 0, 1.71054E-35, 0, 0.001959652, 0, 0...","[0, 0, 0, 0, 1.0, 0, 0.01587302, 0, 0, 1.91025...","[0, 0, 0, 0, 1.71054E-35, 0, 0.001959652, 0, 0...","[0, 0, 0, 0, 1.53641E-28, 0, 0.00783338, 0, 0,...","[0, 0, 0, 0, 0, 0, 4.43058E-19, 0, 0, 1.0]","[0, 0, 0, 0, 0, 0, 1.50413E-8, 0, 0, 1.0]","[0, 0, 0, 0, 1.23952E-14, 0, 0.125167, 0, 0, 1.0]","[0, 0, 0, 0, 1.0, 0, 0.01566154, 0, 0, 4.60461...","[0, 0, 0, 0, 1.0, 0, 0.0626044, 0, 0, 4.13586E...","[0, 0, 0, 0, 1.23952E-14, 0, 0.125167, 0, 0, 1.0]",...,"[0, 0, 0, 0, 0, 0, 1.226418E-4, 0, 0, 1.0]","[0, 0, 0, 0, 1.38E-21, 0, 0.0313126, 0, 0, 1.0]","[0, 0, 0, 0, 0, 0, 4.8035E-7, 0, 0, 1.0]","[0, 0, 0, 0, 0, 0, 3.84032E-6, 0, 0, 1.0]","[0, 0, 0, 0, 0, 0, 7.67806E-6, 0, 0, 1.0]","[0, 0, 0, 0, 1.23952E-14, 0, 0.125167, 0, 0, 1.0]","[0, 0, 0, 0, 0.5, 0, 1.0, 0, 0, 0.5]","[0, 0, 0, 0, 1.17598E-38, 0, 9.805E-4, 0, 0, 1.0]","[0, 0, 0, 0, 0, 0, 3.00938E-8, 0, 0, 1.0]","[0, 0, 0, 0, 0.5, 0, 1.0, 0, 0, 0.5]","[0, 0, 0, 0, 1.0, 0, 3.07362E-5, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 6.13414E-5, 0, 0, 1.0]","[0, 0, 0, 0, 1.0, 0, 2.45202E-4, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0.0313126, 0, 0, 1.38E-21]","[0, 0, 0, 0, 1.0, 0, 6.13414E-5, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 6.01252E-8, 0, 0, 1.0]","[0, 0, 0, 0, 0, 0, 1.226418E-4, 0, 0, 1.0]","[0, 0, 0, 0, 0, 0, 6.1363E-5, 0, 0, 1.0]","[0, 0, 0, 0, 0, 0, 2.45202E-4, 0, 0, 1.0]",Peex113Ctg00050_84666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1938,Peex113Ctg04615,80242,"[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 1.0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 1.418974E-19, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 1.948874E-42, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 2.80622E-22, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[1.0, 0, 4.27202E-36, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 8.17646E-36, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 8.28884E-33, 0, 0, 0, 0, 1.0, 0, 0]","[1.0, 0, 3.54336E-24, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 8.58022E-37, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 1.24572E-25, 0, 0, 0, 0, 1.0, 0, 0]",...,"[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 7.71462E-15, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[1.0, 0, 4.94866E-13, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[1.0, 0, 6.46034E-36, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 2.47518E-25, 0, 0, 0, 0, 1.0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]",Peex113Ctg04615_80242
1939,Peex113Ctg04638,55082,"[0, 0, 1.73535E-21, 0, 0, 1.73535E-21, 0, 1.0,...","[1.0, 3.09949E-5, 3.09949E-5, 3.09949E-5, 0, 0...","[5.7075E-39, 0, 9.80152E-4, 0, 0, 0, 0, 1.0, 0...","[1.11334E-7, 0, 0.500334, 0, 0, 0, 0, 1.0, 0, 0]","[0.5, 0, 1.0, 0, 0, 0, 0, 0.5, 0, 0]","[1.0, 0, 0.001960344, 0, 0, 0, 0, 3.52442E-35,...","[4.60461E-25, 0, 0.01566154, 0, 0, 0, 0, 1.0, ...","[0.5, 0, 1.0, 0, 0, 0, 0, 0.5, 0, 0]","[2.12675E-30, 0, 0.0039709, 0, 0, 0, 0, 1.0, 0...","[0, 0, 2.45288E-4, 0, 0, 0, 0, 1.0, 0, 0]","[5.12648E-32, 0, 0.003918, 0, 0, 0, 0, 1.0, 0, 0]","[3.52442E-35, 0, 0.001960344, 0, 0, 0, 0, 1.0,...","[0, 0, 3.4007E-24, 0, 0, 0, 0, 1.0, 0, 0]","[1.0, 0, 1.414922E-17, 0, 0, 0, 0, 0, 0, 0]","[5.12648E-32, 0, 0.003918, 0, 0, 0, 0, 1.0, 0, 0]","[1.0, 0, 0.0313126, 0, 0, 0, 0, 1.38E-21, 0, 0]","[0, 0, 1.920118E-6, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 6.1452E-5, 0, 0, 0, 0, 1.0, 0, 0]",...,"[9.48741E-25, 0, 0.01566708, 0, 0, 0, 0, 1.0, ...","[1.0, 0, 0.00391938, 0, 0, 0, 0, 1.05627E-31, ...","[3.92386E-42, 0, 4.90414E-4, 0, 0, 0, 0, 1.0, ...","[1.0, 0, 6.13812E-8, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 9.41338E-10, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 6.13848E-5, 0, 0, 0, 0, 1.0, 0, 0]","[0.5, 0, 1.0, 0, 0, 0, 0, 0.5, 0, 0]","[0, 0, 2.40256E-7, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 2.40256E-7, 0, 0, 0, 0, 1.0, 0, 0]","[0.5, 0, 1.0, 0, 0, 0, 0, 0.5, 0, 0]","[0, 0, 3.0681E-5, 0, 0, 0, 0, 1.0, 0, 0]","[1.38E-21, 0, 0.0313126, 0, 0, 0, 0, 1.0, 0, 0]","[1.38E-21, 0, 0.0313126, 0, 0, 0, 0, 1.0, 0, 0]","[3.3355559243212763E-4, 0, 1.0, 0, 0, 0, 0, 0....","[1.0, 0, 4.9024E-4, 0, 0, 0, 0, 1.9044E-42, 0, 0]","[0, 0, 2.45202E-4, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 1.229064E-4, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 2.88714E-14, 0, 0, 0, 0, 1.0, 0, 0]","[4.60461E-25, 0, 0.01566154, 0, 0, 0, 0, 1.0, ...",Peex113Ctg04638_55082
1940,Peex113Ctg04867,106970,"[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 3.62064E-38, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]",...,"[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 5.5984E-20, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 6.66156E-30, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]",Peex113Ctg04867_106970
1941,Peex113Ctg00483,38002,"[0, 0, 0, 0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 0, 0, 0, 6.43918E-33, 0, 1.0, 0, 0]","[0, 0, 0, 0, 0, 2.4062E-7, 0, 1.0, 0, 0]","[0, 0, 0, 0, 1.0, 0.25025, 0, 3.71484E-11, 0, 0]","[0, 0, 0, 0, 0, 1.134472E-16, 0, 1.0, 0, 0]","[0, 0, 0, 0, 0, 1.0, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0.001960344, 0, 3.52442E-35,...","[0, 0, 0, 0, 1.0, 1.440224E-17, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 7.32542E-15, 0, 1.0, 0, 0]","[0, 0, 0, 0, 1.0, 7.31476E-15, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 3.50214E-21, 0, 1.0, 0, 0]","[0, 0, 0, 0, 1.0, 2.1935E-22, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 1.407082E-20, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 4.66534E-13, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 9.82268E-4, 0, 7.53419E-38, ...","[0, 0, 0, 0, 1.0, 5.77644E-14, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 3.76684E-9, 0, 0, 0, 0]",...,"[0, 0, 0, 0, 1.0, 2.3132E-13, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 4.525E-16, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 2.67536E-26, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 1.698538E-33, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 1.436562E-17, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 1.183356E-10, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0.125167, 0, 1.23952E-14, 0, 0]","[0, 0, 0, 0, 1.0, 8.45998E-28, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 1.639632E-36, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 1.201676E-7, 0, 1.0, 0, 0]","[0, 0, 0, 0, 1.0, 4.70992E-10, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 8.80602E-22, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 7.23546E-15, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 2.98416E-11, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 8.49334E-28, 0, 1.0, 0, 0]","[0, 0, 0, 0, 1.0, 9.00004E-19, 0, 0, 0, 0]","[0, 0, 0, 0, 1.0, 7.52582E-9, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 2.72236E-26, 0, 1.0, 0, 0]","[0, 0, 0, 0, 9.0003E-40, 1.0, 0, 0, 0, 0]",Peex113Ctg00483_38002


In [21]:
mapping = [ "AA", "AC", "AG","AT", "CC", "CG", "CT", "GG", "GT" ,"TT"]
print(cols)
print(test_Kril.shape)
tt = test_Kril[cols[2:]].copy()
tt1 = test_Kril.copy()
print(tt.shape)
c = 0
for key, value in tt.iteritems(): 
    print(key)
    for t, i in enumerate(value): 
        c += 1
        #print(key, i)
        if i.count('1.0') > 1: 
            g = "NA"
        else: 
            g = mapping[i.index('1.0')]
        tt1[key].iloc[t] = g

Index(['CHR', 'POS', 'exserta', 'axillaris', 'RIL_196', 'RIL_197', 'RIL_198',
       'RIL_199', 'RIL_200', 'RIL_201',
       ...
       'RIL_187', 'RIL_105', 'RIL_188', 'RIL_189', 'RIL_190', 'RIL_191',
       'RIL_192', 'RIL_194', 'RIL_195', 'RIL_106'],
      dtype='object', length=199)
(1943, 200)
(1943, 197)
exserta
axillaris
RIL_196
RIL_197
RIL_198
RIL_199
RIL_200
RIL_201
RIL_202
RIL_203
RIL_16
RIL_18
RIL_19
RIL_1
RIL_20
RIL_21
RIL_22
RIL_23
RIL_24
RIL_25
RIL_26
RIL_27
RIL_28
RIL_29
RIL_2
RIL_30
RIL_31
RIL_32
RIL_33
RIL_34
RIL_35
RIL_36
RIL_37
RIL_38
RIL_39
RIL_3
RIL_41
RIL_42
RIL_43
RIL_44
RIL_100
RIL_45
RIL_46
RIL_47
RIL_48
RIL_4
RIL_50
RIL_51
RIL_52
RIL_53
RIL_54
RIL_10
RIL_55
RIL_56
RIL_57
RIL_58
RIL_59
RIL_61
RIL_62
RIL_63
RIL_64
RIL_65
RIL_11
RIL_66
RIL_67
RIL_68
RIL_69
RIL_6
RIL_70
RIL_71
RIL_72
RIL_73
RIL_74
RIL_12
RIL_75
RIL_76
RIL_77
RIL_78
RIL_79
RIL_7
RIL_80
RIL_81
RIL_82
RIL_83
RIL_13
RIL_84
RIL_85
RIL_86
RIL_87
RIL_88
RIL_8
RIL_90
RIL_91
RIL_92
RIL_93
RIL_14
RIL_94
RIL

In [22]:
display(tt1)

Unnamed: 0,CHR,POS,exserta,axillaris,RIL_196,RIL_197,RIL_198,RIL_199,RIL_200,RIL_201,RIL_202,RIL_203,RIL_16,RIL_18,RIL_19,RIL_1,RIL_20,RIL_21,RIL_22,RIL_23,...,RIL_178,RIL_179,RIL_180,RIL_181,RIL_182,RIL_183,RIL_184,RIL_185,RIL_186,RIL_187,RIL_105,RIL_188,RIL_189,RIL_190,RIL_191,RIL_192,RIL_194,RIL_195,RIL_106,marker
0,Peex113Ctg00004,119009,AG,GG,AA,GG,AG,AA,AA,AA,AA,AA,AA,AA,GG,GG,GG,AA,AA,AA,...,AA,AA,AA,AA,AA,AG,AG,GG,GG,AG,AA,AA,AA,GG,GG,GG,AA,AA,AA,Peex113Ctg00004_119009
1,Peex113Ctg00043,151704,GG,CC,GG,CC,CG,GG,CC,CC,CC,GG,CG,CC,CC,CC,CC,GG,GG,CG,...,CC,CC,CC,CC,CC,CC,CC,CC,CC,GG,CC,CC,GG,GG,GG,CC,CC,CC,CC,Peex113Ctg00043_151704
2,Peex113Ctg00050,84548,GG,TT,GG,TT,GT,TT,TT,GT,TT,GG,TT,TT,TT,TT,TT,GG,GG,GT,...,TT,TT,TT,TT,TT,TT,GT,TT,TT,GG,GG,TT,GT,GG,GG,TT,TT,TT,TT,Peex113Ctg00050_84548
3,Peex113Ctg00050,84557,GG,AA,GG,AA,AG,AA,AA,AG,AA,GG,AA,AA,AA,AA,AA,GG,GG,AG,...,AA,AA,AA,AA,AA,AA,AG,AA,AA,AG,GG,AA,AG,GG,GG,AA,AA,AA,AA,Peex113Ctg00050_84557
4,Peex113Ctg00050,84666,CC,TT,CC,TT,CT,TT,TT,CT,TT,CC,TT,TT,TT,TT,TT,CC,CC,TT,...,TT,TT,TT,TT,TT,TT,CT,TT,TT,CT,CC,TT,CC,CC,CC,TT,TT,TT,TT,Peex113Ctg00050_84666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1938,Peex113Ctg04615,80242,AA,GG,AA,AG,GG,GG,GG,GG,GG,AA,GG,GG,GG,GG,GG,AA,AA,GG,...,GG,AA,GG,AA,GG,GG,GG,GG,GG,AA,AA,GG,AA,AA,AA,GG,GG,AA,GG,Peex113Ctg04615_80242
1939,Peex113Ctg04638,55082,GG,AA,GG,GG,AG,AA,GG,AG,GG,GG,GG,GG,GG,AA,GG,AA,GG,GG,...,GG,AA,GG,AA,GG,GG,AG,GG,GG,AG,GG,GG,GG,AG,AA,GG,GG,GG,GG,Peex113Ctg04638_55082
1940,Peex113Ctg04867,106970,CC,AA,CC,CC,CC,AA,CC,CC,CC,CC,CC,CC,CC,AA,CC,AA,CC,CC,...,CC,AA,CC,AA,CC,CC,CC,CC,CC,CC,CC,CC,CC,CC,AA,CC,CC,CC,CC,Peex113Ctg04867_106970
1941,Peex113Ctg00483,38002,CC,GG,GG,GG,CC,GG,CG,CC,CC,GG,CC,GG,CC,CC,CC,CC,CC,CC,...,CC,CC,CC,CC,CC,CC,CC,CC,CC,GG,CC,CC,CC,CC,GG,CC,CC,GG,CG,Peex113Ctg00483_38002


In [140]:
#extensive heterozygous calls -> lots of makers get miscalled to heterzygous
c = 0
q = 0
for k, value in tt1.iterrows():
    exs = value['exserta']
    axs = value['axillaris']
    if c % 100 == 0: 
        pass
        #print(c)
    #print(axsB, exsB)
    
    if len(set(exs)) + len(set(axs)) > 2: 
        q += 1
        #handle NAs in references
        if exs == "NA" or axs == "NA": 
            continue
            #under construction
            #if axs == "NA": 
                #remove exs and take next frequent basecalls
            #    axs = tt1.iloc[k,2:-1].value_counts()
        
        
        #handle complete different heterozygous genotypes : example ax:CC exs:GT
        
        if len(set(exs)) == 2 and len(set(axs)) == 1: 
            #handle complete different heterozygous genotypes : example ax:CC exs:GT
            if list(set(axs))[0] not in exs:
                print("boooooooooooooo", exs, axs)
                continue
                
            c += 1
            #create homozygous of het by removing axillaris-base: 
            ex_hom = exs.replace(list(set(axs))[0], '')
            new_exs = 2* ex_hom
            #print(k, new_exs,  tt1.iloc[k,2:-1].value_counts()[new_exs], axs, tt1.iloc[k,2:-1].value_counts()[axs], tt1.iloc[k,2:-1].value_counts().tolist())    
        
        elif len(set(axs)) == 2 and len(set(exs)) == 1: 
            #handle complete different heterozygous genotypes : example ax:CC exs:GT
            if list(set(exs))[0] not in axs:
                print("boooooooooooooo", exs, axs)
                continue
            
            
            c += 1
            
            #create homozygous of het by removing axillaris-base: 
            ax_hom = axs.replace(list(set(exs))[0], '')
            new_axs = 2* ax_hom
            #print(k, new_axs,  tt1.iloc[k,2:-1].value_counts()[new_axs], exs, tt1.iloc[k,2:-1].value_counts()[exs], tt1.iloc[k,2:-1].value_counts().tolist())    
            continue
print(q, c)

boooooooooooooo CC GT
86 69


In [96]:
#genotype the plant according to parents
out = tt1.copy()
c = 0
print(tt1.shape)
print(tt.shape)
for k, value in tt1.iterrows():
    c += 1
    exs = value['exserta']
    axs = value['axillaris']
    if c % 100 == 0: 
        print(c)
    
    
    if len(set(exs)) + len(set(axs)) > 2: 
        #problematic markers
        
        
        
        
        
        
        
        if tt1.iloc[k,2:-1].value_counts()[exs] + 
        
        
        print(k)
        continue
    
    
    
    else:
        #easy markers
        exsB = list(set(exs))[0]
        axsB = list(set(axs))[0]
        het1 = axsB + exsB
        het2 = exsB + axsB
        #print(het1, het2)
        for i in range(4, tt1.shape[1]): 
            #c += 1
            if value[i] == axs: 
                out.iloc[k, i] = "AX"
            elif value[i] == exs: 
                out.iloc[k, i] = "EX"
            elif value[i] == het1 or value[i] == het2: 
                out.iloc[k, i] = "HET"
            else: 
                out.iloc[k, i] = "NA"
        #print(tt1.columns[k+2], set(exs), set(axs))
    
    #if c > 20: 
    #   break

(1943, 200)
(1943, 197)
0
27
83
100
158
159
160
200
241
260
261
268
294
300
322
323
324
325
326
337
338
342
355
384
400
419
467
484
485
486
487
488
500
509
514
515
516
517
600
637
672
700
744
768
769
770
800
833
893
900
936
937
943
963
965
1000
1062
1068
1100
1104
1141
1156
1167
1168
1169
1188
1200
1227
1300
1328
1370
1371
1372
1375
1400
1400
1410
1450
1472
1473
1475
1500
1511
1512
1513
1514
1546
1600
1644
1645
1700
1703
1716
1727
1756
1757
1785
1800
1815
1820
1846
1866
1881
1900
1906


In [102]:
display(out)
print(out.shape, tt1.shape)
tt1.to_csv("/media/mmoser/data1/P.EXSERTA/PEXV1.1.3/genetic_maps/markers/F7-K/F7-K.bases.csv", index = False)
out.to_csv("/media/mmoser/data1/P.EXSERTA/PEXV1.1.3/genetic_maps/markers/F7-K/F7-K.geno.csv", index = False)

Unnamed: 0,CHR,POS,exserta,axillaris,RIL_196,RIL_197,RIL_198,RIL_199,RIL_200,RIL_201,RIL_202,RIL_203,RIL_16,RIL_18,RIL_19,RIL_1,RIL_20,RIL_21,RIL_22,RIL_23,...,RIL_178,RIL_179,RIL_180,RIL_181,RIL_182,RIL_183,RIL_184,RIL_185,RIL_186,RIL_187,RIL_105,RIL_188,RIL_189,RIL_190,RIL_191,RIL_192,RIL_194,RIL_195,RIL_106,marker
0,Peex113Ctg00004,119009,AG,GG,AA,GG,AG,AA,AA,AA,AA,AA,AA,AA,GG,GG,GG,AA,AA,AA,...,AA,AA,AA,AA,AA,AG,AG,GG,GG,AG,AA,AA,AA,GG,GG,GG,AA,AA,AA,Peex113Ctg00004_119009
1,Peex113Ctg00043,151704,GG,CC,EX,AX,HET,EX,AX,AX,AX,EX,HET,AX,AX,AX,AX,EX,EX,HET,...,AX,AX,AX,AX,AX,AX,AX,AX,AX,EX,AX,AX,EX,EX,EX,AX,AX,AX,AX,
2,Peex113Ctg00050,84548,GG,TT,EX,AX,HET,AX,AX,HET,AX,EX,AX,AX,AX,AX,AX,EX,EX,HET,...,AX,AX,AX,AX,AX,AX,HET,AX,AX,EX,EX,AX,HET,EX,EX,AX,AX,AX,AX,
3,Peex113Ctg00050,84557,GG,AA,EX,AX,HET,AX,AX,HET,AX,EX,AX,AX,AX,AX,AX,EX,EX,HET,...,AX,AX,AX,AX,AX,AX,HET,AX,AX,HET,EX,AX,HET,EX,EX,AX,AX,AX,AX,
4,Peex113Ctg00050,84666,CC,TT,EX,AX,HET,AX,AX,HET,AX,EX,AX,AX,AX,AX,AX,EX,EX,AX,...,AX,AX,AX,AX,AX,AX,HET,AX,AX,HET,EX,AX,EX,EX,EX,AX,AX,AX,AX,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1938,Peex113Ctg04615,80242,AA,GG,EX,HET,AX,AX,AX,AX,AX,EX,AX,AX,AX,AX,AX,EX,EX,AX,...,AX,EX,AX,EX,AX,AX,AX,AX,AX,EX,EX,AX,EX,EX,EX,AX,AX,EX,AX,
1939,Peex113Ctg04638,55082,GG,AA,EX,EX,HET,AX,EX,HET,EX,EX,EX,EX,EX,AX,EX,AX,EX,EX,...,EX,AX,EX,AX,EX,EX,HET,EX,EX,HET,EX,EX,EX,HET,AX,EX,EX,EX,EX,
1940,Peex113Ctg04867,106970,CC,AA,EX,EX,EX,AX,EX,EX,EX,EX,EX,EX,EX,AX,EX,AX,EX,EX,...,EX,AX,EX,AX,EX,EX,EX,EX,EX,EX,EX,EX,EX,EX,AX,EX,EX,EX,EX,
1941,Peex113Ctg00483,38002,CC,GG,AX,AX,EX,AX,HET,EX,EX,AX,EX,AX,EX,EX,EX,EX,EX,EX,...,EX,EX,EX,EX,EX,EX,EX,EX,EX,AX,EX,EX,EX,EX,AX,EX,EX,AX,HET,


(1943, 200) (1943, 200)


In [99]:
co = 0
out1 = out.copy()

out_list = []
header = ["individual", "AX", "EX", "HET", "tot"]
for i in out.columns[4:191]: 
    #print(i)
    #display(out[i])
    t = out[i].value_counts().sort_index()
    print(t)
    #print([i, t["AX"], t["EX"], t["HET"], t["AX"]+ t["EX"]+ t["HET"]])
    out_list.append([i, t["AX"], t["EX"], t["HET"], t["AX"]+ t["EX"]+ t["HET"]])

df = pd.DataFrame(out_list, columns=header)
display(df)
df.to_csv("/media/mmoser/data1/P.EXSERTA/PEXV1.1.3/genetic_maps/markers/LM3_F7_K.markers.csv", index = False)

AA      16
AC       1
AG       2
AT       2
AX     606
      ... 
GG      16
GT       1
HET     22
NA       3
TT      21
Name: RIL_196, Length: 13, dtype: int64
AA       12
AC        1
AG        5
AT        2
AX     1106
       ... 
GG       18
GT        3
HET     212
NA        2
TT       15
Name: RIL_197, Length: 14, dtype: int64
AA       9
AC       4
AG       8
AT       2
AX     456
      ... 
GG      13
GT       4
HET    402
NA       5
TT      16
Name: RIL_198, Length: 14, dtype: int64
AA       14
AC        4
AG        2
AT        1
AX     1346
       ... 
EX      475
GG       18
HET      34
NA        2
TT       18
Name: RIL_199, Length: 12, dtype: int64
AA      12
AC       1
AG       6
AT       1
AX     937
      ... 
GG      13
GT       3
HET    361
NA       2
TT      15
Name: RIL_200, Length: 14, dtype: int64
AA      11
AC       6
AG       5
AT       3
AX     690
      ... 
GG      10
GT       6
HET    432
NA       5
TT      14
Name: RIL_201, Length: 14, dtype: int64
AA      12
A

Unnamed: 0,individual,AX,EX,HET,tot
0,RIL_196,606,1226,22,1854
1,RIL_197,1106,537,212,1855
2,RIL_198,456,994,402,1852
3,RIL_199,1346,475,34,1855
4,RIL_200,937,557,361,1855
...,...,...,...,...,...
182,RIL_184,606,535,711,1852
183,RIL_185,1031,809,12,1852
184,RIL_186,1039,805,8,1852
185,RIL_187,605,790,459,1854
