# GBS data processing
------
#### from reads to markers to genetic maps using parts of LM3 scripts, downloadable at (https://sourceforge.net/projects/lep-map3/)

### 1. quality filter (QC) of raw demultiplexed reads   
         fastq-mcf -q 30 -l 50
         
### 2. align QC reads to genome   
          bowtie2 --very-sensitive --no-unal --rg-id F7_RIL103 --rg SM:blubb --rg LB:blubb --rg PI:blubby --rg PL:ILLUMINA

### 3. prepare fofn's for posterior calls and LM3
         bam_list.txt: space delimited fofn to bam-files
         example: 
         ../F7_plate1.95.b2.peex113.s.bam ../F7_plate1.94.b2.peex113.s.bam ../F7_plate1.1.b2.peex113.s.bam ../F7_plate1.10.b2.peex113.s.bam ../F7_plate1.11.b2.peex113.s.bam 
         
         mapping.txt: space delimited file of sample names in same order as bam_list.txt
         example: 
         axillaris axillaris exserta 1 2 3 4                   

* 3.1: create posterior basecalls
          $samtools mpileup -q 10 -Q 10 -s $(cat bam_list.txt) |awk -f LEP-MAP3/scripts/pileupParser2.awk |awk -f LEP-MAP3/scripts/pileup2posterior.awk |head -1000 |gzip > tt.post.gz             

        pedigree.txt: tab-delimited file of relation between individuals
        resemble F7 as F2: 
        
        #family  indID   mother   father   sex(1=female;2=male)  phenotype                       
        F2      exserta 0       0       1       0   
        F2      axillaris       0       0       2       0                                        
        F2      P1      exserta axillaris       1       0                                        
        F2      P2      exserta axillaris       2       0                                        
        F2      1       P1      P2      0       0   
        F2      2       P1      P2      0       0   
        F2      3       P1      P2      0       0   
        F2      4       P1      P2      0       0   


* 3.2: transform pedigree.txt

        $LEP-MAP3/scripts/transpose_tab Michel.F4.ped |awk '{print "CHR\tPOS\t"$0}' > Michel.F4.tped 
               
* 3.3: create posterior calls with pedigree and posterior calls    

         $zcat tt.post.gz |java -cp LEP-MAP3/bin/ ParentCall2 data=Michel.F2.tped posteriorFile=- removeNonInformative=1  > tt.postcall           
         
         
         
         
## 4. Transfer and filter posterior calls to csv (to use as input in R/ASmap) 

In [64]:
import pandas as pd
pd.get_option("display.max_rows")
pd.set_option("display.max_rows",8)
pd.set_option("display.max_columns",10)


test_W = pd.read_csv("/media/mmoser/data1/P.EXSERTA/PEXV1.1.3/genetic_maps/F7_W.PD.postcall", sep = "\t")
test_W.columns

#filter out dummy parents
cols = test_W.columns.str.contains("AE|PA|PE|POS|CHR") 
cols
cols = test_W.columns[cols]
test_Wril = test_W[cols].copy()

for i in cols[2:]: 
    #print(i)
    test_Wril[i] = test_W[i].apply(lambda x: x.split(' '))
test_Wril.columns

Index(['CHR', 'POS', 'PA', 'PE', 'AE_100', 'AE_103', 'AE_108', 'AE_10',
       'AE_110', 'AE_114',
       ...
       'AE_88', 'AE_89', 'AE_8', 'AE_90', 'AE_91', 'AE_93', 'AE_95', 'AE_97',
       'AE_98', 'AE_9'],
      dtype='object', length=192)

In [65]:
test_Wril.loc[1,["PA", "PE" ]]
test_Wril.POS = test_Wril.POS.apply(str)
test_Wril['marker'] = test_Wril[['CHR', 'POS']].apply(lambda x: '_'.join(x), axis=1)
display(test_Wril)

Unnamed: 0,CHR,POS,PA,PE,AE_100,...,AE_95,AE_97,AE_98,AE_9,marker
0,Peex113Ctg00021,64522,"[0, 0, 0, 0, 0, 0, 1.0, 0, 0, 0]","[0, 0, 0, 1.24097E-25, 0, 0, 9.39227E-10, 0, 1...","[0, 0, 0, 0, 0, 0, 7.52392E-9, 0, 0, 1.0]",...,"[0, 0, 0, 0, 0, 0, 7.2058E-21, 0, 0, 1.0]","[0, 0, 0, 0, 0, 0, 1.14957E-19, 0, 0, 1.0]","[0, 0, 0, 0, 1.0, 0, 1.141634E-16, 0, 0, 0]","[0, 0, 0, 0, 1.0, 0, 1.882644E-30, 0, 0, 0]",Peex113Ctg00021_64522
1,Peex113Ctg00041,8426,"[1.39187E-40, 1.39187E-40, 4.65626E-14, 2.9892...","[2.59021E-36, 2.59021E-36, 2.59021E-36, 5.6601...","[0, 0, 0, 0, 0, 0, 0, 0.5, 1.0, 0.5]",...,"[0, 0, 0, 0, 0, 0, 0, 2.08214E-9, 0.00990202, ...","[0, 0, 0, 0, 0, 0, 0, 1.0, 1.287198E-5, 5.0751...","[0, 0, 0, 0, 0, 0, 0, 9.0594E-13, 0.0028456, 1.0]","[0, 0, 0, 0, 0, 0, 0, 1.0, 9.71766E-7, 5.0392E...",Peex113Ctg00041_8426
2,Peex113Ctg00041,12417,"[0, 0, 0, 5.64513E-34, 0, 0, 1.0, 0, 3.5157E-3...","[1.64544E-21, 0.0156573, 1.64544E-21, 1.64544E...","[0, 0, 0, 0, 0.5, 0, 1.0, 0, 0, 0.5]",...,"[0, 0, 0, 0, 0.9996661115187527, 0, 1.0, 0, 0,...","[0, 0, 0, 0, 0, 0, 2.04338E-7, 0, 0, 1.0]","[0, 0, 0, 0, 1.0, 0, 0.0314056, 0, 0, 2.36451E...","[0, 0, 0, 0, 3.51347E-34, 0, 1.392618E-4, 0, 0...",Peex113Ctg00041_12417
3,Peex113Ctg00046,475756,"[0, 0, 1.0, 0, 0, 0, 0, 0, 0, 0]","[1.53641E-28, 1.53641E-28, 0.00391669, 1.53641...","[3.3355559243212763E-4, 0, 1.0, 0, 0, 0, 0, 0....",...,"[1.0, 0, 3.00626E-8, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 7.82588E-21, 0, 0, 0, 0, 1.0, 0, 0]","[3.71484E-11, 0, 0.25025, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 8.33094E-28, 0, 0, 0, 0, 1.0, 0, 0]",Peex113Ctg00046_475756
...,...,...,...,...,...,...,...,...,...,...,...
5649,Peex113Ctg00498,75039,"[0, 6.6821E-18, 0, 8.08545E-39, 5.02813E-14, 6...","[1.72258E-14, 0.0625909, 1.72258E-14, 1.72258E...","[0, 0, 0, 0, 0.5, 0, 1.0, 0, 0, 0.5]",...,"[0, 0, 0, 0, 1.0, 0, 0.0156716, 0, 0, 1.5867E-24]","[0, 0, 0, 0, 1.0, 0, 3.83916E-6, 0, 0, 0]","[0, 0, 0, 0, 0.00751075, 0, 0.429954, 0, 0, 1.0]","[0, 0, 0, 0, 3.349E-42, 0, 1.313986E-11, 0, 0,...",Peex113Ctg00498_75039
5650,Peex113Ctg00498,75097,"[7.00543E-34, 1.0, 3.40021E-38, 3.40021E-38, 8...","[1.23952E-14, 0.0625835, 1.23952E-14, 1.23952E...","[0.5, 1.0, 0, 0, 0.5, 0, 0, 0, 0, 0]",...,"[4.60461E-25, 0.01566154, 0, 0, 1.0, 0, 0, 0, ...","[0, 3.9498E-6, 0, 0, 1.0, 0, 0, 0, 0, 0]","[1.0, 0.429846, 0, 0, 0.00749879, 0, 0, 0, 0, 0]","[1.0, 1.302418E-11, 0, 0, 6.25738E-43, 0, 0, 0...",Peex113Ctg00498_75097
5651,Peex113Ctg00498,75101,"[0, 0, 7.20697E-19, 3.57815E-41, 0, 7.20697E-1...","[1.23952E-14, 1.23952E-14, 0.0625835, 1.23952E...","[0, 0, 0, 0, 0, 0, 0, 0.5, 1.0, 0.5]",...,"[0, 0, 0, 0, 0, 0, 0, 1.0, 0.01566756, 9.92719...","[0, 0, 0, 0, 0, 0, 0, 1.0, 3.84194E-6, 0]","[0, 0, 0, 0, 0, 0, 0, 0.00750326, 0.429888, 1.0]","[0, 0, 0, 0, 0, 0, 0, 6.44368E-43, 1.302434E-1...",Peex113Ctg00498_75101
5652,Peex113Ctg00500,7004,"[2.59343E-20, 3.84258E-24, 1.0, 3.84258E-24, 0...","[0, 0, 5.79679E-4, 0, 0, 7.8456E-6, 0, 1.0, 7....","[3.755367981638904E-4, 0, 1.0, 0, 0, 0, 0, 0.9...",...,"[4.18257E-11, 0, 0.25026, 0, 0, 0, 0, 1.0, 0, 0]","[0, 0, 4.87172E-7, 0, 0, 0, 0, 1.0, 0, 0]","[0.9779912841416757, 0, 1.0, 0, 0, 0, 0, 0.022...","[0.03923155, 0, 1.0, 0, 0, 0, 0, 1.123825E-12,...",Peex113Ctg00500_7004


In [66]:
mapping = [ "AA", "AC", "AG","AT", "CC", "CG", "CT", "GG", "GT" ,"TT"]
print(test_Wril.shape)
tt = test_Wril[cols[2:]].copy()
tt1 = test_Wril.copy()
c = 0
cc = 0
for key, value in tt.iteritems(): 
    cc += 1
    if cc % 10 == 0: 
        print("{} individuals processed ...".format(cc))
    for t, i in enumerate(value): 
        c += 1
        if i.count('1.0') > 1: 
            print(t, i)
            g = "NA"
        elif '1.0' not in i: 
            continue
        else: 
            g = mapping[i.index('1.0')]
        tt1[key].iloc[t] = g

(5653, 193)
35 ['1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0']
93 ['1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0']
94 ['1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0']
95 ['1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0']
105 ['1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0']
106 ['1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0']
138 ['1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0']
185 ['1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0']
291 ['1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0']
398 ['1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0']
429 ['1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0']
471 ['1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0']
505 ['1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0', '1.0']
756 ['1.0', '1.0'

In [67]:
display(tt1)
tt1.index = tt1.marker

Unnamed: 0,CHR,POS,PA,PE,AE_100,...,AE_95,AE_97,AE_98,AE_9,marker
0,Peex113Ctg00021,64522,CT,TT,TT,...,TT,TT,CC,CC,Peex113Ctg00021_64522
1,Peex113Ctg00041,8426,GT,TT,GT,...,TT,GG,TT,GG,Peex113Ctg00041_8426
2,Peex113Ctg00041,12417,CT,CC,CT,...,CT,TT,CC,TT,Peex113Ctg00041_12417
3,Peex113Ctg00046,475756,AG,GG,AG,...,AA,GG,GG,GG,Peex113Ctg00046_475756
...,...,...,...,...,...,...,...,...,...,...,...
5649,Peex113Ctg00498,75039,CT,CC,CT,...,CC,CC,TT,TT,Peex113Ctg00498_75039
5650,Peex113Ctg00498,75097,AC,CC,AC,...,CC,CC,AA,AA,Peex113Ctg00498_75097
5651,Peex113Ctg00498,75101,GT,GG,GT,...,GG,GG,TT,TT,Peex113Ctg00498_75101
5652,Peex113Ctg00500,7004,AG,GG,AG,...,GG,GG,AG,AG,Peex113Ctg00500_7004


## transfer basecalls to genotypes using parental columns as reference

#### majority of parental genotypes are heterzygous, adjust marker selection and do phasing

In [132]:


def handle_marker(marker): 
    '''
    phase marker and return parental genotype
    '''
    marker_n = marker.name
    ax = marker["PA"]
    ex = marker["PE"]
    
    axB, exB = "", ""
    
    
    if len(set(ax)) == 2: 
        axB = ax.replace(ex[0], "") 
        exB = ex[0] 
    else: 
        exB = ex.replace(ax[0], "") 
        axB = ax[0] 
    
    #print(marker_n, axB, exB)
    
    #test genotype frequency for set parental genotypes
    #hard coded thresholds: at least 70% data and only weak deviation from 1:1 ratio
    tot_ind = len(marker[2:-1])
    
    mset = marker[2:-1].value_counts()
    
    
    #take the two most abundant genotypes and test if they are in the parental lines: 
    #print(marker[2:-1].value_counts(), mset.index.tolist())
    
    if axB*2 in mset.index.tolist()[:2] and exB*2 in mset.index.tolist()[:2]: 
        #print("good")
        return 1, axB, exB
    else: 
        return 0, "", ""
    
    
    
    try: 
        axG = marker[2:-1].value_counts()[axB*2]
        exG = marker[2:-1].value_counts()[exB*2]
        
    except KeyError: 
        print("key not found")
        axB = ""
        exB = ""
        
    #print(axG, exG, tot_ind)
    
    if axB and axG + exG > tot_ind * 0.7 and 3 > axG / exG > 0.25: 
        return axB, exB
    else: 
        return "", ""
#    print(len(marker[2:-1]))
    #print(marker_n, ax, ex, marker[2:-1].value_counts())
    
    
c = 0
k = 0
for e, row in tt1.iterrows(): 
        c += 1
        if c > 50: 
            break
        kr, t, t1 = handle_marker(row) 
        if kr :    
            k += 1
print(k)


43


In [133]:
%%time

#genotype the individuals according to parents
out = tt1.copy()
c = 0
mis_count = 0
both_count = 0
identical = 0
#print(tt1.shape)

for k, value in tt1.iterrows():
    c += 1
    exs = value['PE']
    axs = value['PA']
    exsB = ""
    axsB = ""
    if c % 500 == 0: 
        print("{} markers processed ...".format(c))
    
    #IF one partental basecall is not homozygous 
    if len(set(exs)) + len(set(axs)) == 3 and "N" not in exs + axs: 
        #problematic markers
        mis_count += 1
        kr, axsB, exsB = handle_marker(value)
        if kr == 0:
            continue
        else: 
            het1 = axsB + exsB
            het2 = exsB + axsB
        #out.iloc[k, :] = fixed_marker
        
    #IF both parental basecalls are not homozygous
    elif len(set(exs)) + len(set(axs)) == 4: 
        both_count += 1
        if set(exs) == set(axs): 
            identical += 1
        #print(k, axs, exs, value[2:-1].value_counts())
        #print(value[2:-1].)
    
    #IF partental basecalls are homozygous
    else:
        if set(exs) == set(axs): 
            identical += 1
            
        exsB = list(set(exs))[0]
        axsB = list(set(axs))[0]
        het1 = axsB + exsB
        het2 = exsB + axsB
        
        
    for individual, i in value[4:tt1.shape[1]].iteritems(): 
            if i == axs: 
                out.at[k, individual] = "AX"
            elif i == exs: 
                out.at[k, individual] = "EX"
            elif i == het1 or i == het2: 
                out.at[k, individual] = "HET"
            else : 
                out.at[k, individual] = "NA"
        

print("{} markers not being homozygous for ONE parents but fixed".format(mis_count))     
print("{} markers not being homozygous for BOTH parents".format(both_count))        
print("{} markers identical between parental species".format(identical))        
out.index = tt1.marker


500 markers processed ...
1000 markers processed ...
1500 markers processed ...
2000 markers processed ...
2500 markers processed ...
3000 markers processed ...
3500 markers processed ...
4000 markers processed ...
4500 markers processed ...
5000 markers processed ...
5500 markers processed ...
3896 markers not being homozygous for ONE parents but fixed
1453 markers not being homozygous for BOTH parents
1498 markers identical between parental species
CPU times: user 17.9 s, sys: 34.7 ms, total: 17.9 s
Wall time: 17.9 s


In [134]:
res = out.iloc[:, 4:-1].stack().value_counts()
print(res)
#res = out.ilo.stack().value_counts()
print(display(out.iloc[:, 4:-1]))

NA    502158
EX    327237
AX    162530
AA     12691
       ...  
AT      2786
AC      1888
GT      1783
CG       728
Length: 14, dtype: int64


Unnamed: 0_level_0,AE_100,AE_103,AE_108,AE_10,AE_110,...,AE_93,AE_95,AE_97,AE_98,AE_9
marker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Peex113Ctg00021_64522,EX,EX,,,,...,,EX,EX,,
Peex113Ctg00041_8426,AX,AX,AX,EX,EX,...,EX,EX,,EX,
Peex113Ctg00041_12417,AX,AX,,AX,AX,...,EX,AX,,EX,
Peex113Ctg00046_475756,AX,AX,EX,EX,EX,...,EX,,EX,EX,EX
...,...,...,...,...,...,...,...,...,...,...,...
Peex113Ctg00498_75039,AX,AX,,,AX,...,,EX,EX,,
Peex113Ctg00498_75097,AX,AX,,,AX,...,,EX,EX,,
Peex113Ctg00498_75101,AX,AX,,,AX,...,,EX,EX,,
Peex113Ctg00500_7004,AX,EX,AX,,,...,,EX,EX,AX,AX


None


In [135]:
display(out)

print(out.shape, tt1.shape)
tt1.to_csv("/media/mmoser/data1/P.EXSERTA/PEXV1.1.3/genetic_maps/markers/F7-W/F7-W.bases.csv", index = True)
out.to_csv("/media/mmoser/data1/P.EXSERTA/PEXV1.1.3/genetic_maps/markers/F7-W/F7-W.geno.csv", index = True)

Unnamed: 0_level_0,CHR,POS,PA,PE,AE_100,...,AE_95,AE_97,AE_98,AE_9,marker
marker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Peex113Ctg00021_64522,Peex113Ctg00021,64522,CT,TT,EX,...,EX,EX,,,
Peex113Ctg00041_8426,Peex113Ctg00041,8426,GT,TT,AX,...,EX,,EX,,
Peex113Ctg00041_12417,Peex113Ctg00041,12417,CT,CC,AX,...,AX,,EX,,
Peex113Ctg00046_475756,Peex113Ctg00046,475756,AG,GG,AX,...,,EX,EX,EX,
...,...,...,...,...,...,...,...,...,...,...,...
Peex113Ctg00498_75039,Peex113Ctg00498,75039,CT,CC,AX,...,EX,EX,,,
Peex113Ctg00498_75097,Peex113Ctg00498,75097,AC,CC,AX,...,EX,EX,,,
Peex113Ctg00498_75101,Peex113Ctg00498,75101,GT,GG,AX,...,EX,EX,,,
Peex113Ctg00500_7004,Peex113Ctg00500,7004,AG,GG,AX,...,EX,EX,AX,AX,


(5653, 193) (5653, 193)


## Filter HQ markers and count number of genotpyes per marker

In [136]:
co = 0

marker_l = []

#iterate over markers
for index, row in out.iloc[:,2:out.shape[1]-1].iterrows():
    t = row.value_counts().sort_index()
    #print(t)
    
    #filter out heterzygous parents and identical parents
    if "AX" in t.index and row["PE"] != row["PA"]:
        
        #replace heterzygous sites with NA
        if "HET" in t.index: 
            #print(row)
            row1 = row.replace("HET", "NA")
            marker_l.append(row1)
        else: 
            marker_l.append(row)


marker_df = pd.concat(marker_l, axis=1, keys=[s for s in out.index])
            


#iterate over individuals and summarise marker appearance

out_list = []
header = ["individual", "AX", "EX", "NA", "tot"]

for index, individual_s in marker_df.iterrows(): 
    
    t = individual_s.value_counts().sort_index()
    
    if "AX" not in t.index: 
        continue
    #print(column, [val for pair in zip(t.tolist(), t.index.tolist()) for val in pair])
    out_list.append([index, t["AX"], t["EX"], t["NA"], t["AX"]+ t["EX"]+ t["NA"]])


#iterate over markers and summarise marker appearance

out_list2 = []
header2 = ["marker", "AX", "EX", "NA", "tot"]

marker_dfT = marker_df.T

for index, marker_s in marker_dfT.iterrows(): 
    
    t = marker_s.value_counts().sort_index()
    
    if "AX" not in t.index: 
        print(marker_s)
        out_list2.append([index, "-", t["EX"], t["NA"], t["EX"]+ t["NA"]])
        continue
    elif "EX" not in t.index:
        print(marker_s)
        out_list2.append([index, t["AX"], "-", t["NA"], t["AX"]+  t["NA"]])
        continue
    
    #print(column, [val for pair in zip(t.tolist(), t.index.tolist()) for val in pair])
    out_list2.append([index, t["AX"], t["EX"], t["NA"], t["AX"]+ t["EX"]+ t["NA"]])


    
    
    
#harvest list

df1 = pd.DataFrame(out_list, columns=header)
df2 = pd.DataFrame(out_list2, columns=header2)

display(marker_df)


marker_df.to_csv("/media/mmoser/data1/P.EXSERTA/PEXV1.1.3/genetic_maps/markers/F7-W/LM3_F7_W.markers.csv", index = True)
df1.to_csv("/media/mmoser/data1/P.EXSERTA/PEXV1.1.3/genetic_maps/markers/F7-W/LM3_F7_W.INDIVIDUAL.genoSUMMARY.csv", index = False)
df2.to_csv("/media/mmoser/data1/P.EXSERTA/PEXV1.1.3/genetic_maps/markers/F7-W/LM3_F7_W.MARKER.genoSUMMARY.csv", index = False)

PA        AG
PE        NA
AE_100    AX
AE_103    AX
          ..
AE_95     NA
AE_97     NA
AE_98     NA
AE_9      NA
Name: Peex113Ctg05107_19641, Length: 190, dtype: object
PA        CT
PE        NA
AE_100    AX
AE_103    AX
          ..
AE_95     NA
AE_97     AX
AE_98     NA
AE_9      NA
Name: Peex113Ctg00518_13786, Length: 190, dtype: object
PA        GT
PE        NA
AE_100    AX
AE_103    AX
          ..
AE_95     NA
AE_97     AX
AE_98     NA
AE_9      NA
Name: Peex113Ctg00518_13858, Length: 190, dtype: object
PA        CT
PE        NA
AE_100    AX
AE_103    AX
          ..
AE_95     NA
AE_97     AX
AE_98     NA
AE_9      NA
Name: Peex113Ctg00534_130241, Length: 190, dtype: object
PA        AG
PE        NA
AE_100    AX
AE_103    AX
          ..
AE_95     NA
AE_97     NA
AE_98     AX
AE_9      NA
Name: Peex113Ctg00538_20105, Length: 190, dtype: object
PA        AG
PE        NA
AE_100    AX
AE_103    AX
          ..
AE_95     NA
AE_97     NA
AE_98     AX
AE_9      NA
Name: Peex113Ctg0

Unnamed: 0,Peex113Ctg00021_64522,Peex113Ctg00041_8426,Peex113Ctg00041_12417,Peex113Ctg00046_475756,Peex113Ctg00046_475771,...,Peex113Ctg18265_265822,Peex113Ctg18266_197241,Peex113Ctg18266_393673,Peex113Ctg18267_254799,Peex113Ctg18267_861574
PA,CT,GT,CT,AG,AC,...,AG,CT,AC,GT,AG
PE,TT,TT,CC,GG,AA,...,AA,CC,CC,GG,GG
AE_100,EX,AX,AX,AX,AX,...,EX,AX,AX,AX,AX
AE_103,EX,AX,AX,AX,AX,...,AX,AX,AX,AX,EX
...,...,...,...,...,...,...,...,...,...,...,...
AE_95,EX,EX,AX,,EX,...,EX,EX,EX,EX,EX
AE_97,EX,,,EX,EX,...,,EX,EX,EX,EX
AE_98,,EX,EX,EX,EX,...,EX,,,,AX
AE_9,,,,EX,EX,...,,,,,AX


### further filtering of individuals and markers will follow in R