# Detect best polymorphisms

In this notebook the initially filtered dataset is analyzed and we detect polymorphisms that has the best ability to differentiate between eyes colors.

In [1]:
# Read te data
import pandas as pd
ped = pd.read_csv("data/final/ped_filtered.csv")

In [14]:
ped.head()

Unnamed: 0,rs6599770,rs12440100,rs12906138,rs7179358,kgp6444009,rs4932078,exm1140790,exm1140862,exm1140954,exm1140961,...,exm1194798,exm1194803,rs7182651,rs7180058,rs2203349,exm1194810,exm2274791,rs13329121,rs11247329,color
0,"('A', 'C')","('A', 'G')","('A', 'G')","('A', 'A')","('C', 'A')","('T', 'C')","('A', 'A')","('C', 'C')","('C', 'C')","('G', 'G')",...,"('A', 'A')","('G', 'G')","('G', 'G')","('G', 'G')","('T', 'C')","('C', 'C')","('C', 'C')","('T', 'T')","('C', 'T')",1
1,"('A', 'C')","('G', 'G')","('A', 'G')","('A', 'A')","('C', 'A')","('T', 'C')","('A', 'A')","('C', 'C')","('C', 'C')","('G', 'G')",...,"('A', 'A')","('G', 'G')","('A', 'G')","('G', 'G')","('C', 'C')","('C', 'C')","('C', 'C')","('C', 'T')","('T', 'T')",2
2,"('A', 'C')","('G', 'G')","('G', 'G')","('G', 'A')","('C', 'A')","('T', 'C')","('A', 'A')","('C', 'C')","('C', 'C')","('G', 'G')",...,"('A', 'A')","('G', 'G')","('A', 'G')","('G', 'G')","('T', 'T')","('T', 'C')","('C', 'C')","('T', 'T')","('C', 'T')",2
3,"('C', 'C')","('G', 'G')","('A', 'G')","('G', 'G')","('A', 'A')","('C', 'C')","('G', 'A')","('C', 'C')","('C', 'C')","('G', 'G')",...,"('A', 'A')","('A', 'G')","('A', 'G')","('G', 'G')","('T', 'T')","('C', 'C')","('C', 'C')","('T', 'T')","('C', 'T')",2
4,"('C', 'C')","('G', 'G')","('G', 'G')","('G', 'G')","('A', 'A')","('C', 'C')","('A', 'A')","('C', 'C')","('C', 'C')","('G', 'G')",...,"('A', 'A')","('G', 'G')","('A', 'G')","('G', 'G')","('C', 'C')","('C', 'C')","('C', 'C')","('C', 'T')","('T', 'T')",2


### Analysis for single polymorphisms

Here we investigate all polymorphysms indyvidually.
We calculate the number of occurences for each variant for each class (bright eyes / dark eyes) and the percentage difference between classes. Thus we calculate mean percentage difference which is our mearure of polymorphysms value (the highest the better)

In [15]:
# Simple example for one selected polymorphism. Not essential to further analyses but left for better visualisation.

poly = 'exm-rs12913832'
df_input = ped[['color', poly]]
df_input['sum'] = 0
df_input = df_input[df_input[poly] != "('0', '0')"]
data = df_input.groupby(['color', poly]).count().astype(float)
print(data)
data['sum'][1] /= (data['sum'][1]).sum()
data['sum'][2] /= (data['sum'][2]).sum()
print(data)
data2 = abs(data['sum'][1] - data['sum'][2])
print(data2)
print(data2.sum())
print(data2.count())

                         sum
color exm-rs12913832        
1     ('A', 'A')        13.0
      ('A', 'G')       240.0
      ('G', 'G')      2755.0
2     ('A', 'A')       196.0
      ('A', 'G')      1316.0
      ('G', 'G')       250.0
                           sum
color exm-rs12913832          
1     ('A', 'A')      0.004322
      ('A', 'G')      0.079787
      ('G', 'G')      0.915891
2     ('A', 'A')      0.111237
      ('A', 'G')      0.746879
      ('G', 'G')      0.141884
exm-rs12913832
('A', 'A')    0.106915
('A', 'G')    0.667091
('G', 'G')    0.774007
Name: sum, dtype: float64
1.5480134699446952
3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [16]:
# Calculating differences between classes.
# For a selected column (polymorphism) we calculate percentage of each genotype.
# Then calculate a difference.

# Pairs (polymorphysm, difference) are tstored in 'features' variable.

features = {}
for i, poly in enumerate(list(ped.columns.values)):
    print (i, poly)
    df_input = ped[['color', poly]]
    df_input['sum'] = 0
    df_input = df_input[df_input[poly] != "('0', '0')"]
    data = df_input.groupby(['color', poly]).count().astype(float)
    data['sum'][1] /= (data['sum'][1]).sum()
    data['sum'][2] /= (data['sum'][2]).sum()
    data2 = abs(data['sum'][1] - data['sum'][2])
    features[poly] = data2.sum() / data2.count()

0 rs6599770
1 rs12440100
2 rs12906138
3 rs7179358
4 kgp6444009
5 rs4932078


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


6 exm1140790
7 exm1140862
8 exm1140954
9 exm1140961
10 exm1140965
11 kgp11825057
12 rs4462663
13 kgp1549400
14 rs7402159
15 rs4932679
16 rs6606788
17 rs12900257
18 rs12102024
19 rs11486384
20 exm2264555
21 exm1141104
22 exm1141110
23 rs8029154
24 exm1141135
25 exm1141138
26 exm1141140
27 variant.34220
28 exm1141154
29 exm1141160
30 indel.34227
31 exm1141179
32 exm1141188
33 newrs146368727
34 rs11632888
35 rs3812921
36 rs17137279
37 rs2289815
38 exm1141217
39 rs765763
40 rs8029320
41 rs11853131
42 rs11632180
43 rs7171787
44 rs722410
45 rs17841096
46 rs11634023
47 rs2289823
48 exm1141281
49 rs8029108
50 exm1141287
51 rs4778298
52 exm1141304
53 exm1141320
54 rs4134803
55 exm1825219
56 rs2289819
57 exm1141359
58 rs7174982
59 rs6606813
60 exm1141368
61 exm1141369
62 rs17841095
63 variant.34238
64 exm1141380
65 rs2068655
66 exm1141388
67 exm1141399
68 rs7182576
69 rs10519332
70 rs3693
71 exm1141448
72 exm1141452
73 rs7168000
74 exm1141491
75 rs7170784
76 rs4592619
77 exm2267749
78 rs7179251


583 rs4906913
584 rs12441474
585 rs11637817
586 rs6576628
587 rs6576636
588 rs6576638
589 rs7496866
590 rs7173687
591 rs9286393
592 rs28426996
593 exm1142893
594 rs10519587
595 rs140684
596 exm1142920
597 rs7183980
598 rs12185122
599 rs7403021
600 rs2376481
601 exm1142937
602 exm-rs1432133
603 rs1432132
604 rs7495797
605 rs1368022
606 rs724456
607 rs7167986
608 rs7168179
609 rs7403483
610 rs10519582
611 rs7171954
612 rs6606865
613 rs11263701
614 rs12913876
615 rs11852692
616 rs17648036
617 rs9330237
618 rs4887554
619 rs4887525
620 rs4354903
621 rs8036123
622 rs6606873
623 rs208177
624 rs208176
625 rs969754
626 rs6606877
627 rs2376837
628 rs7495487
629 rs2045153
630 rs7402147
631 exm2272215
632 rs1378101
633 rs8030244
634 rs997140
635 rs12907392
636 rs6422904
637 rs9972391
638 rs897173
639 rs4243105
640 rs1026358
641 rs6606892
642 rs7174776
643 rs1869599
644 rs11263713
645 rs7177867
646 rs8037609
647 rs4600441
648 rs140672
649 rs10083549
650 rs9672543
651 rs9635412
652 rs4238490
653 rs4

1136 exm1144445
1137 exm1144449
1138 variant.34350
1139 exm1144465
1140 rs1056118
1141 rs7176351
1142 rs7169064
1143 exm1144485
1144 exm1144504
1145 exm1144511
1146 exm1144513
1147 exm1144517
1148 exm1144533
1149 exm1144536
1150 exm1144545
1151 rs565
1152 rs2873
1153 exm1144557
1154 exm1144568
1155 exm1144579
1156 exm1144589
1157 exm2252096
1158 rs11293
1159 exm1144595
1160 rs2949569
1161 exm1144611
1162 exm1144619
1163 exm1144622
1164 exm1144628
1165 rs2949576
1166 exm1144630
1167 rs1133642
1168 exm1144642
1169 1KG_15_31283029
1170 exm1144647
1171 exm1144659
1172 exm1144665
1173 exm1144668
1174 exm1144670
1175 exm1144682
1176 exm1144687
1177 exm1144689
1178 exm1144690
1179 exm1144692
1180 exm1144693
1181 exm1144696
1182 exm2274734
1183 exm1826789
1184 variant.34381
1185 exm1144707
1186 rs7182547
1187 rs2113946
1188 rs7161812
1189 exm1144718
1190 exm1144722
1191 exm2274737
1192 rs12913672
1193 exm1144786
1194 exm1144797
1195 rs2338834
1196 exm2252098
1197 rs3743234
1198 exm1144825
1199

1666 rs3850096
1667 rs16958835
1668 rs4587944
1669 rs7181902
1670 rs11635964
1671 rs8040071
1672 rs4780227
1673 rs7169884
1674 exm1146399
1675 rs4780233
1676 variant.34490
1677 exm1146474
1678 rs2290942
1679 exm1146479
1680 exm1146494
1681 exm1146501
1682 rs12591967
1683 rs347849
1684 rs347861
1685 rs347866
1686 exm1146526
1687 rs7175028
1688 rs347830
1689 rs8031147
1690 exm1146534
1691 exm1146566
1692 exm1146568
1693 exm1146578
1694 exm1146599
1695 exm1146602
1696 exm1146609
1697 exm1146611
1698 exm1146612
1699 exm1146619
1700 exm1146624
1701 exm1146625
1702 exm2223172
1703 exm1146639
1704 exm1146640
1705 exm1146669
1706 exm1146671
1707 exm1146677
1708 exm1146681
1709 exm1146689
1710 exm1146693
1711 exm1146694
1712 exm1146699
1713 exm1146708
1714 exm1146719
1715 exm1146722
1716 exm1146734
1717 rs4984222
1718 rs4984215
1719 rs4984217
1720 kgp11313841
1721 rs2879515
1722 rs4924045
1723 rs8041207
1724 rs8036663
1725 rs2059591
1726 rs16954263
1727 rs6495691
1728 rs9806404
1729 rs1836974
1

2200 rs7171789
2201 rs2912382
2202 rs16967312
2203 rs2958295
2204 rs2958285
2205 rs11073355
2206 rs1465511
2207 rs2929657
2208 rs2929660
2209 rs11636712
2210 rs2912387
2211 rs1004866
2212 rs2624257
2213 rs17659166
2214 rs12903682
2215 rs16954437
2216 rs8182091
2217 rs2958305
2218 exm-rs2624265
2219 rs4924307
2220 rs11632328
2221 rs7177787
2222 rs7176726
2223 rs8040465
2224 exm2267760
2225 rs7178589
2226 rs1897737
2227 rs2060369
2228 rs12439653
2229 rs7163517
2230 rs731223
2231 rs8027106
2232 rs9972315
2233 rs11857756
2234 rs768269
2235 rs12905686
2236 rs8039562
2237 rs4924313
2238 rs2164006
2239 rs3910264
2240 rs6492848
2241 rs8034449
2242 rs2035060
2243 rs2164011
2244 rs10152887
2245 rs12442494
2246 rs1460800
2247 rs11853413
2248 rs983482
2249 rs2004175
2250 rs4924327
2251 rs1368814
2252 rs7166614
2253 rs1460787
2254 rs724729
2255 rs16968165
2256 rs1380934
2257 rs275184
2258 exm2267824
2259 rs275178
2260 rs9972581
2261 rs11638941
2262 rs1505274
2263 rs3913762
2264 exm-rs2852078
2265 r

2723 exm1150778
2724 exm1150789
2725 exm1150805
2726 exm1150808
2727 exm1150812
2728 exm1150817
2729 rs3743038
2730 rs7166469
2731 exm1150854
2732 rs1942
2733 rs1757463
2734 exm1150889
2735 exm2252138
2736 exm1150935
2737 exm1150943
2738 exm1150950
2739 rs170296
2740 variant.34793
2741 exm1150958
2742 exm1150974
2743 exm1150975
2744 exm1150977
2745 exm1150985
2746 exm1150993
2747 exm1151000
2748 exm1151002
2749 exm1151026
2750 exm1151040
2751 rs1077809
2752 exm1151064
2753 rs2305030
2754 exm1151085
2755 exm1151131
2756 exm1151132
2757 rs3743031
2758 exm1151141
2759 exm2223253
2760 exm1151152
2761 exm1151158
2762 exm1151168
2763 exm1151171
2764 exm2252139
2765 exm1151183
2766 rs1473781
2767 exm1151205
2768 exm1151208
2769 exm1151212
2770 exm1151215
2771 exm1151217
2772 rs2289741
2773 rs1200349
2774 exm1151227
2775 exm1151228
2776 exm1151249
2777 exm1151250
2778 exm1151259
2779 var_15_41828413
2780 exm1151265
2781 exm1151266
2782 exm1151274
2783 rs1200353
2784 rs7167667
2785 exm1151309
2

3238 exm1154775
3239 exm1154780
3240 exm1154782
3241 exm1154787
3242 exm1154792
3243 1KG_15_43024569
3244 exm1154803
3245 exm1154812
3246 exm1154813
3247 variant.35130
3248 exm1154858
3249 exm1154859
3250 exm1154884
3251 exm1154888
3252 exm1154891
3253 exm1154900
3254 exm1154918
3255 exm1154922
3256 exm1154945
3257 rs7174661
3258 rs9920562
3259 exm1154969
3260 rs16957167
3261 exm1154976
3262 rs13380301
3263 exm1155047
3264 rs8039438
3265 rs11070382
3266 exm1155054
3267 rs7164041
3268 rs3803341
3269 exm1155061
3270 exm1155066
3271 rs2277532
3272 exm1155069
3273 rs3759792
3274 exm1155076
3275 rs8037022
3276 exm1830809
3277 exm1155104
3278 exm1830818
3279 exm1155110
3280 exm1155123
3281 exm1155139
3282 rs3736054
3283 exm1155195
3284 exm1155200
3285 exm1155205
3286 rs16957385
3287 exm1155215
3288 exm1155220
3289 indel.35156
3290 exm1155243
3291 exm1155259
3292 rs2176870
3293 exm1155264
3294 indel.35158
3295 exm1155287
3296 exm1155295
3297 exm1155298
3298 variant.35164
3299 exm1155301
3300 

3753 exm1158882
3754 exm1158883
3755 exm1158891
3756 exm1158893
3757 exm1158913
3758 exm1158918
3759 exm1158929
3760 exm1158992
3761 exm1158999
3762 var_15_45709547
3763 rs2467857
3764 exm1159003
3765 exm1159008
3766 rs1288847
3767 indel.35479
3768 exm1159015
3769 rs1629871
3770 indel.35482
3771 rs1974981
3772 rs6493153
3773 rs2453531
3774 exm1159055
3775 rs8042811
3776 rs8041902
3777 rs2899382
3778 rs607541
3779 rs2218309
3780 rs682805
3781 rs11637984
3782 rs635205
3783 rs674638
3784 rs2670811
3785 exm1159119
3786 rs11630132
3787 exm1159147
3788 exm1159148
3789 rs12914266
3790 rs2289578
3791 exm1159160
3792 rs2289579
3793 exm2252167
3794 exm-rs12594515
3795 rs951295
3796 rs4776235
3797 rs12148892
3798 exm2252168
3799 rs11632150
3800 rs4408481
3801 rs939424
3802 rs4774754
3803 rs17636594
3804 rs3848111
3805 rs2248216
3806 rs2463395
3807 rs2460641
3808 rs11638436
3809 rs16948518
3810 rs7166380
3811 rs1918498
3812 rs12900823
3813 rs4480736
3814 rs11852360
3815 rs8033052
3816 rs10152524
3

4284 exm1160650
4285 rs2899441
4286 exm1160663
4287 exm1160664
4288 rs12148767
4289 exm1160674
4290 exm1160677
4291 rs7173644
4292 rs12916154
4293 exm1160688
4294 exm1160689
4295 exm1160691
4296 exm1160692
4297 exm1160701
4298 rs6493392
4299 exm1160707
4300 rs8039142
4301 rs6493393
4302 exm1160717
4303 exm1160718
4304 exm1160722
4305 rs11632984
4306 rs8041864
4307 rs2899446
4308 rs11854435
4309 rs7165755
4310 rs13380264
4311 rs7163415
4312 rs12903325
4313 rs8031076
4314 exm1160742
4315 exm1160744
4316 rs8040030
4317 rs11631945
4318 rs2414018
4319 rs6493402
4320 rs11853852
4321 rs7176805
4322 rs8038609
4323 rs6493403
4324 rs7177316
4325 rs12901981
4326 rs2414027
4327 rs11070752
4328 rs4775860
4329 rs11070759
4330 exm2264574
4331 rs1549520
4332 rs7176501
4333 exm1160760
4334 rs933857
4335 exm1160795
4336 rs7166052
4337 exm1160840
4338 exm1160855
4339 exm1160882
4340 rs2853766
4341 exm1160896
4342 rs860526
4343 exm1160903
4344 rs12438041
4345 rs3784296
4346 rs7173127
4347 rs12908591
4348 

4814 rs1189333
4815 exm-rs7168365
4816 rs7182198
4817 rs8026294
4818 rs473664
4819 rs11856406
4820 rs17630697
4821 rs10518725
4822 rs563716
4823 rs10220852
4824 rs957284
4825 rs12440844
4826 rs1906429
4827 rs11632741
4828 rs1906433
4829 rs9672398
4830 rs4625672
4831 rs4448885
4832 rs4332691
4833 rs16966276
4834 rs2899517
4835 rs1906402
4836 rs1906405
4837 exm1163191
4838 exm1163197
4839 exm1163199
4840 exm1163207
4841 rs16966350
4842 rs512563
4843 exm-rs10518733
4844 rs10518733
4845 rs17730436
4846 exm-rs491567
4847 rs689631
4848 rs537025
4849 rs11635030
4850 rs527139
4851 rs989790
4852 rs8182039
4853 rs7174820
4854 rs7176417
4855 exm1163241
4856 exm2252202
4857 rs8038719
4858 exm1163278
4859 variant.35832
4860 exm1834604
4861 exm1163299
4862 rs690337
4863 exm1163303
4864 rs1381194
4865 rs956553
4866 rs518263
4867 exm2264558
4868 rs6493645
4869 exm1163319
4870 rs2470076
4871 rs2459367
4872 rs2169344
4873 rs2414255
4874 rs11635018
4875 rs11853051
4876 rs4553571
4877 rs1841575
4878 rs515

5344 rs4774257
5345 rs734489
5346 rs2431027
5347 rs1664476
5348 rs1664465
5349 rs1567615
5350 rs907473
5351 rs1664454
5352 rs1706386
5353 rs4774260
5354 rs2440968
5355 rs1706403
5356 rs1567620
5357 rs12909385
5358 rs11071314
5359 rs1664436
5360 rs7494888
5361 rs11857181
5362 rs16977493
5363 rs11855618
5364 exm1164773
5365 exm1164796
5366 exm1164804
5367 exm1164808
5368 exm1164816
5369 exm1164818
5370 exm1164819
5371 exm1164825
5372 exm1835304
5373 exm1164837
5374 exm1164841
5375 exm1164850
5376 exm1164857
5377 rs8034215
5378 exm1164859
5379 exm1164864
5380 rs11071320
5381 exm1164878
5382 rs1280410
5383 exm-rs2934442
5384 rs12440928
5385 rs7170267
5386 rs1292363
5387 rs4774948
5388 rs16977561
5389 exm1164901
5390 exm1164910
5391 rs7172031
5392 rs1995990
5393 exm1164929
5394 exm1164945
5395 exm1164949
5396 rs2641574
5397 rs2279605
5398 rs16977585
5399 rs8026794
5400 exm1164955
5401 rs1620402
5402 exm1164958
5403 rs1625458
5404 exm1164962
5405 exm1164967
5406 exm1164968
5407 exm1164986
54

5875 exm1166536
5876 exm1166539
5877 exm1166578
5878 rs12900881
5879 rs3743267
5880 rs7180766
5881 exm-rs3743266
5882 exm1166608
5883 rs7183916
5884 rs1866007
5885 rs11635975
5886 rs8033552
5887 rs10431796
5888 exm1166659
5889 exm2260452
5890 rs11632600
5891 rs340005
5892 rs2289163
5893 rs339969
5894 exm1166666
5895 exm-rs340029
5896 rs7172348
5897 rs2113943
5898 exm-rs3784609
5899 rs184638
5900 rs28724570
5901 rs11630262
5902 rs1834336
5903 rs339996
5904 rs974065
5905 rs1657797
5906 rs11629660
5907 rs9920661
5908 rs11629812
5909 rs2433025
5910 rs2414680
5911 rs1863270
5912 rs12899193
5913 exm2252230
5914 rs341413
5915 rs4775292
5916 rs1318929
5917 rs7183955
5918 rs17270362
5919 rs17204426
5920 rs9302215
5921 rs12591650
5922 rs11637671
5923 rs922782
5924 rs2899662
5925 rs12915776
5926 rs10162630
5927 rs17303174
5928 rs341366
5929 rs7171681
5930 rs10152719
5931 rs341382
5932 rs8024133
5933 rs10519080
5934 rs341411
5935 rs2607582
5936 rs8041466
5937 rs10851686
5938 rs1482058
5939 rs19026

6407 exm1168070
6408 exm1168071
6409 exm1168081
6410 exm1168084
6411 exm1168086
6412 rs4412925
6413 rs11630290
6414 exm1168148
6415 exm1168166
6416 var_15_63933761
6417 rs10519223
6418 exm1168216
6419 exm1168218
6420 exm1168231
6421 exm1168267
6422 exm1168269
6423 exm1168271
6424 exm1168299
6425 exm1836672
6426 exm1168320
6427 exm1168325
6428 exm2223631
6429 exm1836687
6430 exm1168356
6431 exm1168359
6432 exm1168360
6433 rs2228510
6434 newrs191410508
6435 exm1168382
6436 exm1168387
6437 exm1168408
6438 exm1168409
6439 exm-rs4411464
6440 exm1168438
6441 rs7176133
6442 exm1168470
6443 exm1168479
6444 exm1168488
6445 exm1168507
6446 exm1168519
6447 exm1168535
6448 exm1168553
6449 rs11632919
6450 rs17188807
6451 rs4777160
6452 rs17189345
6453 rs2197259
6454 rs925248
6455 rs11071770
6456 rs7169989
6457 rs7177662
6458 exm1168582
6459 exm1168588
6460 exm1168606
6461 exm1168614
6462 exm1168624
6463 rs4776266
6464 rs12908891
6465 rs11856257
6466 rs6494457
6467 rs8030264
6468 rs4776273
6469 exm1

6926 rs16953584
6927 rs8024238
6928 rs11636457
6929 rs11636413
6930 exm2272259
6931 rs7173030
6932 rs11858577
6933 exm1171780
6934 exm1171782
6935 rs12593878
6936 rs11636179
6937 rs10775190
6938 rs12438625
6939 rs7496700
6940 rs4381541
6941 rs7183242
6942 rs6494602
6943 rs1987318
6944 rs6494611
6945 rs8035054
6946 rs7495569
6947 rs4776329
6948 rs3079
6949 rs3852629
6950 rs1973147
6951 exm2260461
6952 rs266280
6953 rs898889
6954 rs447287
6955 rs7168889
6956 rs266372
6957 rs7175102
6958 rs7179528
6959 rs266293
6960 rs12908113
6961 rs3803519
6962 rs10518702
6963 rs372334
6964 rs615404
6965 rs2100856
6966 rs12101383
6967 rs4776862
6968 rs266312
6969 rs266274
6970 rs2974282
6971 rs2133241
6972 rs403844
6973 rs12902036
6974 rs4776870
6975 rs2615192
6976 rs904366
6977 rs7173609
6978 rs266339
6979 rs6494623
6980 rs6494625
6981 rs3809572
6982 rs7162912
6983 rs10518707
6984 rs4776881
6985 rs4776888
6986 rs2118612
6987 rs1992215
6988 rs11071938
6989 rs12102171
6990 rs4147358
6991 rs2118610
6992 r

7455 rs8037348
7456 rs7164206
7457 rs4777311
7458 exm-rs1549318
7459 rs1549318
7460 exm2252276
7461 rs11072220
7462 exm1173524
7463 exm1173532
7464 exm1173560
7465 exm1173583
7466 rs3809562
7467 exm1173594
7468 exm1173608
7469 exm1173617
7470 rs1528827
7471 rs2415100
7472 exm1173644
7473 rs11853140
7474 rs11634492
7475 exm1173674
7476 exm1173682
7477 exm1173686
7478 rs1713551
7479 exm1173694
7480 rs1614625
7481 rs2415105
7482 exm2273527
7483 rs6494895
7484 exm1173709
7485 rs598472
7486 rs16955200
7487 rs1348318
7488 rs660620
7489 rs16955258
7490 rs6494899
7491 rs1561887
7492 rs11853577
7493 rs2415112
7494 rs4777346
7495 rs961577
7496 rs2415111
7497 rs7173870
7498 exm-rs7174934
7499 rs7175127
7500 rs9806177
7501 rs983574
7502 rs3743111
7503 rs4777363
7504 rs6494903
7505 rs11853359
7506 rs11858540
7507 rs1473608
7508 rs1441356
7509 rs4777372
7510 exm-rs12899618
7511 rs12899618
7512 rs16955491
7513 rs2028465
7514 rs11632680
7515 rs2165488
7516 rs10518959
7517 rs7165378
7518 rs12442632
751

7979 rs2075593
7980 exm1176498
7981 exm1176499
7982 rs741761
7983 exm1176531
7984 exm1176545
7985 exm1176550
7986 exm1176561
7987 exm1176577
7988 exm1176580
7989 exm1176586
7990 rs11857558
7991 rs11852760
7992 rs28362885
7993 rs1992145
7994 exm1176599
7995 newrs180867141
7996 exm2223796
7997 variant.36740
7998 exm1176625
7999 exm1176640
8000 rs4544202
8001 rs10459601
8002 rs8041357
8003 exm-rs7497036
8004 exm2223803
8005 exm2272315
8006 exm2273529
8007 rs11072494
8008 rs6495117
8009 exm1176750
8010 exm1176759
8011 exm1176774
8012 exm2252294
8013 rs11072496
8014 exm1176800
8015 exm1176810
8016 exm1176820
8017 exm1176838
8018 exm1176843
8019 exm1176844
8020 kgp19913951
8021 exm1176859
8022 exm1176861
8023 rs1048943
8024 exm1176862
8025 exm1176867
8026 exm1176870
8027 rs2856833
8028 exm1176880
8029 exm1176883
8030 SNP76
8031 variant.36760
8032 exm1176891
8033 exm1176898
8034 exm1176910
8035 exm1176911
8036 exm1176921
8037 exm1176924
8038 exm1176926
8039 rs2470893
8040 rs12441817
8041 exm-

8499 rs2241885
8500 rs8030999
8501 exm1179858
8502 exm1179859
8503 exm2223884
8504 exm1179874
8505 exm1179880
8506 exm1179884
8507 rs4886992
8508 rs7173128
8509 exm1179890
8510 exm1179893
8511 rs11631180
8512 rs8039850
8513 exm1179927
8514 exm1179936
8515 exm1179941
8516 rs2867922
8517 exm1179942
8518 exm1179944
8519 exm1179950
8520 rs9806257
8521 exm1179980
8522 exm2276394
8523 exm1179995
8524 rs3784327
8525 exm1180011
8526 rs8036417
8527 exm1180026
8528 exm1180039
8529 exm1180046
8530 exm1180053
8531 rs3816253
8532 exm1180071
8533 exm1180080
8534 rs11638547
8535 exm1180096
8536 exm1180102
8537 exm1180111
8538 rs2304823
8539 exm1180172
8540 exm1180202
8541 exm1180203
8542 rs4887012
8543 exm1180207
8544 rs11629576
8545 rs11072745
8546 rs7179157
8547 rs4886564
8548 exm-rs1533665
8549 rs2013093
8550 rs1010389
8551 rs12439144
8552 rs12916463
8553 rs4887024
8554 rs11639191
8555 rs2139444
8556 newrs142025971
8557 exm1180227
8558 rs3743084
8559 exm1180231
8560 rs17402006
8561 exm1180248
8562

9028 rs8025914
9029 rs11633987
9030 rs7166728
9031 rs1046417
9032 rs10431814
9033 rs2759321
9034 rs941984
9035 rs12437679
9036 exm2264567
9037 rs925111
9038 rs4778856
9039 rs748696
9040 rs2903414
9041 rs1077540
9042 rs2055713
9043 rs12594499
9044 rs17228225
9045 rs906433
9046 rs906435
9047 rs12372944
9048 rs2135881
9049 rs11629743
9050 rs8025378
9051 exm1182146
9052 exm1182159
9053 newrs144446375
9054 rs6495529
9055 exm1182187
9056 exm1182193
9057 exm1182204
9058 exm1182209
9059 exm1182216
9060 exm1182228
9061 rs11072956
9062 exm1182257
9063 exm1182263
9064 rs11072963
9065 exm1182272
9066 exm1182285
9067 exm1182321
9068 exm1182324
9069 exm1182327
9070 exm1182332
9071 exm1182336
9072 exm1182341
9073 rs8039384
9074 rs8039607
9075 exm1182373
9076 exm1182374
9077 rs10519306
9078 rs16972624
9079 exm1182394
9080 rs7163695
9081 rs7178569
9082 rs11629917
9083 rs11630390
9084 rs4778878
9085 rs1509547
9086 rs16972706
9087 rs7180686
9088 rs7162727
9089 rs1879454
9090 rs1509559
9091 exm-rs1317060


9552 exm1184734
9553 exm1184743
9554 exm1184768
9555 exm1184775
9556 exm1184777
9557 exm1184782
9558 exm1184794
9559 rs306202
9560 exm1184796
9561 exm1184806
9562 exm1184807
9563 exm1184812
9564 exm1184843
9565 exm1184846
9566 exm1184855
9567 exm1184860
9568 exm1184874
9569 exm1184882
9570 exm1184888
9571 exm1184890
9572 exm1184895
9573 exm1184897
9574 exm1184899
9575 exm2252345
9576 exm1184911
9577 exm1184942
9578 rs9944229
9579 exm1184957
9580 exm1184987
9581 exm1184993
9582 rs12101610
9583 rs4842994
9584 kgp19816102
9585 rs12442557
9586 exm1185015
9587 kgp28512179
9588 exm1185026
9589 rs8037320
9590 rs1545471
9591 exm1185034
9592 kgp19764014
9593 kgp12128537
9594 kgp11217650
9595 rs896363
9596 exm1185057
9597 rs2290272
9598 exm1185058
9599 rs8040847
9600 rs8033895
9601 exm1185068
9602 kgp8515329
9603 rs2290269
9604 rs4247408
9605 rs4076648
9606 exm1185079
9607 rs896361
9608 exm1185082
9609 kgp20012268
9610 rs4980345
9611 exm1185089
9612 exm1185094
9613 rs4980344
9614 rs12915914
9615

10075 rs1560977
10076 rs16940915
10077 rs7176429
10078 rs1560975
10079 rs2117655
10080 rs1369430
10081 rs10163131
10082 rs11855377
10083 rs1946697
10084 rs8041239
10085 variant.37310
10086 rs8031871
10087 rs11631508
10088 rs13380271
10089 rs2018052
10090 rs11073755
10091 rs16941103
10092 rs8030107
10093 rs10520672
10094 rs12594283
10095 rs4887350
10096 rs3825885
10097 rs3825884
10098 rs16941241
10099 rs3784413
10100 rs11636250
10101 rs9806762
10102 rs2349057
10103 rs1104765
10104 rs8035239
10105 rs4887368
10106 exm1186208
10107 exm1186213
10108 exm2252357
10109 rs16941334
10110 rs12148845
10111 rs11073767
10112 rs6496466
10113 rs6496469
10114 rs4887381
10115 exm2264582
10116 rs8033396
10117 rs11857753
10118 rs11635443
10119 rs4887399
10120 rs4887400
10121 rs744994
10122 rs7164988
10123 rs1530310
10124 rs12593926
10125 rs1530309
10126 rs10852095
10127 rs958700
10128 rs7182916
10129 rs11636752
10130 rs1442291
10131 rs9652509
10132 rs10451018
10133 rs11855600
10134 exm1186258
10135 exm118

10570 rs8040937
10571 exm1188983
10572 exm2252374
10573 exm1189008
10574 exm1189013
10575 rs2970359
10576 exm1189077
10577 exm1189088
10578 exm1189113
10579 exm1189121
10580 exm1189125
10581 rs11630814
10582 rs9672249
10583 rs4553601
10584 rs4436756
10585 rs4609823
10586 rs7178679
10587 rs964021
10588 rs11853144
10589 rs1877782
10590 rs8042171
10591 exm2267804
10592 rs7171150
10593 rs2352209
10594 rs7179795
10595 rs8036117
10596 exm1189167
10597 rs1543115
10598 rs4932305
10599 exm1189181
10600 rs3826001
10601 exm1189207
10602 rs8030039
10603 exm1189221
10604 exm1189236
10605 exm1189238
10606 exm1189248
10607 exm1189249
10608 exm1189256
10609 exm1189259
10610 exm2252375
10611 rs3751656
10612 exm1189283
10613 rs7024
10614 exm1189325
10615 rs7342665
10616 exm1189336
10617 exm1189341
10618 exm1189347
10619 exm1189350
10620 exm1189371
10621 exm1189387
10622 exm1189392
10623 exm1189399
10624 exm1189415
10625 exm1189417
10626 rs12912620
10627 exm1189432
10628 exm1189455
10629 exm1189459
10630

11063 kgp19868604
11064 rs11631374
11065 kgp3881218
11066 kgp1928601
11067 kgp1238391
11068 kgp19953641
11069 rs1053909
11070 rs917356
11071 rs886913
11072 rs758132
11073 rs7163817
11074 rs12910687
11075 rs1479521
11076 rs4778038
11077 rs4778039
11078 rs16946538
11079 rs4777736
11080 rs2079597
11081 rs12902121
11082 rs4627290
11083 rs989989
11084 rs12440538
11085 rs1026759
11086 rs1479520
11087 rs4002407
11088 rs8032711
11089 rs16946596
11090 rs8035845
11091 rs8026156
11092 rs4777824
11093 rs4777675
11094 rs16946659
11095 rs8027648
11096 rs4777930
11097 rs11630088
11098 rs6496922
11099 rs12912184
11100 rs4777959
11101 rs10438441
11102 rs7171020
11103 rs936929
11104 rs1551994
11105 rs899528
11106 rs7175123
11107 rs4777965
11108 rs4564522
11109 rs936923
11110 rs936925
11111 rs2101436
11112 rs8040217
11113 exm-rs3759917
11114 rs3759917
11115 rs2387806
11116 rs11074066
11117 rs11853083
11118 rs3848153
11119 rs3931230
11120 rs2168351
11121 rs1455780
11122 rs1455777
11123 rs2045268
11124 rs4

11568 rs2894895
11569 rs1598909
11570 rs1471169
11571 rs2125311
11572 rs7176581
11573 rs7177791
11574 rs11635926
11575 exm-rs17733968
11576 rs989141
11577 rs920902
11578 rs290673
11579 rs10520771
11580 rs9920011
11581 rs188324
11582 rs8038195
11583 rs12101914
11584 rs12101936
11585 rs7177948
11586 rs7178600
11587 rs290670
11588 rs7168936
11589 rs7171611
11590 rs2218461
11591 rs1596659
11592 rs8040230
11593 rs7176538
11594 rs16949909
11595 rs12912439
11596 rs7167022
11597 rs1444862
11598 exm2264584
11599 rs7174577
11600 rs12324403
11601 rs16975040
11602 rs16975043
11603 rs11853550
11604 rs1483299
11605 rs922910
11606 rs8041060
11607 rs4984575
11608 rs10520779
11609 rs8038720
11610 rs11858528
11611 rs12901265
11612 exm2272283
11613 rs7168315
11614 rs4984577
11615 rs1559571
11616 rs11629797
11617 rs12708555
11618 rs6496105
11619 rs17491052
11620 rs2052652
11621 rs10162686
11622 rs10520785
11623 rs2052650
11624 rs1834204
11625 rs10520798
11626 rs11857715
11627 rs7172578
11628 rs17576986
11

12072 rs7178383
12073 rs12902985
12074 rs11856426
12075 exm1192163
12076 exm-rs2871865
12077 rs2017500
12078 rs4966009
12079 exm2272328
12080 rs1319869
12081 rs11854132
12082 rs1319859
12083 rs7170035
12084 rs7173928
12085 rs1976667
12086 rs4966014
12087 exm1192186
12088 rs11857366
12089 rs907808
12090 exm2252408
12091 rs4966020
12092 rs10794486
12093 rs8037855
12094 rs7175052
12095 rs8033169
12096 rs925585
12097 rs12437851
12098 rs8032111
12099 rs4965436
12100 rs2684761
12101 rs7165366
12102 rs2670506
12103 rs2670504
12104 rs2715442
12105 rs4966035
12106 rs1879613
12107 exm1192202
12108 rs1464433
12109 exm1192219
12110 exm1192227
12111 exm1192232
12112 exm1192238
12113 exm1192264
12114 exm1192274
12115 exm1192278
12116 exm1192299
12117 variant.37740
12118 exm1192317
12119 rs2684800
12120 rs2684799
12121 rs8038056
12122 rs1568501
12123 rs12916884
12124 rs2684792
12125 rs12148482
12126 rs9672965
12127 exm1192356
12128 exm1192358
12129 exm1192359
12130 exm1192362
12131 exm1192364
12132 r

12572 rs12594881
12573 rs7174996
12574 rs12592409
12575 exm1194034
12576 exm1194047
12577 exm1194048
12578 exm1194056
12579 exm1194076
12580 rs10468171
12581 exm1194105
12582 exm1194107
12583 exm2223023
12584 exm1194119
12585 exm1194122
12586 exm1194127
12587 exm1194128
12588 exm1194129
12589 exm1824582
12590 exm1194142
12591 exm1194152
12592 rs3851685
12593 rs10459736
12594 rs11630047
12595 rs4965786
12596 rs897486
12597 rs2412132
12598 rs1902131
12599 rs723988
12600 rs12161956
12601 rs882708
12602 rs4965797
12603 rs4965800
12604 rs2168188
12605 rs4965803
12606 exm1194159
12607 exm1194168
12608 exm1194184
12609 exm1194185
12610 exm1194202
12611 exm1194215
12612 exm1194216
12613 rs4278729
12614 rs13329644
12615 rs11247274
12616 rs2012387
12617 rs2014185
12618 rs8043304
12619 rs8042424
12620 rs12148369
12621 rs752092
12622 rs8032573
12623 rs8031268
12624 rs11854862
12625 exm1194288
12626 variant.37849
12627 variant.37850
12628 exm1194304
12629 newrs150109462
12630 exm2252044
12631 rs716

ValueError: cannot reindex from a duplicate axis

In [17]:
print(len(features))

12766


In [18]:
# sorting features by relevence
features_sorted = sorted(features.items(), key=lambda x: x[1], reverse=True)

In [19]:
print(features_sorted[:100])

[('exm-rs12913832', 0.5160044899815651), ('rs1129038', 0.5124542061581199), ('rs2238289', 0.41869638543356946), ('exm-rs1667394', 0.3664464863402093), ('exm-rs916977', 0.3555417474848011), ('rs8039195', 0.30048828269637684), ('rs3935591', 0.2901727528487347), ('rs7495174', 0.24493963212057368), ('rs4778241', 0.23594634387709537), ('rs201872292', 0.22679662418671648), ('rs2240203', 0.20736268156931031), ('rs8028689', 0.20657268774885137), ('rs61756153', 0.19446930772574683), ('exm-rs4778138', 0.1768015597663876), ('rs11636232', 0.16625839423030997), ('rs7183877', 0.15847219048526864), ('rs61756152', 0.15517719471236976), ('rs7174027', 0.12100460534854895), ('rs4778232', 0.12094966284187376), ('rs7179994', 0.11272904167342307), ('rs1597196', 0.09816260705149597), ('rs1470608', 0.09368608877552274), ('rs11638265', 0.09244711944337126), ('rs7170451', 0.09168112300685206), ('rs1800411', 0.09102295432980068), ('rs12442147', 0.08633523051766012), ('rs1900758', 0.08323401712106031), ('rs717663

In [21]:
# Checking how many features is detected for different thresholds.
# We investigate all thresholds from 0% to 50%
# The number says how many polymorphisms has differentiating ability over the threshold.
# Threshold = 0% meand all our features.

for j in range(50):
    threshold = j / 100
    print(threshold, len([feature for feature in features_sorted if feature[1] > threshold]))

0.0 12766
0.01 3574
0.02 612
0.03 122
0.04 79
0.05 60
0.06 49
0.07 39
0.08 28
0.09 25
0.1 20
0.11 20
0.12 19
0.13 17
0.14 17
0.15 17
0.16 15
0.17 14
0.18 13
0.19 13
0.2 12
0.21 10
0.22 10
0.23 9
0.24 8
0.25 7
0.26 7
0.27 7
0.28 7
0.29 7
0.3 6
0.31 5
0.32 5
0.33 5
0.34 5
0.35 5
0.36 4
0.37 3
0.38 3
0.39 3
0.4 3
0.41 3
0.42 2
0.43 2
0.44 2
0.45 2
0.46 2
0.47 2
0.48 2
0.49 2


In [22]:
# For further analysis we set threshold to 0.03

# Saving two files:
# - features_all: all sorted features
# - features_top: 122 features with difference above 3%

import csv

with open("features_all.csv", 'w') as fw:
    wr = csv.writer(fw, quoting=csv.QUOTE_ALL)
    headers = ("polymorphism", "differentiation")
    wr.writerow(headers)
    for feature in features_sorted:
        wr.writerow(feature)
        
features_top = [feature for feature in features_sorted if feature[1] > 0.03]
with open("features_top.csv", 'w') as fw:
    wr = csv.writer(fw, quoting=csv.QUOTE_ALL)
    headers = ("polymorphism", "differentiation")
    wr.writerow(headers)
    for feature in features_top:
        wr.writerow(feature)


In [23]:
ft = pd.read_csv("features_top.csv")

In [24]:
ft.shape

(122, 2)

In [25]:
# Top 5 single features 
ft.head()

Unnamed: 0,polymorphism,differentiation
0,exm-rs12913832,0.516004
1,rs1129038,0.512454
2,rs2238289,0.418696
3,exm-rs1667394,0.366446
4,exm-rs916977,0.355542


In [27]:
# Ale top single features selectet to further analysis
for i, poly1 in enumerate(features_top):
    print (i, poly1[0])

0 exm-rs12913832
1 rs1129038
2 rs2238289
3 exm-rs1667394
4 exm-rs916977
5 rs8039195
6 rs3935591
7 rs7495174
8 rs4778241
9 rs201872292
10 rs2240203
11 rs8028689
12 rs61756153
13 exm-rs4778138
14 rs11636232
15 rs7183877
16 rs61756152
17 rs7174027
18 rs4778232
19 rs7179994
20 rs1597196
21 rs1470608
22 rs11638265
23 rs7170451
24 rs1800411
25 rs12442147
26 rs1900758
27 rs7176632
28 rs921221
29 rs7179419
30 rs749846
31 rs1037208
32 rs4778218
33 rs10852218
34 rs1800407
35 exm1143095
36 rs7178315
37 rs1800404
38 rs6497233
39 rs12324648
40 rs6497238
41 rs2594934
42 rs16950821
43 rs3947367
44 rs11631195
45 rs17674017
46 rs1018105
47 rs3794604
48 rs1382054
49 rs11637518
50 rs4778192
51 rs1498519
52 rs6497235
53 rs2279728
54 rs2703983
55 rs7162117
56 rs1562592
57 rs1498509
58 rs17565757
59 rs4322616
60 rs3751651
61 rs118112076
62 rs1947747
63 rs1375166
64 rs12592307
65 rs11074304
66 rs768546
67 rs3098583
68 rs989869
69 rs7164752
70 rs2311470
71 rs8035334
72 exm-rs1871017
73 rs1871017
74 rs11852452

### Analysis for pairr of polymorphisms

Here we investigate all pair of polymorphisms composed from our top 122.
We calculate the number of occurences for each variant for each class (bright eyes / dark eyes) and the percentage difference between classes. Thus we calculate mean percentage difference which is our measure of pairs of polymorphysms value (the highest the better).

In [29]:
# Simple example for one pair of selected polymorphisms. 
# Not essential to further analyses but left for better visualisation.

poly1 = 'exm-rs12913832'
poly2 = 'rs1129038'
df_input = ped[['color', poly1, poly2]]
df_input['sum'] = 0
df_input = df_input[df_input[poly1] != "('0', '0')"]
df_input = df_input[df_input[poly2] != "('0', '0')"]
data = df_input.groupby(['color', poly1, poly2]).count().astype(float)
print(data)
data['sum'][1] /= (data['sum'][1]).sum()
data['sum'][2] /= (data['sum'][2]).sum()
print(data)
data2 = abs(data['sum'][1] - data['sum'][2])
print(data2)
print(data2.sum())
print( data2.sum() / data2.count())

                                    sum
color exm-rs12913832 rs1129038         
1     ('A', 'A')     ('G', 'G')    13.0
      ('A', 'G')     ('G', 'A')   240.0
      ('G', 'G')     ('A', 'A')  2739.0
                     ('G', 'A')     9.0
2     ('A', 'A')     ('G', 'A')     1.0
                     ('G', 'G')   195.0
      ('A', 'G')     ('A', 'A')     3.0
                     ('G', 'A')  1311.0
                     ('G', 'G')     1.0
      ('G', 'G')     ('A', 'A')   250.0
                                      sum
color exm-rs12913832 rs1129038           
1     ('A', 'A')     ('G', 'G')  0.004332
      ('A', 'G')     ('G', 'A')  0.079973
      ('G', 'G')     ('A', 'A')  0.912696
                     ('G', 'A')  0.002999
2     ('A', 'A')     ('G', 'A')  0.000568
                     ('G', 'G')  0.110733
      ('A', 'G')     ('A', 'A')  0.001704
                     ('G', 'A')  0.744463
                     ('G', 'G')  0.000568
      ('G', 'G')     ('A', 'A')  0.141965
exm-rs12913832  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [30]:
# Pairs of polymorphysm

pairs_of_features = {}
train_pairs = pd.DataFrame()
for i, polym1 in enumerate(features_top):
    poly1 = polym1[0]
    for j, polym2 in enumerate(features_top[i+1:]):
        poly2 = polym2[0]
        print (i, j, poly1, poly2)
        df_input = ped[['color', poly1, poly2]]
        df_input['sum'] = 0
        df_input = df_input[df_input[poly1] != "('0', '0')"]
        df_input = df_input[df_input[poly2] != "('0', '0')"]
        data = df_input.groupby(['color', poly1, poly2]).count().astype(float)
        data['sum'][1] /= (data['sum'][1]).sum()
        data['sum'][2] /= (data['sum'][2]).sum()
        data2 = abs(data['sum'][1] - data['sum'][2])
        pairs_of_features[(poly1, poly2)] = data2.sum() / data2.count()

0 0 exm-rs12913832 rs1129038
0 1 exm-rs12913832 rs2238289
0 2 exm-rs12913832 exm-rs1667394
0 3 exm-rs12913832 exm-rs916977
0 4 exm-rs12913832 rs8039195


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


0 5 exm-rs12913832 rs3935591
0 6 exm-rs12913832 rs7495174
0 7 exm-rs12913832 rs4778241
0 8 exm-rs12913832 rs201872292
0 9 exm-rs12913832 rs2240203
0 10 exm-rs12913832 rs8028689
0 11 exm-rs12913832 rs61756153
0 12 exm-rs12913832 exm-rs4778138
0 13 exm-rs12913832 rs11636232
0 14 exm-rs12913832 rs7183877
0 15 exm-rs12913832 rs61756152
0 16 exm-rs12913832 rs7174027
0 17 exm-rs12913832 rs4778232
0 18 exm-rs12913832 rs7179994
0 19 exm-rs12913832 rs1597196
0 20 exm-rs12913832 rs1470608
0 21 exm-rs12913832 rs11638265
0 22 exm-rs12913832 rs7170451
0 23 exm-rs12913832 rs1800411
0 24 exm-rs12913832 rs12442147
0 25 exm-rs12913832 rs1900758
0 26 exm-rs12913832 rs7176632
0 27 exm-rs12913832 rs921221
0 28 exm-rs12913832 rs7179419
0 29 exm-rs12913832 rs749846
0 30 exm-rs12913832 rs1037208
0 31 exm-rs12913832 rs4778218
0 32 exm-rs12913832 rs10852218
0 33 exm-rs12913832 rs1800407
0 34 exm-rs12913832 exm1143095
0 35 exm-rs12913832 rs7178315
0 36 exm-rs12913832 rs1800404
0 37 exm-rs12913832 rs6497233
0 38

2 65 rs2238289 rs989869
2 66 rs2238289 rs7164752
2 67 rs2238289 rs2311470
2 68 rs2238289 rs8035334
2 69 rs2238289 exm-rs1871017
2 70 rs2238289 rs1871017
2 71 rs2238289 rs11852452
2 72 rs2238289 exm2272216
2 73 rs2238289 rs11858340
2 74 rs2238289 rs3097531
2 75 rs2238289 rs7176463
2 76 rs2238289 rs11853577
2 77 rs2238289 rs2086042
2 78 rs2238289 rs6497218
2 79 rs2238289 rs1603784
2 80 rs2238289 exm1143132
2 81 rs2238289 rs17648415
2 82 rs2238289 rs1968813
2 83 rs2238289 rs746435
2 84 rs2238289 rs7177529
2 85 rs2238289 rs1528461
2 86 rs2238289 rs591143
2 87 rs2238289 rs4778199
2 88 rs2238289 rs7175827
2 89 rs2238289 rs10775246
2 90 rs2238289 rs1490079
2 91 rs2238289 rs2339262
2 92 rs2238289 rs8040604
2 93 rs2238289 rs8023338
2 94 rs2238289 rs9920714
2 95 rs2238289 rs16950272
2 96 rs2238289 rs11853140
2 97 rs2238289 rs2439421
2 98 rs2238289 rs8036143
2 99 rs2238289 rs9972608
2 100 rs2238289 rs8035562
2 101 rs2238289 rs8042768
2 102 rs2238289 rs11857194
2 103 rs2238289 rs12905508
2 104 rs2

5 2 rs8039195 rs4778241
5 3 rs8039195 rs201872292
5 4 rs8039195 rs2240203
5 5 rs8039195 rs8028689
5 6 rs8039195 rs61756153
5 7 rs8039195 exm-rs4778138
5 8 rs8039195 rs11636232
5 9 rs8039195 rs7183877
5 10 rs8039195 rs61756152
5 11 rs8039195 rs7174027
5 12 rs8039195 rs4778232
5 13 rs8039195 rs7179994
5 14 rs8039195 rs1597196
5 15 rs8039195 rs1470608
5 16 rs8039195 rs11638265
5 17 rs8039195 rs7170451
5 18 rs8039195 rs1800411
5 19 rs8039195 rs12442147
5 20 rs8039195 rs1900758
5 21 rs8039195 rs7176632
5 22 rs8039195 rs921221
5 23 rs8039195 rs7179419
5 24 rs8039195 rs749846
5 25 rs8039195 rs1037208
5 26 rs8039195 rs4778218
5 27 rs8039195 rs10852218
5 28 rs8039195 rs1800407
5 29 rs8039195 exm1143095
5 30 rs8039195 rs7178315
5 31 rs8039195 rs1800404
5 32 rs8039195 rs6497233
5 33 rs8039195 rs12324648
5 34 rs8039195 rs6497238
5 35 rs8039195 rs2594934
5 36 rs8039195 rs16950821
5 37 rs8039195 rs3947367
5 38 rs8039195 rs11631195
5 39 rs8039195 rs17674017
5 40 rs8039195 rs1018105
5 41 rs8039195 rs3

7 97 rs7495174 rs11857194
7 98 rs7495174 rs12905508
7 99 rs7495174 rs405103
7 100 rs7495174 rs723615
7 101 rs7495174 rs2405779
7 102 rs7495174 rs11637291
7 103 rs7495174 rs12916261
7 104 rs7495174 rs12903325
7 105 rs7495174 rs552806
7 106 rs7495174 rs12439063
7 107 rs7495174 rs1874274
7 108 rs7495174 rs725458
7 109 rs7495174 rs8034091
7 110 rs7495174 exm-rs7172432
7 111 rs7495174 rs2946543
7 112 rs7495174 rs7173964
7 113 rs7495174 rs7403483
8 0 rs4778241 rs201872292
8 1 rs4778241 rs2240203
8 2 rs4778241 rs8028689
8 3 rs4778241 rs61756153
8 4 rs4778241 exm-rs4778138
8 5 rs4778241 rs11636232
8 6 rs4778241 rs7183877
8 7 rs4778241 rs61756152
8 8 rs4778241 rs7174027
8 9 rs4778241 rs4778232
8 10 rs4778241 rs7179994
8 11 rs4778241 rs1597196
8 12 rs4778241 rs1470608
8 13 rs4778241 rs11638265
8 14 rs4778241 rs7170451
8 15 rs4778241 rs1800411
8 16 rs4778241 rs12442147
8 17 rs4778241 rs1900758
8 18 rs4778241 rs7176632
8 19 rs4778241 rs921221
8 20 rs4778241 rs7179419
8 21 rs4778241 rs749846
8 22 r

10 74 rs2240203 rs1968813
10 75 rs2240203 rs746435
10 76 rs2240203 rs7177529
10 77 rs2240203 rs1528461
10 78 rs2240203 rs591143
10 79 rs2240203 rs4778199
10 80 rs2240203 rs7175827
10 81 rs2240203 rs10775246
10 82 rs2240203 rs1490079
10 83 rs2240203 rs2339262
10 84 rs2240203 rs8040604
10 85 rs2240203 rs8023338
10 86 rs2240203 rs9920714
10 87 rs2240203 rs16950272
10 88 rs2240203 rs11853140
10 89 rs2240203 rs2439421
10 90 rs2240203 rs8036143
10 91 rs2240203 rs9972608
10 92 rs2240203 rs8035562
10 93 rs2240203 rs8042768
10 94 rs2240203 rs11857194
10 95 rs2240203 rs12905508
10 96 rs2240203 rs405103
10 97 rs2240203 rs723615
10 98 rs2240203 rs2405779
10 99 rs2240203 rs11637291
10 100 rs2240203 rs12916261
10 101 rs2240203 rs12903325
10 102 rs2240203 rs552806
10 103 rs2240203 rs12439063
10 104 rs2240203 rs1874274
10 105 rs2240203 rs725458
10 106 rs2240203 rs8034091
10 107 rs2240203 exm-rs7172432
10 108 rs2240203 rs2946543
10 109 rs2240203 rs7173964
10 110 rs2240203 rs7403483
11 0 rs8028689 rs617

13 45 exm-rs4778138 rs4322616
13 46 exm-rs4778138 rs3751651
13 47 exm-rs4778138 rs118112076
13 48 exm-rs4778138 rs1947747
13 49 exm-rs4778138 rs1375166
13 50 exm-rs4778138 rs12592307
13 51 exm-rs4778138 rs11074304
13 52 exm-rs4778138 rs768546
13 53 exm-rs4778138 rs3098583
13 54 exm-rs4778138 rs989869
13 55 exm-rs4778138 rs7164752
13 56 exm-rs4778138 rs2311470
13 57 exm-rs4778138 rs8035334
13 58 exm-rs4778138 exm-rs1871017
13 59 exm-rs4778138 rs1871017
13 60 exm-rs4778138 rs11852452
13 61 exm-rs4778138 exm2272216
13 62 exm-rs4778138 rs11858340
13 63 exm-rs4778138 rs3097531
13 64 exm-rs4778138 rs7176463
13 65 exm-rs4778138 rs11853577
13 66 exm-rs4778138 rs2086042
13 67 exm-rs4778138 rs6497218
13 68 exm-rs4778138 rs1603784
13 69 exm-rs4778138 exm1143132
13 70 exm-rs4778138 rs17648415
13 71 exm-rs4778138 rs1968813
13 72 exm-rs4778138 rs746435
13 73 exm-rs4778138 rs7177529
13 74 exm-rs4778138 rs1528461
13 75 exm-rs4778138 rs591143
13 76 exm-rs4778138 rs4778199
13 77 exm-rs4778138 rs7175827


16 25 rs61756152 rs16950821
16 26 rs61756152 rs3947367
16 27 rs61756152 rs11631195
16 28 rs61756152 rs17674017
16 29 rs61756152 rs1018105
16 30 rs61756152 rs3794604
16 31 rs61756152 rs1382054
16 32 rs61756152 rs11637518
16 33 rs61756152 rs4778192
16 34 rs61756152 rs1498519
16 35 rs61756152 rs6497235
16 36 rs61756152 rs2279728
16 37 rs61756152 rs2703983
16 38 rs61756152 rs7162117
16 39 rs61756152 rs1562592
16 40 rs61756152 rs1498509
16 41 rs61756152 rs17565757
16 42 rs61756152 rs4322616
16 43 rs61756152 rs3751651
16 44 rs61756152 rs118112076
16 45 rs61756152 rs1947747
16 46 rs61756152 rs1375166
16 47 rs61756152 rs12592307
16 48 rs61756152 rs11074304
16 49 rs61756152 rs768546
16 50 rs61756152 rs3098583
16 51 rs61756152 rs989869
16 52 rs61756152 rs7164752
16 53 rs61756152 rs2311470
16 54 rs61756152 rs8035334
16 55 rs61756152 exm-rs1871017
16 56 rs61756152 rs1871017
16 57 rs61756152 rs11852452
16 58 rs61756152 exm2272216
16 59 rs61756152 rs11858340
16 60 rs61756152 rs3097531
16 61 rs617561

19 23 rs7179994 rs3947367
19 24 rs7179994 rs11631195
19 25 rs7179994 rs17674017
19 26 rs7179994 rs1018105
19 27 rs7179994 rs3794604
19 28 rs7179994 rs1382054
19 29 rs7179994 rs11637518
19 30 rs7179994 rs4778192
19 31 rs7179994 rs1498519
19 32 rs7179994 rs6497235
19 33 rs7179994 rs2279728
19 34 rs7179994 rs2703983
19 35 rs7179994 rs7162117
19 36 rs7179994 rs1562592
19 37 rs7179994 rs1498509
19 38 rs7179994 rs17565757
19 39 rs7179994 rs4322616
19 40 rs7179994 rs3751651
19 41 rs7179994 rs118112076
19 42 rs7179994 rs1947747
19 43 rs7179994 rs1375166
19 44 rs7179994 rs12592307
19 45 rs7179994 rs11074304
19 46 rs7179994 rs768546
19 47 rs7179994 rs3098583
19 48 rs7179994 rs989869
19 49 rs7179994 rs7164752
19 50 rs7179994 rs2311470
19 51 rs7179994 rs8035334
19 52 rs7179994 exm-rs1871017
19 53 rs7179994 rs1871017
19 54 rs7179994 rs11852452
19 55 rs7179994 exm2272216
19 56 rs7179994 rs11858340
19 57 rs7179994 rs3097531
19 58 rs7179994 rs7176463
19 59 rs7179994 rs11853577
19 60 rs7179994 rs208604

22 32 rs11638265 rs7162117
22 33 rs11638265 rs1562592
22 34 rs11638265 rs1498509
22 35 rs11638265 rs17565757
22 36 rs11638265 rs4322616
22 37 rs11638265 rs3751651
22 38 rs11638265 rs118112076
22 39 rs11638265 rs1947747
22 40 rs11638265 rs1375166
22 41 rs11638265 rs12592307
22 42 rs11638265 rs11074304
22 43 rs11638265 rs768546
22 44 rs11638265 rs3098583
22 45 rs11638265 rs989869
22 46 rs11638265 rs7164752
22 47 rs11638265 rs2311470
22 48 rs11638265 rs8035334
22 49 rs11638265 exm-rs1871017
22 50 rs11638265 rs1871017
22 51 rs11638265 rs11852452
22 52 rs11638265 exm2272216
22 53 rs11638265 rs11858340
22 54 rs11638265 rs3097531
22 55 rs11638265 rs7176463
22 56 rs11638265 rs11853577
22 57 rs11638265 rs2086042
22 58 rs11638265 rs6497218
22 59 rs11638265 rs1603784
22 60 rs11638265 exm1143132
22 61 rs11638265 rs17648415
22 62 rs11638265 rs1968813
22 63 rs11638265 rs746435
22 64 rs11638265 rs7177529
22 65 rs11638265 rs1528461
22 66 rs11638265 rs591143
22 67 rs11638265 rs4778199
22 68 rs11638265 

25 51 rs12442147 rs3097531
25 52 rs12442147 rs7176463
25 53 rs12442147 rs11853577
25 54 rs12442147 rs2086042
25 55 rs12442147 rs6497218
25 56 rs12442147 rs1603784
25 57 rs12442147 exm1143132
25 58 rs12442147 rs17648415
25 59 rs12442147 rs1968813
25 60 rs12442147 rs746435
25 61 rs12442147 rs7177529
25 62 rs12442147 rs1528461
25 63 rs12442147 rs591143
25 64 rs12442147 rs4778199
25 65 rs12442147 rs7175827
25 66 rs12442147 rs10775246
25 67 rs12442147 rs1490079
25 68 rs12442147 rs2339262
25 69 rs12442147 rs8040604
25 70 rs12442147 rs8023338
25 71 rs12442147 rs9920714
25 72 rs12442147 rs16950272
25 73 rs12442147 rs11853140
25 74 rs12442147 rs2439421
25 75 rs12442147 rs8036143
25 76 rs12442147 rs9972608
25 77 rs12442147 rs8035562
25 78 rs12442147 rs8042768
25 79 rs12442147 rs11857194
25 80 rs12442147 rs12905508
25 81 rs12442147 rs405103
25 82 rs12442147 rs723615
25 83 rs12442147 rs2405779
25 84 rs12442147 rs11637291
25 85 rs12442147 rs12916261
25 86 rs12442147 rs12903325
25 87 rs12442147 rs55

28 84 rs921221 rs552806
28 85 rs921221 rs12439063
28 86 rs921221 rs1874274
28 87 rs921221 rs725458
28 88 rs921221 rs8034091
28 89 rs921221 exm-rs7172432
28 90 rs921221 rs2946543
28 91 rs921221 rs7173964
28 92 rs921221 rs7403483
29 0 rs7179419 rs749846
29 1 rs7179419 rs1037208
29 2 rs7179419 rs4778218
29 3 rs7179419 rs10852218
29 4 rs7179419 rs1800407
29 5 rs7179419 exm1143095
29 6 rs7179419 rs7178315
29 7 rs7179419 rs1800404
29 8 rs7179419 rs6497233
29 9 rs7179419 rs12324648
29 10 rs7179419 rs6497238
29 11 rs7179419 rs2594934
29 12 rs7179419 rs16950821
29 13 rs7179419 rs3947367
29 14 rs7179419 rs11631195
29 15 rs7179419 rs17674017
29 16 rs7179419 rs1018105
29 17 rs7179419 rs3794604
29 18 rs7179419 rs1382054
29 19 rs7179419 rs11637518
29 20 rs7179419 rs4778192
29 21 rs7179419 rs1498519
29 22 rs7179419 rs6497235
29 23 rs7179419 rs2279728
29 24 rs7179419 rs2703983
29 25 rs7179419 rs7162117
29 26 rs7179419 rs1562592
29 27 rs7179419 rs1498509
29 28 rs7179419 rs17565757
29 29 rs7179419 rs432

32 37 rs4778218 rs2311470
32 38 rs4778218 rs8035334
32 39 rs4778218 exm-rs1871017
32 40 rs4778218 rs1871017
32 41 rs4778218 rs11852452
32 42 rs4778218 exm2272216
32 43 rs4778218 rs11858340
32 44 rs4778218 rs3097531
32 45 rs4778218 rs7176463
32 46 rs4778218 rs11853577
32 47 rs4778218 rs2086042
32 48 rs4778218 rs6497218
32 49 rs4778218 rs1603784
32 50 rs4778218 exm1143132
32 51 rs4778218 rs17648415
32 52 rs4778218 rs1968813
32 53 rs4778218 rs746435
32 54 rs4778218 rs7177529
32 55 rs4778218 rs1528461
32 56 rs4778218 rs591143
32 57 rs4778218 rs4778199
32 58 rs4778218 rs7175827
32 59 rs4778218 rs10775246
32 60 rs4778218 rs1490079
32 61 rs4778218 rs2339262
32 62 rs4778218 rs8040604
32 63 rs4778218 rs8023338
32 64 rs4778218 rs9920714
32 65 rs4778218 rs16950272
32 66 rs4778218 rs11853140
32 67 rs4778218 rs2439421
32 68 rs4778218 rs8036143
32 69 rs4778218 rs9972608
32 70 rs4778218 rs8035562
32 71 rs4778218 rs8042768
32 72 rs4778218 rs11857194
32 73 rs4778218 rs12905508
32 74 rs4778218 rs405103


35 81 exm1143095 rs8034091
35 82 exm1143095 exm-rs7172432
35 83 exm1143095 rs2946543
35 84 exm1143095 rs7173964
35 85 exm1143095 rs7403483
36 0 rs7178315 rs1800404
36 1 rs7178315 rs6497233
36 2 rs7178315 rs12324648
36 3 rs7178315 rs6497238
36 4 rs7178315 rs2594934
36 5 rs7178315 rs16950821
36 6 rs7178315 rs3947367
36 7 rs7178315 rs11631195
36 8 rs7178315 rs17674017
36 9 rs7178315 rs1018105
36 10 rs7178315 rs3794604
36 11 rs7178315 rs1382054
36 12 rs7178315 rs11637518
36 13 rs7178315 rs4778192
36 14 rs7178315 rs1498519
36 15 rs7178315 rs6497235
36 16 rs7178315 rs2279728
36 17 rs7178315 rs2703983
36 18 rs7178315 rs7162117
36 19 rs7178315 rs1562592
36 20 rs7178315 rs1498509
36 21 rs7178315 rs17565757
36 22 rs7178315 rs4322616
36 23 rs7178315 rs3751651
36 24 rs7178315 rs118112076
36 25 rs7178315 rs1947747
36 26 rs7178315 rs1375166
36 27 rs7178315 rs12592307
36 28 rs7178315 rs11074304
36 29 rs7178315 rs768546
36 30 rs7178315 rs3098583
36 31 rs7178315 rs989869
36 32 rs7178315 rs7164752
36 33

39 54 rs12324648 rs2339262
39 55 rs12324648 rs8040604
39 56 rs12324648 rs8023338
39 57 rs12324648 rs9920714
39 58 rs12324648 rs16950272
39 59 rs12324648 rs11853140
39 60 rs12324648 rs2439421
39 61 rs12324648 rs8036143
39 62 rs12324648 rs9972608
39 63 rs12324648 rs8035562
39 64 rs12324648 rs8042768
39 65 rs12324648 rs11857194
39 66 rs12324648 rs12905508
39 67 rs12324648 rs405103
39 68 rs12324648 rs723615
39 69 rs12324648 rs2405779
39 70 rs12324648 rs11637291
39 71 rs12324648 rs12916261
39 72 rs12324648 rs12903325
39 73 rs12324648 rs552806
39 74 rs12324648 rs12439063
39 75 rs12324648 rs1874274
39 76 rs12324648 rs725458
39 77 rs12324648 rs8034091
39 78 rs12324648 exm-rs7172432
39 79 rs12324648 rs2946543
39 80 rs12324648 rs7173964
39 81 rs12324648 rs7403483
40 0 rs6497238 rs2594934
40 1 rs6497238 rs16950821
40 2 rs6497238 rs3947367
40 3 rs6497238 rs11631195
40 4 rs6497238 rs17674017
40 5 rs6497238 rs1018105
40 6 rs6497238 rs3794604
40 7 rs6497238 rs1382054
40 8 rs6497238 rs11637518
40 9 rs

43 45 rs3947367 rs591143
43 46 rs3947367 rs4778199
43 47 rs3947367 rs7175827
43 48 rs3947367 rs10775246
43 49 rs3947367 rs1490079
43 50 rs3947367 rs2339262
43 51 rs3947367 rs8040604
43 52 rs3947367 rs8023338
43 53 rs3947367 rs9920714
43 54 rs3947367 rs16950272
43 55 rs3947367 rs11853140
43 56 rs3947367 rs2439421
43 57 rs3947367 rs8036143
43 58 rs3947367 rs9972608
43 59 rs3947367 rs8035562
43 60 rs3947367 rs8042768
43 61 rs3947367 rs11857194
43 62 rs3947367 rs12905508
43 63 rs3947367 rs405103
43 64 rs3947367 rs723615
43 65 rs3947367 rs2405779
43 66 rs3947367 rs11637291
43 67 rs3947367 rs12916261
43 68 rs3947367 rs12903325
43 69 rs3947367 rs552806
43 70 rs3947367 rs12439063
43 71 rs3947367 rs1874274
43 72 rs3947367 rs725458
43 73 rs3947367 rs8034091
43 74 rs3947367 exm-rs7172432
43 75 rs3947367 rs2946543
43 76 rs3947367 rs7173964
43 77 rs3947367 rs7403483
44 0 rs11631195 rs17674017
44 1 rs11631195 rs1018105
44 2 rs11631195 rs3794604
44 3 rs11631195 rs1382054
44 4 rs11631195 rs11637518
44

47 51 rs3794604 rs11853140
47 52 rs3794604 rs2439421
47 53 rs3794604 rs8036143
47 54 rs3794604 rs9972608
47 55 rs3794604 rs8035562
47 56 rs3794604 rs8042768
47 57 rs3794604 rs11857194
47 58 rs3794604 rs12905508
47 59 rs3794604 rs405103
47 60 rs3794604 rs723615
47 61 rs3794604 rs2405779
47 62 rs3794604 rs11637291
47 63 rs3794604 rs12916261
47 64 rs3794604 rs12903325
47 65 rs3794604 rs552806
47 66 rs3794604 rs12439063
47 67 rs3794604 rs1874274
47 68 rs3794604 rs725458
47 69 rs3794604 rs8034091
47 70 rs3794604 exm-rs7172432
47 71 rs3794604 rs2946543
47 72 rs3794604 rs7173964
47 73 rs3794604 rs7403483
48 0 rs1382054 rs11637518
48 1 rs1382054 rs4778192
48 2 rs1382054 rs1498519
48 3 rs1382054 rs6497235
48 4 rs1382054 rs2279728
48 5 rs1382054 rs2703983
48 6 rs1382054 rs7162117
48 7 rs1382054 rs1562592
48 8 rs1382054 rs1498509
48 9 rs1382054 rs17565757
48 10 rs1382054 rs4322616
48 11 rs1382054 rs3751651
48 12 rs1382054 rs118112076
48 13 rs1382054 rs1947747
48 14 rs1382054 rs1375166
48 15 rs138

52 7 rs6497235 rs3751651
52 8 rs6497235 rs118112076
52 9 rs6497235 rs1947747
52 10 rs6497235 rs1375166
52 11 rs6497235 rs12592307
52 12 rs6497235 rs11074304
52 13 rs6497235 rs768546
52 14 rs6497235 rs3098583
52 15 rs6497235 rs989869
52 16 rs6497235 rs7164752
52 17 rs6497235 rs2311470
52 18 rs6497235 rs8035334
52 19 rs6497235 exm-rs1871017
52 20 rs6497235 rs1871017
52 21 rs6497235 rs11852452
52 22 rs6497235 exm2272216
52 23 rs6497235 rs11858340
52 24 rs6497235 rs3097531
52 25 rs6497235 rs7176463
52 26 rs6497235 rs11853577
52 27 rs6497235 rs2086042
52 28 rs6497235 rs6497218
52 29 rs6497235 rs1603784
52 30 rs6497235 exm1143132
52 31 rs6497235 rs17648415
52 32 rs6497235 rs1968813
52 33 rs6497235 rs746435
52 34 rs6497235 rs7177529
52 35 rs6497235 rs1528461
52 36 rs6497235 rs591143
52 37 rs6497235 rs4778199
52 38 rs6497235 rs7175827
52 39 rs6497235 rs10775246
52 40 rs6497235 rs1490079
52 41 rs6497235 rs2339262
52 42 rs6497235 rs8040604
52 43 rs6497235 rs8023338
52 44 rs6497235 rs9920714
52 4

56 51 rs1562592 rs723615
56 52 rs1562592 rs2405779
56 53 rs1562592 rs11637291
56 54 rs1562592 rs12916261
56 55 rs1562592 rs12903325
56 56 rs1562592 rs552806
56 57 rs1562592 rs12439063
56 58 rs1562592 rs1874274
56 59 rs1562592 rs725458
56 60 rs1562592 rs8034091
56 61 rs1562592 exm-rs7172432
56 62 rs1562592 rs2946543
56 63 rs1562592 rs7173964
56 64 rs1562592 rs7403483
57 0 rs1498509 rs17565757
57 1 rs1498509 rs4322616
57 2 rs1498509 rs3751651
57 3 rs1498509 rs118112076
57 4 rs1498509 rs1947747
57 5 rs1498509 rs1375166
57 6 rs1498509 rs12592307
57 7 rs1498509 rs11074304
57 8 rs1498509 rs768546
57 9 rs1498509 rs3098583
57 10 rs1498509 rs989869
57 11 rs1498509 rs7164752
57 12 rs1498509 rs2311470
57 13 rs1498509 rs8035334
57 14 rs1498509 exm-rs1871017
57 15 rs1498509 rs1871017
57 16 rs1498509 rs11852452
57 17 rs1498509 exm2272216
57 18 rs1498509 rs11858340
57 19 rs1498509 rs3097531
57 20 rs1498509 rs7176463
57 21 rs1498509 rs11853577
57 22 rs1498509 rs2086042
57 23 rs1498509 rs6497218
57 24 

61 47 rs118112076 rs2405779
61 48 rs118112076 rs11637291
61 49 rs118112076 rs12916261
61 50 rs118112076 rs12903325
61 51 rs118112076 rs552806
61 52 rs118112076 rs12439063
61 53 rs118112076 rs1874274
61 54 rs118112076 rs725458
61 55 rs118112076 rs8034091
61 56 rs118112076 exm-rs7172432
61 57 rs118112076 rs2946543
61 58 rs118112076 rs7173964
61 59 rs118112076 rs7403483
62 0 rs1947747 rs1375166
62 1 rs1947747 rs12592307
62 2 rs1947747 rs11074304
62 3 rs1947747 rs768546
62 4 rs1947747 rs3098583
62 5 rs1947747 rs989869
62 6 rs1947747 rs7164752
62 7 rs1947747 rs2311470
62 8 rs1947747 rs8035334
62 9 rs1947747 exm-rs1871017
62 10 rs1947747 rs1871017
62 11 rs1947747 rs11852452
62 12 rs1947747 exm2272216
62 13 rs1947747 rs11858340
62 14 rs1947747 rs3097531
62 15 rs1947747 rs7176463
62 16 rs1947747 rs11853577
62 17 rs1947747 rs2086042
62 18 rs1947747 rs6497218
62 19 rs1947747 rs1603784
62 20 rs1947747 exm1143132
62 21 rs1947747 rs17648415
62 22 rs1947747 rs1968813
62 23 rs1947747 rs746435
62 24 r

67 14 rs3098583 rs1603784
67 15 rs3098583 exm1143132
67 16 rs3098583 rs17648415
67 17 rs3098583 rs1968813
67 18 rs3098583 rs746435
67 19 rs3098583 rs7177529
67 20 rs3098583 rs1528461
67 21 rs3098583 rs591143
67 22 rs3098583 rs4778199
67 23 rs3098583 rs7175827
67 24 rs3098583 rs10775246
67 25 rs3098583 rs1490079
67 26 rs3098583 rs2339262
67 27 rs3098583 rs8040604
67 28 rs3098583 rs8023338
67 29 rs3098583 rs9920714
67 30 rs3098583 rs16950272
67 31 rs3098583 rs11853140
67 32 rs3098583 rs2439421
67 33 rs3098583 rs8036143
67 34 rs3098583 rs9972608
67 35 rs3098583 rs8035562
67 36 rs3098583 rs8042768
67 37 rs3098583 rs11857194
67 38 rs3098583 rs12905508
67 39 rs3098583 rs405103
67 40 rs3098583 rs723615
67 41 rs3098583 rs2405779
67 42 rs3098583 rs11637291
67 43 rs3098583 rs12916261
67 44 rs3098583 rs12903325
67 45 rs3098583 rs552806
67 46 rs3098583 rs12439063
67 47 rs3098583 rs1874274
67 48 rs3098583 rs725458
67 49 rs3098583 rs8034091
67 50 rs3098583 exm-rs7172432
67 51 rs3098583 rs2946543
67 

73 17 rs1871017 rs7175827
73 18 rs1871017 rs10775246
73 19 rs1871017 rs1490079
73 20 rs1871017 rs2339262
73 21 rs1871017 rs8040604
73 22 rs1871017 rs8023338
73 23 rs1871017 rs9920714
73 24 rs1871017 rs16950272
73 25 rs1871017 rs11853140
73 26 rs1871017 rs2439421
73 27 rs1871017 rs8036143
73 28 rs1871017 rs9972608
73 29 rs1871017 rs8035562
73 30 rs1871017 rs8042768
73 31 rs1871017 rs11857194
73 32 rs1871017 rs12905508
73 33 rs1871017 rs405103
73 34 rs1871017 rs723615
73 35 rs1871017 rs2405779
73 36 rs1871017 rs11637291
73 37 rs1871017 rs12916261
73 38 rs1871017 rs12903325
73 39 rs1871017 rs552806
73 40 rs1871017 rs12439063
73 41 rs1871017 rs1874274
73 42 rs1871017 rs725458
73 43 rs1871017 rs8034091
73 44 rs1871017 exm-rs7172432
73 45 rs1871017 rs2946543
73 46 rs1871017 rs7173964
73 47 rs1871017 rs7403483
74 0 rs11852452 exm2272216
74 1 rs11852452 rs11858340
74 2 rs11852452 rs3097531
74 3 rs11852452 rs7176463
74 4 rs11852452 rs11853577
74 5 rs11852452 rs2086042
74 6 rs11852452 rs6497218


80 13 rs2086042 rs2339262
80 14 rs2086042 rs8040604
80 15 rs2086042 rs8023338
80 16 rs2086042 rs9920714
80 17 rs2086042 rs16950272
80 18 rs2086042 rs11853140
80 19 rs2086042 rs2439421
80 20 rs2086042 rs8036143
80 21 rs2086042 rs9972608
80 22 rs2086042 rs8035562
80 23 rs2086042 rs8042768
80 24 rs2086042 rs11857194
80 25 rs2086042 rs12905508
80 26 rs2086042 rs405103
80 27 rs2086042 rs723615
80 28 rs2086042 rs2405779
80 29 rs2086042 rs11637291
80 30 rs2086042 rs12916261
80 31 rs2086042 rs12903325
80 32 rs2086042 rs552806
80 33 rs2086042 rs12439063
80 34 rs2086042 rs1874274
80 35 rs2086042 rs725458
80 36 rs2086042 rs8034091
80 37 rs2086042 exm-rs7172432
80 38 rs2086042 rs2946543
80 39 rs2086042 rs7173964
80 40 rs2086042 rs7403483
81 0 rs6497218 rs1603784
81 1 rs6497218 exm1143132
81 2 rs6497218 rs17648415
81 3 rs6497218 rs1968813
81 4 rs6497218 rs746435
81 5 rs6497218 rs7177529
81 6 rs6497218 rs1528461
81 7 rs6497218 rs591143
81 8 rs6497218 rs4778199
81 9 rs6497218 rs7175827
81 10 rs649721

88 29 rs1528461 exm-rs7172432
88 30 rs1528461 rs2946543
88 31 rs1528461 rs7173964
88 32 rs1528461 rs7403483
89 0 rs591143 rs4778199
89 1 rs591143 rs7175827
89 2 rs591143 rs10775246
89 3 rs591143 rs1490079
89 4 rs591143 rs2339262
89 5 rs591143 rs8040604
89 6 rs591143 rs8023338
89 7 rs591143 rs9920714
89 8 rs591143 rs16950272
89 9 rs591143 rs11853140
89 10 rs591143 rs2439421
89 11 rs591143 rs8036143
89 12 rs591143 rs9972608
89 13 rs591143 rs8035562
89 14 rs591143 rs8042768
89 15 rs591143 rs11857194
89 16 rs591143 rs12905508
89 17 rs591143 rs405103
89 18 rs591143 rs723615
89 19 rs591143 rs2405779
89 20 rs591143 rs11637291
89 21 rs591143 rs12916261
89 22 rs591143 rs12903325
89 23 rs591143 rs552806
89 24 rs591143 rs12439063
89 25 rs591143 rs1874274
89 26 rs591143 rs725458
89 27 rs591143 rs8034091
89 28 rs591143 exm-rs7172432
89 29 rs591143 rs2946543
89 30 rs591143 rs7173964
89 31 rs591143 rs7403483
90 0 rs4778199 rs7175827
90 1 rs4778199 rs10775246
90 2 rs4778199 rs1490079
90 3 rs4778199 rs

100 14 rs2439421 rs1874274
100 15 rs2439421 rs725458
100 16 rs2439421 rs8034091
100 17 rs2439421 exm-rs7172432
100 18 rs2439421 rs2946543
100 19 rs2439421 rs7173964
100 20 rs2439421 rs7403483
101 0 rs8036143 rs9972608
101 1 rs8036143 rs8035562
101 2 rs8036143 rs8042768
101 3 rs8036143 rs11857194
101 4 rs8036143 rs12905508
101 5 rs8036143 rs405103
101 6 rs8036143 rs723615
101 7 rs8036143 rs2405779
101 8 rs8036143 rs11637291
101 9 rs8036143 rs12916261
101 10 rs8036143 rs12903325
101 11 rs8036143 rs552806
101 12 rs8036143 rs12439063
101 13 rs8036143 rs1874274
101 14 rs8036143 rs725458
101 15 rs8036143 rs8034091
101 16 rs8036143 exm-rs7172432
101 17 rs8036143 rs2946543
101 18 rs8036143 rs7173964
101 19 rs8036143 rs7403483
102 0 rs9972608 rs8035562
102 1 rs9972608 rs8042768
102 2 rs9972608 rs11857194
102 3 rs9972608 rs12905508
102 4 rs9972608 rs405103
102 5 rs9972608 rs723615
102 6 rs9972608 rs2405779
102 7 rs9972608 rs11637291
102 8 rs9972608 rs12916261
102 9 rs9972608 rs12903325
102 10 rs

In [31]:
print(len(pairs_of_features))

7381


In [32]:
pairs_of_features_sorted = sorted(pairs_of_features.items(), key=lambda x: x[1], reverse=True)

In [33]:
print(pairs_of_features_sorted[:50])

[(('exm-rs12913832', 'rs1129038'), 0.5138738850568014), (('exm-rs12913832', 'rs118112076'), 0.3095249940830457), (('exm-rs12913832', 'rs201872292'), 0.308551762761853), (('exm-rs12913832', 'rs61756153'), 0.30852593506910087), (('exm-rs12913832', 'rs2240203'), 0.3083345951184059), (('exm-rs12913832', 'rs8028689'), 0.3082406054531842), (('exm-rs12913832', 'rs2238289'), 0.3077813876310403), (('rs1129038', 'rs118112076'), 0.3074608986332802), (('rs1129038', 'rs61756153'), 0.3065888126255851), (('rs1129038', 'rs201872292'), 0.30647623421354764), (('rs1129038', 'rs2240203'), 0.3062014882571385), (('rs1129038', 'rs8028689'), 0.30611120775613115), (('rs1129038', 'rs2238289'), 0.30576090576090575), (('rs2238289', 'rs61756153'), 0.27946234724892993), (('rs2238289', 'rs2240203'), 0.2790386608648689), (('rs2238289', 'rs8028689'), 0.2787426495008498), (('rs2238289', 'rs201872292'), 0.27805116174601396), (('rs2238289', 'exm-rs1667394'), 0.270094684854814), (('exm-rs12913832', 'rs11636232'), 0.258503

In [34]:
for j in range(50):
    threshold = j / 100
    print(threshold, len([feature for feature in pairs_of_features_sorted if feature[1] > threshold]))

0.0 7381
0.01 7381
0.02 5579
0.03 3318
0.04 2382
0.05 2026
0.06 1809
0.07 1374
0.08 1104
0.09 949
0.1 891
0.11 763
0.12 642
0.13 540
0.14 343
0.15 318
0.16 307
0.17 295
0.18 117
0.19 107
0.2 68
0.21 64
0.22 45
0.23 38
0.24 36
0.25 36
0.26 18
0.27 18
0.28 13
0.29 13
0.3 13
0.31 1
0.32 1
0.33 1
0.34 1
0.35 1
0.36 1
0.37 1
0.38 1
0.39 1
0.4 1
0.41 1
0.42 1
0.43 1
0.44 1
0.45 1
0.46 1
0.47 1
0.48 1
0.49 1


In [35]:
# Set threshold to 0.1 and count polymorphisms seen it top 891 pairs.

top_pairs_of_features = [feature for feature in pairs_of_features_sorted if feature[1] > 0.1]
stats = {}
for pair in top_pairs_of_features:
    for feature in pair[0]:
        try:
            stats[feature] += 1
        except:
            stats[feature] = 1
stats_sorted = sorted(stats.items(), key=lambda x: x[1], reverse=True)
print(stats_sorted)

[('exm-rs12913832', 121), ('rs1129038', 121), ('rs2238289', 121), ('exm-rs1667394', 121), ('exm-rs916977', 121), ('rs8039195', 118), ('rs3935591', 108), ('rs201872292', 23), ('rs7495174', 23), ('rs4778241', 21), ('rs7183877', 19), ('rs61756152', 19), ('rs118112076', 18), ('rs61756153', 18), ('rs2240203', 18), ('rs8028689', 18), ('rs11636232', 15), ('exm-rs4778138', 14), ('rs749846', 12), ('exm1143132', 12), ('rs1800407', 12), ('exm1143095', 12), ('rs7174027', 11), ('rs7162117', 10), ('rs7179994', 10), ('rs9972608', 8), ('rs3097531', 8), ('rs4778232', 8), ('rs1490079', 7), ('rs405103', 7), ('rs17648415', 7), ('rs723615', 7), ('rs8023338', 7), ('rs8034091', 7), ('rs7179419', 7), ('rs12903325', 7), ('rs12324648', 7), ('rs3794604', 7), ('rs16950821', 7), ('rs2405779', 7), ('rs2339262', 7), ('rs591143', 7), ('rs1597196', 7), ('rs8040604', 7), ('rs921221', 7), ('rs11631195', 7), ('rs7178315', 7), ('rs7403483', 7), ('exm-rs7172432', 7), ('rs4322616', 7), ('rs2279728', 7), ('rs725458', 7), ('r

In [36]:
# We see that there is a big difference between first 7 and the rest.

Top seven features from file provided by organizers

rs1129038

exm-rs12913832 

exm-rs916977

exm-rs1667394

rs8039195

rs3935591

rs2238289

They are the same as our top 7 (the order is slightly different)

### Corelation analysis

In [37]:
top_seven = [stat[0] for stat in stats_sorted[:7]]
print(top_seven)

['exm-rs12913832', 'rs1129038', 'rs2238289', 'exm-rs1667394', 'exm-rs916977', 'rs8039195', 'rs3935591']


In [38]:
to_corr = ped[['color'] + top_seven]
to_corr.head()

Unnamed: 0,color,exm-rs12913832,rs1129038,rs2238289,exm-rs1667394,exm-rs916977,rs8039195,rs3935591
0,1,"('G', 'G')","('A', 'A')","('T', 'T')","('A', 'A')","('G', 'G')","('T', 'T')","('G', 'G')"
1,2,"('A', 'A')","('G', 'G')","('C', 'T')","('G', 'A')","('A', 'G')","('C', 'T')","('A', 'G')"
2,2,"('G', 'G')","('A', 'A')","('T', 'T')","('A', 'A')","('G', 'G')","('T', 'T')","('G', 'G')"
3,2,"('A', 'G')","('G', 'A')","('T', 'T')","('A', 'A')","('G', 'G')","('T', 'T')","('G', 'G')"
4,2,"('A', 'G')","('G', 'A')","('T', 'T')","('A', 'A')","('G', 'G')","('T', 'T')","('0', '0')"


In [39]:
df_cont = pd.DataFrame()
for poly in to_corr.columns.values:
    if poly != 'color':
        df_input = to_corr[['color', poly]]
        df_input['sum'] = 0
        df_input = df_input[df_input[poly] != "('0', '0')"]
        data = df_input.groupby(['color', poly]).count().astype(float)
        print(poly)
        print(data)
        
        df_cont = df_cont.append(data['sum'],ignore_index=True)
df_cont = df_cont.transpose()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


exm-rs12913832
                         sum
color exm-rs12913832        
1     ('A', 'A')        13.0
      ('A', 'G')       240.0
      ('G', 'G')      2755.0
2     ('A', 'A')       196.0
      ('A', 'G')      1316.0
      ('G', 'G')       250.0
rs1129038
                     sum
color rs1129038         
1     ('A', 'A')  2741.0
      ('G', 'A')   249.0
      ('G', 'G')    13.0
2     ('A', 'A')   254.0
      ('G', 'A')  1313.0
      ('G', 'G')   196.0
rs2238289
                     sum
color rs2238289         
1     ('C', 'T')   158.0
      ('T', 'T')  2853.0
2     ('C', 'T')   809.0
      ('T', 'T')   908.0
exm-rs1667394
                        sum
color exm-rs1667394        
1     ('A', 'A')     2826.0
      ('G', 'A')      178.0
      ('G', 'G')        7.0
2     ('A', 'A')      686.0
      ('G', 'A')      981.0
      ('G', 'G')       97.0
exm-rs916977
                       sum
color exm-rs916977        
1     ('A', 'A')       6.0
      ('A', 'G')     177.0
      ('G', 'G')    2822

In [40]:
df_cont

Unnamed: 0,0,1,2,3,4,5,6
"(1, ('A', 'A'))",13.0,2741.0,,2826.0,6.0,,3.0
"(1, ('A', 'G'))",240.0,,,,177.0,,135.0
"(1, ('G', 'G'))",2755.0,13.0,,7.0,2822.0,,2849.0
"(2, ('A', 'A'))",196.0,254.0,,686.0,87.0,,52.0
"(2, ('A', 'G'))",1316.0,,,,960.0,,753.0
"(2, ('G', 'G'))",250.0,196.0,,97.0,715.0,,867.0
"(1, ('G', 'A'))",,249.0,,178.0,,,
"(2, ('G', 'A'))",,1313.0,,981.0,,,
"(1, ('C', 'T'))",,,158.0,,,157.0,
"(1, ('T', 'T'))",,,2853.0,,,2849.0,


In [41]:
df_cont.shape

(14, 7)

In [42]:
df_cont.corr()

Unnamed: 0,0,1,2,3,4,5,6
0,1.0,-0.473345,,-0.52218,0.967955,,0.936204
1,-0.473345,1.0,,0.971724,-0.525979,,-0.538902
2,,,1.0,,,0.99984,
3,-0.52218,0.971724,,1.0,-0.600009,,-0.620342
4,0.967955,-0.525979,,-0.600009,1.0,,0.99424
5,,,0.99984,,,1.0,
6,0.936204,-0.538902,,-0.620342,0.99424,,1.0


So among our 7 top we have 3 independent groups.

For prediction we choose first polymorphysm from each group.

* ['exm-rs12913832', 'exm-rs916977', 'rs3935591']

* ['rs1129038', 'exm-rs1667394']

* ['rs2238289', 'rs8039195']


## Analysis for set of polymorphisms

In [43]:
#features_top = ['exm-rs12913832', 'rs1129038', 'rs2238289']
#features_names_top = [x[0] for x in features_top]
features_names_top = ['exm-rs12913832', 'rs1129038', 'rs2238289']

In [46]:
from itertools import combinations

# Combinations of multiple polymorphysms
tuple_of_features = {}
train_tuple = pd.DataFrame()
for i, poly_tuple in enumerate(combinations(features_names_top, 3)):
    print(i, poly_tuple)
    df_input = ped[['color'] + [poly for poly in poly_tuple]]
    df_input['sum'] = 0
    for poly in poly_tuple:
        df_input = df_input[df_input[poly] != "('0', '0')"]
    data = df_input.groupby(['color'] + [poly for poly in poly_tuple]).count().astype(float)
    data['sum'][1] /= (data['sum'][1]).sum()
    data['sum'][2] /= (data['sum'][2]).sum()
    data2 = abs(data['sum'][1] - data['sum'][2])
    tuple_of_features[poly_tuple] = data2.sum() / data2.count()

0 ('exm-rs12913832', 'rs1129038', 'rs2238289')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [47]:
# TODO Sorry, no time. Left to show that such calculations can be made in future.
