In [259]:
from collections import Counter

import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

%matplotlib notebook


In [260]:
from bokeh.io import output_notebook
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.transform import cumsum

output_notebook()

In [261]:
meta = pd.read_csv('IDAT_dataset/meta.csv', sep='\t', header=None)
meta

Unnamed: 0,0,1,2
0,8TD32,9238348074,R02C02
1,7TD38,9238388096,R04C01
2,6TD35,9238348109,R03C01
3,8TD36,9238349162,R06C01
4,18TD36,9236445147,R01C02
5,6TD38,9238388096,R03C01
6,1TD38,9236445231,R04C02
7,8TD37,9236445226,R01C01
8,18TD35,9238348135,R06C02
9,15TD37,9236445226,R02C02


In [262]:
metaDict = {(row[1], row[2]): row[0] for i, row in meta.iterrows()}
meta['Region'] = 'Koura'
meta['Group_Label'] = 'Koura'
meta = meta.set_index(0)
meta.index.name = 'Id'
meta = meta[['Region', 'Group_Label']]

In [263]:
meta

Unnamed: 0_level_0,Region,Group_Label
Id,Unnamed: 1_level_1,Unnamed: 2_level_1
8TD32,Koura,Koura
7TD38,Koura,Koura
6TD35,Koura,Koura
8TD36,Koura,Koura
18TD36,Koura,Koura
6TD38,Koura,Koura
1TD38,Koura,Koura
8TD37,Koura,Koura
18TD35,Koura,Koura
15TD37,Koura,Koura


In [264]:
def newID(row):
    return metaDict.get((row['FID'], row['IID']), row.IID)

#pcafile = 'NassimReichHO/pcs.txt'
pcafile = 'Koura2ReichPCA/pcs.txt'
pca = pd.read_csv(pcafile, delimiter='\t')

pca['Id'] = [newID(row) for i, row in pca.iterrows()]
pca = pca[~((pca.Id.str.startswith('R0')) & (pca.Id==pca.IID))] ## ignore some samples (we dont have in the metadata)
pca

Unnamed: 0,FID,IID,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,Id
0,132,AZR-1010,-0.048499,0.034557,0.043301,0.014663,0.020442,0.004713,0.049407,-0.007598,0.003987,-0.043725,AZR-1010
1,133,AZR-1012,-0.041418,0.016744,0.046862,0.018207,0.024628,-0.011168,0.053775,-0.017285,0.019168,-0.042185,AZR-1012
2,134,AZR-1013,-0.037900,0.031346,0.065967,0.020969,0.025136,0.004407,0.048848,-0.015180,0.007726,-0.044067,AZR-1013
3,135,AZR-1017,-0.045493,0.017920,0.038550,0.012423,0.026697,0.001368,0.038749,-0.008607,0.013319,-0.040788,AZR-1017
4,136,AZR-1018,-0.044423,0.024819,0.044767,0.012811,0.006833,0.000962,0.043968,-0.011994,0.014202,-0.032834,AZR-1018
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1038,9238348109,R03C01,-0.011842,0.019434,0.017846,-0.003264,-0.011879,0.013244,0.015995,0.013539,0.001881,0.006624,6TD35
1039,9238348135,R06C02,-0.020369,0.037580,0.019703,-0.006527,-0.000819,-0.008146,0.025697,0.015836,0.000785,-0.001451,18TD35
1040,9238349162,R06C01,0.370251,-0.268811,0.187583,0.487543,0.803633,-0.081847,-0.527633,-0.776412,-0.337711,-0.298868,8TD36
1041,9238388096,R03C01,-0.015948,0.032014,0.026339,0.003207,0.005445,0.009614,0.024386,-0.002006,0.000635,-0.010215,6TD38


In [265]:
reichMeta=pd.read_csv("Reich/v44.3_HO_public.anno", sep='\t')
reichMeta.columns = ['Index', 'Id', 'Id2',
       'Publication', 'contact',
       'Date',
       'Full_Date',
       'Group_Label', 'Locality', 'Region', 'Lat', 'Long',
       'Data_source', 'Cov_autosm',
       'SNPs_autosm', 'Sex',
       'Library_type',
       'ASSESSMENT']


In [266]:
## Cleaning/Filtering
reichMeta = reichMeta[~reichMeta.Group_Label.str.startswith('Ignore_')]
reichMeta = reichMeta[reichMeta.Date==0] ## only contemporary
reichMeta = reichMeta.set_index('Id') # ['Id','Region','Group_Label', ]].
#reichMeta = reichMeta[['Region','Group_Label']]

In [267]:
meta = pd.concat([reichMeta, meta])
meta

Unnamed: 0_level_0,Index,Id2,Publication,contact,Date,Full_Date,Group_Label,Locality,Region,Lat,Long,Data_source,Cov_autosm,SNPs_autosm,Sex,Library_type,ASSESSMENT
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
MAL-005,1798.0,MAL-005,SkoglundCell2017,Garrett Hellenthal / Saioa Lopez / Mark Thomas...,0.0,..,Malawi_Yao,Dedza // Yao,Malawi,-14.166667,34.33333,Fall2015,..,585645.0,M,..,PASS (genotyping)
MAL-009,1799.0,MAL-009,SkoglundCell2017,Garrett Hellenthal / Saioa Lopez / Mark Thomas...,0.0,..,Malawi_Yao,Machinga // Yao,Malawi,-14.862605,35.574122,Fall2015,..,582189.0,M,..,PASS (genotyping)
MAL-011,1800.0,MAL-011,SkoglundCell2017,Garrett Hellenthal / Saioa Lopez / Mark Thomas...,0.0,..,Malawi_Chewa,Mchinga // Chichewa,Malawi,-14.862605,35.574122,Fall2015,..,579844.0,M,..,PASS (genotyping)
MAL-012,1801.0,MAL-012,SkoglundCell2017,Garrett Hellenthal / Saioa Lopez / Mark Thomas...,0.0,..,Malawi_Chewa,Salima // Chichewa,Malawi,-13.75,34.5,Fall2015,..,585204.0,M,..,PASS (genotyping)
MAL-014,1802.0,MAL-014,SkoglundCell2017,Garrett Hellenthal / Saioa Lopez / Mark Thomas...,0.0,..,Malawi_Chewa,Nambuma // Chichewa,Malawi,-13.703473,33.597743,Fall2015,..,584410.0,M,..,PASS (genotyping)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15TD37,,,,,,,Koura,,Koura,,,,,,,,
9TD37,,,,,,,Koura,,Koura,,,,,,,,
26TD37,,,,,,,Koura,,Koura,,,,,,,,
23TD37,,,,,,,Koura,,Koura,,,,,,,,


In [268]:
pca = pca.set_index('Id')
pca

Unnamed: 0_level_0,FID,IID,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
AZR-1010,132,AZR-1010,-0.048499,0.034557,0.043301,0.014663,0.020442,0.004713,0.049407,-0.007598,0.003987,-0.043725
AZR-1012,133,AZR-1012,-0.041418,0.016744,0.046862,0.018207,0.024628,-0.011168,0.053775,-0.017285,0.019168,-0.042185
AZR-1013,134,AZR-1013,-0.037900,0.031346,0.065967,0.020969,0.025136,0.004407,0.048848,-0.015180,0.007726,-0.044067
AZR-1017,135,AZR-1017,-0.045493,0.017920,0.038550,0.012423,0.026697,0.001368,0.038749,-0.008607,0.013319,-0.040788
AZR-1018,136,AZR-1018,-0.044423,0.024819,0.044767,0.012811,0.006833,0.000962,0.043968,-0.011994,0.014202,-0.032834
...,...,...,...,...,...,...,...,...,...,...,...,...
6TD35,9238348109,R03C01,-0.011842,0.019434,0.017846,-0.003264,-0.011879,0.013244,0.015995,0.013539,0.001881,0.006624
18TD35,9238348135,R06C02,-0.020369,0.037580,0.019703,-0.006527,-0.000819,-0.008146,0.025697,0.015836,0.000785,-0.001451
8TD36,9238349162,R06C01,0.370251,-0.268811,0.187583,0.487543,0.803633,-0.081847,-0.527633,-0.776412,-0.337711,-0.298868
6TD38,9238388096,R03C01,-0.015948,0.032014,0.026339,0.003207,0.005445,0.009614,0.024386,-0.002006,0.000635,-0.010215


In [269]:
pca = pca.join(meta).dropna(subset=['Region'])

In [270]:
pca

Unnamed: 0_level_0,FID,IID,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,...,Locality,Region,Lat,Long,Data_source,Cov_autosm,SNPs_autosm,Sex,Library_type,ASSESSMENT
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AZR-1010,132,AZR-1010,-0.048499,0.034557,0.043301,0.014663,0.020442,0.004713,0.049407,-0.007598,...,Azerbajan,Azerbaijan,40.06,47.19,Balanovsky,..,584472.0,M,..,PASS (genotyping)
AZR-1012,133,AZR-1012,-0.041418,0.016744,0.046862,0.018207,0.024628,-0.011168,0.053775,-0.017285,...,Azerbajan,Azerbaijan,40.06,47.19,Balanovsky,..,584794.0,M,..,PASS (genotyping)
AZR-1013,134,AZR-1013,-0.037900,0.031346,0.065967,0.020969,0.025136,0.004407,0.048848,-0.015180,...,Azerbajan,Azerbaijan,40.06,47.19,Balanovsky,..,583239.0,M,..,PASS (genotyping)
AZR-1017,135,AZR-1017,-0.045493,0.017920,0.038550,0.012423,0.026697,0.001368,0.038749,-0.008607,...,Azerbajan,Azerbaijan,40.06,47.19,Balanovsky,..,585153.0,M,..,PASS (genotyping)
AZR-1018,136,AZR-1018,-0.044423,0.024819,0.044767,0.012811,0.006833,0.000962,0.043968,-0.011994,...,Azerbajan,Azerbaijan,40.06,47.19,Balanovsky,..,584454.0,M,..,PASS (genotyping)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6TD35,9238348109,R03C01,-0.011842,0.019434,0.017846,-0.003264,-0.011879,0.013244,0.015995,0.013539,...,,Koura,,,,,,,,
18TD35,9238348135,R06C02,-0.020369,0.037580,0.019703,-0.006527,-0.000819,-0.008146,0.025697,0.015836,...,,Koura,,,,,,,,
8TD36,9238349162,R06C01,0.370251,-0.268811,0.187583,0.487543,0.803633,-0.081847,-0.527633,-0.776412,...,,Koura,,,,,,,,
6TD38,9238388096,R03C01,-0.015948,0.032014,0.026339,0.003207,0.005445,0.009614,0.024386,-0.002006,...,,Koura,,,,,,,,


In [271]:
selection2 = ['Jordan', 'Ukraine',  'Albania',  'India',  'Italy', 'Iran', 'France', 'Armenia',  'Canada', 'Lebanon', 'Czechoslovakia', 
             'Hungary', 'Georgia', 'Egypt',  'Morocco', 'Saudi Arabia', 'Yemen',
             'Turkey', 'Abkhazia', 'Israel', 'Malta',  'Romania',  'Greece',   'Cambodia', 'Spain',  'Cyprus',  'Lithuania', 'Belarus', 
              'Poland', 'Bulgaria', 'Syria', 'Iraq', 'Koura']

selection = ['Yemen', 'Jordan',  'Lebanon', 'Turkey',  'Israel', 'Malta',   'Greece',   'Cyprus', 'Koura', 'Spain', 'Algeria']


In [272]:
def fixGroupLabel(label):
    if '.' in label: return '.'.join(label.split('.')[:-1])
    return label

pca["Group_Label"] = [fixGroupLabel(label) for label in pca.Group_Label]

In [273]:
from  bokeh.palettes import Paired,  Category20, Category20b
colors = Paired[12] + Category20[20] + Category20b[20]
colors = colors[:len(selection)]
markers = 'circle asterisk cross diamond dash triangle inverted_traingle square triangle'.split()[:6]

In [274]:
ids = set()
for country in selection:
    ids = ids.union(pca[pca.Region==country].index)
len(ids)    
    

790

In [275]:
%pprint

Pretty printing has been turned ON


In [276]:
ids

{'15TD37',
 '18TD35',
 '18TD36',
 '1TD38',
 '23TD36',
 '23TD37',
 '26TD37',
 '6TD35',
 '6TD38',
 '7TD38',
 '8TD32',
 '8TD36',
 '8TD37',
 '9TD37',
 'ALB126',
 'ALB231',
 'ALB389',
 'ALE005',
 'ALE009',
 'ALE088',
 'ALE339',
 'ALG_LN_200',
 'ALG_LN_203',
 'ALG_LN_204',
 'ARM005',
 'Adana23108',
 'Adana23112',
 'Adana23113',
 'Adana23114',
 'Adana23117',
 'Adana23133',
 'Adana23136',
 'Adana23144',
 'Adana23147',
 'Adana23150',
 'Algerian43A13',
 'Algerian43A21',
 'Algerian43A22',
 'Algerian43A23',
 'Algerian43A24',
 'Algerian43A32',
 'Algerian43A34',
 'Assyrian151',
 'Assyrian152',
 'Assyrian153',
 'Assyrian163',
 'Assyrian165',
 'Aydin18112',
 'Aydin18419',
 'Aydin18483',
 'Aydin18596',
 'Aydin18636',
 'Aydin18784',
 'Aydin18873',
 'BAS22',
 'BAS25',
 'BAS27',
 'BAS28',
 'BAS30',
 'BAS31',
 'BAS32',
 'BAS33',
 'BAS35',
 'BON076',
 'BOS010',
 'BOS011',
 'BOS015',
 'BOS027',
 'BOS029',
 'B_Crete-1.DG',
 'B_Crete-2.DG',
 'Balikesir16653',
 'Balikesir16675',
 'Balikesir16790',
 'Balikesir16

In [280]:
set(pca.Region)

{'Abkhazia',
 'Albania',
 'Algeria',
 'Armenia',
 'Azerbaijan',
 'Bulgaria',
 'Croatia',
 'Cyprus',
 'Egypt',
 'Georgia',
 'Greece',
 'Iran',
 'Iraq',
 'Israel',
 'Italy',
 'Jordan',
 'Koura',
 'Lebanon',
 'Malta',
 'Moldova',
 'Romania',
 'Saudi Arabia',
 'Spain',
 'Syria',
 'Turkey',
 'Ukraine',
 'Yemen'}

In [277]:
from bokeh.plotting import output_file, save
pd.options.mode.chained_assignment = None
#output_file (filename='kouraPCA_PC1_2.html', title="PCA Koura + Human Origins")

p = figure(width=1400, height=1200, tooltips=[("pop", "@Region"), ("ID", "@Id"), ("Ethnic", "@Group_Label"), ("Data Source", "@Data_source"), ("Locality", "@Locality"), ('contact', '@contact'), ('Publication', '@Publication')])
for pop, color in zip(selection, colors):
    data0 = pca[pca.Region==pop][['PC1', 'PC2', 'PC3', 'Region', 'Group_Label', 'contact', 'Publication', 'Data_source', 'Locality', 'Date']]
    counter = 0
    for group, data in data0.groupby(by=['Group_Label']):
        try:
            data['size'] = 12
            data['marker'] = markers[counter % (len(markers))]            
            data['color'] = color
            pass
        except:
            pass
        p.scatter(x='PC1', y='PC3', marker='marker', 
                  color='color',
                  muted_color='grey',
                  muted_alpha =0.1,
                  legend_label = f'{pop}/{group}',
                  size='size',
                  source=data)                                       
        counter += 1

p.legend.location = "top_left"
p.legend.click_policy = 'mute'
#save(p) ## creates html, uncomment output_file above
show(p)

## Admixture

In [278]:
k = 8
## Koura regional context
wdir = 'Admixture/KouraReich/'
admix =  "koura_reich2.%s.Q" % k
q = pd.read_csv(admix, header=None, sep=' ')
q.index = pca.index
pops = ['Pop%02d'%i for i in range(k)]
q.columns = pops

q

FileNotFoundError: [Errno 2] File koura_reich2.8.Q does not exist: 'koura_reich2.8.Q'

In [240]:
pca = pd.concat([pca, q], axis=1)

In [241]:
pca

Unnamed: 0,FID,IID,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,...,Library_type,ASSESSMENT,0,1,2,3,4,5,6,7
0,,,,,,,,,,,...,,,0.064951,0.000010,0.764156,0.000010,0.138833,0.019343,0.012687,0.000010
1,,,,,,,,,,,...,,,0.072606,0.000010,0.754463,0.029192,0.141863,0.001846,0.000010,0.000010
2,,,,,,,,,,,...,,,0.102469,0.000010,0.787860,0.015894,0.082188,0.011558,0.000010,0.000010
3,,,,,,,,,,,...,,,0.089320,0.015183,0.648577,0.008983,0.232169,0.000010,0.005748,0.000010
4,,,,,,,,,,,...,,,0.098489,0.000010,0.724974,0.009987,0.160828,0.000010,0.000010,0.005692
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
syria464,2090.0,syria464,0.031139,0.038917,0.022840,0.015482,0.020833,0.010038,0.030391,-0.012479,...,..,PASS (genotyping),,,,,,,,
syria485,2111.0,syria485,0.007881,0.014575,0.020821,0.013228,0.016446,0.005379,0.023282,-0.003461,...,..,PASS (genotyping),,,,,,,,
syria520,2122.0,syria520,0.044224,-0.001659,0.022862,0.006042,0.016146,-0.000995,0.000648,0.004480,...,..,PASS (genotyping),,,,,,,,
syria6,2154.0,syria6,0.040782,0.037344,0.026954,0.026886,0.030977,0.011224,0.024424,-0.010938,...,..,PASS (genotyping),,,,,,,,


ValueError: Length mismatch: Expected axis has 1014 elements, new values have 1043 elements

In [95]:
data

Unnamed: 0_level_0,PC1,PC2,Region,Group_Label,contact,Publication,Data_source,Locality,Date,size,marker,color
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
18TD36,0.010863,0.223259,Koura,Koura,,,,,,9,circle,#ff7f00
23TD36,0.011545,0.220536,Koura,Koura,,,,,,9,circle,#ff7f00
8TD37,0.016063,0.219422,Koura,Koura,,,,,,9,circle,#ff7f00
9TD37,0.015519,0.221961,Koura,Koura,,,,,,9,circle,#ff7f00
15TD37,0.010253,0.222526,Koura,Koura,,,,,,9,circle,#ff7f00
26TD37,0.011249,0.223396,Koura,Koura,,,,,,9,circle,#ff7f00
23TD37,0.011428,0.221233,Koura,Koura,,,,,,9,circle,#ff7f00
1TD38,0.009209,0.224193,Koura,Koura,,,,,,9,circle,#ff7f00
8TD32,0.011113,0.222827,Koura,Koura,,,,,,9,circle,#ff7f00
6TD35,0.009351,0.216591,Koura,Koura,,,,,,9,circle,#ff7f00


In [120]:
from bokeh.palettes import Category20c, Paired
from bokeh.io import output_notebook
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.transform import cumsum

output_notebook()

In [124]:
ls -lt *.Q

-rw-rw-r--. 1 ahenschel ahenschel 173394 Nov 10 23:46 koura_reich.19.Q
-rw-rw-r--. 1 ahenschel ahenschel 155142 Nov 10 22:15 koura_reich.17.Q
-rw-rw-r--. 1 ahenschel ahenschel 146016 Nov 10 21:00 koura_reich.16.Q
-rw-rw-r--. 1 ahenschel ahenschel 136890 Nov 10 20:33 koura_reich.15.Q
-rw-rw-r--. 1 ahenschel ahenschel 127764 Nov 10 19:40 koura_reich.14.Q
-rw-rw-r--. 1 ahenschel ahenschel 109512 Nov 10 19:17 koura_reich.12.Q
-rw-rw-r--. 1 ahenschel ahenschel 118638 Nov 10 19:07 koura_reich.13.Q
-rw-rw-r--. 1 ahenschel ahenschel 100386 Nov 10 18:21 koura_reich.11.Q
-rw-rw-r--. 1 ahenschel ahenschel  82134 Nov 10 18:04 koura_reich.9.Q
-rw-rw-r--. 1 ahenschel ahenschel  91260 Nov 10 17:59 koura_reich.10.Q
-rw-rw-r--. 1 ahenschel ahenschel  73008 Nov 10 17:01 koura_reich.8.Q
-rw-rw-r--. 1 ahenschel ahenschel  63882 Nov 10 16:32 koura_reich.7.Q
-rw-rw-r--. 1 ahenschel ahenschel  54756 Nov 10 16:17 koura_reich.6.Q
-rw-rw-r--. 1 ahenschel ahenschel  45630 Nov 10 16:08 koura_reich.5.

In [123]:
pca = pd.read_csv(f'KouraReichPCA/pcs.txt', delimiter='\t')
pca.tail(15)

Unnamed: 0,FID,IID,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10
999,10406,HGDP00722.SDG,-0.087907,-0.028809,0.057007,-0.038181,0.141185,-0.04677,-0.257752,0.94439,0.009926,0.193385
1000,9236445147,R01C02,-0.046716,0.029692,0.004181,-0.00632,0.004827,0.007938,-0.01024,-0.004017,0.009091,-0.010659
1001,9236445147,R06C02,-0.042981,0.032856,0.008226,-0.003327,0.008272,0.009658,-0.023911,0.000792,0.002445,-0.010408
1002,9236445226,R01C01,-0.050222,0.022108,0.000315,-0.007833,0.003865,0.015857,-0.021073,0.008849,0.009888,-0.010239
1003,9236445226,R02C01,-0.040502,0.030685,0.005787,-0.005269,0.007715,0.004428,-0.017982,0.001928,0.00295,-0.007893
1004,9236445226,R02C02,-0.042317,0.036873,0.011297,-0.012885,-0.00989,0.020172,-0.024148,0.004988,0.006536,-0.016199
1005,9236445231,R01C02,-0.05028,0.030187,0.008502,-0.008863,0.005468,-0.008327,-0.02068,-0.008773,-0.00016,-0.014296
1006,9236445231,R04C01,-0.043437,0.030907,0.007791,-0.014263,0.003541,0.008528,-0.021924,0.001796,0.009871,-0.015672
1007,9236445231,R04C02,-0.047712,0.031086,0.010243,-0.006166,0.008661,0.009713,-0.01707,-0.004043,0.000375,-0.009015
1008,9238348074,R02C02,-0.044135,0.025365,0.010664,-0.015127,0.005444,0.008486,-0.01773,-0.001129,0.015734,-0.004032


In [None]:
p = figure(plot_height=1200, plot_width=1600, title="PCA + Admixture", x_range=x_range, y_range=y_range,
    tooltips=[("@pop", "@value"), ("Sample", "@sample"), ("Region", "@region")])
# one pie chart per sample:
# taking a row from the master table -> df, adding some convenience columns
for sample in list(ddf.index):
    region = ddf.loc[sample, 'Region']
    #if not region in markers.keys(): continue
    data = ddf.loc[sample, pops].reset_index(name='value').rename(columns={'index':'pop'})
    data['angle'] = data['value']/data['value'].sum() * 2*pi
    data['color'] = Paired[k]
    data['sample'] = sample
    data['line_color'] = colors2[region]
    data['region'] = region
    x = ddf.loc[sample, 'PC1']
    y = ddf.loc[sample, 'PC2']
    radius = 0.002 if region in markers.keys() else 0.001
        
    p.wedge(x=x, y=y, radius=radius,
            start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
            line_color='line_color', fill_color='color',  line_width=3, source=data)
   

show(p)