In [38]:
import hail as hl

# hl.init()

In [39]:
# hl.utils.get_1kg('data/1kg/')
# hl.utils.get_hgdp('data/hgdp/')

In [40]:
data_path = 'data/hgdp'

In [41]:
mt = hl.read_matrix_table(f'{data_path}/HGDP.mt')

In [42]:
mt.describe()

----------------------------------------
Global fields:
    None
----------------------------------------
Column fields:
    's': str
----------------------------------------
Row fields:
    'locus': locus<GRCh38>
    'alleles': array<str>
    'rsid': str
    'qual': float64
    'filters': set<str>
    'info': struct {
        QUALapprox: int32, 
        SB: array<int32>, 
        MQ: float64, 
        MQRankSum: float64, 
        VarDP: int32, 
        AS_ReadPosRankSum: float64, 
        AS_pab_max: float64, 
        AS_QD: float64, 
        AS_MQ: float64, 
        QD: float64, 
        AS_MQRankSum: float64, 
        FS: float64, 
        AS_FS: float64, 
        ReadPosRankSum: float64, 
        AS_QUALapprox: int32, 
        AS_SB_TABLE: array<int32>, 
        AS_VarDP: int32, 
        AS_SOR: float64, 
        SOR: float64, 
        transmitted_singleton: bool, 
        omni: bool, 
        mills: bool, 
        monoallelic: bool, 
        AS_VQSLOD: float64, 
        Inbreeding

In [43]:
populations_table = hl.import_table(f'{data_path}/HGDP_annotations.txt', key='s')
populations_table.show()

s,pop,continental_pop,sex_karyotype
str,str,str,str
"""HG00096""","""gbr""","""nfe""","""XY"""
"""HG00097""","""gbr""","""nfe""","""XX"""
"""HG00099""","""gbr""","""nfe""","""XX"""
"""HG00100""","""gbr""","""nfe""","""XX"""
"""HG00101""","""gbr""","""nfe""","""XY"""
"""HG00102""","""gbr""","""nfe""","""XX"""
"""HG00103""","""gbr""","""nfe""","""XY"""
"""HG00105""","""gbr""","""nfe""","""XY"""
"""HG00106""","""gbr""","""nfe""","""XX"""
"""HG00107""","""gbr""","""nfe""","""XY"""


In [44]:
mt = mt.annotate_cols(**populations_table[mt.s])

In [45]:
hl.summarize_variants(mt)

Number of alleles,Count
2,10441

Allele type,Count
SNP,10441

Metric,Value
Transitions,6602.0
Transversions,3839.0
Ratio,1.72

Contig,Count
chr1,881
chr2,799
chr3,728
chr4,659
chr5,618
chr6,572
chr7,576
chr8,525
chr9,476
chr10,516


In [46]:
import random
import pandas as pd

rsid_list = mt.aggregate_rows(hl.agg.collect_as_set(mt.rsid))
selected_snps = random.choices(list(rsid_list), k=1000)
mt_filtered = mt.filter_rows(hl.literal(selected_snps).contains(mt.rsid))

selected_snps

['rs117074207',
 'rs1442694036',
 'rs1020766792',
 'rs74035220',
 'rs80183304',
 'rs13213474',
 'rs28538977',
 'rs193297613',
 'rs62391248',
 'rs11777412',
 'rs16840594',
 'rs11873094',
 'rs1257893829',
 'rs79281451',
 'rs183080591',
 'rs535952773',
 'rs111406298',
 'rs74624810',
 'rs16966676',
 'rs4773513',
 'rs8087272',
 'rs2365679',
 'rs76438242',
 'rs6466077',
 'rs10185400',
 'rs139628852',
 'rs114459468',
 'rs115631685',
 'rs117104508',
 'rs11878679',
 'rs74958464',
 'rs17133260',
 'rs1036044411',
 'rs377586467',
 'rs2239363',
 'rs938212325',
 'rs561527715',
 'rs533317158',
 'rs59867566',
 'rs116385604',
 'rs8062411',
 'rs12558026',
 'rs10169315',
 'rs112018293',
 'rs567735211',
 'rs11893003',
 'rs77502827',
 'rs115915792',
 'rs143624377',
 'rs186971633',
 'rs77705431',
 'rs878897797',
 'rs587695573',
 'rs80157892',
 'rs79854856',
 'rs8058053',
 'rs140014333',
 'rs2812212',
 'rs1260171507',
 'rs555000',
 'rs60420990',
 'rs1261731404',
 'rs79854856',
 'rs7734347',
 'rs544424270',
 

In [47]:
mt_filtered.count_rows()

930

In [48]:
def gt_freq_estimates(mt, group):
    p_ML = ((2 * hl.agg.sum(mt.GT.is_hom_ref())) + hl.agg.sum(mt.GT.is_het())) / (2 * hl.agg.count_where(hl.is_defined(mt.GT)))
    return mt.aggregate_entries(
        hl.agg.group_by(
            mt.rsid,
            hl.agg.group_by(
                group,
                hl.struct(
                    p_ML = p_ML,
                    p_AA = p_ML ** 2,
                    p_AB = 2 * p_ML * (1 - p_ML),
                    p_BB = (1 - p_ML) ** 2,
                )
            )
        )
    )

def create_freqs_dataframe(snp_freqs):
    rows = [
        {
            'rsid': rsid,
            'pop_freqs': pop_freqs
        }
        for rsid, pop_freqs in snp_freqs.items()
    ]
    df = pd.DataFrame(rows)
    return df

snp_freqs = gt_freq_estimates(mt_filtered, mt_filtered.pop)
freqs_ht = hl.Table.from_pandas(create_freqs_dataframe(snp_freqs), key='rsid')
freqs_ht.show()

rsid,pop_freqs
str,"dict<str, struct{p_ML: float64, p_AA: float64, p_AB: float64, p_BB: float64}>"
"""rs10014702""","{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(9.17e-01,8.40e-01,1.53e-01,6.94e-03),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""fin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(9.50e-01,9.03e-01,9.50e-02,2.50e-03),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(9.75e-01,9.51e-01,4.88e-02,6.25e-04),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(9.41e-01,8.86e-01,1.11e-01,3.46e-03),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""oroqen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00)}"
"""rs1001724848""","{""cdx"":(8.64e-01,7.46e-01,2.36e-01,1.86e-02),""gih"":(8.64e-01,7.46e-01,2.36e-01,1.86e-02),""adygei"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""palestinian"":(9.38e-01,8.79e-01,1.17e-01,3.91e-03),""han"":(8.00e-01,6.40e-01,3.20e-01,4.00e-02),""ibs"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""beb"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""makrani"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tsi"":(9.23e-01,8.52e-01,1.42e-01,5.92e-03),""fin"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""tujia"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(8.85e-01,7.83e-01,2.04e-01,1.33e-02),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(9.17e-01,8.40e-01,1.53e-01,6.94e-03),""pur"":(9.25e-01,8.56e-01,1.39e-01,5.62e-03),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(8.57e-01,7.35e-01,2.45e-01,2.04e-02),""she"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""chs"":(7.78e-01,6.05e-01,3.46e-01,4.94e-02),""brahui"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(7.86e-01,6.17e-01,3.37e-01,4.59e-02),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(9.38e-01,8.79e-01,1.17e-01,3.91e-03),""clm"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""karitiana"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""cambodian"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""kalash"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mongola"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(8.89e-01,7.90e-01,1.98e-01,1.23e-02),""chb"":(7.00e-01,4.90e-01,4.20e-01,9.00e-02),""uygur"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""jpt"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""khv"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""stu"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""oroqen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00)}"
"""rs10042507""","{""cdx"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""gih"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""adygei"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""msl"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""burusho"":(3.75e-01,1.41e-01,4.69e-01,3.91e-01),""palestinian"":(6.88e-01,4.73e-01,4.30e-01,9.77e-02),""han"":(6.00e-01,3.60e-01,4.80e-01,1.60e-01),""ibs"":(5.91e-01,3.49e-01,4.83e-01,1.67e-01),""beb"":(5.56e-01,3.09e-01,4.94e-01,1.98e-01),""french"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""basque"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""tu"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""itu"":(6.43e-01,4.13e-01,4.59e-01,1.28e-01),""dai"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""ceu"":(5.94e-01,3.53e-01,4.82e-01,1.65e-01),""makrani"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""tsi"":(5.38e-01,2.90e-01,4.97e-01,2.13e-01),""fin"":(7.78e-01,6.05e-01,3.46e-01,4.94e-02),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(3.75e-01,1.41e-01,4.69e-01,3.91e-01),""pjl"":(6.92e-01,4.79e-01,4.26e-01,9.47e-02),""gwd"":(5.63e-01,3.16e-01,4.92e-01,1.91e-01),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""pel"":(5.83e-01,3.40e-01,4.86e-01,1.74e-01),""pur"":(4.25e-01,1.81e-01,4.89e-01,3.31e-01),""orcadian"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""acb"":(8.50e-01,7.22e-01,2.55e-01,2.25e-02),""bedouin"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""she"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""chs"":(6.11e-01,3.73e-01,4.75e-01,1.51e-01),""brahui"":(1.67e-01,2.78e-02,2.78e-01,6.94e-01),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(7.86e-01,6.17e-01,3.37e-01,4.59e-02),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""balochi"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""asw"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""clm"":(7.22e-01,5.22e-01,4.01e-01,7.72e-02),""maya"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""yoruba"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""colombian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""karitiana"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""yizu"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""pathan"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""hazara"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""cambodian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""kalash"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""yakut"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""yri"":(6.75e-01,4.56e-01,4.39e-01,1.06e-01),""hezhen"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mongola"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tuscan"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""lwk"":(7.00e-01,4.90e-01,4.20e-01,9.00e-02),""gbr"":(3.89e-01,1.51e-01,4.75e-01,3.73e-01),""chb"":(7.00e-01,4.90e-01,4.20e-01,9.00e-02),""uygur"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""esn"":(6.47e-01,4.19e-01,4.57e-01,1.25e-01),""sindhi"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""druze"":(1.25e-01,1.56e-02,2.19e-01,7.66e-01),""japanese"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""jpt"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""khv"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""stu"":(7.73e-01,5.97e-01,3.51e-01,5.17e-02),""oroqen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00)}"
"""rs1008328363""","{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""fin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""oroqen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00)}"
"""rs10085724""","{""cdx"":(6.36e-01,4.05e-01,4.63e-01,1.32e-01),""gih"":(5.45e-01,2.98e-01,4.96e-01,2.07e-01),""adygei"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""msl"":(5.83e-01,3.40e-01,4.86e-01,1.74e-01),""burusho"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""palestinian"":(3.75e-01,1.41e-01,4.69e-01,3.91e-01),""han"":(9.00e-01,8.10e-01,1.80e-01,1.00e-02),""ibs"":(6.36e-01,4.05e-01,4.63e-01,1.32e-01),""beb"":(4.44e-01,1.98e-01,4.94e-01,3.09e-01),""french"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""itu"":(5.71e-01,3.27e-01,4.90e-01,1.84e-01),""dai"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""ceu"":(5.63e-01,3.16e-01,4.92e-01,1.91e-01),""makrani"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""tsi"":(5.77e-01,3.33e-01,4.88e-01,1.79e-01),""fin"":(7.22e-01,5.22e-01,4.01e-01,7.72e-02),""tujia"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mozabite"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""pjl"":(4.42e-01,1.96e-01,4.93e-01,3.11e-01),""gwd"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""pur"":(6.50e-01,4.23e-01,4.55e-01,1.22e-01),""orcadian"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""acb"":(5.50e-01,3.03e-01,4.95e-01,2.02e-01),""bedouin"":(5.71e-01,3.27e-01,4.90e-01,1.84e-01),""she"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""chs"":(6.11e-01,3.73e-01,4.75e-01,1.51e-01),""brahui"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""naxi"":(NaN,NaN,NaN,NaN),""mxl"":(7.14e-01,5.10e-01,4.08e-01,8.16e-02),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""asw"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""clm"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""maya"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""yoruba"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""yizu"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""pathan"":(3.33e-01,1.11e-01,4.44e-01,4.44e-01),""hazara"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""cambodian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""kalash"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(7.00e-01,4.90e-01,4.20e-01,9.00e-02),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(4.00e-01,1.60e-01,4.80e-01,3.60e-01),""gbr"":(6.11e-01,3.73e-01,4.75e-01,1.51e-01),""chb"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""uygur"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""esn"":(7.94e-01,6.31e-01,3.27e-01,4.24e-02),""sindhi"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""druze"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""japanese"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""jpt"":(1.88e-01,3.52e-02,3.05e-01,6.60e-01),""khv"":(5.45e-01,2.98e-01,4.96e-01,2.07e-01),""stu"":(4.09e-01,1.67e-01,4.83e-01,3.49e-01),""oroqen"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01)}"
"""rs10115472""","{""cdx"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""gih"":(3.64e-01,1.32e-01,4.63e-01,4.05e-01),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(5.83e-01,3.40e-01,4.86e-01,1.74e-01),""burusho"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""palestinian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""han"":(6.00e-01,3.60e-01,4.80e-01,1.60e-01),""ibs"":(9.09e-01,8.26e-01,1.65e-01,8.26e-03),""beb"":(3.89e-01,1.51e-01,4.75e-01,3.73e-01),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tu"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""itu"":(5.71e-01,3.27e-01,4.90e-01,1.84e-01),""dai"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""ceu"":(7.81e-01,6.10e-01,3.42e-01,4.79e-02),""makrani"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tsi"":(8.46e-01,7.16e-01,2.60e-01,2.37e-02),""fin"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""pjl"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""gwd"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""pel"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""pur"":(6.50e-01,4.23e-01,4.55e-01,1.22e-01),""orcadian"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""acb"":(7.00e-01,4.90e-01,4.20e-01,9.00e-02),""bedouin"":(8.57e-01,7.35e-01,2.45e-01,2.04e-02),""she"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""chs"":(4.44e-01,1.98e-01,4.94e-01,3.09e-01),""brahui"":(3.33e-01,1.11e-01,4.44e-01,4.44e-01),""naxi"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""mxl"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""balochi"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""asw"":(4.38e-01,1.91e-01,4.92e-01,3.16e-01),""clm"":(5.56e-01,3.09e-01,4.94e-01,1.98e-01),""maya"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""yoruba"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""colombian"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""karitiana"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""yizu"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""pathan"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""hazara"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""cambodian"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""kalash"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""yakut"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""yri"":(6.75e-01,4.56e-01,4.39e-01,1.06e-01),""hezhen"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mongola"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(7.00e-01,4.90e-01,4.20e-01,9.00e-02),""gbr"":(8.89e-01,7.90e-01,1.98e-01,1.23e-02),""chb"":(6.00e-01,3.60e-01,4.80e-01,1.60e-01),""uygur"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""esn"":(5.59e-01,3.12e-01,4.93e-01,1.95e-01),""sindhi"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""druze"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""japanese"":(3.75e-01,1.41e-01,4.69e-01,3.91e-01),""jpt"":(3.75e-01,1.41e-01,4.69e-01,3.91e-01),""khv"":(5.45e-01,2.98e-01,4.96e-01,2.07e-01),""stu"":(5.91e-01,3.49e-01,4.83e-01,1.67e-01),""oroqen"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00)}"
"""rs1012136451""","{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""fin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""oroqen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00)}"
"""rs1012598""","{""cdx"":(3.18e-01,1.01e-01,4.34e-01,4.65e-01),""gih"":(6.82e-01,4.65e-01,4.34e-01,1.01e-01),""adygei"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""msl"":(5.83e-01,3.40e-01,4.86e-01,1.74e-01),""burusho"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""palestinian"":(8.13e-01,6.60e-01,3.05e-01,3.52e-02),""han"":(6.00e-01,3.60e-01,4.80e-01,1.60e-01),""ibs"":(6.36e-01,4.05e-01,4.63e-01,1.32e-01),""beb"":(5.56e-01,3.09e-01,4.94e-01,1.98e-01),""french"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""basque"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tu"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""itu"":(7.14e-01,5.10e-01,4.08e-01,8.16e-02),""dai"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""ceu"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""makrani"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tsi"":(5.77e-01,3.33e-01,4.88e-01,1.79e-01),""fin"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""tujia"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mozabite"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""pjl"":(6.92e-01,4.79e-01,4.26e-01,9.47e-02),""gwd"":(6.56e-01,4.31e-01,4.51e-01,1.18e-01),""mandenka"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""surui"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""pel"":(5.83e-01,3.40e-01,4.86e-01,1.74e-01),""pur"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""orcadian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""acb"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""bedouin"":(5.71e-01,3.27e-01,4.90e-01,1.84e-01),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(6.11e-01,3.73e-01,4.75e-01,1.51e-01),""brahui"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""naxi"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""mxl"":(5.71e-01,3.27e-01,4.90e-01,1.84e-01),""daur"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""russian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""balochi"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""asw"":(8.13e-01,6.60e-01,3.05e-01,3.52e-02),""clm"":(5.56e-01,3.09e-01,4.94e-01,1.98e-01),""maya"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""yoruba"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""colombian"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""karitiana"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""yizu"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""pathan"":(3.33e-01,1.11e-01,4.44e-01,4.44e-01),""hazara"":(3.33e-01,1.11e-01,4.44e-01,4.44e-01),""cambodian"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""kalash"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""yakut"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""yri"":(7.25e-01,5.26e-01,3.99e-01,7.56e-02),""hezhen"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""mongola"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""tuscan"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""lwk"":(9.00e-01,8.10e-01,1.80e-01,1.00e-02),""gbr"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""chb"":(3.00e-01,9.00e-02,4.20e-01,4.90e-01),""uygur"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""esn"":(5.59e-01,3.12e-01,4.93e-01,1.95e-01),""sindhi"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""druze"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""japanese"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""jpt"":(4.38e-01,1.91e-01,4.92e-01,3.16e-01),""khv"":(3.18e-01,1.01e-01,4.34e-01,4.65e-01),""stu"":(8.64e-01,7.46e-01,2.36e-01,1.86e-02),""oroqen"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01)}"
"""rs10131660""","{""cdx"":(9.09e-01,8.26e-01,1.65e-01,8.26e-03),""gih"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""burusho"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""palestinian"":(9.38e-01,8.79e-01,1.17e-01,3.91e-03),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(9.69e-01,9.38e-01,6.05e-02,9.77e-04),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""fin"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""tujia"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(9.23e-01,8.52e-01,1.42e-01,5.92e-03),""gwd"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""pel"":(9.17e-01,8.40e-01,1.53e-01,6.94e-03),""pur"":(9.74e-01,9.48e-01,5.12e-02,6.93e-04),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(7.22e-01,5.22e-01,4.01e-01,7.72e-02),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(8.57e-01,7.35e-01,2.45e-01,2.04e-02),""daur"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(5.63e-01,3.16e-01,4.92e-01,1.91e-01),""clm"":(8.89e-01,7.90e-01,1.98e-01,1.23e-02),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(NaN,NaN,NaN,NaN),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""yri"":(8.16e-01,6.66e-01,3.01e-01,3.39e-02),""hezhen"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(6.00e-01,3.60e-01,4.80e-01,1.60e-01),""gbr"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chb"":(6.00e-01,3.60e-01,4.80e-01,1.60e-01),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(7.35e-01,5.41e-01,3.89e-01,7.01e-02),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""jpt"":(8.13e-01,6.60e-01,3.05e-01,3.52e-02),""khv"":(9.00e-01,8.10e-01,1.80e-01,1.00e-02),""stu"":(9.09e-01,8.26e-01,1.65e-01,8.26e-03),""oroqen"":(NaN,NaN,NaN,NaN)}"
"""rs1013541776""","{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""fin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""oroqen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00)}"


In [70]:
total_subjects = mt.count_cols()
ethnicity_counts = mt.aggregate_cols(hl.agg.counter(mt.pop))
ethnicity_proportions = {k: v / total_subjects for k, v in ethnicity_counts.items()}

ethnicity_proportions, sum(ethnicity_proportions.values())

({'acb': 0.025510204081632654,
  'adygei': 0.00510204081632653,
  'asw': 0.02040816326530612,
  'balochi': 0.002551020408163265,
  'basque': 0.00510204081632653,
  'beb': 0.02295918367346939,
  'bedouin': 0.017857142857142856,
  'brahui': 0.007653061224489796,
  'burusho': 0.01020408163265306,
  'cambodian': 0.007653061224489796,
  'cdx': 0.02806122448979592,
  'ceu': 0.04081632653061224,
  'chb': 0.012755102040816327,
  'chs': 0.02295918367346939,
  'clm': 0.02295918367346939,
  'colombian': 0.002551020408163265,
  'dai': 0.00510204081632653,
  'daur': 0.002551020408163265,
  'druze': 0.01020408163265306,
  'esn': 0.04336734693877551,
  'fin': 0.02295918367346939,
  'french': 0.00510204081632653,
  'gbr': 0.02295918367346939,
  'gih': 0.02806122448979592,
  'gwd': 0.04081632653061224,
  'han': 0.012755102040816327,
  'hazara': 0.007653061224489796,
  'hezhen': 0.002551020408163265,
  'ibs': 0.02806122448979592,
  'itu': 0.017857142857142856,
  'japanese': 0.01020408163265306,
  'jpt':

In [50]:
def annotate_rows_with_freqs(mt, freqs_ht):
    mt = mt.annotate_rows(
        freqs=freqs_ht[mt_filtered.rsid].pop_freqs
    )
    return mt

mt_annotated = annotate_rows_with_freqs(mt_filtered, freqs_ht)

In [51]:
mt_annotated.entries().show(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,Unnamed: 30_level_0,Unnamed: 31_level_0,Unnamed: 32_level_0,Unnamed: 33_level_0,Unnamed: 34_level_0,Unnamed: 35_level_0,Unnamed: 36_level_0,Unnamed: 37_level_0,Unnamed: 38_level_0,Unnamed: 39_level_0
locus,alleles,rsid,qual,filters,QUALapprox,SB,MQ,MQRankSum,VarDP,AS_ReadPosRankSum,AS_pab_max,AS_QD,AS_MQ,QD,AS_MQRankSum,FS,AS_FS,ReadPosRankSum,AS_QUALapprox,AS_SB_TABLE,AS_VarDP,AS_SOR,SOR,transmitted_singleton,omni,mills,monoallelic,AS_VQSLOD,InbreedingCoeff,freqs,s,pop,continental_pop,sex_karyotype,GT,DP,GQ,AD,PL
locus<GRCh38>,array<str>,str,float64,set<str>,int32,array<int32>,float64,float64,int32,float64,float64,float64,float64,float64,float64,float64,float64,float64,int32,array<int32>,int32,float64,float64,bool,bool,bool,bool,float64,float64,"dict<str, struct{p_ML: float64, p_AA: float64, p_AB: float64, p_BB: float64}>",str,str,str,str,call,int32,int32,array<int32>,array<int32>
chr1:17379,"[""G"",""A""]","""rs754322362""",-10.0,{},783830,"[195198,94196,41845,15576]",39.4,0.322,346815,0.41,1.0,2.26,39.4,2.26,0.304,2.73,2.73,0.376,783830,"[195198,94196,41845,15576]",346815,0.986,0.986,False,False,False,False,-2.04,-0.0194,"{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(9.62e-01,9.25e-01,7.40e-02,1.48e-03),""fin"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(9.81e-01,9.62e-01,3.77e-02,3.70e-04),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(NaN,NaN,NaN,NaN),""mxl"":(9.29e-01,8.62e-01,1.33e-01,5.10e-03),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(NaN,NaN,NaN,NaN),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""oroqen"":(NaN,NaN,NaN,NaN)}","""HG00107""","""gbr""","""nfe""","""XY""",0/0,22,21,,
chr1:17379,"[""G"",""A""]","""rs754322362""",-10.0,{},783830,"[195198,94196,41845,15576]",39.4,0.322,346815,0.41,1.0,2.26,39.4,2.26,0.304,2.73,2.73,0.376,783830,"[195198,94196,41845,15576]",346815,0.986,0.986,False,False,False,False,-2.04,-0.0194,"{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(9.62e-01,9.25e-01,7.40e-02,1.48e-03),""fin"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(9.81e-01,9.62e-01,3.77e-02,3.70e-04),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(NaN,NaN,NaN,NaN),""mxl"":(9.29e-01,8.62e-01,1.33e-01,5.10e-03),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(NaN,NaN,NaN,NaN),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""oroqen"":(NaN,NaN,NaN,NaN)}","""HG00114""","""gbr""","""nfe""","""XY""",0/0,60,76,,
chr1:17379,"[""G"",""A""]","""rs754322362""",-10.0,{},783830,"[195198,94196,41845,15576]",39.4,0.322,346815,0.41,1.0,2.26,39.4,2.26,0.304,2.73,2.73,0.376,783830,"[195198,94196,41845,15576]",346815,0.986,0.986,False,False,False,False,-2.04,-0.0194,"{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(9.62e-01,9.25e-01,7.40e-02,1.48e-03),""fin"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(9.81e-01,9.62e-01,3.77e-02,3.70e-04),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(NaN,NaN,NaN,NaN),""mxl"":(9.29e-01,8.62e-01,1.33e-01,5.10e-03),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(NaN,NaN,NaN,NaN),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""oroqen"":(NaN,NaN,NaN,NaN)}","""HG00121""","""gbr""","""nfe""","""XX""",0/0,31,52,,


In [67]:
def log_likelihood(genotype, freqs, pop):
    return (
        hl.case()
        .when(genotype.is_hom_ref(), hl.log(freqs[pop].p_AA))
        .when(genotype.is_het(), hl.log(freqs[pop].p_AB))
        .when(genotype.is_hom_var(), hl.log(freqs[pop].p_BB))
        .default(0.0)
    )

def get_log_likelihoods(mt, proportions):
    mt = mt.annotate_entries(
        log_likelihood=hl.dict({
            pop: log_likelihood(mt.GT, mt.freqs, pop)
            for pop in proportions
        })
    )
    mt = mt.annotate_cols(
        log_likelihoods=hl.dict({
            pop: mt.log_likelihood[pop] + hl.log(proportions[pop])
            for pop in proportions
        })
    )
    return mt


mt_likelihood = get_log_likelihoods(mt_annotated, ethnicity_proportions)
mt_likelihood.cols().show()

ExpressionException: scope violation: 'MatrixTable.annotate_rows: field 'log_likelihood'' expects an expression indexed by ['row']
    Found indices ['row', 'column'], with unexpected indices ['column']. Invalid fields:
        'GT' (indices ['row', 'column'])
    'MatrixTable.annotate_rows: field 'log_likelihood'' supports aggregation over axes ['column'], so these fields may appear inside an aggregator function.

In [57]:
mt_likelihood.entries().show(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,Unnamed: 30_level_0,Unnamed: 31_level_0,Unnamed: 32_level_0,Unnamed: 33_level_0,Unnamed: 34_level_0,Unnamed: 35_level_0,Unnamed: 36_level_0,Unnamed: 37_level_0,Unnamed: 38_level_0,Unnamed: 39_level_0,Unnamed: 40_level_0,Unnamed: 41_level_0
locus,alleles,rsid,qual,filters,QUALapprox,SB,MQ,MQRankSum,VarDP,AS_ReadPosRankSum,AS_pab_max,AS_QD,AS_MQ,QD,AS_MQRankSum,FS,AS_FS,ReadPosRankSum,AS_QUALapprox,AS_SB_TABLE,AS_VarDP,AS_SOR,SOR,transmitted_singleton,omni,mills,monoallelic,AS_VQSLOD,InbreedingCoeff,freqs,s,pop,continental_pop,sex_karyotype,log_likelihoods,GT,DP,GQ,AD,PL,log_likelihood
locus<GRCh38>,array<str>,str,float64,set<str>,int32,array<int32>,float64,float64,int32,float64,float64,float64,float64,float64,float64,float64,float64,float64,int32,array<int32>,int32,float64,float64,bool,bool,bool,bool,float64,float64,"dict<str, struct{p_ML: float64, p_AA: float64, p_AB: float64, p_BB: float64}>",str,str,str,str,"dict<str, float64>",call,int32,int32,array<int32>,array<int32>,float64
chr1:17379,"[""G"",""A""]","""rs754322362""",-10.0,{},783830,"[195198,94196,41845,15576]",39.4,0.322,346815,0.41,1.0,2.26,39.4,2.26,0.304,2.73,2.73,0.376,783830,"[195198,94196,41845,15576]",346815,0.986,0.986,False,False,False,False,-2.04,-0.0194,"{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(9.62e-01,9.25e-01,7.40e-02,1.48e-03),""fin"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(9.81e-01,9.62e-01,3.77e-02,3.70e-04),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(NaN,NaN,NaN,NaN),""mxl"":(9.29e-01,8.62e-01,1.33e-01,5.10e-03),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(NaN,NaN,NaN,NaN),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""oroqen"":(NaN,NaN,NaN,NaN)}","""HG00107""","""gbr""","""nfe""","""XY""","{""cdx"":-2.76e+02,""gih"":-2.76e+02,""adygei"":-2.78e+02,""msl"":-2.76e+02,""burusho"":-2.77e+02,""palestinian"":-2.76e+02,""han"":-2.77e+02,""ibs"":-2.76e+02,""beb"":-2.76e+02,""french"":-2.78e+02,""basque"":-2.78e+02,""tu"":-2.78e+02,""itu"":-2.76e+02,""dai"":-2.78e+02,""ceu"":-2.76e+02,""makrani"":-2.78e+02,""tsi"":-2.76e+02,""fin"":-2.76e+02,""tujia"":-2.78e+02,""mozabite"":-2.77e+02,""pjl"":-2.75e+02,""gwd"":-2.76e+02,""mandenka"":-2.78e+02,""surui"":-2.78e+02,""pel"":-2.77e+02,""pur"":-2.75e+02,""orcadian"":-2.78e+02,""acb"":-2.76e+02,""bedouin"":-2.76e+02,""she"":-2.78e+02,""chs"":-2.76e+02,""brahui"":-2.77e+02,""naxi"":-2.78e+02,""mxl"":-2.76e+02,""daur"":-2.78e+02,""russian"":-2.78e+02,""balochi"":-2.78e+02,""asw"":-2.76e+02,""clm"":-2.76e+02,""maya"":-2.78e+02,""yoruba"":-2.78e+02,""colombian"":-2.78e+02,""karitiana"":-2.78e+02,""yizu"":-2.78e+02,""pathan"":-2.77e+02,""hazara"":-2.77e+02,""cambodian"":-2.77e+02,""kalash"":-2.77e+02,""yakut"":-2.77e+02,""yri"":-2.75e+02,""hezhen"":-2.78e+02,""mongola"":-2.78e+02,""tuscan"":-2.78e+02,""lwk"":-2.77e+02,""gbr"":-2.76e+02,""chb"":-2.77e+02,""uygur"":-2.78e+02,""esn"":-2.76e+02,""sindhi"":-2.78e+02,""druze"":-2.77e+02,""japanese"":-2.77e+02,""jpt"":-2.76e+02,""khv"":-2.76e+02,""stu"":-2.76e+02,""oroqen"":-2.78e+02}",0/0,22,21,,,-0.114
chr1:17379,"[""G"",""A""]","""rs754322362""",-10.0,{},783830,"[195198,94196,41845,15576]",39.4,0.322,346815,0.41,1.0,2.26,39.4,2.26,0.304,2.73,2.73,0.376,783830,"[195198,94196,41845,15576]",346815,0.986,0.986,False,False,False,False,-2.04,-0.0194,"{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(9.62e-01,9.25e-01,7.40e-02,1.48e-03),""fin"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(9.81e-01,9.62e-01,3.77e-02,3.70e-04),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(NaN,NaN,NaN,NaN),""mxl"":(9.29e-01,8.62e-01,1.33e-01,5.10e-03),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(NaN,NaN,NaN,NaN),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""oroqen"":(NaN,NaN,NaN,NaN)}","""HG00114""","""gbr""","""nfe""","""XY""","{""cdx"":-3.02e+02,""gih"":-3.02e+02,""adygei"":-3.04e+02,""msl"":-3.02e+02,""burusho"":-3.03e+02,""palestinian"":-3.03e+02,""han"":-3.03e+02,""ibs"":-3.02e+02,""beb"":-3.02e+02,""french"":-3.04e+02,""basque"":-3.04e+02,""tu"":-3.05e+02,""itu"":-3.03e+02,""dai"":-3.04e+02,""ceu"":-3.02e+02,""makrani"":-3.04e+02,""tsi"":-3.02e+02,""fin"":-3.02e+02,""tujia"":-3.05e+02,""mozabite"":-3.03e+02,""pjl"":-3.01e+02,""gwd"":-3.02e+02,""mandenka"":-3.05e+02,""surui"":-3.04e+02,""pel"":-3.03e+02,""pur"":-3.02e+02,""orcadian"":-3.04e+02,""acb"":-3.02e+02,""bedouin"":-3.03e+02,""she"":-3.04e+02,""chs"":-3.02e+02,""brahui"":-3.03e+02,""naxi"":-3.05e+02,""mxl"":-3.03e+02,""daur"":-3.05e+02,""russian"":-3.05e+02,""balochi"":-3.05e+02,""asw"":-3.03e+02,""clm"":-3.02e+02,""maya"":-3.04e+02,""yoruba"":-3.04e+02,""colombian"":-3.05e+02,""karitiana"":-3.04e+02,""yizu"":-3.04e+02,""pathan"":-3.03e+02,""hazara"":-3.03e+02,""cambodian"":-3.03e+02,""kalash"":-3.03e+02,""yakut"":-3.03e+02,""yri"":-3.02e+02,""hezhen"":-3.05e+02,""mongola"":-3.04e+02,""tuscan"":-3.05e+02,""lwk"":-3.03e+02,""gbr"":-3.02e+02,""chb"":-3.03e+02,""uygur"":-3.04e+02,""esn"":-3.02e+02,""sindhi"":-3.04e+02,""druze"":-3.03e+02,""japanese"":-3.03e+02,""jpt"":-3.03e+02,""khv"":-3.02e+02,""stu"":-3.02e+02,""oroqen"":-3.05e+02}",0/0,60,76,,,-0.114
chr1:17379,"[""G"",""A""]","""rs754322362""",-10.0,{},783830,"[195198,94196,41845,15576]",39.4,0.322,346815,0.41,1.0,2.26,39.4,2.26,0.304,2.73,2.73,0.376,783830,"[195198,94196,41845,15576]",346815,0.986,0.986,False,False,False,False,-2.04,-0.0194,"{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(9.62e-01,9.25e-01,7.40e-02,1.48e-03),""fin"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(9.81e-01,9.62e-01,3.77e-02,3.70e-04),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(NaN,NaN,NaN,NaN),""mxl"":(9.29e-01,8.62e-01,1.33e-01,5.10e-03),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(NaN,NaN,NaN,NaN),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""oroqen"":(NaN,NaN,NaN,NaN)}","""HG00121""","""gbr""","""nfe""","""XX""","{""cdx"":-3.27e+02,""gih"":-3.27e+02,""adygei"":-3.29e+02,""msl"":-3.27e+02,""burusho"":-3.28e+02,""palestinian"":-3.27e+02,""han"":-3.28e+02,""ibs"":-3.27e+02,""beb"":-3.27e+02,""french"":-3.29e+02,""basque"":-3.29e+02,""tu"":-3.29e+02,""itu"":-3.27e+02,""dai"":-3.29e+02,""ceu"":-3.26e+02,""makrani"":-3.29e+02,""tsi"":-3.27e+02,""fin"":-3.27e+02,""tujia"":-3.29e+02,""mozabite"":-3.28e+02,""pjl"":-3.26e+02,""gwd"":-3.26e+02,""mandenka"":-3.29e+02,""surui"":-3.29e+02,""pel"":-3.27e+02,""pur"":-3.26e+02,""orcadian"":-3.29e+02,""acb"":-3.27e+02,""bedouin"":-3.27e+02,""she"":-3.29e+02,""chs"":-3.27e+02,""brahui"":-3.28e+02,""naxi"":-3.29e+02,""mxl"":-3.27e+02,""daur"":-3.29e+02,""russian"":-3.29e+02,""balochi"":-3.29e+02,""asw"":-3.27e+02,""clm"":-3.27e+02,""maya"":-3.29e+02,""yoruba"":-3.29e+02,""colombian"":-3.29e+02,""karitiana"":-3.29e+02,""yizu"":-3.29e+02,""pathan"":-3.28e+02,""hazara"":-3.28e+02,""cambodian"":-3.28e+02,""kalash"":-3.28e+02,""yakut"":-3.28e+02,""yri"":-3.26e+02,""hezhen"":-3.29e+02,""mongola"":-3.29e+02,""tuscan"":-3.29e+02,""lwk"":-3.28e+02,""gbr"":-3.27e+02,""chb"":-3.28e+02,""uygur"":-3.29e+02,""esn"":-3.26e+02,""sindhi"":-3.29e+02,""druze"":-3.28e+02,""japanese"":-3.28e+02,""jpt"":-3.27e+02,""khv"":-3.27e+02,""stu"":-3.27e+02,""oroqen"":-3.29e+02}",0/0,31,52,,,-0.114


In [53]:
def log_sum_exp(log_values):
    max_log = hl.max(log_values)
    return max_log + hl.log(hl.sum([hl.exp(log_value - max_log) for log_value in log_values]))


def calculate_posteriors(mt, proportions):
    log_likelihoods = [mt.log_likelihoods[pop] for pop in proportions.keys()]

    mt = mt.annotate_cols(log_likelihood_sum=log_sum_exp(log_likelihoods))
    return mt.annotate_cols(
        posteriors=hl.dict({
            pop: mt.log_likelihoods[pop] #- mt.log_likelihood_sum
               for pop in proportions.keys()
        })
    )

mt_probs = calculate_posteriors(mt_likelihood, ethnicity_proportions)
mt_probs.cols().show(5)

s,pop,continental_pop,sex_karyotype,log_likelihoods,log_likelihood_sum,posteriors
str,str,str,str,"dict<str, float64>",float64,"dict<str, float64>"
"""HG00107""","""gbr""","""nfe""","""XY""","{""cdx"":-2.76e+02,""gih"":-2.76e+02,""adygei"":-2.78e+02,""msl"":-2.76e+02,""burusho"":-2.77e+02,""palestinian"":-2.76e+02,""han"":-2.77e+02,""ibs"":-2.76e+02,""beb"":-2.76e+02,""french"":-2.78e+02,""basque"":-2.78e+02,""tu"":-2.78e+02,""itu"":-2.76e+02,""dai"":-2.78e+02,""ceu"":-2.76e+02,""makrani"":-2.78e+02,""tsi"":-2.76e+02,""fin"":-2.76e+02,""tujia"":-2.78e+02,""mozabite"":-2.77e+02,""pjl"":-2.75e+02,""gwd"":-2.76e+02,""mandenka"":-2.78e+02,""surui"":-2.78e+02,""pel"":-2.77e+02,""pur"":-2.75e+02,""orcadian"":-2.78e+02,""acb"":-2.76e+02,""bedouin"":-2.76e+02,""she"":-2.78e+02,""chs"":-2.76e+02,""brahui"":-2.77e+02,""naxi"":-2.78e+02,""mxl"":-2.76e+02,""daur"":-2.78e+02,""russian"":-2.78e+02,""balochi"":-2.78e+02,""asw"":-2.76e+02,""clm"":-2.76e+02,""maya"":-2.78e+02,""yoruba"":-2.78e+02,""colombian"":-2.78e+02,""karitiana"":-2.78e+02,""yizu"":-2.78e+02,""pathan"":-2.77e+02,""hazara"":-2.77e+02,""cambodian"":-2.77e+02,""kalash"":-2.77e+02,""yakut"":-2.77e+02,""yri"":-2.75e+02,""hezhen"":-2.78e+02,""mongola"":-2.78e+02,""tuscan"":-2.78e+02,""lwk"":-2.77e+02,""gbr"":-2.76e+02,""chb"":-2.77e+02,""uygur"":-2.78e+02,""esn"":-2.76e+02,""sindhi"":-2.78e+02,""druze"":-2.77e+02,""japanese"":-2.77e+02,""jpt"":-2.76e+02,""khv"":-2.76e+02,""stu"":-2.76e+02,""oroqen"":-2.78e+02}",-272.0,"{""cdx"":-2.76e+02,""gih"":-2.76e+02,""adygei"":-2.78e+02,""msl"":-2.76e+02,""burusho"":-2.77e+02,""palestinian"":-2.76e+02,""han"":-2.77e+02,""ibs"":-2.76e+02,""beb"":-2.76e+02,""french"":-2.78e+02,""basque"":-2.78e+02,""tu"":-2.78e+02,""itu"":-2.76e+02,""dai"":-2.78e+02,""ceu"":-2.76e+02,""makrani"":-2.78e+02,""tsi"":-2.76e+02,""fin"":-2.76e+02,""tujia"":-2.78e+02,""mozabite"":-2.77e+02,""pjl"":-2.75e+02,""gwd"":-2.76e+02,""mandenka"":-2.78e+02,""surui"":-2.78e+02,""pel"":-2.77e+02,""pur"":-2.75e+02,""orcadian"":-2.78e+02,""acb"":-2.76e+02,""bedouin"":-2.76e+02,""she"":-2.78e+02,""chs"":-2.76e+02,""brahui"":-2.77e+02,""naxi"":-2.78e+02,""mxl"":-2.76e+02,""daur"":-2.78e+02,""russian"":-2.78e+02,""balochi"":-2.78e+02,""asw"":-2.76e+02,""clm"":-2.76e+02,""maya"":-2.78e+02,""yoruba"":-2.78e+02,""colombian"":-2.78e+02,""karitiana"":-2.78e+02,""yizu"":-2.78e+02,""pathan"":-2.77e+02,""hazara"":-2.77e+02,""cambodian"":-2.77e+02,""kalash"":-2.77e+02,""yakut"":-2.77e+02,""yri"":-2.75e+02,""hezhen"":-2.78e+02,""mongola"":-2.78e+02,""tuscan"":-2.78e+02,""lwk"":-2.77e+02,""gbr"":-2.76e+02,""chb"":-2.77e+02,""uygur"":-2.78e+02,""esn"":-2.76e+02,""sindhi"":-2.78e+02,""druze"":-2.77e+02,""japanese"":-2.77e+02,""jpt"":-2.76e+02,""khv"":-2.76e+02,""stu"":-2.76e+02,""oroqen"":-2.78e+02}"
"""HG00114""","""gbr""","""nfe""","""XY""","{""cdx"":-3.02e+02,""gih"":-3.02e+02,""adygei"":-3.04e+02,""msl"":-3.02e+02,""burusho"":-3.03e+02,""palestinian"":-3.03e+02,""han"":-3.03e+02,""ibs"":-3.02e+02,""beb"":-3.02e+02,""french"":-3.04e+02,""basque"":-3.04e+02,""tu"":-3.05e+02,""itu"":-3.03e+02,""dai"":-3.04e+02,""ceu"":-3.02e+02,""makrani"":-3.04e+02,""tsi"":-3.02e+02,""fin"":-3.02e+02,""tujia"":-3.05e+02,""mozabite"":-3.03e+02,""pjl"":-3.01e+02,""gwd"":-3.02e+02,""mandenka"":-3.05e+02,""surui"":-3.04e+02,""pel"":-3.03e+02,""pur"":-3.02e+02,""orcadian"":-3.04e+02,""acb"":-3.02e+02,""bedouin"":-3.03e+02,""she"":-3.04e+02,""chs"":-3.02e+02,""brahui"":-3.03e+02,""naxi"":-3.05e+02,""mxl"":-3.03e+02,""daur"":-3.05e+02,""russian"":-3.05e+02,""balochi"":-3.05e+02,""asw"":-3.03e+02,""clm"":-3.02e+02,""maya"":-3.04e+02,""yoruba"":-3.04e+02,""colombian"":-3.05e+02,""karitiana"":-3.04e+02,""yizu"":-3.04e+02,""pathan"":-3.03e+02,""hazara"":-3.03e+02,""cambodian"":-3.03e+02,""kalash"":-3.03e+02,""yakut"":-3.03e+02,""yri"":-3.02e+02,""hezhen"":-3.05e+02,""mongola"":-3.04e+02,""tuscan"":-3.05e+02,""lwk"":-3.03e+02,""gbr"":-3.02e+02,""chb"":-3.03e+02,""uygur"":-3.04e+02,""esn"":-3.02e+02,""sindhi"":-3.04e+02,""druze"":-3.03e+02,""japanese"":-3.03e+02,""jpt"":-3.03e+02,""khv"":-3.02e+02,""stu"":-3.02e+02,""oroqen"":-3.05e+02}",-299.0,"{""cdx"":-3.02e+02,""gih"":-3.02e+02,""adygei"":-3.04e+02,""msl"":-3.02e+02,""burusho"":-3.03e+02,""palestinian"":-3.03e+02,""han"":-3.03e+02,""ibs"":-3.02e+02,""beb"":-3.02e+02,""french"":-3.04e+02,""basque"":-3.04e+02,""tu"":-3.05e+02,""itu"":-3.03e+02,""dai"":-3.04e+02,""ceu"":-3.02e+02,""makrani"":-3.04e+02,""tsi"":-3.02e+02,""fin"":-3.02e+02,""tujia"":-3.05e+02,""mozabite"":-3.03e+02,""pjl"":-3.01e+02,""gwd"":-3.02e+02,""mandenka"":-3.05e+02,""surui"":-3.04e+02,""pel"":-3.03e+02,""pur"":-3.02e+02,""orcadian"":-3.04e+02,""acb"":-3.02e+02,""bedouin"":-3.03e+02,""she"":-3.04e+02,""chs"":-3.02e+02,""brahui"":-3.03e+02,""naxi"":-3.05e+02,""mxl"":-3.03e+02,""daur"":-3.05e+02,""russian"":-3.05e+02,""balochi"":-3.05e+02,""asw"":-3.03e+02,""clm"":-3.02e+02,""maya"":-3.04e+02,""yoruba"":-3.04e+02,""colombian"":-3.05e+02,""karitiana"":-3.04e+02,""yizu"":-3.04e+02,""pathan"":-3.03e+02,""hazara"":-3.03e+02,""cambodian"":-3.03e+02,""kalash"":-3.03e+02,""yakut"":-3.03e+02,""yri"":-3.02e+02,""hezhen"":-3.05e+02,""mongola"":-3.04e+02,""tuscan"":-3.05e+02,""lwk"":-3.03e+02,""gbr"":-3.02e+02,""chb"":-3.03e+02,""uygur"":-3.04e+02,""esn"":-3.02e+02,""sindhi"":-3.04e+02,""druze"":-3.03e+02,""japanese"":-3.03e+02,""jpt"":-3.03e+02,""khv"":-3.02e+02,""stu"":-3.02e+02,""oroqen"":-3.05e+02}"
"""HG00121""","""gbr""","""nfe""","""XX""","{""cdx"":-3.27e+02,""gih"":-3.27e+02,""adygei"":-3.29e+02,""msl"":-3.27e+02,""burusho"":-3.28e+02,""palestinian"":-3.27e+02,""han"":-3.28e+02,""ibs"":-3.27e+02,""beb"":-3.27e+02,""french"":-3.29e+02,""basque"":-3.29e+02,""tu"":-3.29e+02,""itu"":-3.27e+02,""dai"":-3.29e+02,""ceu"":-3.26e+02,""makrani"":-3.29e+02,""tsi"":-3.27e+02,""fin"":-3.27e+02,""tujia"":-3.29e+02,""mozabite"":-3.28e+02,""pjl"":-3.26e+02,""gwd"":-3.26e+02,""mandenka"":-3.29e+02,""surui"":-3.29e+02,""pel"":-3.27e+02,""pur"":-3.26e+02,""orcadian"":-3.29e+02,""acb"":-3.27e+02,""bedouin"":-3.27e+02,""she"":-3.29e+02,""chs"":-3.27e+02,""brahui"":-3.28e+02,""naxi"":-3.29e+02,""mxl"":-3.27e+02,""daur"":-3.29e+02,""russian"":-3.29e+02,""balochi"":-3.29e+02,""asw"":-3.27e+02,""clm"":-3.27e+02,""maya"":-3.29e+02,""yoruba"":-3.29e+02,""colombian"":-3.29e+02,""karitiana"":-3.29e+02,""yizu"":-3.29e+02,""pathan"":-3.28e+02,""hazara"":-3.28e+02,""cambodian"":-3.28e+02,""kalash"":-3.28e+02,""yakut"":-3.28e+02,""yri"":-3.26e+02,""hezhen"":-3.29e+02,""mongola"":-3.29e+02,""tuscan"":-3.29e+02,""lwk"":-3.28e+02,""gbr"":-3.27e+02,""chb"":-3.28e+02,""uygur"":-3.29e+02,""esn"":-3.26e+02,""sindhi"":-3.29e+02,""druze"":-3.28e+02,""japanese"":-3.28e+02,""jpt"":-3.27e+02,""khv"":-3.27e+02,""stu"":-3.27e+02,""oroqen"":-3.29e+02}",-323.0,"{""cdx"":-3.27e+02,""gih"":-3.27e+02,""adygei"":-3.29e+02,""msl"":-3.27e+02,""burusho"":-3.28e+02,""palestinian"":-3.27e+02,""han"":-3.28e+02,""ibs"":-3.27e+02,""beb"":-3.27e+02,""french"":-3.29e+02,""basque"":-3.29e+02,""tu"":-3.29e+02,""itu"":-3.27e+02,""dai"":-3.29e+02,""ceu"":-3.26e+02,""makrani"":-3.29e+02,""tsi"":-3.27e+02,""fin"":-3.27e+02,""tujia"":-3.29e+02,""mozabite"":-3.28e+02,""pjl"":-3.26e+02,""gwd"":-3.26e+02,""mandenka"":-3.29e+02,""surui"":-3.29e+02,""pel"":-3.27e+02,""pur"":-3.26e+02,""orcadian"":-3.29e+02,""acb"":-3.27e+02,""bedouin"":-3.27e+02,""she"":-3.29e+02,""chs"":-3.27e+02,""brahui"":-3.28e+02,""naxi"":-3.29e+02,""mxl"":-3.27e+02,""daur"":-3.29e+02,""russian"":-3.29e+02,""balochi"":-3.29e+02,""asw"":-3.27e+02,""clm"":-3.27e+02,""maya"":-3.29e+02,""yoruba"":-3.29e+02,""colombian"":-3.29e+02,""karitiana"":-3.29e+02,""yizu"":-3.29e+02,""pathan"":-3.28e+02,""hazara"":-3.28e+02,""cambodian"":-3.28e+02,""kalash"":-3.28e+02,""yakut"":-3.28e+02,""yri"":-3.26e+02,""hezhen"":-3.29e+02,""mongola"":-3.29e+02,""tuscan"":-3.29e+02,""lwk"":-3.28e+02,""gbr"":-3.27e+02,""chb"":-3.28e+02,""uygur"":-3.29e+02,""esn"":-3.26e+02,""sindhi"":-3.29e+02,""druze"":-3.28e+02,""japanese"":-3.28e+02,""jpt"":-3.27e+02,""khv"":-3.27e+02,""stu"":-3.27e+02,""oroqen"":-3.29e+02}"
"""HG00127""","""gbr""","""nfe""","""XX""","{""cdx"":-2.95e+02,""gih"":-2.95e+02,""adygei"":-2.97e+02,""msl"":-2.95e+02,""burusho"":-2.96e+02,""palestinian"":-2.95e+02,""han"":-2.96e+02,""ibs"":-2.95e+02,""beb"":-2.95e+02,""french"":-2.97e+02,""basque"":-2.97e+02,""tu"":-2.97e+02,""itu"":-2.95e+02,""dai"":-2.97e+02,""ceu"":-2.95e+02,""makrani"":-2.97e+02,""tsi"":-2.95e+02,""fin"":-2.95e+02,""tujia"":-2.97e+02,""mozabite"":-2.96e+02,""pjl"":-2.94e+02,""gwd"":-2.95e+02,""mandenka"":-2.97e+02,""surui"":-2.97e+02,""pel"":-2.96e+02,""pur"":-2.94e+02,""orcadian"":-2.97e+02,""acb"":-2.95e+02,""bedouin"":-2.95e+02,""she"":-2.97e+02,""chs"":-2.95e+02,""brahui"":-2.96e+02,""naxi"":-2.97e+02,""mxl"":-2.95e+02,""daur"":-2.97e+02,""russian"":-2.97e+02,""balochi"":-2.97e+02,""asw"":-2.95e+02,""clm"":-2.95e+02,""maya"":-2.97e+02,""yoruba"":-2.97e+02,""colombian"":-2.97e+02,""karitiana"":-2.97e+02,""yizu"":-2.97e+02,""pathan"":-2.96e+02,""hazara"":-2.96e+02,""cambodian"":-2.96e+02,""kalash"":-2.96e+02,""yakut"":-2.96e+02,""yri"":-2.94e+02,""hezhen"":-2.97e+02,""mongola"":-2.97e+02,""tuscan"":-2.97e+02,""lwk"":-2.96e+02,""gbr"":-2.95e+02,""chb"":-2.96e+02,""uygur"":-2.97e+02,""esn"":-2.95e+02,""sindhi"":-2.97e+02,""druze"":-2.96e+02,""japanese"":-2.96e+02,""jpt"":-2.95e+02,""khv"":-2.95e+02,""stu"":-2.95e+02,""oroqen"":-2.97e+02}",-291.0,"{""cdx"":-2.95e+02,""gih"":-2.95e+02,""adygei"":-2.97e+02,""msl"":-2.95e+02,""burusho"":-2.96e+02,""palestinian"":-2.95e+02,""han"":-2.96e+02,""ibs"":-2.95e+02,""beb"":-2.95e+02,""french"":-2.97e+02,""basque"":-2.97e+02,""tu"":-2.97e+02,""itu"":-2.95e+02,""dai"":-2.97e+02,""ceu"":-2.95e+02,""makrani"":-2.97e+02,""tsi"":-2.95e+02,""fin"":-2.95e+02,""tujia"":-2.97e+02,""mozabite"":-2.96e+02,""pjl"":-2.94e+02,""gwd"":-2.95e+02,""mandenka"":-2.97e+02,""surui"":-2.97e+02,""pel"":-2.96e+02,""pur"":-2.94e+02,""orcadian"":-2.97e+02,""acb"":-2.95e+02,""bedouin"":-2.95e+02,""she"":-2.97e+02,""chs"":-2.95e+02,""brahui"":-2.96e+02,""naxi"":-2.97e+02,""mxl"":-2.95e+02,""daur"":-2.97e+02,""russian"":-2.97e+02,""balochi"":-2.97e+02,""asw"":-2.95e+02,""clm"":-2.95e+02,""maya"":-2.97e+02,""yoruba"":-2.97e+02,""colombian"":-2.97e+02,""karitiana"":-2.97e+02,""yizu"":-2.97e+02,""pathan"":-2.96e+02,""hazara"":-2.96e+02,""cambodian"":-2.96e+02,""kalash"":-2.96e+02,""yakut"":-2.96e+02,""yri"":-2.94e+02,""hezhen"":-2.97e+02,""mongola"":-2.97e+02,""tuscan"":-2.97e+02,""lwk"":-2.96e+02,""gbr"":-2.95e+02,""chb"":-2.96e+02,""uygur"":-2.97e+02,""esn"":-2.95e+02,""sindhi"":-2.97e+02,""druze"":-2.96e+02,""japanese"":-2.96e+02,""jpt"":-2.95e+02,""khv"":-2.95e+02,""stu"":-2.95e+02,""oroqen"":-2.97e+02}"
"""HG00132""","""gbr""","""nfe""","""XX""","{""cdx"":-2.98e+02,""gih"":-2.98e+02,""adygei"":-3.00e+02,""msl"":-2.98e+02,""burusho"":-2.99e+02,""palestinian"":-2.98e+02,""han"":-2.99e+02,""ibs"":-2.98e+02,""beb"":-2.98e+02,""french"":-3.00e+02,""basque"":-3.00e+02,""tu"":-3.01e+02,""itu"":-2.99e+02,""dai"":-3.00e+02,""ceu"":-2.98e+02,""makrani"":-3.00e+02,""tsi"":-2.98e+02,""fin"":-2.98e+02,""tujia"":-3.01e+02,""mozabite"":-2.99e+02,""pjl"":-2.97e+02,""gwd"":-2.98e+02,""mandenka"":-3.01e+02,""surui"":-3.00e+02,""pel"":-2.99e+02,""pur"":-2.98e+02,""orcadian"":-3.00e+02,""acb"":-2.98e+02,""bedouin"":-2.99e+02,""she"":-3.00e+02,""chs"":-2.98e+02,""brahui"":-2.99e+02,""naxi"":-3.01e+02,""mxl"":-2.99e+02,""daur"":-3.01e+02,""russian"":-3.01e+02,""balochi"":-3.01e+02,""asw"":-2.98e+02,""clm"":-2.98e+02,""maya"":-3.00e+02,""yoruba"":-3.00e+02,""colombian"":-3.01e+02,""karitiana"":-3.00e+02,""yizu"":-3.00e+02,""pathan"":-2.99e+02,""hazara"":-2.99e+02,""cambodian"":-2.99e+02,""kalash"":-2.99e+02,""yakut"":-2.99e+02,""yri"":-2.98e+02,""hezhen"":-3.01e+02,""mongola"":-3.00e+02,""tuscan"":-3.01e+02,""lwk"":-2.99e+02,""gbr"":-2.98e+02,""chb"":-2.99e+02,""uygur"":-3.00e+02,""esn"":-2.98e+02,""sindhi"":-3.00e+02,""druze"":-2.99e+02,""japanese"":-2.99e+02,""jpt"":-2.98e+02,""khv"":-2.98e+02,""stu"":-2.98e+02,""oroqen"":-3.01e+02}",-295.0,"{""cdx"":-2.98e+02,""gih"":-2.98e+02,""adygei"":-3.00e+02,""msl"":-2.98e+02,""burusho"":-2.99e+02,""palestinian"":-2.98e+02,""han"":-2.99e+02,""ibs"":-2.98e+02,""beb"":-2.98e+02,""french"":-3.00e+02,""basque"":-3.00e+02,""tu"":-3.01e+02,""itu"":-2.99e+02,""dai"":-3.00e+02,""ceu"":-2.98e+02,""makrani"":-3.00e+02,""tsi"":-2.98e+02,""fin"":-2.98e+02,""tujia"":-3.01e+02,""mozabite"":-2.99e+02,""pjl"":-2.97e+02,""gwd"":-2.98e+02,""mandenka"":-3.01e+02,""surui"":-3.00e+02,""pel"":-2.99e+02,""pur"":-2.98e+02,""orcadian"":-3.00e+02,""acb"":-2.98e+02,""bedouin"":-2.99e+02,""she"":-3.00e+02,""chs"":-2.98e+02,""brahui"":-2.99e+02,""naxi"":-3.01e+02,""mxl"":-2.99e+02,""daur"":-3.01e+02,""russian"":-3.01e+02,""balochi"":-3.01e+02,""asw"":-2.98e+02,""clm"":-2.98e+02,""maya"":-3.00e+02,""yoruba"":-3.00e+02,""colombian"":-3.01e+02,""karitiana"":-3.00e+02,""yizu"":-3.00e+02,""pathan"":-2.99e+02,""hazara"":-2.99e+02,""cambodian"":-2.99e+02,""kalash"":-2.99e+02,""yakut"":-2.99e+02,""yri"":-2.98e+02,""hezhen"":-3.01e+02,""mongola"":-3.00e+02,""tuscan"":-3.01e+02,""lwk"":-2.99e+02,""gbr"":-2.98e+02,""chb"":-2.99e+02,""uygur"":-3.00e+02,""esn"":-2.98e+02,""sindhi"":-3.00e+02,""druze"":-2.99e+02,""japanese"":-2.99e+02,""jpt"":-2.98e+02,""khv"":-2.98e+02,""stu"":-2.98e+02,""oroqen"":-3.01e+02}"


In [54]:
def predict_ancestry(posteriors):
    return hl.bind(
        lambda x: hl.sorted(x.items(), key=lambda item: item[1], reverse=True)[0][0],
        posteriors
    )

mt_probs = mt_probs.annotate_cols(predicted_ancestry=predict_ancestry(mt_probs.posteriors))
mt_probs.cols().select('predicted_ancestry', 'pop').show(5)

s,predicted_ancestry,pop
str,str,str
"""HG00107""","""pjl""","""gbr"""
"""HG00114""","""pjl""","""gbr"""
"""HG00121""","""pjl""","""gbr"""
"""HG00127""","""pjl""","""gbr"""
"""HG00132""","""pjl""","""gbr"""


In [55]:
tp, total = mt_probs.filter_cols(mt_probs.predicted_ancestry == mt_probs.pop).count_cols(), mt_probs.count_cols()

tp / total, tp, total

(0.0663265306122449, 26, 392)

In [56]:
def calculate_error_rate(mt):
    correct_predictions = mt.filter_cols(mt.predicted_ancestry == mt.pop)
    total_samples = mt.count_cols()
    error_rate = correct_predictions.aggregate_cols(
        hl.agg.sum(1 - hl.exp(correct_predictions.posteriors[correct_predictions.predicted_ancestry]))
    ) / total_samples
    return error_rate


calculate_error_rate(mt_probs)

0.0663265306122449