In [147]:
import hail as hl

# hl.init()

In [148]:
# hl.utils.get_1kg('data/1kg/')
# hl.utils.get_hgdp('data/hgdp/')

In [149]:
data_path = 'data/hgdp'

In [150]:
mt = hl.read_matrix_table(f'{data_path}/HGDP.mt')

In [151]:
mt.describe()

----------------------------------------
Global fields:
    None
----------------------------------------
Column fields:
    's': str
----------------------------------------
Row fields:
    'locus': locus<GRCh38>
    'alleles': array<str>
    'rsid': str
    'qual': float64
    'filters': set<str>
    'info': struct {
        QUALapprox: int32, 
        SB: array<int32>, 
        MQ: float64, 
        MQRankSum: float64, 
        VarDP: int32, 
        AS_ReadPosRankSum: float64, 
        AS_pab_max: float64, 
        AS_QD: float64, 
        AS_MQ: float64, 
        QD: float64, 
        AS_MQRankSum: float64, 
        FS: float64, 
        AS_FS: float64, 
        ReadPosRankSum: float64, 
        AS_QUALapprox: int32, 
        AS_SB_TABLE: array<int32>, 
        AS_VarDP: int32, 
        AS_SOR: float64, 
        SOR: float64, 
        transmitted_singleton: bool, 
        omni: bool, 
        mills: bool, 
        monoallelic: bool, 
        AS_VQSLOD: float64, 
        Inbreeding

In [152]:
populations_table = hl.import_table(f'{data_path}/HGDP_annotations.txt', key='s')
populations_table.show()

s,pop,continental_pop,sex_karyotype
str,str,str,str
"""HG00096""","""gbr""","""nfe""","""XY"""
"""HG00097""","""gbr""","""nfe""","""XX"""
"""HG00099""","""gbr""","""nfe""","""XX"""
"""HG00100""","""gbr""","""nfe""","""XX"""
"""HG00101""","""gbr""","""nfe""","""XY"""
"""HG00102""","""gbr""","""nfe""","""XX"""
"""HG00103""","""gbr""","""nfe""","""XY"""
"""HG00105""","""gbr""","""nfe""","""XY"""
"""HG00106""","""gbr""","""nfe""","""XX"""
"""HG00107""","""gbr""","""nfe""","""XY"""


In [153]:
mt = mt.annotate_cols(**populations_table[mt.s])

In [154]:
hl.summarize_variants(mt)

Number of alleles,Count
2,10441

Allele type,Count
SNP,10441

Metric,Value
Transitions,6602.0
Transversions,3839.0
Ratio,1.72

Contig,Count
chr1,881
chr2,799
chr3,728
chr4,659
chr5,618
chr6,572
chr7,576
chr8,525
chr9,476
chr10,516


In [155]:
import random
import pandas as pd

rsid_list = mt.aggregate_rows(hl.agg.collect_as_set(mt.rsid))
selected_snps = random.choices(list(rsid_list), k=1000)
mt_filtered = mt.filter_rows(hl.literal(selected_snps).contains(mt.rsid))

selected_snps

['rs2612663',
 'rs28481921',
 'rs11103730',
 'rs9369601',
 'rs79938568',
 'rs1340597476',
 'rs113683215',
 'rs6757804',
 'rs7012443',
 'rs10843003',
 'rs577043690',
 'rs153449',
 'rs6903768',
 'rs78688158',
 'rs184903933',
 'rs183860502',
 'rs3768045',
 'rs2471158',
 'rs4941551',
 'rs8139499',
 'rs1114543',
 'rs946724808',
 'rs141773591',
 'rs1752648',
 'rs1337880708',
 'rs12392493',
 'rs9480260',
 'rs1752648',
 'rs1206162660',
 'rs144234746',
 'rs7460118',
 'rs115747995',
 'rs147674105',
 'rs2670019',
 'rs9652413',
 'rs1828407',
 'rs117312584',
 'rs2093290',
 'rs11233481',
 'rs1316622721',
 'rs553262939',
 'rs2526885',
 'rs1962227',
 'rs1935716',
 'rs2291465',
 'rs2514522',
 'rs144981874',
 'rs4699670',
 'rs112883860',
 'rs190590637',
 'rs10192210',
 'rs116599778',
 'rs153449',
 'rs114506121',
 'rs73823032',
 'rs142073136',
 'rs11129727',
 'rs78946980',
 'rs2731878',
 'rs796711064',
 'rs544584204',
 'rs11588944',
 'rs11974351',
 'rs1278787618',
 'rs1613949',
 'rs112971114',
 'rs145729

In [156]:
mt_filtered.count_rows()

922

In [157]:
def gt_freq_estimates(mt, group):
    p_ML = ((2 * hl.agg.sum(mt.GT.is_hom_ref())) + hl.agg.sum(mt.GT.is_het())) / (2 * hl.agg.count_where(hl.is_defined(mt.GT)))
    return mt.aggregate_entries(
        hl.agg.group_by(
            mt.rsid,
            hl.agg.group_by(
                group,
                hl.struct(
                    p_ML = p_ML,
                    p_AA = p_ML ** 2,
                    p_AB = 2 * p_ML * (1 - p_ML),
                    p_BB = (1 - p_ML) ** 2,
                )
            )
        )
    )

def create_freqs_dataframe(snp_freqs):
    rows = [
        {
            'rsid': rsid,
            'pop_freqs': pop_freqs
        }
        for rsid, pop_freqs in snp_freqs.items()
    ]
    df = pd.DataFrame(rows)
    return df

snp_freqs = gt_freq_estimates(mt_filtered, mt_filtered.pop)
freqs_ht = hl.Table.from_pandas(create_freqs_dataframe(snp_freqs), key='rsid')
freqs_ht.show()

rsid,pop_freqs
str,"dict<str, struct{p_ML: float64, p_AA: float64, p_AB: float64, p_BB: float64}>"
"""rs10014702""","{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(9.17e-01,8.40e-01,1.53e-01,6.94e-03),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""fin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(9.50e-01,9.03e-01,9.50e-02,2.50e-03),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(9.75e-01,9.51e-01,4.88e-02,6.25e-04),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(9.41e-01,8.86e-01,1.11e-01,3.46e-03),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""oroqen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00)}"
"""rs10017861""","{""cdx"":(7.27e-01,5.29e-01,3.97e-01,7.44e-02),""gih"":(4.09e-01,1.67e-01,4.83e-01,3.49e-01),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.25e-01,1.56e-02,2.19e-01,7.66e-01),""burusho"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""palestinian"":(3.75e-01,1.41e-01,4.69e-01,3.91e-01),""han"":(9.00e-01,8.10e-01,1.80e-01,1.00e-02),""ibs"":(3.64e-01,1.32e-01,4.63e-01,4.05e-01),""beb"":(7.78e-01,6.05e-01,3.46e-01,4.94e-02),""french"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""basque"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""tu"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""itu"":(7.14e-01,5.10e-01,4.08e-01,8.16e-02),""dai"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""ceu"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""makrani"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""tsi"":(4.62e-01,2.13e-01,4.97e-01,2.90e-01),""fin"":(6.11e-01,3.73e-01,4.75e-01,1.51e-01),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""pjl"":(7.69e-01,5.92e-01,3.55e-01,5.33e-02),""gwd"":(1.88e-01,3.52e-02,3.05e-01,6.60e-01),""mandenka"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""surui"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""pel"":(5.83e-01,3.40e-01,4.86e-01,1.74e-01),""pur"":(4.50e-01,2.03e-01,4.95e-01,3.03e-01),""orcadian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""acb"":(1.50e-01,2.25e-02,2.55e-01,7.22e-01),""bedouin"":(6.43e-01,4.13e-01,4.59e-01,1.28e-01),""she"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(3.33e-01,1.11e-01,4.44e-01,4.44e-01),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(5.71e-01,3.27e-01,4.90e-01,1.84e-01),""daur"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""russian"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""clm"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""maya"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""yoruba"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""colombian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""karitiana"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""yizu"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""pathan"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""hazara"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""cambodian"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""kalash"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(7.50e-02,5.63e-03,1.39e-01,8.56e-01),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(3.00e-01,9.00e-02,4.20e-01,4.90e-01),""gbr"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""chb"":(7.00e-01,4.90e-01,4.20e-01,9.00e-02),""uygur"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""esn"":(8.82e-02,7.79e-03,1.61e-01,8.31e-01),""sindhi"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""druze"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""japanese"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""jpt"":(9.38e-01,8.79e-01,1.17e-01,3.91e-03),""khv"":(6.82e-01,4.65e-01,4.34e-01,1.01e-01),""stu"":(4.55e-01,2.07e-01,4.96e-01,2.98e-01),""oroqen"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01)}"
"""rs10047111""","{""cdx"":(5.45e-01,2.98e-01,4.96e-01,2.07e-01),""gih"":(6.36e-01,4.05e-01,4.63e-01,1.32e-01),""adygei"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""msl"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""burusho"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""palestinian"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""han"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""ibs"":(5.91e-01,3.49e-01,4.83e-01,1.67e-01),""beb"":(6.11e-01,3.73e-01,4.75e-01,1.51e-01),""french"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""basque"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""tu"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""itu"":(7.14e-01,5.10e-01,4.08e-01,8.16e-02),""dai"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""ceu"":(5.94e-01,3.53e-01,4.82e-01,1.65e-01),""makrani"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""tsi"":(5.77e-01,3.33e-01,4.88e-01,1.79e-01),""fin"":(6.11e-01,3.73e-01,4.75e-01,1.51e-01),""tujia"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mozabite"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""pjl"":(7.69e-01,5.92e-01,3.55e-01,5.33e-02),""gwd"":(5.63e-01,3.16e-01,4.92e-01,1.91e-01),""mandenka"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""surui"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""pel"":(5.83e-01,3.40e-01,4.86e-01,1.74e-01),""pur"":(6.75e-01,4.56e-01,4.39e-01,1.06e-01),""orcadian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""acb"":(5.50e-01,3.03e-01,4.95e-01,2.02e-01),""bedouin"":(5.71e-01,3.27e-01,4.90e-01,1.84e-01),""she"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""chs"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""brahui"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""naxi"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mxl"":(6.43e-01,4.13e-01,4.59e-01,1.28e-01),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(5.63e-01,3.16e-01,4.92e-01,1.91e-01),""clm"":(7.22e-01,5.22e-01,4.01e-01,7.72e-02),""maya"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""yoruba"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""yizu"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""pathan"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""hazara"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""cambodian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""kalash"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""yakut"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""yri"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""hezhen"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mongola"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""tuscan"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""lwk"":(6.00e-01,3.60e-01,4.80e-01,1.60e-01),""gbr"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""chb"":(8.00e-01,6.40e-01,3.20e-01,4.00e-02),""uygur"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""esn"":(5.59e-01,3.12e-01,4.93e-01,1.95e-01),""sindhi"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""druze"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""japanese"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""jpt"":(5.63e-01,3.16e-01,4.92e-01,1.91e-01),""khv"":(5.91e-01,3.49e-01,4.83e-01,1.67e-01),""stu"":(7.27e-01,5.29e-01,3.97e-01,7.44e-02),""oroqen"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01)}"
"""rs10063535""","{""cdx"":(8.64e-01,7.46e-01,2.36e-01,1.86e-02),""gih"":(6.36e-01,4.05e-01,4.63e-01,1.32e-01),""adygei"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""msl"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""burusho"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""palestinian"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""han"":(8.00e-01,6.40e-01,3.20e-01,4.00e-02),""ibs"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""beb"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""french"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(7.14e-01,5.10e-01,4.08e-01,8.16e-02),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""makrani"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tsi"":(6.92e-01,4.79e-01,4.26e-01,9.47e-02),""fin"":(7.78e-01,6.05e-01,3.46e-01,4.94e-02),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""pjl"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""gwd"":(8.44e-01,7.12e-01,2.64e-01,2.44e-02),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(9.17e-01,8.40e-01,1.53e-01,6.94e-03),""pur"":(8.00e-01,6.40e-01,3.20e-01,4.00e-02),""orcadian"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""acb"":(8.00e-01,6.40e-01,3.20e-01,4.00e-02),""bedouin"":(5.71e-01,3.27e-01,4.90e-01,1.84e-01),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(8.89e-01,7.90e-01,1.98e-01,1.23e-02),""brahui"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""daur"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(8.13e-01,6.60e-01,3.05e-01,3.52e-02),""clm"":(7.22e-01,5.22e-01,4.01e-01,7.72e-02),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""cambodian"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""kalash"":(6.25e-01,3.91e-01,4.69e-01,1.41e-01),""yakut"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""yri"":(8.25e-01,6.81e-01,2.89e-01,3.06e-02),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(7.00e-01,4.90e-01,4.20e-01,9.00e-02),""gbr"":(6.11e-01,3.73e-01,4.75e-01,1.51e-01),""chb"":(7.00e-01,4.90e-01,4.20e-01,9.00e-02),""uygur"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""esn"":(7.94e-01,6.31e-01,3.27e-01,4.24e-02),""sindhi"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(8.13e-01,6.60e-01,3.05e-01,3.52e-02),""khv"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""stu"":(6.82e-01,4.65e-01,4.34e-01,1.01e-01),""oroqen"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01)}"
"""rs1007018596""","{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""fin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""oroqen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00)}"
"""rs1008328363""","{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""fin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""oroqen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00)}"
"""rs10103328""","{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(7.73e-01,5.97e-01,3.51e-01,5.17e-02),""adygei"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""msl"":(9.58e-01,9.18e-01,7.99e-02,1.74e-03),""burusho"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""palestinian"":(9.38e-01,8.79e-01,1.17e-01,3.91e-03),""han"":(8.00e-01,6.40e-01,3.20e-01,4.00e-02),""ibs"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""beb"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(7.86e-01,6.17e-01,3.37e-01,4.59e-02),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""makrani"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tsi"":(9.23e-01,8.52e-01,1.42e-01,5.92e-03),""fin"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""pjl"":(8.46e-01,7.16e-01,2.60e-01,2.37e-02),""gwd"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""mandenka"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""surui"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""pel"":(9.17e-01,8.40e-01,1.53e-01,6.94e-03),""pur"":(9.25e-01,8.56e-01,1.39e-01,5.62e-03),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(9.00e-01,8.10e-01,1.80e-01,1.00e-02),""bedouin"":(9.29e-01,8.62e-01,1.33e-01,5.10e-03),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""brahui"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(9.29e-01,8.62e-01,1.33e-01,5.10e-03),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(8.13e-01,6.60e-01,3.05e-01,3.52e-02),""clm"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(9.00e-01,8.10e-01,1.80e-01,1.00e-02),""gbr"":(7.78e-01,6.05e-01,3.46e-01,4.94e-02),""chb"":(9.00e-01,8.10e-01,1.80e-01,1.00e-02),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(8.82e-01,7.79e-01,2.08e-01,1.38e-02),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""japanese"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""jpt"":(9.38e-01,8.79e-01,1.17e-01,3.91e-03),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(7.73e-01,5.97e-01,3.51e-01,5.17e-02),""oroqen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00)}"
"""rs10120006""","{""cdx"":(1.82e-01,3.31e-02,2.98e-01,6.69e-01),""gih"":(3.64e-01,1.32e-01,4.63e-01,4.05e-01),""adygei"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""msl"":(3.33e-01,1.11e-01,4.44e-01,4.44e-01),""burusho"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""palestinian"":(6.25e-02,3.91e-03,1.17e-01,8.79e-01),""han"":(2.00e-01,4.00e-02,3.20e-01,6.40e-01),""ibs"":(1.82e-01,3.31e-02,2.98e-01,6.69e-01),""beb"":(2.78e-01,7.72e-02,4.01e-01,5.22e-01),""french"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""basque"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""tu"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""itu"":(5.71e-01,3.27e-01,4.90e-01,1.84e-01),""dai"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""ceu"":(9.38e-02,8.79e-03,1.70e-01,8.21e-01),""makrani"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""tsi"":(1.15e-01,1.33e-02,2.04e-01,7.83e-01),""fin"":(1.11e-01,1.23e-02,1.98e-01,7.90e-01),""tujia"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""mozabite"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""pjl"":(3.08e-01,9.47e-02,4.26e-01,4.79e-01),""gwd"":(3.13e-01,9.77e-02,4.30e-01,4.73e-01),""mandenka"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""surui"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""pel"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""pur"":(1.50e-01,2.25e-02,2.55e-01,7.22e-01),""orcadian"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""acb"":(4.00e-01,1.60e-01,4.80e-01,3.60e-01),""bedouin"":(1.43e-01,2.04e-02,2.45e-01,7.35e-01),""she"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""chs"":(2.78e-01,7.72e-02,4.01e-01,5.22e-01),""brahui"":(3.33e-01,1.11e-01,4.44e-01,4.44e-01),""naxi"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mxl"":(2.86e-01,8.16e-02,4.08e-01,5.10e-01),""daur"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""russian"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""balochi"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""asw"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""clm"":(5.56e-02,3.09e-03,1.05e-01,8.92e-01),""maya"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""yoruba"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""yizu"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""pathan"":(1.67e-01,2.78e-02,2.78e-01,6.94e-01),""hazara"":(3.33e-01,1.11e-01,4.44e-01,4.44e-01),""cambodian"":(1.67e-01,2.78e-02,2.78e-01,6.94e-01),""kalash"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""yakut"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""yri"":(4.75e-01,2.26e-01,4.99e-01,2.76e-01),""hezhen"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""mongola"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""tuscan"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""lwk"":(4.00e-01,1.60e-01,4.80e-01,3.60e-01),""gbr"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""chb"":(6.00e-01,3.60e-01,4.80e-01,1.60e-01),""uygur"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""esn"":(2.94e-01,8.65e-02,4.15e-01,4.98e-01),""sindhi"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00),""druze"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""japanese"":(3.75e-01,1.41e-01,4.69e-01,3.91e-01),""jpt"":(3.13e-01,9.77e-02,4.30e-01,4.73e-01),""khv"":(4.55e-02,2.07e-03,8.68e-02,9.11e-01),""stu"":(9.09e-02,8.26e-03,1.65e-01,8.26e-01),""oroqen"":(0.00e+00,0.00e+00,0.00e+00,1.00e+00)}"
"""rs1015455864""","{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(8.89e-01,7.90e-01,1.98e-01,1.23e-02),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(9.62e-01,9.25e-01,7.40e-02,1.48e-03),""fin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(9.80e-01,9.60e-01,3.92e-02,4.00e-04),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""oroqen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00)}"
"""rs10192210""","{""cdx"":(5.91e-01,3.49e-01,4.83e-01,1.67e-01),""gih"":(6.82e-01,4.65e-01,4.34e-01,1.01e-01),""adygei"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""msl"":(6.67e-01,4.44e-01,4.44e-01,1.11e-01),""burusho"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""palestinian"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""han"":(8.00e-01,6.40e-01,3.20e-01,4.00e-02),""ibs"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""beb"":(7.22e-01,5.22e-01,4.01e-01,7.72e-02),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tu"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""itu"":(5.71e-01,3.27e-01,4.90e-01,1.84e-01),""dai"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""ceu"":(7.19e-01,5.17e-01,4.04e-01,7.91e-02),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(7.69e-01,5.92e-01,3.55e-01,5.33e-02),""fin"":(7.78e-01,6.05e-01,3.46e-01,4.94e-02),""tujia"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mozabite"":(8.75e-01,7.66e-01,2.19e-01,1.56e-02),""pjl"":(7.88e-01,6.22e-01,3.34e-01,4.47e-02),""gwd"":(7.19e-01,5.17e-01,4.04e-01,7.91e-02),""mandenka"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""surui"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""pel"":(5.83e-01,3.40e-01,4.86e-01,1.74e-01),""pur"":(7.75e-01,6.01e-01,3.49e-01,5.06e-02),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(6.00e-01,3.60e-01,4.80e-01,1.60e-01),""bedouin"":(9.29e-01,8.62e-01,1.33e-01,5.10e-03),""she"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""chs"":(7.22e-01,5.22e-01,4.01e-01,7.72e-02),""brahui"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""naxi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mxl"":(8.57e-01,7.35e-01,2.45e-01,2.04e-02),""daur"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""russian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""balochi"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""asw"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""clm"":(6.11e-01,3.73e-01,4.75e-01,1.51e-01),""maya"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""yoruba"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""colombian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""pathan"":(8.33e-01,6.94e-01,2.78e-01,2.78e-02),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""kalash"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""yakut"":(3.33e-01,1.11e-01,4.44e-01,4.44e-01),""yri"":(7.75e-01,6.01e-01,3.49e-01,5.06e-02),""hezhen"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""mongola"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""tuscan"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01),""lwk"":(6.00e-01,3.60e-01,4.80e-01,1.60e-01),""gbr"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""chb"":(7.00e-01,4.90e-01,4.20e-01,9.00e-02),""uygur"":(2.50e-01,6.25e-02,3.75e-01,5.63e-01),""esn"":(6.76e-01,4.58e-01,4.38e-01,1.05e-01),""sindhi"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(7.50e-01,5.63e-01,3.75e-01,6.25e-02),""jpt"":(8.13e-01,6.60e-01,3.05e-01,3.52e-02),""khv"":(5.91e-01,3.49e-01,4.83e-01,1.67e-01),""stu"":(7.27e-01,5.29e-01,3.97e-01,7.44e-02),""oroqen"":(5.00e-01,2.50e-01,5.00e-01,2.50e-01)}"


In [158]:
total_subjects = mt.count_cols()
ethnicity_counts = mt.aggregate_cols(hl.agg.counter(mt.pop))
ethnicity_proportions = {k: v / total_subjects for k, v in ethnicity_counts.items()}

ethnicity_proportions, sum(ethnicity_proportions.values())

({'acb': 0.025510204081632654,
  'adygei': 0.00510204081632653,
  'asw': 0.02040816326530612,
  'balochi': 0.002551020408163265,
  'basque': 0.00510204081632653,
  'beb': 0.02295918367346939,
  'bedouin': 0.017857142857142856,
  'brahui': 0.007653061224489796,
  'burusho': 0.01020408163265306,
  'cambodian': 0.007653061224489796,
  'cdx': 0.02806122448979592,
  'ceu': 0.04081632653061224,
  'chb': 0.012755102040816327,
  'chs': 0.02295918367346939,
  'clm': 0.02295918367346939,
  'colombian': 0.002551020408163265,
  'dai': 0.00510204081632653,
  'daur': 0.002551020408163265,
  'druze': 0.01020408163265306,
  'esn': 0.04336734693877551,
  'fin': 0.02295918367346939,
  'french': 0.00510204081632653,
  'gbr': 0.02295918367346939,
  'gih': 0.02806122448979592,
  'gwd': 0.04081632653061224,
  'han': 0.012755102040816327,
  'hazara': 0.007653061224489796,
  'hezhen': 0.002551020408163265,
  'ibs': 0.02806122448979592,
  'itu': 0.017857142857142856,
  'japanese': 0.01020408163265306,
  'jpt':

In [159]:
def annotate_rows_with_freqs(mt, freqs_ht):
    mt = mt.annotate_rows(
        freqs=freqs_ht[mt_filtered.rsid].pop_freqs
    )
    return mt

mt_annotated = annotate_rows_with_freqs(mt_filtered, freqs_ht)

In [160]:
mt_annotated.entries().show(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,Unnamed: 30_level_0,Unnamed: 31_level_0,Unnamed: 32_level_0,Unnamed: 33_level_0,Unnamed: 34_level_0,Unnamed: 35_level_0,Unnamed: 36_level_0,Unnamed: 37_level_0,Unnamed: 38_level_0,Unnamed: 39_level_0
locus,alleles,rsid,qual,filters,QUALapprox,SB,MQ,MQRankSum,VarDP,AS_ReadPosRankSum,AS_pab_max,AS_QD,AS_MQ,QD,AS_MQRankSum,FS,AS_FS,ReadPosRankSum,AS_QUALapprox,AS_SB_TABLE,AS_VarDP,AS_SOR,SOR,transmitted_singleton,omni,mills,monoallelic,AS_VQSLOD,InbreedingCoeff,freqs,s,pop,continental_pop,sex_karyotype,GT,DP,GQ,AD,PL
locus<GRCh38>,array<str>,str,float64,set<str>,int32,array<int32>,float64,float64,int32,float64,float64,float64,float64,float64,float64,float64,float64,float64,int32,array<int32>,int32,float64,float64,bool,bool,bool,bool,float64,float64,"dict<str, struct{p_ML: float64, p_AA: float64, p_AB: float64, p_BB: float64}>",str,str,str,str,call,int32,int32,array<int32>,array<int32>
chr1:17379,"[""G"",""A""]","""rs754322362""",-10.0,{},783830,"[195198,94196,41845,15576]",39.4,0.322,346815,0.41,1.0,2.26,39.4,2.26,0.304,2.73,2.73,0.376,783830,"[195198,94196,41845,15576]",346815,0.986,0.986,False,False,False,False,-2.04,-0.0194,"{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(9.62e-01,9.25e-01,7.40e-02,1.48e-03),""fin"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(9.81e-01,9.62e-01,3.77e-02,3.70e-04),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(NaN,NaN,NaN,NaN),""mxl"":(9.29e-01,8.62e-01,1.33e-01,5.10e-03),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(NaN,NaN,NaN,NaN),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""oroqen"":(NaN,NaN,NaN,NaN)}","""HG00107""","""gbr""","""nfe""","""XY""",0/0,22,21,,
chr1:17379,"[""G"",""A""]","""rs754322362""",-10.0,{},783830,"[195198,94196,41845,15576]",39.4,0.322,346815,0.41,1.0,2.26,39.4,2.26,0.304,2.73,2.73,0.376,783830,"[195198,94196,41845,15576]",346815,0.986,0.986,False,False,False,False,-2.04,-0.0194,"{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(9.62e-01,9.25e-01,7.40e-02,1.48e-03),""fin"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(9.81e-01,9.62e-01,3.77e-02,3.70e-04),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(NaN,NaN,NaN,NaN),""mxl"":(9.29e-01,8.62e-01,1.33e-01,5.10e-03),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(NaN,NaN,NaN,NaN),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""oroqen"":(NaN,NaN,NaN,NaN)}","""HG00114""","""gbr""","""nfe""","""XY""",0/0,60,76,,
chr1:17379,"[""G"",""A""]","""rs754322362""",-10.0,{},783830,"[195198,94196,41845,15576]",39.4,0.322,346815,0.41,1.0,2.26,39.4,2.26,0.304,2.73,2.73,0.376,783830,"[195198,94196,41845,15576]",346815,0.986,0.986,False,False,False,False,-2.04,-0.0194,"{""cdx"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gih"":(9.55e-01,9.11e-01,8.68e-02,2.07e-03),""adygei"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""msl"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""burusho"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""palestinian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""han"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ibs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""beb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""french"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""basque"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""itu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""dai"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""ceu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""makrani"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tsi"":(9.62e-01,9.25e-01,7.40e-02,1.48e-03),""fin"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""tujia"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mozabite"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pjl"":(9.81e-01,9.62e-01,3.77e-02,3.70e-04),""gwd"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mandenka"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""surui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pel"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""orcadian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""acb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""bedouin"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""she"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""chs"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""brahui"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""naxi"":(NaN,NaN,NaN,NaN),""mxl"":(9.29e-01,8.62e-01,1.33e-01,5.10e-03),""daur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""russian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""balochi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""asw"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""clm"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""maya"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yoruba"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""colombian"":(NaN,NaN,NaN,NaN),""karitiana"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yizu"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""pathan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hazara"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""cambodian"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""kalash"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yakut"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""yri"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""hezhen"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""mongola"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""tuscan"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""lwk"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""gbr"":(9.44e-01,8.92e-01,1.05e-01,3.09e-03),""chb"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""uygur"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""esn"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""sindhi"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""druze"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""japanese"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""jpt"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""khv"":(1.00e+00,1.00e+00,0.00e+00,0.00e+00),""stu"":(8.18e-01,6.69e-01,2.98e-01,3.31e-02),""oroqen"":(NaN,NaN,NaN,NaN)}","""HG00121""","""gbr""","""nfe""","""XX""",0/0,31,52,,


In [169]:
def log_likelihood(genotype, freqs, pop):
    return (
        hl.case()
        .when(genotype.is_hom_ref(), hl.log(freqs[pop].p_AA))
        .when(genotype.is_het(), hl.log(freqs[pop].p_AB))
        .when(genotype.is_hom_var(), hl.log(freqs[pop].p_BB))
        .default(0.0)
    )

def get_log_likelihoods(mt, proportions):
    mt = mt.annotate_entries(
        log_likelihood=log_likelihood(mt.GT, mt.freqs, mt.pop)
    )
    mt = mt.annotate_cols(
        log_likelihoods=hl.dict({
            pop: hl.agg.sum(mt.log_likelihood) + hl.log(proportions[pop])
            for pop in proportions.keys()
        })
    )
    return mt


mt_likelihood = get_log_likelihoods(mt_annotated, ethnicity_proportions)
mt_likelihood.cols().show()

s,pop,continental_pop,sex_karyotype,log_likelihoods
str,str,str,str,"dict<str, float64>"
"""HG00107""","""gbr""","""nfe""","""XY""","{""cdx"":-3.00e+02,""gih"":-3.00e+02,""adygei"":-3.02e+02,""msl"":-3.00e+02,""burusho"":-3.01e+02,""palestinian"":-3.00e+02,""han"":-3.01e+02,""ibs"":-3.00e+02,""beb"":-3.00e+02,""french"":-3.02e+02,""basque"":-3.02e+02,""tu"":-3.02e+02,""itu"":-3.01e+02,""dai"":-3.02e+02,""ceu"":-3.00e+02,""makrani"":-3.02e+02,""tsi"":-3.00e+02,""fin"":-3.00e+02,""tujia"":-3.02e+02,""mozabite"":-3.01e+02,""pjl"":-2.99e+02,""gwd"":-3.00e+02,""mandenka"":-3.02e+02,""surui"":-3.02e+02,""pel"":-3.01e+02,""pur"":-2.99e+02,""orcadian"":-3.02e+02,""acb"":-3.00e+02,""bedouin"":-3.01e+02,""she"":-3.02e+02,""chs"":-3.00e+02,""brahui"":-3.01e+02,""naxi"":-3.02e+02,""mxl"":-3.01e+02,""daur"":-3.02e+02,""russian"":-3.02e+02,""balochi"":-3.02e+02,""asw"":-3.00e+02,""clm"":-3.00e+02,""maya"":-3.02e+02,""yoruba"":-3.02e+02,""colombian"":-3.02e+02,""karitiana"":-3.02e+02,""yizu"":-3.02e+02,""pathan"":-3.01e+02,""hazara"":-3.01e+02,""cambodian"":-3.01e+02,""kalash"":-3.01e+02,""yakut"":-3.01e+02,""yri"":-2.99e+02,""hezhen"":-3.02e+02,""mongola"":-3.02e+02,""tuscan"":-3.02e+02,""lwk"":-3.01e+02,""gbr"":-3.00e+02,""chb"":-3.01e+02,""uygur"":-3.02e+02,""esn"":-3.00e+02,""sindhi"":-3.02e+02,""druze"":-3.01e+02,""japanese"":-3.01e+02,""jpt"":-3.00e+02,""khv"":-3.00e+02,""stu"":-3.00e+02,""oroqen"":-3.02e+02}"
"""HG00114""","""gbr""","""nfe""","""XY""","{""cdx"":-3.32e+02,""gih"":-3.32e+02,""adygei"":-3.34e+02,""msl"":-3.32e+02,""burusho"":-3.33e+02,""palestinian"":-3.33e+02,""han"":-3.33e+02,""ibs"":-3.32e+02,""beb"":-3.33e+02,""french"":-3.34e+02,""basque"":-3.34e+02,""tu"":-3.35e+02,""itu"":-3.33e+02,""dai"":-3.34e+02,""ceu"":-3.32e+02,""makrani"":-3.34e+02,""tsi"":-3.32e+02,""fin"":-3.33e+02,""tujia"":-3.35e+02,""mozabite"":-3.33e+02,""pjl"":-3.32e+02,""gwd"":-3.32e+02,""mandenka"":-3.35e+02,""surui"":-3.34e+02,""pel"":-3.33e+02,""pur"":-3.32e+02,""orcadian"":-3.34e+02,""acb"":-3.33e+02,""bedouin"":-3.33e+02,""she"":-3.34e+02,""chs"":-3.33e+02,""brahui"":-3.34e+02,""naxi"":-3.35e+02,""mxl"":-3.33e+02,""daur"":-3.35e+02,""russian"":-3.35e+02,""balochi"":-3.35e+02,""asw"":-3.33e+02,""clm"":-3.33e+02,""maya"":-3.34e+02,""yoruba"":-3.34e+02,""colombian"":-3.35e+02,""karitiana"":-3.34e+02,""yizu"":-3.34e+02,""pathan"":-3.34e+02,""hazara"":-3.34e+02,""cambodian"":-3.34e+02,""kalash"":-3.33e+02,""yakut"":-3.34e+02,""yri"":-3.32e+02,""hezhen"":-3.35e+02,""mongola"":-3.34e+02,""tuscan"":-3.35e+02,""lwk"":-3.33e+02,""gbr"":-3.33e+02,""chb"":-3.33e+02,""uygur"":-3.34e+02,""esn"":-3.32e+02,""sindhi"":-3.34e+02,""druze"":-3.33e+02,""japanese"":-3.33e+02,""jpt"":-3.33e+02,""khv"":-3.32e+02,""stu"":-3.32e+02,""oroqen"":-3.35e+02}"
"""HG00121""","""gbr""","""nfe""","""XX""","{""cdx"":-3.20e+02,""gih"":-3.20e+02,""adygei"":-3.22e+02,""msl"":-3.20e+02,""burusho"":-3.21e+02,""palestinian"":-3.20e+02,""han"":-3.21e+02,""ibs"":-3.20e+02,""beb"":-3.20e+02,""french"":-3.22e+02,""basque"":-3.22e+02,""tu"":-3.22e+02,""itu"":-3.20e+02,""dai"":-3.22e+02,""ceu"":-3.20e+02,""makrani"":-3.22e+02,""tsi"":-3.20e+02,""fin"":-3.20e+02,""tujia"":-3.22e+02,""mozabite"":-3.21e+02,""pjl"":-3.19e+02,""gwd"":-3.20e+02,""mandenka"":-3.22e+02,""surui"":-3.22e+02,""pel"":-3.21e+02,""pur"":-3.19e+02,""orcadian"":-3.22e+02,""acb"":-3.20e+02,""bedouin"":-3.20e+02,""she"":-3.22e+02,""chs"":-3.20e+02,""brahui"":-3.21e+02,""naxi"":-3.22e+02,""mxl"":-3.20e+02,""daur"":-3.22e+02,""russian"":-3.22e+02,""balochi"":-3.22e+02,""asw"":-3.20e+02,""clm"":-3.20e+02,""maya"":-3.22e+02,""yoruba"":-3.22e+02,""colombian"":-3.22e+02,""karitiana"":-3.22e+02,""yizu"":-3.22e+02,""pathan"":-3.21e+02,""hazara"":-3.21e+02,""cambodian"":-3.21e+02,""kalash"":-3.21e+02,""yakut"":-3.21e+02,""yri"":-3.19e+02,""hezhen"":-3.22e+02,""mongola"":-3.22e+02,""tuscan"":-3.22e+02,""lwk"":-3.21e+02,""gbr"":-3.20e+02,""chb"":-3.21e+02,""uygur"":-3.22e+02,""esn"":-3.20e+02,""sindhi"":-3.22e+02,""druze"":-3.21e+02,""japanese"":-3.21e+02,""jpt"":-3.20e+02,""khv"":-3.20e+02,""stu"":-3.20e+02,""oroqen"":-3.22e+02}"
"""HG00127""","""gbr""","""nfe""","""XX""","{""cdx"":-3.23e+02,""gih"":-3.23e+02,""adygei"":-3.25e+02,""msl"":-3.23e+02,""burusho"":-3.24e+02,""palestinian"":-3.24e+02,""han"":-3.24e+02,""ibs"":-3.23e+02,""beb"":-3.23e+02,""french"":-3.25e+02,""basque"":-3.25e+02,""tu"":-3.26e+02,""itu"":-3.24e+02,""dai"":-3.25e+02,""ceu"":-3.23e+02,""makrani"":-3.25e+02,""tsi"":-3.23e+02,""fin"":-3.23e+02,""tujia"":-3.26e+02,""mozabite"":-3.24e+02,""pjl"":-3.22e+02,""gwd"":-3.23e+02,""mandenka"":-3.26e+02,""surui"":-3.25e+02,""pel"":-3.24e+02,""pur"":-3.23e+02,""orcadian"":-3.25e+02,""acb"":-3.23e+02,""bedouin"":-3.24e+02,""she"":-3.25e+02,""chs"":-3.23e+02,""brahui"":-3.24e+02,""naxi"":-3.26e+02,""mxl"":-3.24e+02,""daur"":-3.26e+02,""russian"":-3.26e+02,""balochi"":-3.26e+02,""asw"":-3.24e+02,""clm"":-3.23e+02,""maya"":-3.25e+02,""yoruba"":-3.25e+02,""colombian"":-3.26e+02,""karitiana"":-3.25e+02,""yizu"":-3.25e+02,""pathan"":-3.24e+02,""hazara"":-3.24e+02,""cambodian"":-3.24e+02,""kalash"":-3.24e+02,""yakut"":-3.24e+02,""yri"":-3.23e+02,""hezhen"":-3.26e+02,""mongola"":-3.25e+02,""tuscan"":-3.26e+02,""lwk"":-3.24e+02,""gbr"":-3.23e+02,""chb"":-3.24e+02,""uygur"":-3.25e+02,""esn"":-3.23e+02,""sindhi"":-3.25e+02,""druze"":-3.24e+02,""japanese"":-3.24e+02,""jpt"":-3.24e+02,""khv"":-3.23e+02,""stu"":-3.23e+02,""oroqen"":-3.26e+02}"
"""HG00132""","""gbr""","""nfe""","""XX""","{""cdx"":-3.15e+02,""gih"":-3.15e+02,""adygei"":-3.17e+02,""msl"":-3.15e+02,""burusho"":-3.16e+02,""palestinian"":-3.16e+02,""han"":-3.16e+02,""ibs"":-3.15e+02,""beb"":-3.15e+02,""french"":-3.17e+02,""basque"":-3.17e+02,""tu"":-3.18e+02,""itu"":-3.16e+02,""dai"":-3.17e+02,""ceu"":-3.15e+02,""makrani"":-3.17e+02,""tsi"":-3.15e+02,""fin"":-3.15e+02,""tujia"":-3.18e+02,""mozabite"":-3.16e+02,""pjl"":-3.14e+02,""gwd"":-3.15e+02,""mandenka"":-3.18e+02,""surui"":-3.17e+02,""pel"":-3.16e+02,""pur"":-3.15e+02,""orcadian"":-3.17e+02,""acb"":-3.15e+02,""bedouin"":-3.16e+02,""she"":-3.17e+02,""chs"":-3.15e+02,""brahui"":-3.16e+02,""naxi"":-3.18e+02,""mxl"":-3.16e+02,""daur"":-3.18e+02,""russian"":-3.18e+02,""balochi"":-3.18e+02,""asw"":-3.16e+02,""clm"":-3.15e+02,""maya"":-3.17e+02,""yoruba"":-3.17e+02,""colombian"":-3.18e+02,""karitiana"":-3.17e+02,""yizu"":-3.17e+02,""pathan"":-3.16e+02,""hazara"":-3.16e+02,""cambodian"":-3.16e+02,""kalash"":-3.16e+02,""yakut"":-3.16e+02,""yri"":-3.15e+02,""hezhen"":-3.18e+02,""mongola"":-3.17e+02,""tuscan"":-3.18e+02,""lwk"":-3.16e+02,""gbr"":-3.15e+02,""chb"":-3.16e+02,""uygur"":-3.17e+02,""esn"":-3.15e+02,""sindhi"":-3.17e+02,""druze"":-3.16e+02,""japanese"":-3.16e+02,""jpt"":-3.16e+02,""khv"":-3.15e+02,""stu"":-3.15e+02,""oroqen"":-3.18e+02}"
"""HG00149""","""gbr""","""nfe""","""XY""","{""cdx"":-3.18e+02,""gih"":-3.18e+02,""adygei"":-3.20e+02,""msl"":-3.18e+02,""burusho"":-3.19e+02,""palestinian"":-3.19e+02,""han"":-3.19e+02,""ibs"":-3.18e+02,""beb"":-3.18e+02,""french"":-3.20e+02,""basque"":-3.20e+02,""tu"":-3.21e+02,""itu"":-3.19e+02,""dai"":-3.20e+02,""ceu"":-3.18e+02,""makrani"":-3.20e+02,""tsi"":-3.18e+02,""fin"":-3.18e+02,""tujia"":-3.21e+02,""mozabite"":-3.19e+02,""pjl"":-3.17e+02,""gwd"":-3.18e+02,""mandenka"":-3.21e+02,""surui"":-3.20e+02,""pel"":-3.19e+02,""pur"":-3.18e+02,""orcadian"":-3.20e+02,""acb"":-3.18e+02,""bedouin"":-3.19e+02,""she"":-3.20e+02,""chs"":-3.18e+02,""brahui"":-3.20e+02,""naxi"":-3.21e+02,""mxl"":-3.19e+02,""daur"":-3.21e+02,""russian"":-3.21e+02,""balochi"":-3.21e+02,""asw"":-3.19e+02,""clm"":-3.18e+02,""maya"":-3.20e+02,""yoruba"":-3.20e+02,""colombian"":-3.21e+02,""karitiana"":-3.20e+02,""yizu"":-3.20e+02,""pathan"":-3.20e+02,""hazara"":-3.20e+02,""cambodian"":-3.20e+02,""kalash"":-3.19e+02,""yakut"":-3.20e+02,""yri"":-3.18e+02,""hezhen"":-3.21e+02,""mongola"":-3.20e+02,""tuscan"":-3.21e+02,""lwk"":-3.19e+02,""gbr"":-3.18e+02,""chb"":-3.19e+02,""uygur"":-3.20e+02,""esn"":-3.18e+02,""sindhi"":-3.20e+02,""druze"":-3.19e+02,""japanese"":-3.19e+02,""jpt"":-3.19e+02,""khv"":-3.18e+02,""stu"":-3.18e+02,""oroqen"":-3.21e+02}"
"""HG00177""","""fin""","""fin""","""XX""","{""cdx"":-3.21e+02,""gih"":-3.21e+02,""adygei"":-3.23e+02,""msl"":-3.21e+02,""burusho"":-3.22e+02,""palestinian"":-3.22e+02,""han"":-3.22e+02,""ibs"":-3.21e+02,""beb"":-3.21e+02,""french"":-3.23e+02,""basque"":-3.23e+02,""tu"":-3.24e+02,""itu"":-3.22e+02,""dai"":-3.23e+02,""ceu"":-3.21e+02,""makrani"":-3.23e+02,""tsi"":-3.21e+02,""fin"":-3.21e+02,""tujia"":-3.24e+02,""mozabite"":-3.22e+02,""pjl"":-3.20e+02,""gwd"":-3.21e+02,""mandenka"":-3.24e+02,""surui"":-3.23e+02,""pel"":-3.22e+02,""pur"":-3.21e+02,""orcadian"":-3.23e+02,""acb"":-3.21e+02,""bedouin"":-3.22e+02,""she"":-3.23e+02,""chs"":-3.21e+02,""brahui"":-3.23e+02,""naxi"":-3.24e+02,""mxl"":-3.22e+02,""daur"":-3.24e+02,""russian"":-3.24e+02,""balochi"":-3.24e+02,""asw"":-3.22e+02,""clm"":-3.21e+02,""maya"":-3.23e+02,""yoruba"":-3.23e+02,""colombian"":-3.24e+02,""karitiana"":-3.23e+02,""yizu"":-3.23e+02,""pathan"":-3.23e+02,""hazara"":-3.23e+02,""cambodian"":-3.23e+02,""kalash"":-3.22e+02,""yakut"":-3.23e+02,""yri"":-3.21e+02,""hezhen"":-3.24e+02,""mongola"":-3.23e+02,""tuscan"":-3.24e+02,""lwk"":-3.22e+02,""gbr"":-3.21e+02,""chb"":-3.22e+02,""uygur"":-3.23e+02,""esn"":-3.21e+02,""sindhi"":-3.23e+02,""druze"":-3.22e+02,""japanese"":-3.22e+02,""jpt"":-3.22e+02,""khv"":-3.21e+02,""stu"":-3.21e+02,""oroqen"":-3.24e+02}"
"""HG00190""","""fin""","""fin""","""XY""","{""cdx"":-3.12e+02,""gih"":-3.12e+02,""adygei"":-3.14e+02,""msl"":-3.12e+02,""burusho"":-3.13e+02,""palestinian"":-3.12e+02,""han"":-3.13e+02,""ibs"":-3.12e+02,""beb"":-3.12e+02,""french"":-3.14e+02,""basque"":-3.14e+02,""tu"":-3.15e+02,""itu"":-3.13e+02,""dai"":-3.14e+02,""ceu"":-3.12e+02,""makrani"":-3.14e+02,""tsi"":-3.12e+02,""fin"":-3.12e+02,""tujia"":-3.15e+02,""mozabite"":-3.13e+02,""pjl"":-3.11e+02,""gwd"":-3.12e+02,""mandenka"":-3.15e+02,""surui"":-3.14e+02,""pel"":-3.13e+02,""pur"":-3.12e+02,""orcadian"":-3.14e+02,""acb"":-3.12e+02,""bedouin"":-3.13e+02,""she"":-3.14e+02,""chs"":-3.12e+02,""brahui"":-3.13e+02,""naxi"":-3.15e+02,""mxl"":-3.13e+02,""daur"":-3.15e+02,""russian"":-3.15e+02,""balochi"":-3.15e+02,""asw"":-3.12e+02,""clm"":-3.12e+02,""maya"":-3.14e+02,""yoruba"":-3.14e+02,""colombian"":-3.15e+02,""karitiana"":-3.14e+02,""yizu"":-3.14e+02,""pathan"":-3.13e+02,""hazara"":-3.13e+02,""cambodian"":-3.13e+02,""kalash"":-3.13e+02,""yakut"":-3.13e+02,""yri"":-3.12e+02,""hezhen"":-3.15e+02,""mongola"":-3.14e+02,""tuscan"":-3.15e+02,""lwk"":-3.13e+02,""gbr"":-3.12e+02,""chb"":-3.13e+02,""uygur"":-3.14e+02,""esn"":-3.12e+02,""sindhi"":-3.14e+02,""druze"":-3.13e+02,""japanese"":-3.13e+02,""jpt"":-3.12e+02,""khv"":-3.12e+02,""stu"":-3.12e+02,""oroqen"":-3.15e+02}"
"""HG00233""","""gbr""","""nfe""","""XX""","{""cdx"":-3.21e+02,""gih"":-3.21e+02,""adygei"":-3.23e+02,""msl"":-3.21e+02,""burusho"":-3.22e+02,""palestinian"":-3.21e+02,""han"":-3.22e+02,""ibs"":-3.21e+02,""beb"":-3.21e+02,""french"":-3.23e+02,""basque"":-3.23e+02,""tu"":-3.23e+02,""itu"":-3.21e+02,""dai"":-3.23e+02,""ceu"":-3.21e+02,""makrani"":-3.23e+02,""tsi"":-3.21e+02,""fin"":-3.21e+02,""tujia"":-3.23e+02,""mozabite"":-3.22e+02,""pjl"":-3.20e+02,""gwd"":-3.21e+02,""mandenka"":-3.23e+02,""surui"":-3.23e+02,""pel"":-3.22e+02,""pur"":-3.20e+02,""orcadian"":-3.23e+02,""acb"":-3.21e+02,""bedouin"":-3.21e+02,""she"":-3.23e+02,""chs"":-3.21e+02,""brahui"":-3.22e+02,""naxi"":-3.23e+02,""mxl"":-3.21e+02,""daur"":-3.23e+02,""russian"":-3.23e+02,""balochi"":-3.23e+02,""asw"":-3.21e+02,""clm"":-3.21e+02,""maya"":-3.23e+02,""yoruba"":-3.23e+02,""colombian"":-3.23e+02,""karitiana"":-3.23e+02,""yizu"":-3.23e+02,""pathan"":-3.22e+02,""hazara"":-3.22e+02,""cambodian"":-3.22e+02,""kalash"":-3.22e+02,""yakut"":-3.22e+02,""yri"":-3.20e+02,""hezhen"":-3.23e+02,""mongola"":-3.23e+02,""tuscan"":-3.23e+02,""lwk"":-3.22e+02,""gbr"":-3.21e+02,""chb"":-3.22e+02,""uygur"":-3.23e+02,""esn"":-3.21e+02,""sindhi"":-3.23e+02,""druze"":-3.22e+02,""japanese"":-3.22e+02,""jpt"":-3.21e+02,""khv"":-3.21e+02,""stu"":-3.21e+02,""oroqen"":-3.23e+02}"
"""HG00252""","""gbr""","""nfe""","""XY""","{""cdx"":-3.22e+02,""gih"":-3.22e+02,""adygei"":-3.24e+02,""msl"":-3.22e+02,""burusho"":-3.23e+02,""palestinian"":-3.22e+02,""han"":-3.23e+02,""ibs"":-3.22e+02,""beb"":-3.22e+02,""french"":-3.24e+02,""basque"":-3.24e+02,""tu"":-3.24e+02,""itu"":-3.22e+02,""dai"":-3.24e+02,""ceu"":-3.22e+02,""makrani"":-3.24e+02,""tsi"":-3.22e+02,""fin"":-3.22e+02,""tujia"":-3.24e+02,""mozabite"":-3.23e+02,""pjl"":-3.21e+02,""gwd"":-3.22e+02,""mandenka"":-3.24e+02,""surui"":-3.24e+02,""pel"":-3.23e+02,""pur"":-3.21e+02,""orcadian"":-3.24e+02,""acb"":-3.22e+02,""bedouin"":-3.22e+02,""she"":-3.24e+02,""chs"":-3.22e+02,""brahui"":-3.23e+02,""naxi"":-3.24e+02,""mxl"":-3.22e+02,""daur"":-3.24e+02,""russian"":-3.24e+02,""balochi"":-3.24e+02,""asw"":-3.22e+02,""clm"":-3.22e+02,""maya"":-3.24e+02,""yoruba"":-3.24e+02,""colombian"":-3.24e+02,""karitiana"":-3.24e+02,""yizu"":-3.24e+02,""pathan"":-3.23e+02,""hazara"":-3.23e+02,""cambodian"":-3.23e+02,""kalash"":-3.23e+02,""yakut"":-3.23e+02,""yri"":-3.21e+02,""hezhen"":-3.24e+02,""mongola"":-3.24e+02,""tuscan"":-3.24e+02,""lwk"":-3.23e+02,""gbr"":-3.22e+02,""chb"":-3.23e+02,""uygur"":-3.24e+02,""esn"":-3.22e+02,""sindhi"":-3.24e+02,""druze"":-3.23e+02,""japanese"":-3.23e+02,""jpt"":-3.22e+02,""khv"":-3.22e+02,""stu"":-3.22e+02,""oroqen"":-3.24e+02}"


In [189]:
def calculate_total_likelihood(mt, proportions):
    log_total_likelihood = hl.log(hl.sum([hl.exp(mt.log_likelihoods[pop] + hl.log(proportions[pop])) for pop in proportions]))
    return mt.annotate_cols(log_total_likelihood=log_total_likelihood)

mt_likelihood = calculate_total_likelihood(mt_likelihood, ethnicity_proportions)

In [191]:
mt_likelihood.cols().show(5)

KeyboardInterrupt: 

In [185]:
def calculate_posteriors(mt, proportions):
    mt = mt.annotate_cols(
        posteriors=hl.dict(
            {pop: mt.log_likelihoods[pop] - mt.log_total_likelihood
               for pop in proportions.keys()}
        )
    )
    return mt

mt_probs = calculate_posteriors(mt_likelihood, ethnicity_proportions)
mt_probs.cols().select('posteriors').show(5)

s,posteriors
str,"dict<str, float64>"
"""HG00107""","{""cdx"":-7.85e-03,""gih"":-7.85e-03,""adygei"":-1.71e+00,""msl"":7.92e-02,""burusho"":-1.02e+00,""palestinian"":-3.26e-01,""han"":-7.96e-01,""ibs"":-7.85e-03,""beb"":-2.09e-01,""french"":-1.71e+00,""basque"":-1.71e+00,""tu"":-2.41e+00,""itu"":-4.60e-01,""dai"":-1.71e+00,""ceu"":3.67e-01,""makrani"":-1.71e+00,""tsi"":1.59e-01,""fin"":-2.09e-01,""tujia"":-2.41e+00,""mozabite"":-1.02e+00,""pjl"":8.52e-01,""gwd"":3.67e-01,""mandenka"":-2.41e+00,""surui"":-1.71e+00,""pel"":-6.14e-01,""pur"":5.90e-01,""orcadian"":-1.71e+00,""acb"":-1.03e-01,""bedouin"":-4.60e-01,""she"":-1.71e+00,""chs"":-2.09e-01,""brahui"":-1.31e+00,""naxi"":-2.41e+00,""mxl"":-4.60e-01,""daur"":-2.41e+00,""russian"":-2.41e+00,""balochi"":-2.41e+00,""asw"":-3.26e-01,""clm"":-2.09e-01,""maya"":-1.71e+00,""yoruba"":-1.71e+00,""colombian"":-2.41e+00,""karitiana"":-1.71e+00,""yizu"":-1.71e+00,""pathan"":-1.31e+00,""hazara"":-1.31e+00,""cambodian"":-1.31e+00,""kalash"":-1.02e+00,""yakut"":-1.31e+00,""yri"":5.90e-01,""hezhen"":-2.41e+00,""mongola"":-1.71e+00,""tuscan"":-2.41e+00,""lwk"":-7.96e-01,""gbr"":-2.09e-01,""chb"":-7.96e-01,""uygur"":-1.71e+00,""esn"":4.27e-01,""sindhi"":-1.71e+00,""druze"":-1.02e+00,""japanese"":-1.02e+00,""jpt"":-3.26e-01,""khv"":-7.85e-03,""stu"":-7.85e-03,""oroqen"":-2.41e+00}"
"""HG00114""","{""cdx"":-7.85e-03,""gih"":-7.85e-03,""adygei"":-1.71e+00,""msl"":7.92e-02,""burusho"":-1.02e+00,""palestinian"":-3.26e-01,""han"":-7.96e-01,""ibs"":-7.85e-03,""beb"":-2.09e-01,""french"":-1.71e+00,""basque"":-1.71e+00,""tu"":-2.41e+00,""itu"":-4.60e-01,""dai"":-1.71e+00,""ceu"":3.67e-01,""makrani"":-1.71e+00,""tsi"":1.59e-01,""fin"":-2.09e-01,""tujia"":-2.41e+00,""mozabite"":-1.02e+00,""pjl"":8.52e-01,""gwd"":3.67e-01,""mandenka"":-2.41e+00,""surui"":-1.71e+00,""pel"":-6.14e-01,""pur"":5.90e-01,""orcadian"":-1.71e+00,""acb"":-1.03e-01,""bedouin"":-4.60e-01,""she"":-1.71e+00,""chs"":-2.09e-01,""brahui"":-1.31e+00,""naxi"":-2.41e+00,""mxl"":-4.60e-01,""daur"":-2.41e+00,""russian"":-2.41e+00,""balochi"":-2.41e+00,""asw"":-3.26e-01,""clm"":-2.09e-01,""maya"":-1.71e+00,""yoruba"":-1.71e+00,""colombian"":-2.41e+00,""karitiana"":-1.71e+00,""yizu"":-1.71e+00,""pathan"":-1.31e+00,""hazara"":-1.31e+00,""cambodian"":-1.31e+00,""kalash"":-1.02e+00,""yakut"":-1.31e+00,""yri"":5.90e-01,""hezhen"":-2.41e+00,""mongola"":-1.71e+00,""tuscan"":-2.41e+00,""lwk"":-7.96e-01,""gbr"":-2.09e-01,""chb"":-7.96e-01,""uygur"":-1.71e+00,""esn"":4.27e-01,""sindhi"":-1.71e+00,""druze"":-1.02e+00,""japanese"":-1.02e+00,""jpt"":-3.26e-01,""khv"":-7.85e-03,""stu"":-7.85e-03,""oroqen"":-2.41e+00}"
"""HG00121""","{""cdx"":-7.85e-03,""gih"":-7.85e-03,""adygei"":-1.71e+00,""msl"":7.92e-02,""burusho"":-1.02e+00,""palestinian"":-3.26e-01,""han"":-7.96e-01,""ibs"":-7.85e-03,""beb"":-2.09e-01,""french"":-1.71e+00,""basque"":-1.71e+00,""tu"":-2.41e+00,""itu"":-4.60e-01,""dai"":-1.71e+00,""ceu"":3.67e-01,""makrani"":-1.71e+00,""tsi"":1.59e-01,""fin"":-2.09e-01,""tujia"":-2.41e+00,""mozabite"":-1.02e+00,""pjl"":8.52e-01,""gwd"":3.67e-01,""mandenka"":-2.41e+00,""surui"":-1.71e+00,""pel"":-6.14e-01,""pur"":5.90e-01,""orcadian"":-1.71e+00,""acb"":-1.03e-01,""bedouin"":-4.60e-01,""she"":-1.71e+00,""chs"":-2.09e-01,""brahui"":-1.31e+00,""naxi"":-2.41e+00,""mxl"":-4.60e-01,""daur"":-2.41e+00,""russian"":-2.41e+00,""balochi"":-2.41e+00,""asw"":-3.26e-01,""clm"":-2.09e-01,""maya"":-1.71e+00,""yoruba"":-1.71e+00,""colombian"":-2.41e+00,""karitiana"":-1.71e+00,""yizu"":-1.71e+00,""pathan"":-1.31e+00,""hazara"":-1.31e+00,""cambodian"":-1.31e+00,""kalash"":-1.02e+00,""yakut"":-1.31e+00,""yri"":5.90e-01,""hezhen"":-2.41e+00,""mongola"":-1.71e+00,""tuscan"":-2.41e+00,""lwk"":-7.96e-01,""gbr"":-2.09e-01,""chb"":-7.96e-01,""uygur"":-1.71e+00,""esn"":4.27e-01,""sindhi"":-1.71e+00,""druze"":-1.02e+00,""japanese"":-1.02e+00,""jpt"":-3.26e-01,""khv"":-7.85e-03,""stu"":-7.85e-03,""oroqen"":-2.41e+00}"
"""HG00127""","{""cdx"":-7.85e-03,""gih"":-7.85e-03,""adygei"":-1.71e+00,""msl"":7.92e-02,""burusho"":-1.02e+00,""palestinian"":-3.26e-01,""han"":-7.96e-01,""ibs"":-7.85e-03,""beb"":-2.09e-01,""french"":-1.71e+00,""basque"":-1.71e+00,""tu"":-2.41e+00,""itu"":-4.60e-01,""dai"":-1.71e+00,""ceu"":3.67e-01,""makrani"":-1.71e+00,""tsi"":1.59e-01,""fin"":-2.09e-01,""tujia"":-2.41e+00,""mozabite"":-1.02e+00,""pjl"":8.52e-01,""gwd"":3.67e-01,""mandenka"":-2.41e+00,""surui"":-1.71e+00,""pel"":-6.14e-01,""pur"":5.90e-01,""orcadian"":-1.71e+00,""acb"":-1.03e-01,""bedouin"":-4.60e-01,""she"":-1.71e+00,""chs"":-2.09e-01,""brahui"":-1.31e+00,""naxi"":-2.41e+00,""mxl"":-4.60e-01,""daur"":-2.41e+00,""russian"":-2.41e+00,""balochi"":-2.41e+00,""asw"":-3.26e-01,""clm"":-2.09e-01,""maya"":-1.71e+00,""yoruba"":-1.71e+00,""colombian"":-2.41e+00,""karitiana"":-1.71e+00,""yizu"":-1.71e+00,""pathan"":-1.31e+00,""hazara"":-1.31e+00,""cambodian"":-1.31e+00,""kalash"":-1.02e+00,""yakut"":-1.31e+00,""yri"":5.90e-01,""hezhen"":-2.41e+00,""mongola"":-1.71e+00,""tuscan"":-2.41e+00,""lwk"":-7.96e-01,""gbr"":-2.09e-01,""chb"":-7.96e-01,""uygur"":-1.71e+00,""esn"":4.27e-01,""sindhi"":-1.71e+00,""druze"":-1.02e+00,""japanese"":-1.02e+00,""jpt"":-3.26e-01,""khv"":-7.85e-03,""stu"":-7.85e-03,""oroqen"":-2.41e+00}"
"""HG00132""","{""cdx"":-7.85e-03,""gih"":-7.85e-03,""adygei"":-1.71e+00,""msl"":7.92e-02,""burusho"":-1.02e+00,""palestinian"":-3.26e-01,""han"":-7.96e-01,""ibs"":-7.85e-03,""beb"":-2.09e-01,""french"":-1.71e+00,""basque"":-1.71e+00,""tu"":-2.41e+00,""itu"":-4.60e-01,""dai"":-1.71e+00,""ceu"":3.67e-01,""makrani"":-1.71e+00,""tsi"":1.59e-01,""fin"":-2.09e-01,""tujia"":-2.41e+00,""mozabite"":-1.02e+00,""pjl"":8.52e-01,""gwd"":3.67e-01,""mandenka"":-2.41e+00,""surui"":-1.71e+00,""pel"":-6.14e-01,""pur"":5.90e-01,""orcadian"":-1.71e+00,""acb"":-1.03e-01,""bedouin"":-4.60e-01,""she"":-1.71e+00,""chs"":-2.09e-01,""brahui"":-1.31e+00,""naxi"":-2.41e+00,""mxl"":-4.60e-01,""daur"":-2.41e+00,""russian"":-2.41e+00,""balochi"":-2.41e+00,""asw"":-3.26e-01,""clm"":-2.09e-01,""maya"":-1.71e+00,""yoruba"":-1.71e+00,""colombian"":-2.41e+00,""karitiana"":-1.71e+00,""yizu"":-1.71e+00,""pathan"":-1.31e+00,""hazara"":-1.31e+00,""cambodian"":-1.31e+00,""kalash"":-1.02e+00,""yakut"":-1.31e+00,""yri"":5.90e-01,""hezhen"":-2.41e+00,""mongola"":-1.71e+00,""tuscan"":-2.41e+00,""lwk"":-7.96e-01,""gbr"":-2.09e-01,""chb"":-7.96e-01,""uygur"":-1.71e+00,""esn"":4.27e-01,""sindhi"":-1.71e+00,""druze"":-1.02e+00,""japanese"":-1.02e+00,""jpt"":-3.26e-01,""khv"":-7.85e-03,""stu"":-7.85e-03,""oroqen"":-2.41e+00}"


In [186]:
def predict_ancestry(posteriors):
    return hl.bind(
        lambda x: hl.sorted(x.items(), key=lambda item: item[1], reverse=True)[0][0],
        posteriors
    )

mt_probs = mt_probs.annotate_cols(predicted_ancestry=predict_ancestry(mt_probs.posteriors))
mt_probs.cols().select('predicted_ancestry', 'pop').show(5)

s,predicted_ancestry,pop
str,str,str
"""HG00107""","""pjl""","""gbr"""
"""HG00114""","""pjl""","""gbr"""
"""HG00121""","""pjl""","""gbr"""
"""HG00127""","""pjl""","""gbr"""
"""HG00132""","""pjl""","""gbr"""


In [187]:
tp, total = mt_probs.filter_cols(mt_probs.predicted_ancestry == mt_probs.pop).count_cols(), mt_probs.count_cols()

tp / total, tp, total

(0.0663265306122449, 26, 392)

In [188]:
def calculate_error_rate(mt):
    correct_predictions = mt.filter_cols(mt.predicted_ancestry == mt.pop)
    total_samples = mt.count_cols()
    error_rate = correct_predictions.aggregate_cols(
        hl.agg.sum(1 - hl.exp(correct_predictions.posteriors[correct_predictions.predicted_ancestry]))
    ) / total_samples
    return error_rate


calculate_error_rate(mt_probs)

-0.08921879842594872