# Example Definition File Construction 

### Feel free to adopt this script for your own use!

In [1]:
import pandas as pd 
import numpy as np

ANCESTRIES = ["EAS", "EUR"]

In [2]:
# --ances path first

fams = {ances: pd.read_csv(f"chr22_mind02_geno02_maf01_{ances}.fam", sep="\s+", header=None) for ances in ANCESTRIES}
ances_path = []
for ances in ANCESTRIES:
    table = pd.DataFrame({"IID": fams[ances][0], "ANCES": ances})
    ances_path.append(table)

# concatenate across ancestries
ances_path = pd.concat(ances_path)
ances_path

Unnamed: 0,IID,ANCES
0,HG00403,EAS
1,HG00404,EAS
2,HG00406,EAS
3,HG00407,EAS
4,HG00409,EAS
0,HG00096,EUR
1,HG00097,EUR
2,HG00099,EUR
3,HG00101,EUR
4,HG00103,EUR


### This looks good -- we can save this. Note that we're excluding the header from this file.

In [3]:
ances_path.to_csv("iid_ances_file", sep="\t", header=None, index=None)

In [4]:
# --snp-ances next 

bims = {ances: pd.read_csv(f"chr22_mind02_geno02_maf01_{ances}.bim", sep="\t", header=None) for ances in ANCESTRIES}
all_snps = pd.DataFrame(pd.concat(bims.values())[1].drop_duplicates())
for ances in ANCESTRIES:
    merged = all_snps.merge(bims[ances], on=1, how="outer", indicator=True)
    all_snps[ances] = np.where(merged["_merge"] == "both", 1, 0)

In [5]:
all_snps.rename(columns={1: "SNP"}, inplace=True)
all_snps

Unnamed: 0,SNP,EAS,EUR
0,rs200766023,1,1
1,rs566863180,1,1
2,rs9617249,1,1
3,rs533485114,1,0
4,rs12163443,1,0
...,...,...,...
839,rs35138863,0,1
843,rs532372350,0,1
845,rs113111175,0,1
846,rs151333479,0,1


### This looks good -- we can save this. Note that the ancestry names in the column header match the ancestry strings in the previous file.

In [6]:
all_snps.to_csv("snp_ances_file", sep="\t", index=None)