In [None]:
import gzip

In [None]:
chrom_site_dict = {chrom: {} for chrom in snakemake.params['chrom']}

for chrom in snakemake.params["chrom"]:
    sites_path = f"/research/projects/trifolium/glue/glue_ps/results/program_resources/angsd_sites/{chrom}/{chrom}_af_allSamples.sites"
    with open(sites_path, "r") as fin:
        lines = fin.readlines()
        for i, l in enumerate(lines):
            pos = l.strip().split("\t")[1]
            chrom_site_dict[chrom][pos] = i + 1  # Since this file has no header

In [None]:
urban_pos_index_dict = {chrom: {} for chrom in snakemake.params['chrom']}
rural_pos_index_dict = {chrom: {} for chrom in snakemake.params['chrom']}

city = snakemake.wildcards["city"]

for chrom in snakemake.params["chrom"]:
    urb_pos_path = f"/research/projects/trifolium/glue/glue_ps/results/angsd/maf/byCity/{city}/{chrom}/{city}_urban_{chrom}_snps.pos.gz"
    rur_pos_path = f"/research/projects/trifolium/glue/glue_ps/results/angsd/maf/byCity/{city}/{chrom}/{city}_rural_{chrom}_snps.pos.gz"

    with gzip.open(urb_pos_path,'rb') as urb_pos:
        lines = urb_pos.readlines() 
        for i, l in enumerate(lines):
            if i != 0:
                sl = l.strip().split(b"\t")
                pos = sl[1].decode('utf-8')
                urban_pos_index_dict[chrom][pos] = i

    with gzip.open(rur_pos_path,'rb') as rur_pos:
        lines = rur_pos.readlines() 
        for i, l in enumerate(lines):
            if i != 0:
                sl = l.strip().split(b"\t")
                pos = sl[1].decode('utf-8')
                rural_pos_index_dict[chrom][pos] = i        

In [None]:
global_to_urban_mapping = {chrom: {} for chrom in snakemake.params['chrom']}
global_to_rural_mapping = {chrom: {} for chrom in snakemake.params['chrom']}

for chrom in snakemake.params["chrom"]:
    for gp, gi in chrom_site_dict[chrom].items():
        try:
            urban_idx = urban_pos_index_dict[chrom][gp]
            global_to_urban_mapping[chrom][gp] = urban_idx
        except KeyError:
            print(f"{chrom}: {gp} missing from urban populations")
        try:
            rural_idx = rural_pos_index_dict[chrom][gp]
            global_to_rural_mapping[chrom][gp] = rural_idx
        except KeyError:
            print(f"{chrom}: {gp} missing from rural populations")

In [None]:
global_to_urban_mapping

In [None]:
# allele_count_dict = {city: {hab: [] for hab in snakemake.params["habitats"]} for city in snakemake.params["cities"]}
# allele_count_dict
nucl_index_dict = {b'A': 0, b'C': 1, b'G': 2, b'T': 3}

city = snakemake.wildcards["city"]

with open(snakemake.output["perCity_geno"], "w") as fout:
    for chrom in snakemake.params["chrom"]:
        print(chrom)
        glob_maf_path = f"/research/projects/trifolium/glue/glue_ps/results/angsd/maf/allSamples/{chrom}/{chrom}_allSamples_snps.mafs.gz"
        urb_path = f"/research/projects/trifolium/glue/glue_ps/results/angsd/maf/byCity/{city}/{chrom}/{city}_urban_{chrom}_snps.counts.gz"
        rur_path = f"/research/projects/trifolium/glue/glue_ps/results/angsd/maf/byCity/{city}/{chrom}/{city}_rural_{chrom}_snps.counts.gz"
        
        glob_mafs = gzip.open(glob_maf_path,'rb').readlines()
        urban_counts = gzip.open(urb_path,'rb').readlines()
        rural_counts = gzip.open(rur_path,'rb').readlines()

        for gp, gi in chrom_site_dict[chrom].items():
            urban_idx = global_to_urban_mapping[chrom].get(gp, None)
            rural_idx = global_to_rural_mapping[chrom].get(gp, None)
            if urban_idx and rural_idx:
                glob_mafs_sl = glob_mafs[gi].strip().split(b"\t")
                REF = glob_mafs_sl[2]
                ALT = glob_mafs_sl[3]
                
                # print(urban_counts[urban_idx].strip().split(b"\t"), rural_counts[rural_idx].strip().split(b"\t"))
                # print(REF, ALT)
                urban_ref = urban_counts[urban_idx].strip().split(b"\t")[nucl_index_dict[REF]].decode('utf-8')
                urban_alt = urban_counts[urban_idx].strip().split(b"\t")[nucl_index_dict[ALT]].decode('utf-8')
                rural_ref = rural_counts[rural_idx].strip().split(b"\t")[nucl_index_dict[REF]].decode('utf-8')
                rural_alt = rural_counts[rural_idx].strip().split(b"\t")[nucl_index_dict[ALT]].decode('utf-8')
                # print(urban_ref, urban_alt, rural_ref, rural_alt)
                # print("========================")
                fout.write(f"{urban_ref} {urban_alt} {rural_ref} {rural_alt}\n")
            else:
                pass


In [None]:
with open(snakemake.output["perCity_cont"], "w") as fout:
    fout.write("1 -1")