## Split genotype files for BayPass

- Split genome-wide read count data into multiple splits to be run independently through BayPass

In [None]:
# Load genotype fille with all sites
allSamples_geno = open(snakemake.input["as_geno"], "r").readlines()

In [None]:
# Generate dictionary with split as keys (i.e., from 1 to num_splits) and 
# the marker indices for the split as values (list)
split_index_dict = {n: [] for n in snakemake.params["splits"]}
num_splits = len(snakemake.params["splits"])
idx_array = [x for x in range(len(allSamples_geno))]

for n in range(num_splits):
    idx = [x for x in range(n, len(idx_array), num_splits)]
    split_index_dict[n] = [idx_array[i] for i in idx]

In [None]:
# Write file with read count data for markers in each split
out_prefix = snakemake.params["out_prefix"]
splits_out = f"{out_prefix}/allSamples/splits"
if not os.path.exists(splits_out):
    os.makedirs(splits_out)
for split, geno in split_index_dict.items():
    as_geno_out = f"{splits_out}/allSamples_{split}.geno"
    with open(as_geno_out, "w") as fout:
        for g in geno:
            fout.write(allSamples_geno[g])

In [None]:
# Write file with the order of markers in the above genotype files
order_out = f"{out_prefix}/allSamples/site_order"
if not os.path.exists(order_out):
    os.makedirs(order_out)
site_order = open(snakemake.input["site_order"], "r").readlines()
for split, geno in split_index_dict.items():
    site_order_out = f"{order_out}/site_order_{split}.txt"
    with open(site_order_out, "w") as fout:
        for g in geno:
            fout.write(site_order[g])

In [None]:
# Write marker files for each city individually for city-by-city BayPass runs
in_prefix = snakemake.params["in_prefix"]
byCity_out = f"{out_prefix}/byCity"
if not os.path.exists(byCity_out):
    os.makedirs(byCity_out)
for city in snakemake.params["cities"]:
    in_path = f"{in_prefix}/{city}/{city}.geno"
    city_geno = open(in_path, "r").readlines()
    for split, geno in split_index_dict.items():
        city_path_out = f"{byCity_out}/{city}"
        if not os.path.exists(city_path_out):
            os.makedirs(city_path_out)
        city_geno_out = f"{city_path_out}/{city}_{split}.geno"
        with open(city_geno_out, "w") as fout:
            for g in geno:
                fout.write(city_geno[g])

In [None]:
random_100_indices = {}
for sites_file in snakemake.input["random100"]:
    with open(sites_file, "r") as sites:
        all_sites = sites.readlines()
        print(all_sites[0])
        break

In [None]:
len(site_order)