In [1]:
import folium
import pandas as pd
import pickle
import os
import h3
from label_samples_time_hexa import label_samples
from vizualize import draw_hexagons, draw_migration_for_time_bin, draw_hexagons_with_values, draw_barriers
from func import rename_time_bins, calc_dist_time_bin, normalize_distances, get_time_bin_hexagons, get_min_max_dist, get_isolated_hex_and_barriers, find_closest_population

In [2]:
matrix = pd.read_pickle("/home/jaro/BINP29/Project_Eran/1_dist_matrix/eucl_dist.pkl")

In [3]:
time_bins = 18
resolution = 3
same_age_range = True
df = label_samples("/home/jaro/BINP29/Project_Eran/", time_bins, resolution, same_age_range)
time_bins = rename_time_bins(df)
time_bins_hexagons = get_time_bin_hexagons(df)

In [4]:
# calculate the average distances between neighboring hexagons for each time bin with the given parameters
time_bins_dist = calc_dist_time_bin(df, matrix,
                                        10,
                                        False)

In [14]:
selected_time_bin = time_bins[0]
hexagons = time_bins_hexagons[selected_time_bin]
time_bin = time_bins_dist[selected_time_bin]
time_bin = normalize_distances(time_bin)
threshold = 0.38

In [15]:
isolated_hex, barrier_lines, barrier_hex = get_isolated_hex_and_barriers(time_bin, hexagons, threshold)

print(f"Number of isolated hexagons: {len(isolated_hex)}")
print(f"Number of barrier lines: {len(barrier_lines)}")
print(f"Number of barrier hexagons: {len(barrier_hex)}")

Number of isolated hexagons: 31
Number of barrier lines: 165
Number of barrier hexagons: 1498


In [27]:
import numpy as np
# function that calculates the average distance between two groups of samples
def calc_avg_dist(samples_hex1, samples_hex2, dist_matrix):
    return np.mean(dist_matrix.loc[samples_hex1, samples_hex2].values.flatten())

def find_closest_population(df, time_bin, isolated_hex, dist_matrix, threshold):
    # Convert the 'AgeGroup' column values to tuples of integers representing the start and end years,
    df['AgeGroupTuple'] = df['AgeGroup'].apply(lambda x: tuple(map(int, x.split('-'))))
    # Sort the unique age group tuples to process them in a chronological order.
    time_bins = sorted(df['AgeGroupTuple'].unique())
    # get the samples in the time bin of interest
    time_bin_df = df[df['AgeGroupTuple'] == time_bins[time_bin]]
    # get column name for the hexagons (it should be the only column with 'hex' in the name)
    hex_col = time_bin_df.filter(like='hex').columns[0]
    # get all unique hexagons from the dataframe
    hexagons = time_bin_df[hex_col].unique()
    # get the samples in each hexagon
    samples_in_hex = time_bin_df.groupby(hex_col)['ID'].apply(list).to_dict()
    all_samples = [sample for samples in samples_in_hex.values() for sample in samples]
    # create a submatrix of the distance matrix for the samples in the hexagons
    dist_matrix = dist_matrix.loc[all_samples, all_samples]
    # empty dictrionary to hold the distances between the hexagons
    closest_populations = {}
    # empty list to hold the isolated hexagons that have no migration
    new_isolated_hex = []
    
    # loop over all isolated hexagons
    for iso in isolated_hex:
        # reset the min_dist and closest_hex
        closest_hex = None
        min_dist = threshold
        # check the distance to every hexagon in that time bin
        for hex in hexagons:
            # skip if the hexagon is the same as the isolated hexagon
            if hex == iso:
                continue
            
            Ids_in_hexagon = samples_in_hex.get(iso, [])
            Ids_in_neighbor = samples_in_hex.get(hex, [])
            # calculate the average distance between the hexagon and its neighbor
            distance = calc_avg_dist(Ids_in_hexagon, Ids_in_neighbor, dist_matrix)
            # check if the distance is lower than the current minimum distance
            if distance < min_dist:
                min_dist = distance
                closest_hex = hex
        # Add closest hexagon
        if closest_hex is not None:
            pair = frozenset([iso, closest_hex])
            closest_populations[pair] = round(min_dist, 2)
        else:
            new_isolated_hex.append(iso)
    
    return closest_populations, new_isolated_hex

In [28]:
# get the index of the time bin of interest
time_bin = time_bins.index(time_bins[0])
# get the closest population for each isolated hexagon
closest_populations, new_isolated_hex = find_closest_population(df, time_bin, isolated_hex, matrix, threshold)

In [29]:
m = draw_hexagons(hexagons, color = "darkgreen")
m = draw_hexagons(new_isolated_hex, color = "red", m = m)
m = draw_migration_for_time_bin(closest_populations, m)
m = draw_hexagons_with_values(barrier_hex, m, threshold = 0.0)
m = draw_barriers(barrier_lines, m)
m

In [17]:
# create two hexagons using h3
hex1 = h3.geo_to_h3(37.3615593, -122.0553238, 3)
hex2 = h3.geo_to_h3(36.3615593, -122.0553238, 3)
print(h3.distance(hex1, hex2))

hex1_dots = h3.h3_to_geo_boundary(hex1)
hex2_dots = h3.h3_to_geo_boundary(hex2)
# get the pair of dots that the two hexagons share
dot = [x for x in hex1_dots if x in hex2_dots]
dot
# m = draw_hexagons([hex1, hex2], color = 'blue')
# # draw the line between the two hexagons
# folium.PolyLine(dot, color = 'red').add_to(m)
# m




AttributeError: module 'h3' has no attribute 'distance'