In [4]:
import folium
import pandas as pd
import pickle
import os
import h3
from label_samples_time_hexa import label_samples
from vizualize import draw_hexagons, draw_all_boarders_for_time_bin, draw_hexagons_with_values, draw_barriers
from func import rename_time_bins, calc_dist_time_bin, normalize_distances, get_time_bin_hexagons, get_min_max_dist

In [5]:
matrix = pd.read_pickle("/home/jaro/BINP29/Project_Eran/1_dist_matrix/eucl_dist.pkl")

In [6]:
time_bins = 18
resolution = 3
same_age_range = True
df = label_samples("/home/jaro/BINP29/Project_Eran/", time_bins, resolution, same_age_range)
time_bins = rename_time_bins(df)
time_bins_hexagons = get_time_bin_hexagons(df)

In [7]:
# calculate the average distances between neighboring hexagons for each time bin with the given parameters
time_bins_dist = calc_dist_time_bin(df, matrix,
                                        8,
                                        True,
                                        False)

In [8]:
selected_time_bin = time_bins[0]
hexagons = time_bins_hexagons[selected_time_bin]
time_bin = time_bins_dist[selected_time_bin]
time_bin = normalize_distances(time_bin)
threshold = 0.4

In [11]:
# directory to save the barrier lines and hexagons with their distances
barrier_hex = {}
barrier_lines = {}
# dictionary to save hexagons and their direct neighbor distances to check if their was a migration barrier
hex_dist_to_direct_neighbors = {}
# loop over all pairs of hexagons in the time bin
for pair in time_bin:
    distance = time_bin[pair]
    pair = list(pair)
    # check if the pair are direct neighbors
    if pair[0] in h3.k_ring_distances(pair[1], 1)[1]:
        # get the line between the two hexagons
        boundary1 = h3.h3_to_geo_boundary(pair[0])
        boundary2 = h3.h3_to_geo_boundary(pair[1])
        # get the pair of dots that the two hexagons share
        shared_boundary = frozenset([x for x in boundary1 if x in boundary2])
        # add the line and its distance to the dictionary
        barrier_lines[shared_boundary] = distance
        if pair[0] in hex_dist_to_direct_neighbors:
            hex_dist_to_direct_neighbors[pair[0]].append(distance)
        else:
            hex_dist_to_direct_neighbors[pair[0]] = [distance]
        if pair[1] in hex_dist_to_direct_neighbors:
            hex_dist_to_direct_neighbors[pair[1]].append(distance)
        else:
            hex_dist_to_direct_neighbors[pair[1]] = [distance]
    # if the hexagons are further appart
    else:
        # try to draw a line between the two hexagons
        try:
            line = h3.h3_line(pair[0], pair[1])
        except:
            continue
        # exclude the first and last hexagon
        line = line[1:-1]
        # add all elements of line as a key to the dictionary and give it the value of the pair
        for hex in line:
            if hex in hexagons:
                continue
            if hex in barrier_hex:
                barrier_hex[hex].append(distance)
            else:
                barrier_hex[hex] = [distance]

# calc the average distance for each hexagon and round it to 2 decimal places
for hex in barrier_hex:
    barrier_hex[hex] = round(sum(barrier_hex[hex])/len(barrier_hex[hex]), 2)
    
# create a list of isolated hexagons
isolated_hex = []
for hex in hex_dist_to_direct_neighbors:
    # check if all neighbors are further than a threshold
    if any([x < threshold for x in hex_dist_to_direct_neighbors[hex]]):
        continue
    else:
        isolated_hex.append(hex)
        
# check if their are isloated populations without direct neighbors
hex_without_direct_neighbors = [hex for hex in hexagons if hex not in hex_dist_to_direct_neighbors]
for hex in hex_without_direct_neighbors:
    # list of all neighboring hexagons
    neighbors = h3.k_ring_distances(hex, 1)[1]
    # check if all neighbors are further than a threshold
    flag = True
    for neighbor in neighbors:
        if neighbor in barrier_hex:
            if barrier_hex[neighbor] < threshold:
                flag = False
                break
    if flag:
        isolated_hex.append(hex)

print(f"Number of isolated hexagons: {len(isolated_hex)}")
print(f"Number of barrier lines: {len(barrier_lines)}")
print(f"Number of barrier hexagons: {len(barrier_hex)}")

Number of isolated hexagons: 28
Number of barrier lines: 165
Number of barrier hexagons: 1186


In [19]:
# function that calculates the average distance between two groups of samples
def calc_avg_dist(samples_hex1, samples_hex2, dist_matrix):
    return dist_matrix.loc[samples_hex1, samples_hex2].values.flatten().mean()

def find_closest_population(time_bin_df, isolated_hex, dist_matrix, threshold):
    # get column name for the hexagons (it should be the only column with 'hex' in the name)
    hex_col = str(time_bin_df.columns[df.columns.str.contains('hex')][0])
    # get all unique hexagons from the dataframe
    hexagons = time_bin_df[hex_col].unique()
    # get the samples in each hexagon
    samples_in_hex = time_bin_df.groupby(hex_col)['ID'].apply(list).to_dict()
    # empty dictrionary to hold the distances between the hexagons
    closest_populations = {}
    
    # loop over all isolated hexagons
    for iso in isolated_hex:
        # reset the min_dist
        min_dist = threshold
        # delete the hexagon from the list
        hexagons = hexagons[hexagons != iso]
        # check the distance to every hexagon in that time bin
        for hex in hexagons:
            Ids_in_hexagon = samples_in_hex.get(iso, [])
            Ids_in_neighbor = samples_in_hex.get(hex, [])
            # calculate the average distance between the hexagon and its neighbor
            distance = calc_avg_dist(Ids_in_hexagon, Ids_in_neighbor, dist_matrix)
            # check if the distance is lower than the current minimum distance
            if distance < min_dist:
                min_dist = distance
                closest_hex = hex
        # if the min_dist is lower than the treshold add it to the dictionary
        if min_dist < threshold:
            pair = frozenset([iso, closest_hex])
            closest_populations[pair] = min_dist
    
    return closest_populations

In [20]:
    # Convert the 'AgeGroup' column values to tuples of integers representing the start and end years,
    df['AgeGroupTuple'] = df['AgeGroup'].apply(lambda x: tuple(map(int, x.split('-'))))
    
    # Sort the unique age group tuples to process them in a chronological order.
    time_bins = sorted(df['AgeGroupTuple'].unique())
    time_bin_df = df[df['AgeGroupTuple'] == time_bins[0]]
    
    # get the closest population for each isolated hexagon
    closest_populations = find_closest_population(time_bin_df, isolated_hex, matrix, threshold)


In [22]:
m = draw_hexagons(hexagons, color = "darkgreen")
m = draw_hexagons(isolated_hex, m, color = "red")
m = draw_all_boarders_for_time_bin(closest_populations, m)
m = draw_hexagons_with_values(barrier_hex, m, threshold = 0.5)
m = draw_barriers(barrier_lines, m)
m

In [17]:
# create two hexagons using h3
hex1 = h3.geo_to_h3(37.3615593, -122.0553238, 3)
hex2 = h3.geo_to_h3(36.3615593, -122.0553238, 3)
print(h3.distance(hex1, hex2))

hex1_dots = h3.h3_to_geo_boundary(hex1)
hex2_dots = h3.h3_to_geo_boundary(hex2)
# get the pair of dots that the two hexagons share
dot = [x for x in hex1_dots if x in hex2_dots]
dot
# m = draw_hexagons([hex1, hex2], color = 'blue')
# # draw the line between the two hexagons
# folium.PolyLine(dot, color = 'red').add_to(m)
# m




AttributeError: module 'h3' has no attribute 'distance'