## Map Distance Creation

Code to make the distance metric for the Cannibalism feature.

In [None]:
import pandas as pd
import numpy as np
import googlemaps
from datetime import datetime
from tqdm import tqdm

In [None]:
# Initialize Google Maps client
gmaps = googlemaps.Client(key='Insert API Key')

# Function to get distance and duration from Google Maps API
def get_distance_duration(lat1, lon1, lat2, lon2):
    now = datetime.now()
    directions_result = gmaps.directions(f"{lat1}, {lon1}", f"{lat2}, {lon2}",
                                         mode="driving", avoid="ferries", departure_time=now)
    
    if directions_result:
        distance = directions_result[0]['legs'][0]['distance']['text']
        duration = directions_result[0]['legs'][0]['duration']['text']
    else:
        distance = "N/A"
        duration = "N/A"
    return distance, duration

# Iterate through all pairs of coordinates
distances = []
durations = []

for i in tqdm(range(len(dir))):
    lat1 = dir.loc[i, 'Latitud']
    lon1 = dir.loc[i, 'Longitud']
    for j in range(len(dir)):
        if i != j:
            lat2 = dir.loc[j, 'Latitud']
            lon2 = dir.loc[j, 'Longitud']
            distance, duration = get_distance_duration(lat1, lon1, lat2, lon2)
            distances.append(distance)
            durations.append(duration)
        else:
            distances.append("0 km")
            durations.append("0 mins")

# Reshape the results to a matrix form
distance_matrix = pd.DataFrame(np.array(distances).reshape(len(dir), len(dir)))
duration_matrix = pd.DataFrame(np.array(durations).reshape(len(dir), len(dir))


In [None]:
print(distance_matrix)
print(duration_matrix)

In [None]:
distance_matrix = pd.DataFrame(np.array(distances).reshape(len(dir), len(dir)))
duration_matrix = pd.DataFrame(np.array(durations).reshape(len(dir), len(dir)))

# Create the connections dataframe
connections = []

for i in range(len(dir)):
    lat1 = dir.loc[i, 'Latitud']
    lon1 = dir.loc[i, 'Longitud']
    loc1 = dir.loc[i, 'Location']
    for j in range(len(dir)):
        lat2 = dir.loc[j, 'Latitud']
        lon2 = dir.loc[j, 'Longitud']
        loc2 = dir.loc[j, 'Location']
        distance = distance_matrix.iloc[i, j]
        duration = duration_matrix.iloc[i, j]
        connections.append({
            'From_Latitud': lat1,
            'From_Longitud': lon1,
            'From_Location': loc1,
            'To_Latitud': lat2,
            'To_Longitud': lon2,
            'To_Location': loc2,
            'Distance': distance,
            'Duration': duration
        })

connections_df = pd.DataFrame(connections)

In [None]:
connections_df['min'] = connections_df['Duration'].str.extract('(\d+)').astype(int)
connections_df['km'] = connections_df['Distance'].str.extract('(\d+)').astype(float)


In [None]:
connections_df = connections_df[(connections_df['km'] != 0) & (connections_df['min'] != 0)]
connections_df

In [None]:
selected_rows = connections_df[(connections_df['km']< 40) & (connections_df['min']<= 24)]
selected_rows