# Modules

In [25]:
import pandas as pd
from io import StringIO
import numpy as np
import time
import re
import os

import requests
import json
import concurrent.futures

# Variables

In [27]:
api_key = "AIzaSyB__u8A0uJ-5mvf_7gdNE6RFGpdTdVIV_k" 
output_files = '../data/raw_data'

ipums_processed_file = f'ipums_relation_processed.csv'

######################
## Create directories
######################
directory = f'{output_files}/tmp'
if not os.path.exists(directory):
    os.makedirs(directory)
    print("Directory '%s' created" %directory)

# Functions

In [5]:
def get_distance_matrix(origins, destinations, mode="driving", api_key="--"):
    # Base URL for the Distance Matrix API
    base_url = "https://maps.googleapis.com/maps/api/distancematrix/json"

    # Parameters for the API
    params = {
        "origins": "|".join(origins),
        "destinations": "|".join(destinations),
        "mode": mode,
        "key": api_key
    }

    # Make the API request
    response = requests.get(base_url, params=params)

    # Check for a successful response
    if response.status_code == 200:
        return response.json()
    else:
        return None

def get_duration_in_minutes(duration_seconds):
    return round(duration_seconds / 60)

In [6]:
def retriable_get_distance_matrix(origin, destination, mode, api_key, max_retries=5):
    retry_count = 0
    while retry_count <= max_retries:
        try:
            return get_distance_matrix(origin, destination, mode=mode, api_key=api_key)
        except:
            # Here we assume that any exception indicates a failure.
            # You might want to catch more specific exceptions depending on the nature of your failures.
            if retry_count < max_retries:
                sleep_time = (2 ** retry_count) * 0.1  # exponential backoff starting with 100ms
                time.sleep(sleep_time)
                retry_count += 1
            else:
                raise  # Re-raise the exception after max_retries


# Load info

In [7]:
df_mun_list = (pd.read_csv(ipums_processed_file)[['divipola', 'MUN', 'DPTO']]
                                                                            .drop_duplicates()
                                                                            .rename(columns={'divipola':'CODE'}))

df_mun_list['DPTO']  = df_mun_list['DPTO'].str.title()
df_mun_list['MUN']   = df_mun_list['MUN'].str.title()

df_mun_list['FULL_NAME'] = df_mun_list['MUN'] + ', ' + df_mun_list['DPTO']
get_code = df_mun_list.set_index('FULL_NAME')['CODE'].to_dict()

# Get distance and time info

In [8]:
modes = ["driving", "walking"]  # Add other modes as needed

for departamento in np.unique(df_mun_list.DPTO):
    print(f'**** Processing departamento : {departamento}')
    df_mun_partial = df_mun_list[df_mun_list.DPTO == departamento]

    codigo_departamento = np.unique(df_mun_partial.CODE.astype(str).str[:-3])[0]

    origins      = df_mun_partial['FULL_NAME'].to_list()
    destinations = df_mun_partial['FULL_NAME'].to_list()

    def process_origin(origin):
        local_data = {
            'Origin': [],
            'Destination': [],
        }
        for mode in modes:
            local_data[f'Distance_{mode}'] = []
            local_data[f'Time_minutes_{mode}'] = []

        print(f'--> Origin : {origin}')
        for destination in destinations:
            local_data['Origin'].append(origin)
            local_data['Destination'].append(destination)

            for mode in modes:
                result = retriable_get_distance_matrix([origin], [destination], mode, api_key)
                #result = get_distance_matrix([origin], [destination], mode=mode, api_key=api_key)

                if result and result['status'] == 'OK':
                    element = result['rows'][0]['elements'][0]

                    if element['status'] == 'OK':
                        distance = element['distance']['text']
                        duration_seconds = element['duration']['value']
                        duration_minutes = get_duration_in_minutes(duration_seconds)

                        local_data[f'Distance_{mode}'].append(distance)
                        local_data[f'Time_minutes_{mode}'].append(duration_minutes)
                    else:
                        local_data[f'Distance_{mode}'].append(None)
                        local_data[f'Time_minutes_{mode}'].append(None)

                else:
                    local_data[f'Distance_{mode}'].append(None)
                    local_data[f'Time_minutes_{mode}'].append(None)

        return local_data

    data = {
        'Origin': [],
        'Destination': [],
    }

    for mode in modes:
        data[f'Distance_{mode}'] = []
        data[f'Time_minutes_{mode}'] = []

    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(process_origin, origin) for origin in origins]
        
        for future in concurrent.futures.as_completed(futures):
            local_data = future.result()
            for key, values in local_data.items():
                data[key].extend(values)

    df = pd.DataFrame(data)
    df['Code_Origin'] = df['Origin'].map(get_code)
    df['Code_Destination'] = df['Destination'].map(get_code)

    df.to_csv(f'{output_files}/tmp/tmp_departamento_{codigo_departamento}_travel_info.csv', index=False)
    print(f'\n')

**** Processing departamento : Amazonas
--> Origin : Leticia, Amazonas
--> Origin : El Encanto, Amazonas
--> Origin : La Chorrera, Amazonas
--> Origin : Puerto Arica, Amazonas
--> Origin : La Pedrera, Amazonas
--> Origin : Mirití - Paraná, Amazonas
--> Origin : Puerto Nariño, Amazonas
--> Origin : Puerto Santander, Amazonas
--> Origin : Tarapacá, Amazonas
--> Origin : La Victoria, Amazonas
--> Origin : Puerto Alegría, Amazonas


**** Processing departamento : Antioquia
--> Origin : Medellín, Antioquia
--> Origin : Abejorral, Antioquia
--> Origin : La Unión, Antioquia
--> Origin : Amagá, Antioquia
--> Origin : Andes, Antioquia
--> Origin : Apartadó, Antioquia
--> Origin : Arboletes, Antioquia
--> Origin : San Juan De Urabá, Antioquia
--> Origin : Barbosa, Antioquia
--> Origin : Bello, Antioquia
--> Origin : Betania, Antioquia
--> Origin : Ciudad Bolívar, Antioquia
--> Origin : Caldas, Antioquia
--> Origin : Carepa, Antioquia
--> Origin : El Carmen De Viboral, Antioquia
--> Origin : Cauc

In [28]:
df_ = pd.DataFrame()
for file in os.listdir(f'{output_files}/tmp'):
    df_ = pd.concat([df_, pd.read_csv(f'{output_files}/tmp/{file}')])

# Save file

In [30]:
df_.to_csv(f'{output_files}/travel_times_colombia_municipios.csv', index=False)

# Clean

In [None]:
def remove_directory_tree(start_directory: str):
    """Recursively and permanently removes the specified directory, all of its
    subdirectories, and every file contained in any of those folders."""
    for name in os.listdir(start_directory):
        path = os.path.join(start_directory, name)
        if os.path.isfile(path):
            os.remove(path)
        else:
            remove_directory_tree(path)
    os.rmdir(start_directory)
    
######################
## Remove old directories
######################
directory = f'{output_files}/tmp'
if os.path.exists(directory):
    try:
        #shutil.rmtree(directory)
        remove_directory_tree(directory)
        print("Directory '%s' has been removed successfully" %directory)
    except OSError as error:
        print(error)
        print("Directory '%s' can not be removed" %directory)