In [10]:
#Request the data from https://www.datos.gov.co/resource/gdxc-w37w.json
#and save it in a file called municipios.json and municipios.csv
import pandas as pd
from sodapy import Socrata
import requests
import time

def get_municipios():
    """
    Function to get the municipios from the API

    Returns
    -------
    Municipios:
        pd.DataFrame: DataFrame with the municipios
    """
    client = Socrata("www.datos.gov.co", None)
    results = client.get("xdk5-pm3f", limit=2000)
    for result in results:
        if len(result["c_digo_dane_del_municipio"]) < 6:
            result["c_digo_dane_del_municipio"] = "0" + result["c_digo_dane_del_municipio"]
            if len(result["c_digo_dane_del_municipio"]) < 6:
                result["c_digo_dane_del_municipio"] = result["c_digo_dane_del_municipio"]+"0"
        result["c_digo_dane_del_municipio"] = result["c_digo_dane_del_municipio"].replace(".", "")
    results_df = pd.DataFrame.from_records(results)
    return results_df

def get_search_query(municipios: pd.DataFrame)->pd.DataFrame:
    """
    Function to get the search query from the municipios dataframe
    and makes a new dataframe with the search query as follows:
    `municipio, departamento, pais`

    Parameters
    ----------
    municipios : pd.DataFrame
        DataFrame with the municipios
            columns: ['municipio', 'departamento'], pais is 'Colombia'
    
    Returns
    -------
    search_query:
        pd.DataFrame: DataFrame with the search query
            columns: ['search_query']
    """
    search_query = pd.DataFrame()
    for index, row in municipios.iterrows():
        # - Dibula, La Guajira, Colombia -> Dibulla, La Guajira, Colombia
        # - Tolú Viejo, Sucre, Colombia -> Tolúviejo, Sucre, Colombia
        # - San Juan de Río Seco, Cundinamarca, Colombia -> San Juan de Ríoseco, Cundinamarca, Colombia
        # - San Luis de Gaceno, Casanare, Colombia -> San Luis de Gaceno, <a style='color:red'> Boyacá </a>, Colombia
        # > **Nota:** Se Obta por San Luis de Gaceno, Boyacá, Colombia, dado que San Luis de Gaceno, Casanare, Colombia no se encuentra, y San Luis de Gaceno, Boyacá, Colombia es el municipio más cercano.
        # - Villa de San Diego de Ubate, Cundinamarca, Colombia -> Ubaté, Provincia de Ubaté, Colombia
        # - El Cantón del San Pablo, Chocó, Colombia -> El Cantón de San Pablo, Chocó, Colombia
        # - Valle de Guamez, Putumayo, Colombia -> Valle Del Guamuez, Putumayo, Colombia
        # - San Pablo de Borbur, Bolívar, Colombia -> San Pablo de Borbur, Boyaca, Colombia o San Pablo, Bolívar, Colombia
        # > **Nota:** Se Obta por San Pablo, Bolívar, Colombia, dado que San Pablo de Borbur, Boyaca, Colombia ya se encuentra en la base de datos.
        # - San Andrés de Tumaco, Nariño, Colombia -> Tumaco, Nariño, Colombia

        if f'{row["municipio"]}, {row["departamento"]}, Colombia' == 'Dibula, La Guajira, Colombia':
            search_query = pd.concat([search_query, pd.DataFrame({'search_query': 'Dibulla, La Guajira, Colombia'}, index=[0])])
        elif f'{row["municipio"]}, {row["departamento"]}, Colombia' == 'Tolú Viejo, Sucre, Colombia':
            search_query = pd.concat([search_query, pd.DataFrame({'search_query': 'Tolúviejo, Sucre, Colombia'}, index=[0])])
        elif f'{row["municipio"]}, {row["departamento"]}, Colombia' == 'San Juan de Río Seco, Cundinamarca, Colombia':
            search_query = pd.concat([search_query, pd.DataFrame({'search_query': 'San Juan de Ríoseco, Cundinamarca, Colombia'}, index=[0])])
        elif f'{row["municipio"]}, {row["departamento"]}, Colombia' == 'San Luis de Gaceno, Casanare, Colombia':
            search_query = pd.concat([search_query, pd.DataFrame({'search_query': 'San Luis de Gaceno, Boyacá, Colombia'}, index=[0])])
        elif f'{row["municipio"]}, {row["departamento"]}, Colombia' == 'Villa de San Diego de Ubate, Cundinamarca, Colombia':
            search_query = pd.concat([search_query, pd.DataFrame({'search_query': 'Ubaté, Provincia de Ubaté, Colombia'}, index=[0])])
        elif f'{row["municipio"]}, {row["departamento"]}, Colombia' == 'El Cantón del San Pablo, Chocó, Colombia':
            search_query = pd.concat([search_query, pd.DataFrame({'search_query': 'El Cantón de San Pablo, Chocó, Colombia'}, index=[0])])
        elif f'{row["municipio"]}, {row["departamento"]}, Colombia' == 'Valle de Guamez, Putumayo, Colombia':
            search_query = pd.concat([search_query, pd.DataFrame({'search_query': 'Valle Del Guamuez, Putumayo, Colombia'}, index=[0])])
        elif f'{row["municipio"]}, {row["departamento"]}, Colombia' == 'San Pablo de Borbur, Bolívar, Colombia':
            search_query = pd.concat([search_query, pd.DataFrame({'search_query': 'San Pablo, Bolívar, Colombia'}, index=[0])])
        elif f'{row["municipio"]}, {row["departamento"]}, Colombia' == 'San Andrés de Tumaco, Nariño, Colombia':
            search_query = pd.concat([search_query, pd.DataFrame({'search_query': 'Tumaco, Nariño, Colombia'}, index=[0])])
        else:
            search_query = pd.concat([search_query, pd.DataFrame({'search_query': f'{row["municipio"]}, {row["departamento"]}, Colombia'}, index=[0])])

    search_query.reset_index(drop=True, inplace=True)    
    return search_query

def get_location_info(search_query: pd.DataFrame)->pd.DataFrame:
    """
    Function that gets all the information from the api 
    https://nominatim.openstreetmap.org/search.php?q={search_query}&format=jsonv2
    and returns a dataframe with the information
    """
    
    location_info = pd.DataFrame()
    for index, row in search_query.iterrows():
        start_time = time.time()
        url = f'https://nominatim.openstreetmap.org/search.php?q={row["search_query"]}&format=jsonv2'
        response = requests.get(url)
        # get only the first result
        try:
            response_json = response.json()[0]
            print(f'Getting info from {index+1}/{len(search_query)} {row["search_query"]}')
            print(response_json)
        except:
            print(f'Error with {row["search_query"]}')
            continue
        # print the length of the response
        print(len(response_json))
        # add the info to the dataframe keep boundingbox as a list
        response_json['boundingbox'] = [response_json['boundingbox']]
        location_info = pd.concat([location_info, pd.DataFrame(response_json, index=[0])])
        # print the time
        print("--- %s seconds ---" % (time.time() - start_time))
    return location_info

def get_distance_and_time_matrix(locations_info: pd.DataFrame)->pd.DataFrame:
    """
    Function to get the distance and time matrix from the coordinates
    uses the OSRM API to obetain the values

    Parameters
    --------
    locations_info : pd.DataFrame
        DataFrame with the location info
            columns: ['name', 'lat', 'lon']

    Returns
    -------
    distance_matrix:
        pd.DataFrame: DataFrame with the distance matrix

    time_matrix:
        pd.DataFrame: DataFrame with the time matrix

    Examples
    --------

    >>> locations_info = pd.DataFrame({'name': ['Bogotá', 'Medellín', 'Cali'], 'lat': [4.60971, 6.25184, 3.43722], 'lon': [-74.08175, -75.56359, -76.5225]})
    >>> distance_matrix, time_matrix = get_distance_and_time_matrix(locations_info)
    >>> display(distance_matrix)
    
    |    | name     |   Bogotá |   Medellín |   Cali |
    |---:|:---------|---------:|-----------:|-------:|
    |  0 | Bogotá   |        0 |    411.041 |  386.4 |
    |  1 | Medellín |  411.041 |          0 |  423.9 |
    |  2 | Cali     |  386.4   |    423.9   |    0   |

    >>> display(time_matrix)

    |    | name     |   Bogotá |   Medellín |   Cali |
    |---:|:---------|---------:|-----------:|-------:|
    |  0 | Bogotá   |        0 |    4.35    |   3.85 |
    |  1 | Medellín |    4.35  |    0       |   4.4  |
    |  2 | Cali     |    3.85  |    4.4     |   0    |
    """

    # create the distance and time matrix
    distance_matrix = pd.DataFrame()
    time_matrix = pd.DataFrame()
    url_base = 'http://router.project-osrm.org/table/v1/driving/'
    for index, row in locations_info.iterrows():
        # get the coordinates
        coordinates = f'{row["lon"]},{row["lat"]};'
        for index2, row2 in locations_info.iterrows():
            # get the coordinates
            coordinates += f'{row2["lon"]},{row2["lat"]};'
        # remove the last ;
        coordinates = coordinates[:-1]
        # create the url
        url = url_base + coordinates
        # get the response
        response = requests.get(url)
        # get the distance and time matrix
        response_json = response.json()
        distance_matrix = pd.concat([distance_matrix, pd.DataFrame(response_json['distances'], columns=locations_info['name'])])
        time_matrix = pd.concat([time_matrix, pd.DataFrame(response_json['durations'], columns=locations_info['name'])])
    # reset the index
    distance_matrix.reset_index(drop=True, inplace=True)
    time_matrix.reset_index(drop=True, inplace=True)
    return distance_matrix, time_matrix



municipios = get_municipios()
display(municipios)
# add the search query to the municipios dataframe
search_query = get_search_query(municipios)
municipios = pd.concat([municipios, search_query], axis=1)
# get the location info from the search query
location_info = get_location_info(search_query)
location_info.reset_index(drop=True, inplace=True)
# save the data
location_info.to_csv('location_info.csv', index=False)
# check if municipios and location_info have the same length
if len(municipios) != len(location_info):
    print('Error: municipios and location_info have different length')
# add the location info to the municipios dataframe
municipios = pd.concat([municipios, location_info], axis=1)
# save the data
municipios.to_csv('municipios.csv', index=False)






Unnamed: 0,region,c_digo_dane_del_departamento,departamento,c_digo_dane_del_municipio,municipio
0,Región Eje Cafetero - Antioquia,5,Antioquia,05001,Medellín
1,Región Eje Cafetero - Antioquia,5,Antioquia,05002,Abejorral
2,Región Eje Cafetero - Antioquia,5,Antioquia,05004,Abriaquí
3,Región Eje Cafetero - Antioquia,5,Antioquia,05021,Alejandría
4,Región Eje Cafetero - Antioquia,5,Antioquia,05030,Amagá
...,...,...,...,...,...
1118,Región Eje Cafetero - Antioquia,17,Caldas,17444,Marquetalia
1119,Región Pacífico,52,Nariño,52051,Arboleda
1120,Región Pacífico,76,Valle del Cauca,76109,Buenaventura
1121,Región Caribe,47,Magdalena,47189,Ciénaga


Getting info from 1/1123 Medellín, Antioquia, Colombia
{'place_id': 269992877, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 1343264, 'lat': '6.2697018', 'lon': '-75.60252574475943', 'category': 'boundary', 'type': 'administrative', 'place_rank': 12, 'importance': 0.5934777156602727, 'addresstype': 'city', 'name': 'Medellín', 'display_name': 'Medellín, Valle de Aburrá, Antioquia, RAP del Agua y la Montaña, 0500, Colombia', 'boundingbox': ['6.1626165', '6.3764208', '-75.7194224', '-75.4734083']}
14
--- 2.20137095451355 seconds ---
Getting info from 2/1123 Abejorral, Antioquia, Colombia
{'place_id': 269620863, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 1307016, 'lat': '5.80498375', 'lon': '-75.4304641628398', 'category': 'boundary', 'type': 'administrative', 'place_rank': 12, 'importance': 0.45000999999999997, 'addresstype': 'county', 'name': 

KeyboardInterrupt: 

In [None]:

# TODO: #3 Matriz de Distancias

# TODO: #4 Matriz de Tiempos
# TODO: #5 Merge Habitantes

# TODO: #6 Merge Demanda
# TODO: #7 Merge Origenes

In [36]:
import pandas as pd
import requests
def get_distance_and_time_matrix(locations_info: pd.DataFrame)->pd.DataFrame:
    """
    Function to get the distance and time matrix from the coordinates
    uses the OSRM API to obetain the values

    Parameters
    --------
    locations_info : pd.DataFrame
        DataFrame with the location info
            columns: ['name', 'lat', 'lon']

    Returns
    -------
    distance_matrix:
        pd.DataFrame: DataFrame with the distance matrix

    time_matrix:
        pd.DataFrame: DataFrame with the time matrix

    Examples
    --------

    >>> locations_info = pd.DataFrame({'name': ['Bogotá', 'Medellín', 'Cali'], 'lat': [4.60971, 6.25184, 3.43722], 'lon': [-74.08175, -75.56359, -76.5225]})
    >>> distance_matrix, time_matrix = get_distance_and_time_matrix(locations_info)
    >>> display(distance_matrix)
    
    |    | name     |   Bogotá |   Medellín |   Cali |
    |---:|:---------|---------:|-----------:|-------:|
    |  0 | Bogotá   |        0 |    411.041 |  386.4 |
    |  1 | Medellín |  411.041 |          0 |  423.9 |
    |  2 | Cali     |  386.4   |    423.9   |    0   |

    >>> display(time_matrix)

    |    | name     |   Bogotá |   Medellín |   Cali |
    |---:|:---------|---------:|-----------:|-------:|
    |  0 | Bogotá   |        0 |    4.35    |   3.85 |
    |  1 | Medellín |    4.35  |    0       |   4.4  |
    |  2 | Cali     |    3.85  |    4.4     |   0    |
    """

    # create the distance and time matrix
    url_base = 'http://router.project-osrm.org/table/v1/driving/'
    url_coordinates = ''
    for index, municipio in locations_info.iterrows():
        # get the coordinates
        lon = municipio['lon']
        lat = municipio['lat']
        coordinates = f'{lon},{lat};'
        url_coordinates += coordinates
    # remove the last ;
    url_coordinates = url_coordinates[:-1]
    # create the url
    url = url_base + url_coordinates+'?annotations=distance,duration'
    print(url)
    # get the response
    response = requests.get(url)
    # check if the response is ok
    if response.status_code != 200:
        print(f'Error: {response.status_code}')
        return None, None
    # get the distance and time matrix
    response_json = response.json()
    distance_matrix = pd.DataFrame(response_json['distances'], index=locations_info['name'], columns=locations_info['name'])
    time_matrix = pd.DataFrame(response_json['durations'], index=locations_info['name'], columns=locations_info['name'])
    display(distance_matrix)
    display(time_matrix)
        
location_info = pd.read_csv('municipios.csv')
# time_matrix, distance_matrix = get_distance_and_time_matrix(location_info.head(10))
get_distance_and_time_matrix(location_info.head(100))

http://router.project-osrm.org/table/v1/driving/-75.60252574475943,6.2697018;-75.4304641628398,5.80498375;-76.0645541,6.6319273;-75.09860996491068,6.355965449999999;-75.70093,6.035448199999999;-74.95163388200078,7.0201156;-75.94516744951865,5.627871750000001;-75.72075627092343,6.131153899999999;-75.35301149467134,6.8693472;-75.1466433,7.0736127;-73.93126949176407,5.71736945;-75.91804564673755,6.31394255;-76.6227461,7.8849011;-76.4273168,8.8528266;-75.1428525,5.7310785;-75.7870472,6.1561777;-75.33912589828495,6.438483250000001;-75.5582665,6.334997;-75.9768362,5.7451779;-75.9845433,6.1134294;-76.0208666,5.8500342;-75.58269735773685,7.12021975;-75.92084701105912,6.8110809;-75.3514383,7.577089;-75.99210811258797,6.42595665;-75.63035824143908,6.05168765;-75.29439599536364,7.07754205;-76.0261638,6.7519854;-74.7566979,6.4092756;-75.6444056,5.5487065;-76.74603894250129,7.79845155;-73.37766150522931,5.6124101500000005;-75.3869174,7.1050317;-75.04524822749215,7.8276894;-76.63620320861776,7.61293

name,Medellín,Abejorral,Abriaquí,Alejandría,Amagá,Amalfi,Andes,Angelópolis,Angostura,Anorí,...,San Jerónimo,Puerto Wilches,Puerto Parra,San Luis,Vereda La San Pedro,San Rafael,San Roque,San Vicente,Santa Bárbara,Santo Domingo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Medellín,0.0,86361.7,134450.8,90408.1,41218.2,174751.1,133763.2,43076.8,147665.7,182161.0,...,33920.9,356334.8,251643.9,129384.7,94172.0,107030.9,119708.9,60345.7,59519.3,71141.3
Abejorral,91214.1,0.0,224680.0,133735.3,107066.2,241147.0,148093.6,108924.7,214061.6,248556.9,...,124150.2,422730.7,318039.8,142538.4,160567.9,122788.0,186104.8,88346.7,41514.4,137537.2
Abriaquí,133242.4,219637.5,0.0,224310.2,174262.8,308653.2,220927.7,176121.3,248203.0,282698.3,...,101817.7,490236.8,385546.0,262660.5,194709.2,240932.9,253611.0,193621.5,192563.8,205043.4
Alejandría,91666.1,125038.3,223780.5,0.0,125258.6,162501.2,217803.6,127117.2,113698.7,161591.3,...,123250.7,321851.3,217160.5,66264.7,116801.7,18710.7,72924.8,45388.6,143559.7,27888.0
Amagá,41981.6,107280.4,175447.6,126257.9,0.0,210600.9,93035.8,15574.9,183515.4,218010.7,...,74917.7,392184.5,287493.6,156939.7,130021.7,132729.1,155558.6,87900.7,48304.4,106991.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
San Rafael,108288.8,122809.4,240403.2,18710.7,141881.4,191081.6,234426.3,143739.9,142279.0,190171.6,...,139873.4,315185.6,210494.8,73449.3,133424.4,0.0,54214.1,62011.3,126866.8,44510.8
San Roque,120179.6,199146.8,252294.0,72918.8,153772.1,168328.8,246317.1,155630.7,119526.2,167418.9,...,151764.2,262414.4,157723.5,113662.2,145337.8,54208.1,0.0,110886.8,172073.2,37816.7
San Vicente,60755.3,90132.8,192869.7,45388.6,87319.5,166121.5,179864.4,89178.0,144520.8,165211.6,...,92339.8,347705.1,243014.2,115999.5,91027.1,62011.3,111079.2,0.0,98401.0,54859.8
Santa Bárbara,60400.7,41514.4,193866.7,141903.9,48316.4,229020.0,121000.9,63662.3,201934.5,236429.9,...,93336.9,410603.6,305912.8,150707.0,148440.8,130956.6,173977.8,96515.3,0.0,125410.2


name,Medellín,Abejorral,Abriaquí,Alejandría,Amagá,Amalfi,Andes,Angelópolis,Angostura,Anorí,...,San Jerónimo,Puerto Wilches,Puerto Parra,San Luis,Vereda La San Pedro,San Rafael,San Roque,San Vicente,Santa Bárbara,Santo Domingo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Medellín,0.0,6332.6,7536.2,7244.6,2537.5,13895.3,9583.1,3519.0,8877.8,11908.9,...,1908.8,18487.8,13804.2,8584.9,5730.6,8493.1,8188.1,4438.0,6622.4,4825.9
Abejorral,6343.5,0.0,13639.7,9647.7,6938.1,18915.8,10856.8,7919.6,13898.3,16929.4,...,8012.3,23508.3,18824.7,8358.9,10751.1,9329.8,13208.6,6069.4,4014.2,9846.4
Abriaquí,7504.3,13629.2,0.0,14544.1,9829.5,21194.8,14045.4,10811.0,15923.9,18955.0,...,6038.8,25787.3,21103.7,15881.5,12776.7,15792.6,15487.6,11734.6,13914.4,12125.4
Alejandría,6421.7,9836.6,13666.9,0.0,8300.2,14020.3,15345.8,9281.7,9998.0,12044.8,...,8039.5,17356.5,12672.9,6846.2,7711.8,1556.4,6275.4,3572.5,12385.1,2568.2
Amagá,2379.3,6911.7,9675.5,8929.7,0.0,15580.4,7149.1,1508.0,10562.9,13594.0,...,4048.1,20172.9,15489.3,9700.4,7415.7,10079.0,9873.2,5553.5,4883.9,6511.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
San Rafael,7670.1,9389.7,14915.3,1554.4,9548.6,15079.8,16594.2,10530.1,11057.5,13104.3,...,9287.9,16660.6,11977.0,5317.4,8960.2,0.0,4719.0,4820.9,12048.1,3816.6
San Roque,7326.4,13004.6,14571.6,6289.2,9204.9,13849.7,16250.5,10186.4,9827.4,11874.2,...,8944.2,14008.3,9324.7,9136.2,8629.9,4734.8,0.0,7792.2,13289.8,3728.8
San Vicente,4070.6,6562.5,11315.8,3578.3,5528.1,13524.8,12573.7,6509.6,8798.4,11549.3,...,5688.4,18117.3,13433.7,7235.2,5651.2,4826.8,7817.6,0.0,8843.6,4354.8
Santa Bárbara,6153.5,4014.6,13449.7,12123.5,4731.7,19354.6,9878.8,6175.1,14337.1,17368.2,...,7822.3,23947.1,19263.5,10834.7,11189.9,11805.6,13647.4,8545.2,0.0,10285.2
