In [20]:
import requests
from requests.exceptions import HTTPError
import json
import pandas as pd
import numpy as np
import geopy
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter


In [2]:
URL = "https://raw.githubusercontent.com/andre-marcos-perez/ebac-course-utils/main/dataset/deliveries.json"
data = None

try:
    resposta = requests.get(URL)
    resposta.raise_for_status()
except HTTPError as exc:
    print(exc)
else:
    data = json.loads(resposta.text)

In [7]:
deliveries_df = pd.DataFrame(data)
deliveries_df.head()

Unnamed: 0,name,region,origin,vehicle_capacity,deliveries
0,cvrp-2-df-33,df-2,"{'lng': -48.05498915846707, 'lat': -15.8381445...",180,"[{'id': '313483a19d2f8d65cd5024c8d215cfbd', 'p..."
1,cvrp-2-df-73,df-2,"{'lng': -48.05498915846707, 'lat': -15.8381445...",180,"[{'id': 'bf3fc630b1c29601a4caf1bdd474b85', 'po..."
2,cvrp-2-df-20,df-2,"{'lng': -48.05498915846707, 'lat': -15.8381445...",180,"[{'id': 'b30f1145a2ba4e0b9ac0162b68d045c3', 'p..."
3,cvrp-1-df-71,df-1,"{'lng': -47.89366206897872, 'lat': -15.8051175...",180,"[{'id': 'be3ed547394196c12c7c27c89ac74ed6', 'p..."
4,cvrp-2-df-87,df-2,"{'lng': -48.05498915846707, 'lat': -15.8381445...",180,"[{'id': 'a6328fb4dc0654eb28a996a270b0f6e4', 'p..."


In [8]:
origin_df = pd.json_normalize(deliveries_df['origin'])
origin_df.head()

Unnamed: 0,lng,lat
0,-48.054989,-15.838145
1,-48.054989,-15.838145
2,-48.054989,-15.838145
3,-47.893662,-15.805118
4,-48.054989,-15.838145


In [9]:
deliveries_df['origin'] = pd.DataFrame(deliveries_df['origin'].apply(lambda value: str(value['lat']) + ', ' + str(value['lng'])))
deliveries_df.head()

Unnamed: 0,name,region,origin,vehicle_capacity,deliveries
0,cvrp-2-df-33,df-2,"-15.83814451122274, -48.05498915846707",180,"[{'id': '313483a19d2f8d65cd5024c8d215cfbd', 'p..."
1,cvrp-2-df-73,df-2,"-15.83814451122274, -48.05498915846707",180,"[{'id': 'bf3fc630b1c29601a4caf1bdd474b85', 'po..."
2,cvrp-2-df-20,df-2,"-15.83814451122274, -48.05498915846707",180,"[{'id': 'b30f1145a2ba4e0b9ac0162b68d045c3', 'p..."
3,cvrp-1-df-71,df-1,"-15.80511751066334, -47.89366206897872",180,"[{'id': 'be3ed547394196c12c7c27c89ac74ed6', 'p..."
4,cvrp-2-df-87,df-2,"-15.83814451122274, -48.05498915846707",180,"[{'id': 'a6328fb4dc0654eb28a996a270b0f6e4', 'p..."


In [10]:
deliveries_df = pd.merge(left=deliveries_df, right=origin_df, how='inner', left_index=True, right_index=True)
deliveries_df.head()

Unnamed: 0,name,region,origin,vehicle_capacity,deliveries,lng,lat
0,cvrp-2-df-33,df-2,"-15.83814451122274, -48.05498915846707",180,"[{'id': '313483a19d2f8d65cd5024c8d215cfbd', 'p...",-48.054989,-15.838145
1,cvrp-2-df-73,df-2,"-15.83814451122274, -48.05498915846707",180,"[{'id': 'bf3fc630b1c29601a4caf1bdd474b85', 'po...",-48.054989,-15.838145
2,cvrp-2-df-20,df-2,"-15.83814451122274, -48.05498915846707",180,"[{'id': 'b30f1145a2ba4e0b9ac0162b68d045c3', 'p...",-48.054989,-15.838145
3,cvrp-1-df-71,df-1,"-15.80511751066334, -47.89366206897872",180,"[{'id': 'be3ed547394196c12c7c27c89ac74ed6', 'p...",-47.893662,-15.805118
4,cvrp-2-df-87,df-2,"-15.83814451122274, -48.05498915846707",180,"[{'id': 'a6328fb4dc0654eb28a996a270b0f6e4', 'p...",-48.054989,-15.838145


In [11]:
deliveries_df.rename(columns={'lng': 'origin_lng', 'lat': 'origin_lat'}, inplace=True)
deliveries_df.head()

Unnamed: 0,name,region,origin,vehicle_capacity,deliveries,origin_lng,origin_lat
0,cvrp-2-df-33,df-2,"-15.83814451122274, -48.05498915846707",180,"[{'id': '313483a19d2f8d65cd5024c8d215cfbd', 'p...",-48.054989,-15.838145
1,cvrp-2-df-73,df-2,"-15.83814451122274, -48.05498915846707",180,"[{'id': 'bf3fc630b1c29601a4caf1bdd474b85', 'po...",-48.054989,-15.838145
2,cvrp-2-df-20,df-2,"-15.83814451122274, -48.05498915846707",180,"[{'id': 'b30f1145a2ba4e0b9ac0162b68d045c3', 'p...",-48.054989,-15.838145
3,cvrp-1-df-71,df-1,"-15.80511751066334, -47.89366206897872",180,"[{'id': 'be3ed547394196c12c7c27c89ac74ed6', 'p...",-47.893662,-15.805118
4,cvrp-2-df-87,df-2,"-15.83814451122274, -48.05498915846707",180,"[{'id': 'a6328fb4dc0654eb28a996a270b0f6e4', 'p...",-48.054989,-15.838145


In [12]:
deliveries_exploded_df = deliveries_df[['deliveries']].explode('deliveries')
deliveries_exploded_df.head()

Unnamed: 0,deliveries
0,"{'id': '313483a19d2f8d65cd5024c8d215cfbd', 'po..."
0,"{'id': '320c94b17aa685c939b3f3244c3099de', 'po..."
0,"{'id': '3663b42f4b8decb33059febaba46d5c8', 'po..."
0,"{'id': 'e11ab58363c38d6abc90d5fba87b7d7', 'poi..."
0,"{'id': '54cb45b7bbbd4e34e7150900f92d7f4b', 'po..."


In [13]:
deliveries_normalized_df = pd.concat([
    pd.DataFrame(deliveries_exploded_df['deliveries'].apply(lambda value: value['size'])).rename(columns={'deliveries': 'delivery_size'}),
    pd.DataFrame(deliveries_exploded_df['deliveries'].apply(lambda value: value['point']['lng'])).rename(columns={'deliveries': 'destiny_lng'}),
    pd.DataFrame(deliveries_exploded_df['deliveries'].apply(lambda value: value['point']['lat'])).rename(columns={'deliveries': 'destiny_lat'}),
    pd.DataFrame(deliveries_exploded_df['deliveries'].apply(lambda value: str(value['point']['lat']) + ', ' 
                                                            + str(value['point']['lng']))).rename(columns={'deliveries': 'destiny'})
], axis=1)

deliveries_normalized_df.head()

Unnamed: 0,delivery_size,destiny_lng,destiny_lat,destiny
0,9,-48.116189,-15.848929,"-15.848929154862294, -48.11618888384239"
0,2,-48.118195,-15.850772,"-15.850772371049631, -48.11819489551"
0,1,-48.112483,-15.847871,"-15.84787055941764, -48.11248339849675"
0,2,-48.118023,-15.846471,"-15.846471025281456, -48.11802268617869"
0,7,-48.114898,-15.858055,"-15.85805462185708, -48.114898174591026"


In [14]:
deliveries_df = deliveries_df.drop('deliveries', axis=1)
deliveries_df = pd.merge(left=deliveries_df, right=deliveries_normalized_df, how='right', left_index=True, right_index=True)
deliveries_df.head()

Unnamed: 0,name,region,origin,vehicle_capacity,origin_lng,origin_lat,delivery_size,destiny_lng,destiny_lat,destiny
0,cvrp-2-df-33,df-2,"-15.83814451122274, -48.05498915846707",180,-48.054989,-15.838145,9,-48.116189,-15.848929,"-15.848929154862294, -48.11618888384239"
0,cvrp-2-df-33,df-2,"-15.83814451122274, -48.05498915846707",180,-48.054989,-15.838145,2,-48.118195,-15.850772,"-15.850772371049631, -48.11819489551"
0,cvrp-2-df-33,df-2,"-15.83814451122274, -48.05498915846707",180,-48.054989,-15.838145,1,-48.112483,-15.847871,"-15.84787055941764, -48.11248339849675"
0,cvrp-2-df-33,df-2,"-15.83814451122274, -48.05498915846707",180,-48.054989,-15.838145,2,-48.118023,-15.846471,"-15.846471025281456, -48.11802268617869"
0,cvrp-2-df-33,df-2,"-15.83814451122274, -48.05498915846707",180,-48.054989,-15.838145,7,-48.114898,-15.858055,"-15.85805462185708, -48.114898174591026"


In [17]:
deliveries_df = deliveries_df[['name', 'region', 'origin_lat', 'origin_lng', 'origin', 'vehicle_capacity', 'delivery_size', 'destiny_lat', 'destiny_lng', 'destiny']]
deliveries_df.reset_index(inplace=True, drop=True)
deliveries_df.head()

Unnamed: 0,name,region,origin_lat,origin_lng,origin,vehicle_capacity,delivery_size,destiny_lat,destiny_lng,destiny
0,cvrp-2-df-33,df-2,-15.838145,-48.054989,"-15.83814451122274, -48.05498915846707",180,9,-15.848929,-48.116189,"-15.848929154862294, -48.11618888384239"
1,cvrp-2-df-33,df-2,-15.838145,-48.054989,"-15.83814451122274, -48.05498915846707",180,2,-15.850772,-48.118195,"-15.850772371049631, -48.11819489551"
2,cvrp-2-df-33,df-2,-15.838145,-48.054989,"-15.83814451122274, -48.05498915846707",180,1,-15.847871,-48.112483,"-15.84787055941764, -48.11248339849675"
3,cvrp-2-df-33,df-2,-15.838145,-48.054989,"-15.83814451122274, -48.05498915846707",180,2,-15.846471,-48.118023,"-15.846471025281456, -48.11802268617869"
4,cvrp-2-df-33,df-2,-15.838145,-48.054989,"-15.83814451122274, -48.05498915846707",180,7,-15.858055,-48.114898,"-15.85805462185708, -48.114898174591026"


In [38]:
hub_df = deliveries_df[['region', 'origin']]
hub_df = hub_df.drop_duplicates().sort_values(by='region').reset_index(drop=True)
hub_df.head()

Unnamed: 0,region,origin
0,df-0,"-15.657013854445248, -47.802664728268745"
1,df-1,"-15.80511751066334, -47.89366206897872"
2,df-2,"-15.83814451122274, -48.05498915846707"


In [39]:
geolocator = Nominatim(user_agent='geoloc/1.0 (tiicoliro@gmail.com)')
geocoder = RateLimiter(geolocator.reverse, min_delay_seconds=1)

hub_df['geodata'] = hub_df['origin'].apply(geocoder)
hub_df.head()

Unnamed: 0,region,origin,geodata
0,df-0,"-15.657013854445248, -47.802664728268745","(Rua 7, Quadra 2, Sobradinho, Região Geográfic..."
1,df-1,"-15.80511751066334, -47.89366206897872","(SQS 303, Asa Sul, Brasília, Plano Piloto, Reg..."
2,df-2,"-15.83814451122274, -48.05498915846707","(Armazém do Bolo, lote 4/8, CSB 4/5, Taguating..."


In [37]:
hub_geodata_df = pd.json_normalize(hub_df['geodata'].apply(lambda data: data.raw))
hub_geodata_df = hub_geodata_df[["address.town", "address.suburb", "address.city"]]
hub_geodata_df.rename(columns={"address.town": "origin_town", "address.suburb": "origin_suburb", "address.city": "origin_city"}, inplace=True)
hub_geodata_df["origin_city"] = np.where(hub_geodata_df["origin_city"].notna(), hub_geodata_df["origin_city"], hub_geodata_df["origin_town"])
hub_geodata_df["origin_suburb"] = np.where(hub_geodata_df["origin_suburb"].notna(), hub_geodata_df["origin_suburb"], hub_geodata_df["origin_city"])
hub_geodata_df = hub_geodata_df.drop("origin_town", axis=1)
hub_geodata_df.head()

Unnamed: 0,origin_suburb,origin_city
0,Sobradinho,Sobradinho
1,Asa Sul,Brasília
2,Taguatinga,Taguatinga


In [40]:
hub_df = pd.merge(left=hub_df, right=hub_geodata_df, left_index=True, right_index=True)
hub_df = hub_df[["region", "origin_suburb", "origin_city"]]
hub_df.head()

Unnamed: 0,region,origin_suburb,origin_city
0,df-0,Sobradinho,Sobradinho
1,df-1,Asa Sul,Brasília
2,df-2,Taguatinga,Taguatinga


In [41]:
deliveries_df = pd.merge(left=deliveries_df, right=hub_df, how="inner", on="region")

In [44]:
deliveries_df = deliveries_df[['name', 'region', 'origin_suburb', 'origin_city', 'origin_lat', 'origin_lng', 'vehicle_capacity', 'delivery_size', 'destiny_lat', 'destiny_lng', 'destiny']]
deliveries_df.head()

Unnamed: 0,name,region,origin_suburb,origin_city,origin_lat,origin_lng,vehicle_capacity,delivery_size,destiny_lat,destiny_lng,destiny
0,cvrp-2-df-33,df-2,Taguatinga,Taguatinga,-15.838145,-48.054989,180,9,-15.848929,-48.116189,"-15.848929154862294, -48.11618888384239"
1,cvrp-2-df-33,df-2,Taguatinga,Taguatinga,-15.838145,-48.054989,180,2,-15.850772,-48.118195,"-15.850772371049631, -48.11819489551"
2,cvrp-2-df-33,df-2,Taguatinga,Taguatinga,-15.838145,-48.054989,180,1,-15.847871,-48.112483,"-15.84787055941764, -48.11248339849675"
3,cvrp-2-df-33,df-2,Taguatinga,Taguatinga,-15.838145,-48.054989,180,2,-15.846471,-48.118023,"-15.846471025281456, -48.11802268617869"
4,cvrp-2-df-33,df-2,Taguatinga,Taguatinga,-15.838145,-48.054989,180,7,-15.858055,-48.114898,"-15.85805462185708, -48.114898174591026"


In [13]:
deliveries_df.to_csv('..\\deliveries_loggi.csv', sep=';', index=False)