In [24]:
import requests
from requests.exceptions import HTTPError
import json
import pandas as pd

In [25]:
URL = "https://raw.githubusercontent.com/andre-marcos-perez/ebac-course-utils/main/dataset/deliveries.json"
data = None

try:
    resposta = requests.get(URL)
    resposta.raise_for_status()
except HTTPError as exc:
    print(exc)
else:
    data = json.loads(resposta.text)

In [27]:
deliveries_df = pd.DataFrame(data)
deliveries_df.head()

Unnamed: 0,name,region,origin,vehicle_capacity,deliveries
0,cvrp-2-df-33,df-2,"{'lng': -48.05498915846707, 'lat': -15.8381445...",180,"[{'id': '313483a19d2f8d65cd5024c8d215cfbd', 'p..."
1,cvrp-2-df-73,df-2,"{'lng': -48.05498915846707, 'lat': -15.8381445...",180,"[{'id': 'bf3fc630b1c29601a4caf1bdd474b85', 'po..."
2,cvrp-2-df-20,df-2,"{'lng': -48.05498915846707, 'lat': -15.8381445...",180,"[{'id': 'b30f1145a2ba4e0b9ac0162b68d045c3', 'p..."
3,cvrp-1-df-71,df-1,"{'lng': -47.89366206897872, 'lat': -15.8051175...",180,"[{'id': 'be3ed547394196c12c7c27c89ac74ed6', 'p..."
4,cvrp-2-df-87,df-2,"{'lng': -48.05498915846707, 'lat': -15.8381445...",180,"[{'id': 'a6328fb4dc0654eb28a996a270b0f6e4', 'p..."


In [28]:
origin_df = pd.json_normalize(deliveries_df['origin'])
origin_df.head()

Unnamed: 0,lng,lat
0,-48.054989,-15.838145
1,-48.054989,-15.838145
2,-48.054989,-15.838145
3,-47.893662,-15.805118
4,-48.054989,-15.838145


In [29]:
deliveries_df = deliveries_df.drop('origin', axis=1)
deliveries_df = pd.merge(left=deliveries_df, right=origin_df, how='inner', left_index=True, right_index=True)
deliveries_df.head()

Unnamed: 0,name,region,vehicle_capacity,deliveries,lng,lat
0,cvrp-2-df-33,df-2,180,"[{'id': '313483a19d2f8d65cd5024c8d215cfbd', 'p...",-48.054989,-15.838145
1,cvrp-2-df-73,df-2,180,"[{'id': 'bf3fc630b1c29601a4caf1bdd474b85', 'po...",-48.054989,-15.838145
2,cvrp-2-df-20,df-2,180,"[{'id': 'b30f1145a2ba4e0b9ac0162b68d045c3', 'p...",-48.054989,-15.838145
3,cvrp-1-df-71,df-1,180,"[{'id': 'be3ed547394196c12c7c27c89ac74ed6', 'p...",-47.893662,-15.805118
4,cvrp-2-df-87,df-2,180,"[{'id': 'a6328fb4dc0654eb28a996a270b0f6e4', 'p...",-48.054989,-15.838145


In [31]:
deliveries_df.rename(columns={'lng': 'origin_lng', 'lat': 'origin_lat'}, inplace=True)
deliveries_df.head()

Unnamed: 0,name,region,vehicle_capacity,deliveries,origin_lng,origin_lat
0,cvrp-2-df-33,df-2,180,"[{'id': '313483a19d2f8d65cd5024c8d215cfbd', 'p...",-48.054989,-15.838145
1,cvrp-2-df-73,df-2,180,"[{'id': 'bf3fc630b1c29601a4caf1bdd474b85', 'po...",-48.054989,-15.838145
2,cvrp-2-df-20,df-2,180,"[{'id': 'b30f1145a2ba4e0b9ac0162b68d045c3', 'p...",-48.054989,-15.838145
3,cvrp-1-df-71,df-1,180,"[{'id': 'be3ed547394196c12c7c27c89ac74ed6', 'p...",-47.893662,-15.805118
4,cvrp-2-df-87,df-2,180,"[{'id': 'a6328fb4dc0654eb28a996a270b0f6e4', 'p...",-48.054989,-15.838145


In [33]:
deliveries_exploded_df = deliveries_df[['deliveries']].explode('deliveries')
deliveries_exploded_df.head()

Unnamed: 0,deliveries
0,"{'id': '313483a19d2f8d65cd5024c8d215cfbd', 'po..."
0,"{'id': '320c94b17aa685c939b3f3244c3099de', 'po..."
0,"{'id': '3663b42f4b8decb33059febaba46d5c8', 'po..."
0,"{'id': 'e11ab58363c38d6abc90d5fba87b7d7', 'poi..."
0,"{'id': '54cb45b7bbbd4e34e7150900f92d7f4b', 'po..."


In [34]:
deliveries_normalized_df = pd.concat([
    pd.DataFrame(deliveries_exploded_df['deliveries'].apply(lambda value: value['size'])).rename(columns={'deliveries': 'delivery_size'}),
    pd.DataFrame(deliveries_exploded_df['deliveries'].apply(lambda value: value['point']['lng'])).rename(columns={'deliveries': 'destiny_lng'}),
    pd.DataFrame(deliveries_exploded_df['deliveries'].apply(lambda value: value['point']['lat'])).rename(columns={'deliveries': 'destiny_lat'}),
], axis=1)

deliveries_normalized_df.head()

Unnamed: 0,delivery_size,destiny_lng,destiny_lat
0,9,-48.116189,-15.848929
0,2,-48.118195,-15.850772
0,1,-48.112483,-15.847871
0,2,-48.118023,-15.846471
0,7,-48.114898,-15.858055


In [36]:
deliveries_df = deliveries_df.drop('deliveries', axis=1)
deliveries_df = pd.merge(left=deliveries_df, right=deliveries_normalized_df, how='right', left_index=True, right_index=True)
deliveries_df.head()

Unnamed: 0,name,region,vehicle_capacity,origin_lng,origin_lat,delivery_size,destiny_lng,destiny_lat
0,cvrp-2-df-33,df-2,180,-48.054989,-15.838145,9,-48.116189,-15.848929
0,cvrp-2-df-33,df-2,180,-48.054989,-15.838145,2,-48.118195,-15.850772
0,cvrp-2-df-33,df-2,180,-48.054989,-15.838145,1,-48.112483,-15.847871
0,cvrp-2-df-33,df-2,180,-48.054989,-15.838145,2,-48.118023,-15.846471
0,cvrp-2-df-33,df-2,180,-48.054989,-15.838145,7,-48.114898,-15.858055


In [38]:
deliveries_df.reset_index(inplace=True, drop=True)
deliveries_df.head()

Unnamed: 0,name,region,vehicle_capacity,origin_lng,origin_lat,delivery_size,destiny_lng,destiny_lat
0,cvrp-2-df-33,df-2,180,-48.054989,-15.838145,9,-48.116189,-15.848929
1,cvrp-2-df-33,df-2,180,-48.054989,-15.838145,2,-48.118195,-15.850772
2,cvrp-2-df-33,df-2,180,-48.054989,-15.838145,1,-48.112483,-15.847871
3,cvrp-2-df-33,df-2,180,-48.054989,-15.838145,2,-48.118023,-15.846471
4,cvrp-2-df-33,df-2,180,-48.054989,-15.838145,7,-48.114898,-15.858055


In [40]:
deliveries_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 636149 entries, 0 to 636148
Data columns (total 8 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   name              636149 non-null  object 
 1   region            636149 non-null  object 
 2   vehicle_capacity  636149 non-null  int64  
 3   origin_lng        636149 non-null  float64
 4   origin_lat        636149 non-null  float64
 5   delivery_size     636149 non-null  int64  
 6   destiny_lng       636149 non-null  float64
 7   destiny_lat       636149 non-null  float64
dtypes: float64(4), int64(2), object(2)
memory usage: 38.8+ MB


In [41]:
deliveries_df.to_csv('deliveries_loggi.csv', sep=';', index=False)