# Create a Dataset from a JSON file

This example illustrates how to create a CartoDataFrame from a remote JSON file using pandas showing the process of data cleaning.

In [1]:
import pandas
import requests

from cartoframes import CartoDataFrame
from pandas.io.json import json_normalize

remote_file_path = 'http://opendata.paris.fr/api/records/1.0/search/?dataset=arbresremarquablesparis&rows=200'
data_json = requests.get(remote_file_path).json()['records']
data_json[0].keys()

dict_keys(['datasetid', 'recordid', 'fields', 'geometry', 'record_timestamp'])

## 1. Download the JSON file

In [2]:
remote_file_path = 'http://opendata.paris.fr/api/records/1.0/search/?dataset=arbresremarquablesparis&rows=200'
data_json = requests.get(remote_file_path).json()['records']
data_json[0].keys()

dict_keys(['datasetid', 'recordid', 'fields', 'geometry', 'record_timestamp'])

## 2. Normalize the JSON object

In [3]:
df = json_normalize(data_json)
df.head()

Unnamed: 0,datasetid,recordid,record_timestamp,fields.geom_x_y,fields.libellefrancais,fields.objectid,fields.idemplacement,fields.arrondissement,fields.circonferenceencm,fields.hauteurenm,...,fields.stadedeveloppement,fields.remarquable,fields.idbase,fields.genre,fields.complementadresse,fields.typeemplacement,fields.dateplantation,geometry.type,geometry.coordinates,fields.varieteoucultivar
0,arbresremarquablesparis,e487da020b610c6486f5091d3763f7527c5979bb,2019-11-15T11:01:10.674000+00:00,"[48.8740821646, 2.25916497406]",Catalpa,117258,001301001,BOIS DE BOULOGNE,397.0,14.0,...,M,1,2009236.0,Catalpa,16-62,Arbre,1910-01-01T00:09:21+00:00,Point,"[2.25916497406, 48.8740821646]",
1,arbresremarquablesparis,15098ee0bf77d00043de65e6fa0d13d62f8d9492,2019-11-15T11:01:10.674000+00:00,"[48.8830293243, 2.37006279721]",Tilleul,125473,000103001,PARIS 19E ARRDT,206.0,20.0,...,M,1,235972.0,Tilia,19-05,Arbre,1945-01-01T01:00:00+00:00,Point,"[2.37006279721, 48.8830293243]",
2,arbresremarquablesparis,7e6691b002c010bc69864dec1d7edf629ad872d6,2019-11-15T11:01:10.674000+00:00,"[48.8729000638, 2.29056540474]",Mûrier,178971,000103010,PARIS 16E ARRDT,150.0,8.0,...,M,1,114685.0,Morus,PELOUSE 14 - 11 à 27,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.29056540474, 48.8729000638]",
3,arbresremarquablesparis,64d09421623741a5820e258899dd6ef3b751c55d,2019-11-15T11:01:10.674000+00:00,"[48.8597425245, 2.39998313637]",Marronnier,207385,D00000076032,PARIS 20E ARRDT,347.0,22.0,...,M,1,147672.0,Aesculus,20-15,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.39998313637, 48.8597425245]",
4,arbresremarquablesparis,ea046c1897897ea2237cbf1315024276e773bc91,2019-11-15T11:01:10.674000+00:00,"[48.839886234, 2.43388312679]",Hêtre,104549,12-22,BOIS DE VINCENNES,414.0,16.0,...,M,1,2002372.0,Fagus,12-22,Arbre,1864-01-01T00:09:21+00:00,Point,"[2.43388312679, 48.839886234]",''Pendula''


## 3. Add Latitude and Longitude columns

In [4]:
df['lng'] = df.apply(lambda row: row['geometry.coordinates'][0], axis=1)
df['lat'] = df.apply(lambda row: row['geometry.coordinates'][1], axis=1)
df.head()

Unnamed: 0,datasetid,recordid,record_timestamp,fields.geom_x_y,fields.libellefrancais,fields.objectid,fields.idemplacement,fields.arrondissement,fields.circonferenceencm,fields.hauteurenm,...,fields.idbase,fields.genre,fields.complementadresse,fields.typeemplacement,fields.dateplantation,geometry.type,geometry.coordinates,fields.varieteoucultivar,lng,lat
0,arbresremarquablesparis,e487da020b610c6486f5091d3763f7527c5979bb,2019-11-15T11:01:10.674000+00:00,"[48.8740821646, 2.25916497406]",Catalpa,117258,001301001,BOIS DE BOULOGNE,397.0,14.0,...,2009236.0,Catalpa,16-62,Arbre,1910-01-01T00:09:21+00:00,Point,"[2.25916497406, 48.8740821646]",,2.259165,48.874082
1,arbresremarquablesparis,15098ee0bf77d00043de65e6fa0d13d62f8d9492,2019-11-15T11:01:10.674000+00:00,"[48.8830293243, 2.37006279721]",Tilleul,125473,000103001,PARIS 19E ARRDT,206.0,20.0,...,235972.0,Tilia,19-05,Arbre,1945-01-01T01:00:00+00:00,Point,"[2.37006279721, 48.8830293243]",,2.370063,48.883029
2,arbresremarquablesparis,7e6691b002c010bc69864dec1d7edf629ad872d6,2019-11-15T11:01:10.674000+00:00,"[48.8729000638, 2.29056540474]",Mûrier,178971,000103010,PARIS 16E ARRDT,150.0,8.0,...,114685.0,Morus,PELOUSE 14 - 11 à 27,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.29056540474, 48.8729000638]",,2.290565,48.8729
3,arbresremarquablesparis,64d09421623741a5820e258899dd6ef3b751c55d,2019-11-15T11:01:10.674000+00:00,"[48.8597425245, 2.39998313637]",Marronnier,207385,D00000076032,PARIS 20E ARRDT,347.0,22.0,...,147672.0,Aesculus,20-15,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.39998313637, 48.8597425245]",,2.399983,48.859743
4,arbresremarquablesparis,ea046c1897897ea2237cbf1315024276e773bc91,2019-11-15T11:01:10.674000+00:00,"[48.839886234, 2.43388312679]",Hêtre,104549,12-22,BOIS DE VINCENNES,414.0,16.0,...,2002372.0,Fagus,12-22,Arbre,1864-01-01T00:09:21+00:00,Point,"[2.43388312679, 48.839886234]",''Pendula'',2.433883,48.839886


In [5]:
cdf = CartoDataFrame(df).convert()
cdf.head()

Unnamed: 0,datasetid,recordid,record_timestamp,fields.geom_x_y,fields.libellefrancais,fields.objectid,fields.idemplacement,fields.arrondissement,fields.circonferenceencm,fields.hauteurenm,...,fields.remarquable,fields.idbase,fields.genre,fields.complementadresse,fields.typeemplacement,fields.dateplantation,geometry.type,geometry.coordinates,fields.varieteoucultivar,geometry
0,arbresremarquablesparis,e487da020b610c6486f5091d3763f7527c5979bb,2019-11-15T11:01:10.674000+00:00,"[48.8740821646, 2.25916497406]",Catalpa,117258,001301001,BOIS DE BOULOGNE,397.0,14.0,...,1,2009236.0,Catalpa,16-62,Arbre,1910-01-01T00:09:21+00:00,Point,"[2.25916497406, 48.8740821646]",,POINT (2.25916 48.87408)
1,arbresremarquablesparis,15098ee0bf77d00043de65e6fa0d13d62f8d9492,2019-11-15T11:01:10.674000+00:00,"[48.8830293243, 2.37006279721]",Tilleul,125473,000103001,PARIS 19E ARRDT,206.0,20.0,...,1,235972.0,Tilia,19-05,Arbre,1945-01-01T01:00:00+00:00,Point,"[2.37006279721, 48.8830293243]",,POINT (2.37006 48.88303)
2,arbresremarquablesparis,7e6691b002c010bc69864dec1d7edf629ad872d6,2019-11-15T11:01:10.674000+00:00,"[48.8729000638, 2.29056540474]",Mûrier,178971,000103010,PARIS 16E ARRDT,150.0,8.0,...,1,114685.0,Morus,PELOUSE 14 - 11 à 27,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.29056540474, 48.8729000638]",,POINT (2.29057 48.87290)
3,arbresremarquablesparis,64d09421623741a5820e258899dd6ef3b751c55d,2019-11-15T11:01:10.674000+00:00,"[48.8597425245, 2.39998313637]",Marronnier,207385,D00000076032,PARIS 20E ARRDT,347.0,22.0,...,1,147672.0,Aesculus,20-15,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.39998313637, 48.8597425245]",,POINT (2.39998 48.85974)
4,arbresremarquablesparis,ea046c1897897ea2237cbf1315024276e773bc91,2019-11-15T11:01:10.674000+00:00,"[48.839886234, 2.43388312679]",Hêtre,104549,12-22,BOIS DE VINCENNES,414.0,16.0,...,1,2002372.0,Fagus,12-22,Arbre,1864-01-01T00:09:21+00:00,Point,"[2.43388312679, 48.839886234]",''Pendula'',POINT (2.43388 48.83989)


In [6]:
from cartoframes.viz import Layer

Layer(cdf)