# Create a Dataset from a JSON file

This example illustrates how to create a CartoDataFrame from a remote JSON file using pandas showing the process of data cleaning.

In [1]:
import requests

# Download the JSON file
remote_file_path = 'http://opendata.paris.fr/api/records/1.0/search/?dataset=arbresremarquablesparis&rows=200'
data_json = requests.get(remote_file_path).json()['records']
data_json[0].keys()

dict_keys(['datasetid', 'recordid', 'fields', 'geometry', 'record_timestamp'])

In [2]:
from pandas.io.json import json_normalize

# Normalize the data
df = json_normalize(data_json)
df.head()

Unnamed: 0,datasetid,recordid,record_timestamp,fields.geom_x_y,fields.libellefrancais,fields.objectid,fields.idemplacement,fields.arrondissement,fields.circonferenceencm,fields.hauteurenm,...,fields.stadedeveloppement,fields.remarquable,fields.idbase,fields.genre,fields.complementadresse,fields.typeemplacement,fields.dateplantation,geometry.type,geometry.coordinates,fields.varieteoucultivar
0,arbresremarquablesparis,1e31d3f0c53902b8852d1406cbb485ab5a591688,2019-11-29T11:01:13.717000+00:00,"[48.8460598206, 2.25295516084]",Ailante,11972,60035,PARIS 16E ARRDT,495.0,22.0,...,M,1,112968.0,Ailanthus,16-47,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.25295516084, 48.8460598206]",
1,arbresremarquablesparis,790790e4517a8ff950ea800c14b9b41a619f39f7,2019-11-29T11:01:13.717000+00:00,"[48.8462426895, 2.25137540594]",Erable,15499,40005,PARIS 16E ARRDT,210.0,16.0,...,M,1,123330.0,Acer,,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.25137540594, 48.8462426895]",
2,arbresremarquablesparis,6f417a150db8f783294fea063fe39bb8e324e68a,2019-11-29T11:01:13.717000+00:00,"[48.8460398085, 2.25406628275]",Micocoulier,24985,60082,PARIS 16E ARRDT,172.0,9.0,...,A,1,114452.0,Celtis,,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.25406628275, 48.8460398085]",
3,arbresremarquablesparis,3a381464d5fac7746b2d3fe1ed63d3614a236099,2019-11-29T11:01:13.717000+00:00,"[48.8709369341, 2.24803445349]",If,172685,701001,BOIS DE BOULOGNE,246.0,15.0,...,A,1,2002352.0,Taxus,16-21,Arbre,1772-01-01T00:09:21+00:00,Point,"[2.24803445349, 48.8709369341]",
4,arbresremarquablesparis,6f5f941f740f94230b51dec33c63f7cd155e0feb,2019-11-29T11:01:13.717000+00:00,"[48.876921106, 2.34671864206]",Platane,166794,108001,PARIS 9E ARRDT,465.0,25.0,...,M,1,317250.0,Platanus,,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.34671864206, 48.876921106]",


In [3]:
# Add Latitude and Longitude columns
df['lng'] = df.apply(lambda row: row['geometry.coordinates'][0], axis=1)
df['lat'] = df.apply(lambda row: row['geometry.coordinates'][1], axis=1)
df.head()

Unnamed: 0,datasetid,recordid,record_timestamp,fields.geom_x_y,fields.libellefrancais,fields.objectid,fields.idemplacement,fields.arrondissement,fields.circonferenceencm,fields.hauteurenm,...,fields.idbase,fields.genre,fields.complementadresse,fields.typeemplacement,fields.dateplantation,geometry.type,geometry.coordinates,fields.varieteoucultivar,lng,lat
0,arbresremarquablesparis,1e31d3f0c53902b8852d1406cbb485ab5a591688,2019-11-29T11:01:13.717000+00:00,"[48.8460598206, 2.25295516084]",Ailante,11972,60035,PARIS 16E ARRDT,495.0,22.0,...,112968.0,Ailanthus,16-47,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.25295516084, 48.8460598206]",,2.252955,48.84606
1,arbresremarquablesparis,790790e4517a8ff950ea800c14b9b41a619f39f7,2019-11-29T11:01:13.717000+00:00,"[48.8462426895, 2.25137540594]",Erable,15499,40005,PARIS 16E ARRDT,210.0,16.0,...,123330.0,Acer,,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.25137540594, 48.8462426895]",,2.251375,48.846243
2,arbresremarquablesparis,6f417a150db8f783294fea063fe39bb8e324e68a,2019-11-29T11:01:13.717000+00:00,"[48.8460398085, 2.25406628275]",Micocoulier,24985,60082,PARIS 16E ARRDT,172.0,9.0,...,114452.0,Celtis,,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.25406628275, 48.8460398085]",,2.254066,48.84604
3,arbresremarquablesparis,3a381464d5fac7746b2d3fe1ed63d3614a236099,2019-11-29T11:01:13.717000+00:00,"[48.8709369341, 2.24803445349]",If,172685,701001,BOIS DE BOULOGNE,246.0,15.0,...,2002352.0,Taxus,16-21,Arbre,1772-01-01T00:09:21+00:00,Point,"[2.24803445349, 48.8709369341]",,2.248034,48.870937
4,arbresremarquablesparis,6f5f941f740f94230b51dec33c63f7cd155e0feb,2019-11-29T11:01:13.717000+00:00,"[48.876921106, 2.34671864206]",Platane,166794,108001,PARIS 9E ARRDT,465.0,25.0,...,317250.0,Platanus,,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.34671864206, 48.876921106]",,2.346719,48.876921


In [4]:
from cartoframes import CartoDataFrame

cdf = CartoDataFrame(df)

# Set a geometry column from the coordinates
cdf.set_geometry_from_xy('lng', 'lat', inplace=True)

cdf.head()

Unnamed: 0,datasetid,recordid,record_timestamp,fields.geom_x_y,fields.libellefrancais,fields.objectid,fields.idemplacement,fields.arrondissement,fields.circonferenceencm,fields.hauteurenm,...,fields.genre,fields.complementadresse,fields.typeemplacement,fields.dateplantation,geometry.type,geometry.coordinates,fields.varieteoucultivar,lng,lat,geometry
0,arbresremarquablesparis,1e31d3f0c53902b8852d1406cbb485ab5a591688,2019-11-29T11:01:13.717000+00:00,"[48.8460598206, 2.25295516084]",Ailante,11972,60035,PARIS 16E ARRDT,495.0,22.0,...,Ailanthus,16-47,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.25295516084, 48.8460598206]",,2.252955,48.84606,POINT (2.25296 48.84606)
1,arbresremarquablesparis,790790e4517a8ff950ea800c14b9b41a619f39f7,2019-11-29T11:01:13.717000+00:00,"[48.8462426895, 2.25137540594]",Erable,15499,40005,PARIS 16E ARRDT,210.0,16.0,...,Acer,,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.25137540594, 48.8462426895]",,2.251375,48.846243,POINT (2.25138 48.84624)
2,arbresremarquablesparis,6f417a150db8f783294fea063fe39bb8e324e68a,2019-11-29T11:01:13.717000+00:00,"[48.8460398085, 2.25406628275]",Micocoulier,24985,60082,PARIS 16E ARRDT,172.0,9.0,...,Celtis,,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.25406628275, 48.8460398085]",,2.254066,48.84604,POINT (2.25407 48.84604)
3,arbresremarquablesparis,3a381464d5fac7746b2d3fe1ed63d3614a236099,2019-11-29T11:01:13.717000+00:00,"[48.8709369341, 2.24803445349]",If,172685,701001,BOIS DE BOULOGNE,246.0,15.0,...,Taxus,16-21,Arbre,1772-01-01T00:09:21+00:00,Point,"[2.24803445349, 48.8709369341]",,2.248034,48.870937,POINT (2.24803 48.87094)
4,arbresremarquablesparis,6f5f941f740f94230b51dec33c63f7cd155e0feb,2019-11-29T11:01:13.717000+00:00,"[48.876921106, 2.34671864206]",Platane,166794,108001,PARIS 9E ARRDT,465.0,25.0,...,Platanus,,Arbre,1700-01-01T00:09:21+00:00,Point,"[2.34671864206, 48.876921106]",,2.346719,48.876921,POINT (2.34672 48.87692)


In [5]:
cdf.viz()