# JSON

In [19]:
from pprint import pprint

import os
import json
import pandas as pd

## Working with Files

To read JSON files.

In [7]:
url = 'https://raw.githubusercontent.com/chrisalbon/simulated_datasets/master/data.json'
first_json = pd.read_json(url)
first_json.head()

Unnamed: 0,integer,datetime,category
0,5,2015-01-01 00:00:00,0
1,5,2015-01-01 00:00:01,0
2,9,2015-01-01 00:00:02,0
3,6,2015-01-01 00:00:03,0
4,6,2015-01-01 00:00:04,0


To read nested JSON files.

In [15]:
try:
  df = pd.read_json('./_data/nested.json')
except ValueError:
  print('Cannot read nested JSON.')

Cannot read nested JSON.


In [20]:
with open('./_data/nested.json') as f:
  nested_json = json.load(f)

print(type(nested_json))
pprint(nested_json)

<class 'dict'>
{'article': [{'author': 'Allen',
              'edition': 'first',
              'id': '01',
              'language': 'JSON'},
             {'author': 'Aditya Sharma',
              'edition': 'second',
              'id': '02',
              'language': 'Python'}],
 'blog': [{'URL': 'datacamp.com', 'name': 'Datacamp'}]}


In [21]:
pd.json_normalize(nested_json)

Unnamed: 0,article,blog
0,"[{'id': '01', 'language': 'JSON', 'edition': '...","[{'name': 'Datacamp', 'URL': 'datacamp.com'}]"


In [27]:
# it may be better to access the keys directly to deconstruct the nested data
blog = pd.json_normalize(nested_json, record_path='blog')
article = pd.json_normalize(nested_json, record_path='article')
print(blog, '\n')
print(article)

       name           URL
0  Datacamp  datacamp.com 

   id language edition         author
0  01     JSON   first          Allen
1  02   Python  second  Aditya Sharma


To write JSON files.

In [9]:
os.makedirs('./_data', exist_ok=True)
first_json.to_json('./_data/json_columns.json', orient='columns')
first_json.to_json('./_data/json_index.json', orient='index')

`json_normalize()` has 3 main parameters:
- data: input data
- record_path: nested elements
- meta: let them as they are elements

In [29]:
data = [{"state": "Florida", 
        "shortname": "FL",
        "info": {"governor": "Rick Scott"},
        "counties": [{"name": "Dade", "population": 12345},
                     {"name": "Broward", "population": 40000},
                     {"name": "Palm Beach", "population": 60000}]},
       {"state": "Ohio",
        "shortname": "OH",
        "info": {"governor": "John Kasich"},
        "counties": [{"name": "Summit", "population": 1234},
                     {"name": "Cuyahoga", "population": 1337}]}]

In [30]:
pd.json_normalize(data)

Unnamed: 0,state,shortname,counties,info.governor
0,Florida,FL,"[{'name': 'Dade', 'population': 12345}, {'name...",Rick Scott
1,Ohio,OH,"[{'name': 'Summit', 'population': 1234}, {'nam...",John Kasich


In [31]:
pd.json_normalize(
  data=data,
  record_path='counties',
  meta=['state', 'shortname', ['info', 'governor']])


Unnamed: 0,name,population,state,shortname,info.governor
0,Dade,12345,Florida,FL,Rick Scott
1,Broward,40000,Florida,FL,Rick Scott
2,Palm Beach,60000,Florida,FL,Rick Scott
3,Summit,1234,Ohio,OH,John Kasich
4,Cuyahoga,1337,Ohio,OH,John Kasich
