In [1]:
import pandas as pd 

In [2]:
## Pandas has the function, read_json(), that can load JSON either from a file or a url.

In [3]:
url = "https://raw.githubusercontent.com/chrisalbon/simulated_datasets/master/data.json"
first_json = pd.read_json(url)
first_json.head()

Unnamed: 0,integer,datetime,category
0,5,2015-01-01 00:00:00,0
1,5,2015-01-01 00:00:01,0
2,9,2015-01-01 00:00:02,0
3,6,2015-01-01 00:00:03,0
4,6,2015-01-01 00:00:04,0


In [4]:
## Writing the JSON data is as simple as reading and is one line of code. Instead of read_json(), you will use to_json() with a filename and that's all!

In [5]:
first_json.to_json('json_columns.json', orient="columns")
first_json.to_json('json_index.json', orient="index")

In [6]:
## If the output directory is not specified to_json() stores the file in the same directory as our notebook. 
## Find the two files there, check the two files and see the difference. 
## These functions are the best option to deal with JSON. However, they don't always work.

In [9]:
## So what about the nested JSON files?

## See the file nested.json, how it looks like and try to load it into pandas with pd.read_json()

df = pd.read_json("nested.json")

ValueError: All arrays must be of the same length

In [None]:
## We can see that it doesn't work. Fortunately, we have another method. 
## This is not a Pandas function but the method from package JSON which comes with core Python.



In [None]:
import json


In [None]:
#load json object
with open('nested.json') as f:
    nested_json = json.load(f)
print(nested_json)
print(type(nested_json))

In [None]:
pd.json_normalize(nested_json)

In [None]:
## We can see from above that the primary keys are the columns of the DataFrame. We were able to load it as a Pandas DataFrame but it still looks weird.

## We are going to add a parameter record_path to json_normalize to put a focus on a specific key from the file:

In [None]:
blog = json_normalize(nested_json,record_path ='blog')
blog.head()

In [None]:
article = json_normalize(nested_json,record_path ='article')
article.head()

In [None]:
## json_normalize() has 3 main parameters:

## 1) data - input data
## 2) record_path - nested elements
## 3) meta - let them as they are elements

In [None]:
# define json string
data = [{"state": "Florida", 
        "shortname": "FL",
        "info": {"governor": "Rick Scott"},
        "counties": [{"name": "Dade", "population": 12345},
                     {"name": "Broward", "population": 40000},
                     {"name": "Palm Beach", "population": 60000}]},
       {"state": "Ohio",
        "shortname": "OH",
        "info": {"governor": "John Kasich"},
        "counties": [{"name": "Summit", "population": 1234},
                     {"name": "Cuyahoga", "population": 1337}]}]

In [None]:
json_normalize(data)
json_normalize(data=data, record_path='counties', meta=['state', 'shortname', ['info', 'governor']])