In [21]:
import pandas as pd

# JSON

JavaScript Object Notation 

```
{"menu": {
  "id": "file",
  "value": "File",
  "popup": {
    "menuitem": [
      {"value": "New", "onclick": "CreateNewDoc()"},
      {"value": "Open", "onclick": "OpenDoc()"},
      {"value": "Close", "onclick": "CloseDoc()"}
    ]
  }
}}
```

Can have these types in it:

- objects
- arrays
- strings
- numbers
- boolean (true/false)
- null

## Using Python

https://docs.python.org/3.6/library/json.html

In [24]:
import json
f = open('sample.json')
data = json.load(f)
# What does this look like?
data

{'meta': {'view': {'id': '8dhd-zvi6',
   'name': '2001 Campaign Payments',
   'attribution': 'Campaign Finance Board (CFB)',
   'averageRating': 0,
   'category': 'City Government',
   'createdAt': 1315950830,
   'description': 'A listing of public funds payments for candidates for City office during the 2001 election cycle',
   'displayType': 'table',
   'downloadCount': 1470,
   'hideFromCatalog': False,
   'hideFromDataJson': False,
   'indexUpdatedAt': 1536596254,
   'newBackend': False,
   'numberOfComments': 0,
   'oid': 4140996,
   'provenance': 'official',
   'publicationAppendEnabled': False,
   'publicationDate': 1371845179,
   'publicationGroup': 240370,
   'publicationStage': 'published',
   'rowClass': '',
   'rowsUpdatedAt': 1371845177,
   'rowsUpdatedBy': '5fuc-pqz2',
   'tableId': 932968,
   'totalTimesRated': 0,
   'viewCount': 233,
   'viewLastModified': 1536605717,
   'viewType': 'tabular',
   'columns': [{'id': -1,
     'name': 'sid',
     'dataTypeName': 'meta_data

type(data)

## Creating a DF wiht JSON

In [32]:
# Need to see what's available
data.keys()

dict_keys(['meta', 'data'])

In [31]:
df_meta = pd.DataFrame.from_dict(data['meta'])
df_data = pd.DataFrame.from_dict(data['data'])

# XML

Extensible Markup Language

```
<note>
  <to>Yona</to>
  <from>Bani</from>
  <heading>Don't Forget!</heading>
  <body>We going to the party tomorrow?</body>
</note>
```

## Parsing with Python

https://docs.python.org/3.6/library/xml.html#

Parsing through nested nodes

### Getting our XML Tree

In [3]:
# Coding example
import xml.etree.ElementTree as ET
tree = ET.parse('sample.xml')
root = tree.getroot()

In [17]:
# Going though the tree
first_children = [node  for node in root]
grandchildren = [grandchild for node in root for grandchild in node]

{'_id': '2',
 '_uuid': '9D257416-581A-4C42-85CC-B6EAD9DED97F',
 '_position': '2',
 '_address': 'https://data.cityofnewyork.us/resource/_8dhd-zvi6/2'}

### Checking out our tree

In [None]:
print(first_children[0].tag)
print(first_children[0].row)

In [19]:
print(len(grandchildren))
print(grandchildren[0].tag)
print(grandchildren[0].attrib)

285
row
{'_id': '1', '_uuid': 'E3E9CC9F-7443-43F6-94AF-B5A0F802DBA1', '_position': '1', '_address': 'https://data.cityofnewyork.us/resource/_8dhd-zvi6/1'}


In [11]:
# Going through the whole treee
count = 0
cutoff = 10

for element in root.iter():
    count += 1
    if count < cutoff:
        print(element.tag, element.attrib)

response {}
row {}
row {'_id': '1', '_uuid': 'E3E9CC9F-7443-43F6-94AF-B5A0F802DBA1', '_position': '1', '_address': 'https://data.cityofnewyork.us/resource/_8dhd-zvi6/1'}
candid {}
candname {}
officeboro {}
canclass {}
row {'_id': '2', '_uuid': '9D257416-581A-4C42-85CC-B6EAD9DED97F', '_position': '2', '_address': 'https://data.cityofnewyork.us/resource/_8dhd-zvi6/2'}
election {}


## Creating a DF from XML

In [23]:
dfs = []
tree = ET.parse('sample.xml')
root = tree.getroot()

for n, element in enumerate(root.iter('row')):
    if n > 0:
        dfs.append(pd.DataFrame.from_dict(element.attrib, orient='index').transpose())
df = pd.concat(dfs)
print(len(df))
df.head()

285


Unnamed: 0,_id,_uuid,_position,_address
0,1,E3E9CC9F-7443-43F6-94AF-B5A0F802DBA1,1,https://data.cityofnewyork.us/resource/_8dhd-z...
0,2,9D257416-581A-4C42-85CC-B6EAD9DED97F,2,https://data.cityofnewyork.us/resource/_8dhd-z...
0,3,B80D7891-93CF-49E8-86E8-182B618E68F2,3,https://data.cityofnewyork.us/resource/_8dhd-z...
0,4,BB012003-78F5-406D-8A87-7FF8A425EE3F,4,https://data.cityofnewyork.us/resource/_8dhd-z...
0,5,945825F9-2F5D-47C2-A16B-75B93E61E1AD,5,https://data.cityofnewyork.us/resource/_8dhd-z...


# Comparing the two formats

From https://json.org/example.html

## JSON
```
{"menu": {
  "id": "file",
  "value": "File",
  "popup": {
    "menuitem": [
      {"value": "New", "onclick": "CreateNewDoc()"},
      {"value": "Open", "onclick": "OpenDoc()"},
      {"value": "Close", "onclick": "CloseDoc()"}
    ]
  }
}}
```
## XML
```
<menu id="file" value="File">
  <popup>
    <menuitem value="New" onclick="CreateNewDoc()" />
    <menuitem value="Open" onclick="OpenDoc()" />
    <menuitem value="Close" onclick="CloseDoc()" />
  </popup>
</menu>
```