# Read JSON Format Data

JSoN stands for JavaScript Object Notation (although it is not JavaScript!), and is a data format similar to Python dictionaries.

### Creating JSON Data

In [None]:
import json

In [None]:
#create data

data = {
    "pets": [
    {
        "name": 'Goku',
        "type": 'dog',
        "age": 4,
        "fav_food" : 'bone',
        "hobbies" : ['whining', 'licking butts']   
    },
    {
        "name": 'Mika',
        "type": 'dog',
        "age": 5,
        "fav_food" : 'bacon',
        "hobbies" : ['relaxing', 'ripping toys']
    },
    {
        "name": 'Brunson',
        "type": 'cat',
        "age": 5,
        "fav_food" : 'undecided',
        "hobbies" : 'being outside',
    },
    {
        "name": 'Tiny',
        "type": 'dog',
        "age": 4,
        "fav_food" : 'steak',
        "hobbies" : ['barking', 'sitting outside']
    }

] }

In [None]:
#write data to JSON file
with open('petdata.json', 'w') as write_file:
    json.dump(data, write_file)

In [None]:
#or you can write the data to a Python string object
#this is to be able to do something to the JSON objects in-memory
pet_string = json.dumps(data)
pet_string

In [None]:
#let's make it look more readable
pet_string = json.dumps(data, indent=4)
print(pet_string)

### Load in JSON Data

In [None]:
#bring back data from petdata.json file
with open("petdata.json", 'r') as read_file:
    data = json.load(read_file)
    
data

In [None]:
#or load from in-memory string
data = json.loads(pet_string)
data

## NBA JSON Data

Data collected from Sports Reference LLC that has NBA games played from 1985-2013.

Data source: https://data.mendeley.com/datasets/ct8f9skv97/1

In [1]:
import pandas as pd
import json

In [14]:
#use bash head command to see a little bit of the JSON file
#look at data structure

%%bash

head ./nbagames.json'

SyntaxError: invalid syntax (<ipython-input-14-087a9e1de782>, line 4)

In [9]:
#filepath = "datasets/nbagames.json"
filepath = "./nbagames.json"

In [10]:
#data is individual JSON objects not separated by commas
#will load data as JSON and put in bigger list to be able to extract values from keys

data = []

with open(filepath) as file:
    for line in file:
        data.append(json.loads(line))

In [11]:
#verify that this is a list type
type(data)

list

In [12]:
#check number of items in data list
#this is the total number of games in dataset
len(data)

31686

#### Verify values within the data structure

In [15]:
#each index level is a basketball game
data[0]

{'_id': {'$oid': '52f29f91ddbd75540aba6dae'},
 'date': {'$date': '1985-10-25T04:00:00.000+0000'},
 'teams': [{'abbreviation': 'WSB',
   'city': 'Washington',
   'home': False,
   'name': 'Washington Bullets',
   'players': [{'ast': 9,
     'blk': 2,
     'drb': 8,
     'fg': 8,
     'fg3': 0,
     'fg3_pct': '',
     'fg3a': 0,
     'fg_pct': '.533',
     'fga': 15,
     'ft': 3,
     'ft_pct': '.750',
     'fta': 4,
     'mp': '41:00',
     'orb': 6,
     'pf': 3,
     'player': 'Jeff Ruland',
     'pts': 19,
     'stl': 1,
     'tov': 5,
     'trb': 14},
    {'ast': 2,
     'blk': 1,
     'drb': 7,
     'fg': 9,
     'fg3': 0,
     'fg3_pct': '',
     'fg3a': 0,
     'fg_pct': '.643',
     'fga': 14,
     'ft': 4,
     'ft_pct': '.667',
     'fta': 6,
     'mp': '36:00',
     'orb': 1,
     'pf': 1,
     'player': 'Cliff Robinson',
     'pts': 22,
     'stl': 3,
     'tov': 5,
     'trb': 8},
    {'ast': 4,
     'blk': 0,
     'drb': 0,
     'fg': 8,
     'fg3': 0,
     'fg3_pct': '.

In [16]:
#teams within each basketball game
data[0]['teams']

[{'abbreviation': 'WSB',
  'city': 'Washington',
  'home': False,
  'name': 'Washington Bullets',
  'players': [{'ast': 9,
    'blk': 2,
    'drb': 8,
    'fg': 8,
    'fg3': 0,
    'fg3_pct': '',
    'fg3a': 0,
    'fg_pct': '.533',
    'fga': 15,
    'ft': 3,
    'ft_pct': '.750',
    'fta': 4,
    'mp': '41:00',
    'orb': 6,
    'pf': 3,
    'player': 'Jeff Ruland',
    'pts': 19,
    'stl': 1,
    'tov': 5,
    'trb': 14},
   {'ast': 2,
    'blk': 1,
    'drb': 7,
    'fg': 9,
    'fg3': 0,
    'fg3_pct': '',
    'fg3a': 0,
    'fg_pct': '.643',
    'fga': 14,
    'ft': 4,
    'ft_pct': '.667',
    'fta': 6,
    'mp': '36:00',
    'orb': 1,
    'pf': 1,
    'player': 'Cliff Robinson',
    'pts': 22,
    'stl': 3,
    'tov': 5,
    'trb': 8},
   {'ast': 4,
    'blk': 0,
    'drb': 0,
    'fg': 8,
    'fg3': 0,
    'fg3_pct': '.000',
    'fg3a': 2,
    'fg_pct': '.571',
    'fga': 14,
    'ft': 5,
    'ft_pct': '1.000',
    'fta': 5,
    'mp': '30:00',
    'orb': 0,
    'pf': 0,
   

In [17]:
#get the date of a game
data[0]['date']['$date']

'1985-10-25T04:00:00.000+0000'

In [18]:
#get a team's city abbreviation
data[0]['teams'][0]['abbreviation']

'WSB'

In [19]:
#get a player's name
data[0]['teams'][0]['players'][0]['player']

'Jeff Ruland'

In [40]:
#get a player's name
data[1]['teams'][1]['players'][1]['player']

'John Bagley'

In [20]:
#initialize variables for empty lists to hold data

datels = [] #date of game
abrvls = [] #city abbreviation
cityls = [] #name of city
homels = []   #T/F if home game
namels = [] #team name
playersls = [] #list of players that played in the game
scorels = []   #final score for game
wonls = []  #0/1 if won

In [21]:
#fill lists with data

for game in data:  #data[index]
    
    #add date to list twice for each team playing the game that day
    datels.append(game['date']['$date'])
    datels.append(game['date']['$date'])
    
    for team in game['teams']: #data[index]['teams'][index]
        abrvls.append(team['abbreviation']) 
        cityls.append(team['city'])
        homels.append(team['home'])
        namels.append(team['name'])
        scorels.append(team['score'])
        wonls.append(team['won'])
        
        members = [] #hold list of players to add to playerls
        
        for player in team['players']:  #data[index]['teams'][index]['players'][index]
            members.append(player['player'])
        
        playersls.append(members)

In [22]:
#verify amount of informaton in date list
len(datels)

63372

In [23]:
#see first 20 values within date list
datels[:20]

['1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-25T04:00:00.000+0000',
 '1985-10-26T04:00:00.000+0000',
 '1985-10-26T04:00:00.000+0000',
 '1985-10-26T04:00:00.000+0000',
 '1985-10-26T04:00:00.000+0000']

In [43]:
#check that players is a list of lists
#first 5 items in players list
playersls[:5]

[['Jeff Ruland',
  'Cliff Robinson',
  'Gus Williams',
  'Jeff Malone',
  'Charles Jones',
  'Dan Roundfield',
  'Perry Moss',
  'Dudley Bradley',
  'Darren Daye',
  'Tom McMillen',
  'Manute Bol'],
 ['Dominique Wilkins',
  'Tree Rollins',
  'Cliff Levingston',
  'Spud Webb',
  'Kevin Willis',
  'Randy Wittman',
  'Scott Hastings',
  'Lorenzo Charles',
  'Sedric Toney',
  'Ray Williams',
  'Jon Koncak'],
 ['Kyle Macy',
  'Orlando Woolridge',
  'Michael Jordan',
  'Jawann Oldham',
  'Sidney Green',
  'Dave Corzine',
  'Charles Oakley',
  'George Gervin',
  'Gene Banks'],
 ['Roy Hinson',
  'John Bagley',
  'World B. Free',
  'Phil Hubbard',
  'Mark West',
  'Ben Poquette',
  'Melvin Turpin',
  'Ron Anderson',
  'Dirk Minniefield',
  'Edgar Jones'],
 ['Bill Laimbeer',
  'Kelly Tripucka',
  'Earl Cureton',
  'Isiah Thomas',
  'Vinnie Johnson',
  'Joe Dumars',
  'John Long',
  'Rick Mahorn',
  'Tony Campbell',
  'Kent Benson']]

In [54]:
#check that values for each list add up to be the same

print(len(datels)) #date of game
print(len(abrvls)) #city abbreviation
print(len(cityls)) #name of city
print(len(homels))  #T/F if home game
print(len(namels)) #team name
print(len(playersls)) #list of players that played in the game
print(len(scorels))  #final score for game
print(len(wonls))  #0/1 if won

63372
63372
63372
63372
63372
63372
63372
63372


In [55]:
#zip lists together into one list
#will put in order I want my columns to be
NBAlist1 = list(zip(namels, cityls, scorels, wonls))

#make list of column names
names = ['team_name', 'city', 'score', 'won_game']

#### Make gathered information into dataframe

In [56]:
#make the dataframe
df = pd.DataFrame(NBAlist1, columns=names)

df.head(5)

Unnamed: 0,team_name,city,score,won_game
0,Washington Bullets,Washington,100,1
1,Atlanta Hawks,Atlanta,91,0
2,Chicago Bulls,Chicago,116,1
3,Cleveland Cavaliers,Cleveland,115,0
4,Detroit Pistons,Detroit,118,1


In [57]:
#zip lists together into one list
#will put in order I want my columns to be
NBAlist = list(zip(datels, namels, abrvls, cityls, homels, scorels, wonls, playersls))

#make list of column names
names = ['date', 'team_name', 'abbrv', 'city', 'home_game', 'score', 'won_game', 'players']

In [59]:
#make the dataframe
df = pd.DataFrame(NBAlist, columns=names)

df.head(10)

Unnamed: 0,date,team_name,abbrv,city,home_game,score,won_game,players
0,1985-10-25T04:00:00.000+0000,Washington Bullets,WSB,Washington,False,100,1,"[Jeff Ruland, Cliff Robinson, Gus Williams, Je..."
1,1985-10-25T04:00:00.000+0000,Atlanta Hawks,ATL,Atlanta,True,91,0,"[Dominique Wilkins, Tree Rollins, Cliff Leving..."
2,1985-10-25T04:00:00.000+0000,Chicago Bulls,CHI,Chicago,True,116,1,"[Kyle Macy, Orlando Woolridge, Michael Jordan,..."
3,1985-10-25T04:00:00.000+0000,Cleveland Cavaliers,CLE,Cleveland,False,115,0,"[Roy Hinson, John Bagley, World B. Free, Phil ..."
4,1985-10-25T04:00:00.000+0000,Detroit Pistons,DET,Detroit,True,118,1,"[Bill Laimbeer, Kelly Tripucka, Earl Cureton, ..."
5,1985-10-25T04:00:00.000+0000,Milwaukee Bucks,MIL,Milwaukee,False,116,0,"[Sidney Moncrief, Paul Pressey, Craig Hodges, ..."
6,1985-10-25T04:00:00.000+0000,Denver Nuggets,DEN,Denver,False,119,1,"[Alex English, Calvin Natt, T.R. Dunn, Fat Lev..."
7,1985-10-25T04:00:00.000+0000,Golden State Warriors,GSW,Oakland,True,105,0,"[Joe Barry Carroll, Sleepy Floyd, Terry Teagle..."
8,1985-10-25T04:00:00.000+0000,New Jersey Nets,NJN,Brooklyn,True,113,1,"[Buck Williams, Darryl Dawkins, Mike O'Koren, ..."
9,1985-10-25T04:00:00.000+0000,Boston Celtics,BOS,Boston,False,109,0,"[Dennis Johnson, Kevin McHale, Danny Ainge, La..."


In [None]:
#make the dataframe
df = pd.DataFrame(NBAlist, columns=names)

df.head(10)