In [1]:
import csv
import json # today's topic!

In [2]:
# Warmup 0: Recall how to read in the csv data and the cell function.

# source:  Automate the Boring Stuff with Python
def process_csv(filename):
    exampleFile = open(filename, encoding="utf-8")  
    exampleReader = csv.reader(exampleFile) 
    exampleData = list(exampleReader)        
    exampleFile.close()  
    return exampleData

survey_data = process_csv('cs220_survey_data.csv')
survey_header = survey_data[0]
survey_rows = survey_data[1:]

def cell(row_idx, col_name):
    col_idx = survey_header.index(col_name)
    val = survey_rows[row_idx][col_idx]
    if val == "":
        return None
    elif col_name in ["Age", "Zip Code"]:
        return int(val)
    elif col_name in ["Latitude", "Longitude"]:
        return float(val)
    else:
        return val


In [3]:
# Warmup 1: Put survey_data into buckets by lecture
#   Make a dictionary of lists
#   Key is the lecture
#   Value is the list of students, where each student is a dictionary

lecture_dict = {}
for i in range(len(survey_rows)):
    current_student = survey_rows[i]
    current_lecture = cell(i, 'Lecture')
    if current_lecture not in lecture_dict:
        lecture_dict[current_lecture] = []
    current_student = {}
    for header_value in survey_header:
        current_student[header_value] = cell(i, header_value)
    lecture_dict[current_lecture].append(current_student)
# lecture_dict

In [4]:
# Warmup 2: Find the average, min, and max age for our lecture (LEC005)

people_age = []
for person in lecture_dict["LEC005"]:
    current_age = person['Age']
    if current_age != None:
        current_age = int(current_age)
        people_age.append(current_age)
    
print(min(people_age))
print(max(people_age))
print(sum(people_age) / len(people_age))


17
39
19.13235294117647


In [5]:
# Warmup 3: Make a dictionary of each lecture's average age
# The key is the lecture name
# The value is the average age

lec_age_dict = {}
for lec in lecture_dict:
    current_lec_ages = []
    for person in lecture_dict[lec]:
        current_age = person['Age']
        if current_age != None:
            current_age = int(current_age)
            current_lec_ages.append(current_age)
    lec_age_dict[lec] = sum(current_lec_ages) / len(current_lec_ages)
lec_age_dict

{'LEC003': 19.320197044334975,
 'LEC004': 19.50785340314136,
 'LEC005': 19.13235294117647,
 'LEC001': 19.508130081300813,
 'LEC002': 19.47107438016529}

In [6]:
# Warmup 4: Same thing as before except...
# The key is the lecture name
# The value is a dictionary (nested dictionary!)
#  ... with keys 'avg', 'min', and 'max'

lec_age_dict = {}
for lec in lecture_dict:
    current_lec_ages = []
    for person in lecture_dict[lec]:
        current_age = person['Age']
        if current_age != None:
            current_age = int(current_age)
            current_lec_ages.append(current_age)
    lec_age_dict[lec] = {}
    lec_age_dict[lec]['avg'] = sum(current_lec_ages) / len(current_lec_ages)
    lec_age_dict[lec]['min'] = min(current_lec_ages)
    lec_age_dict[lec]['max'] = max(current_lec_ages)
lec_age_dict

{'LEC003': {'avg': 19.320197044334975, 'min': 17, 'max': 27},
 'LEC004': {'avg': 19.50785340314136, 'min': 17, 'max': 40},
 'LEC005': {'avg': 19.13235294117647, 'min': 17, 'max': 39},
 'LEC001': {'avg': 19.508130081300813, 'min': 17, 'max': 32},
 'LEC002': {'avg': 19.47107438016529, 'min': 17, 'max': 30}}

# CS220: Lecture 19


## Learning Objectives
After this lecture you will be able to...
- Interpret JSON formatted data and recognize differences between JSON and Python
- Deserialize data from JSON for use in Python programs (read)
- Serialize data into JSON for long term storage (write) 

In [7]:
# We will be looking at lecture slides to understand JSON

In [8]:
# Deserialize
def read_json(path):
    with open(path, encoding="utf-8") as f: # f is a variable 
        return json.load(f)                 # f represents a reference the JSON file
    
# Serialize
def write_json(path, data):
    with open(path, 'w', encoding="utf-8") as f:
        json.dump(data, f, indent=2)

In [9]:
# first, let's take a look at the file score_history.json

In [10]:
# now let's read it in and investigate the data
scores_dict = read_json('score_history.json')
print(type(scores_dict))
print(scores_dict.keys())
print(scores_dict['bob'])
print(scores_dict)

scores_dict['cole'] = [50.0, 20.0]
print(scores_dict)

<class 'dict'>
dict_keys(['bob', 'alice', 'meena'])
[20.0, 10.0]
{'bob': [20.0, 10.0], 'alice': [30.0, 20.0], 'meena': [100.0, 10.0]}
{'bob': [20.0, 10.0], 'alice': [30.0, 20.0], 'meena': [100.0, 10.0], 'cole': [50.0, 20.0]}


In [11]:
# Let's practice writing to a JSON file
# when I'm testing code, I like to name my output file differently from my input file
# so that I don't accidentally erase or overwrite my data
write_json('score_history2.json', scores_dict)  

### We can make JSON files in many varied ways
### This makes a list of dictionaries

In [12]:
# Code from last lecture, 
# reads in the survey data into a list of dicts
table_dict_list = []
for i in range(len(survey_rows)):
    row = survey_rows[i]
    row_dict = {}
    for item in survey_header: # iterate through each column name
        row_dict[item] = row[survey_header.index(item)] # find the value in the row using .index
    
    # add row_dict to table_dict_list
    table_dict_list.append(row_dict)
    
# table_dict_list # what is this? A list of dictionaries

In [13]:
# Let's write this list of dictionaries into a JSON file
write_json('cs220_as_json_list.json', table_dict_list)  

In [14]:
# Verify:  can you find this file in your directory? 

In [15]:
# let's write our original dictionary of lists - buckets into a JSON file
write_json('cs220_as_json_dict.json', lecture_dict)  

In [16]:
# Verify: can you find this file in your directory? 

### Many Web Sites have APIs that allow you to get their data

#### cs571.org

In [17]:
# Read cs571.json data
cs571_data = read_json('cs571.json')
# cs571_data

In [18]:
# What are each of the messages?
cs571_messages = cs571_data['messages']
for msg in cs571_messages:
    print(msg['content'])

hello!
hello!
test
vroom
test
Test
Even MORE generic content
More generic posting content
I need this to be generic content
tttt
i <3 react
so I went running for answers
The evils of lucy was all around me
I didn't want to self destruct
found myself screaming in a hotel room
resentment that turned into a great depressions
abusing my power full of resentment
sometimes I did the same
I remember you was conflicted misusing your influence
r
Hello
hello
You got this!
test again
test


In [19]:
# What are the unique posters?
posters = []
for msg in cs571_messages:
    posters.append(msg['poster'])
posters = list(set(posters))
posters

['k.dot',
 'krirk1',
 'robert',
 'testaccmc1234',
 'chase',
 'car',
 'w',
 'newAnkit',
 'b',
 'q',
 'g',
 'testtesttest1',
 'user']

#### Kiva.com Micro-lending site

In [20]:
# Take a look at kiva.json

# read it into a dictionary
kiva_dict = read_json('kiva.json')
# kiva_dict

In [21]:
# Plumbing the data
loan_list = kiva_dict['data']['lend']['loans']['values'] # this gives us a list of dicts
loan_list

[{'name': 'Polikseni',
  'description': "Polikseni is 70 years old and married. She and her husband are both retired and their main income is a retirement pension of $106 a month for Polikseni and disability income for her husband of $289 a month. <br /><br />Polikseni's husband, even though disabled, works in a very small shop as a watchmaker on short hours, just to provide additional income for his family and to feel useful. Polikseni's husband needs constant medical treatment due to his health problems. She requested another loan, which she will use to continue paying for the therapy her husband needs. With a part of the loan, she is going to pay the remainder of the previous loan.",
  'loanAmount': '1325.00',
  'geocode': {'city': 'Korce',
   'country': {'name': 'Albania',
    'region': 'Eastern Europe',
    'fundsLentInCountry': 9051250}}},
 {'name': 'Safarmo',
  'description': "Safarmo is 47 years old. She lives with her husband and her children in Khuroson district. <br /><br />

In [22]:
# what can we learn from this data?
for loan_dict in loan_list:
    print(type(loan_dict))
    for key in loan_dict:
        print(key)

<class 'dict'>
name
description
loanAmount
geocode
<class 'dict'>
name
description
loanAmount
geocode
<class 'dict'>
name
description
loanAmount
geocode
<class 'dict'>
name
description
loanAmount
geocode
<class 'dict'>
name
description
loanAmount
geocode


In [23]:
# print out all the names
for loan_dict in loan_list:
    print(loan_dict['name'])

Polikseni
Safarmo
Elizabeth
Ester
Cherifa


In [24]:
# print out the total amount to loan
tot_loan_amount = 0.0
for loan_dict in loan_list:
    tot_loan_amount += float(loan_dict['loanAmount'])
tot_loan_amount



4350.0

In [25]:
loan_amounts = []
for loan_dict in loan_list:
    loan_amounts.append(float(loan_dict['loanAmount']))
print(min(loan_amounts))
print(max(loan_amounts))
print(sum(loan_amounts) / len(loan_amounts))

275.0
1325.0
870.0


In [26]:
# print out all the country names
for loan_dict in loan_list:
    print(loan_dict['geocode']['country']['name'])

Albania
Tajikistan
Kenya
Kenya
Togo


In [27]:
# more complex APIs...
# https://static01.nyt.com/elections-assets/2020/data/api/2020-11-03/state-page/wisconsin.json

## After Lecture: Worksheet