In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import requests
import json
from pandas import DataFrame

# requests module

## DEMO 1: Simple string example
- URL: https://www.msyamkumar.com/hello.txt

In [3]:
#Simple string example
url = "https://www.msyamkumar.com/hello.txt"
r = requests.get(url)
assert r.status_code == 200
print(type(r.text))
r.text

<class 'str'>


'Hello CS220 students! Welcome to my website. Hope you are staying safe and healthy!\n'

In [4]:
typo_url = "https://www.msyamkumar.com/hello.txttttttt"
r = requests.get(typo_url)
r.raise_for_status() #shortcut for asserting status code is 200 OK
r.text

HTTPError: 404 Client Error: Not Found for url: https://www.msyamkumar.com/hello.txttttttt

## DEMO 2: JSON file example
- URL: https://www.msyamkumar.com/scores.json
- json.load(FILE_OBJECT)
- json.loads(STRING)

In [5]:
#JSON example
url = "https://www.msyamkumar.com/scores.json"
r = requests.get(url)
r.raise_for_status()
urltext = r.text

print(type(json.loads(urltext)))
#d = json.loads(urltext)

#Shortcut for json.loads
d = r.json()
print(d)

<class 'dict'>
{'alice': 100, 'bob': 200, 'cindy': 300}


## DEMO 3: reddit json processing
- URL: https://www.reddit.com/r/UWMadison.json or https://www.msyamkumar.com/cs220/f20/materials/lectureDemo_code/lec-30/UWMadison.json

In [6]:
url = "https://www.reddit.com/r/UWMadison.json"
r = requests.get(url)
r.raise_for_status()

#Sometimes this does not work due to 429 status code! 
#Reddit bans you if you try to keep downloading the json many times :D
#If so, use the below URL instead
#url = "https://www.msyamkumar.com/cs220/f20/materials/lectureDemo_code/lec-30/UWMadison.json"
r = requests.get(url)
r.raise_for_status()
#r.json()
print(type(r.json()))

<class 'dict'>


### How to explore an unknown JSON?
- If you run into a dict, try .keys() functions to look at the keys of the dictionary
- If you run into a list, iterate over the list and print each item

In [7]:
d = r.json()
#print(d.keys())
#print(d["kind"])
print(d["data"])

{'modhash': '', 'dist': 25, 'children': [{'kind': 't3', 'data': {'approved_at_utc': None, 'subreddit': 'UWMadison', 'selftext': '', 'author_fullname': 't2_3r6u0pqt', 'saved': False, 'mod_reason_title': None, 'gilded': 0, 'clicked': False, 'title': 'This person clearly doesn’t care about attendance smh. /s', 'link_flair_richtext': [], 'subreddit_name_prefixed': 'r/UWMadison', 'hidden': False, 'pwls': 6, 'link_flair_css_class': None, 'downs': 0, 'top_awarded_type': None, 'hide_score': False, 'name': 't3_jrzboi', 'quarantine': False, 'link_flair_text_color': 'dark', 'upvote_ratio': 0.99, 'author_flair_background_color': None, 'subreddit_type': 'public', 'ups': 126, 'total_awards_received': 0, 'media_embed': {}, 'author_flair_template_id': None, 'is_original_content': False, 'user_reports': [], 'secure_media': None, 'is_reddit_media_domain': True, 'is_meta': False, 'category': None, 'secure_media_embed': {}, 'link_flair_text': None, 'can_mod_post': False, 'score': 126, 'approved_by': None,

In [8]:
type(d["data"])
d["data"].keys()

dict_keys(['modhash', 'dist', 'children', 'after', 'before'])

In [9]:
print(type(d["data"]["children"]))
d["data"]["children"].keys()

<class 'list'>


AttributeError: 'list' object has no attribute 'keys'

In [10]:
for item in d["data"]["children"]:
    #print(type(item))
    #print(item)
    #print(type(item["data"]))
    #print(item["data"].keys())
    print(item["data"]["score"], item["data"]["title"])

126 This person clearly doesn’t care about attendance smh. /s
98 Here we go again
60 Breaking my lease for next semester?
117 What resources does UW provide that you would otherwise have to pay for (and that people don’t know about)?
12 Sophomore Dorm
5 The Nick track
5 I want to cancel my housing contract and get an apartment for spring but they say theyre not releasing kids for that reason. They will release me if I say Im going to live back home, though. So what if I said I was studying remotely at home and got a campus apartment anyway?
22 UW Thanksgiving To Go - Including free meals for students in need
7 How can I meet people
51 What to do when I can’t afford food in college?
10 The DoIT Help Desk is hiring for remote student jobs!
3 Fresh Market - I'm looking for a job
9 For anyone who is in one of the bands/orchestras how is that going this semester?
8 Math 340 Professors
247 I've been procrastinating this morning/afternoon by making this graphic of our Capitol
5 Need advice
6 

## DEMO 3: State populations

In [11]:
prefixURL = "https://www.msyamkumar.com/cs220/f20/materials/lectureDemo_code/lec-30/data/"
r = requests.get(prefixURL + "state_files.txt")
r.raise_for_status()
stateFiles = r.text.split("\n")
stateFiles

['Alabama.json',
 'Alaska.json',
 'Arizona.json',
 'Arkansas.json',
 'California.json',
 'Colorado.json',
 'Connecticut.json',
 'Delaware.json',
 'Florida.json',
 'Georgia.json',
 'Hawaii.json',
 'Idaho.json',
 'Illinois.json',
 'Indiana.json',
 'Iowa.json',
 'Kansas.json',
 'Kentucky.json',
 'Louisiana.json',
 'Maine.json',
 'Maryland.json',
 'Massachusetts.json',
 'Michigan.json',
 'Minnesota.json',
 'Mississippi.json',
 'Missouri.json',
 'Montana.json',
 'Nebraska.json',
 'Nevada.json',
 'New_Hampshire.json',
 'New_Jersey.json',
 'New_Mexico.json',
 'New_York.json',
 'North_Carolina.json',
 'North_Dakota.json',
 'Ohio.json',
 'Oklahoma.json',
 'Oregon.json',
 'Pennsylvania.json',
 'Rhode_Island.json',
 'South_Carolina.json',
 'South_Dakota.json',
 'Tennessee.json',
 'Texas.json',
 'Utah.json',
 'Vermont.json',
 'Virginia.json',
 'Washington.json',
 'West_Virginia.json',
 'Wisconsin.json',
 'Wyoming.json']

In [12]:
rows = []

for state in stateFiles:
    url = prefixURL + state
    r = requests.get(url)
    r.raise_for_status()
    data = r.json()
    data["name"] = state
    rows.append(data)

df = DataFrame(rows)
df

Unnamed: 0,2000,2010,2015,name
0,4447100,4779736,4846411,Alabama.json
1,626932,710231,737046,Alaska.json
2,5130632,6392017,6728783,Arizona.json
3,2673400,2915918,2966835,Arkansas.json
4,33871648,37253956,38792291,California.json
5,4301261,5029196,5355588,Colorado.json
6,3405565,3574097,3594762,Connecticut.json
7,783600,897934,935968,Delaware.json
8,15982378,18801310,19905569,Florida.json
9,8186453,9687653,10097132,Georgia.json


In [13]:
df.head()
df = df.set_index("name")
df.head()

Unnamed: 0_level_0,2000,2010,2015
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alabama.json,4447100,4779736,4846411
Alaska.json,626932,710231,737046
Arizona.json,5130632,6392017,6728783
Arkansas.json,2673400,2915918,2966835
California.json,33871648,37253956,38792291


In [14]:
df.sum()

2000    280849847
2010    308143815
2015    318247565
dtype: int64

In [15]:
df.T

name,Alabama.json,Alaska.json,Arizona.json,Arkansas.json,California.json,Colorado.json,Connecticut.json,Delaware.json,Florida.json,Georgia.json,...,South_Dakota.json,Tennessee.json,Texas.json,Utah.json,Vermont.json,Virginia.json,Washington.json,West_Virginia.json,Wisconsin.json,Wyoming.json
2000,4447100,626932,5130632,2673400,33871648,4301261,3405565,783600,15982378,8186453,...,754844,5689283,20851820,2233169,608827,7078515,5894121,1808344,5363675,493782
2010,4779736,710231,6392017,2915918,37253956,5029196,3574097,897934,18801310,9687653,...,814180,6346105,25145561,2763885,625741,8001024,6724540,1852994,5686986,563626
2015,4846411,737046,6728783,2966835,38792291,5355588,3594762,935968,19905569,10097132,...,853304,6547779,26979078,2944498,626767,8328098,7063166,1848751,5759432,584304


In [17]:
df.T.max()

name
Alabama.json            4846411
Alaska.json              737046
Arizona.json            6728783
Arkansas.json           2966835
California.json        38792291
Colorado.json           5355588
Connecticut.json        3594762
Delaware.json            935968
Florida.json           19905569
Georgia.json           10097132
Hawaii.json             1420257
Idaho.json              1634806
Illinois.json          12882189
Indiana.json            6597880
Iowa.json               3109481
Kansas.json             2902507
Kentucky.json           4412617
Louisiana.json          4648990
Maine.json              1330256
Maryland.json           5975346
Massachusetts.json      6755124
Michigan.json           9938444
Minnesota.json          5457125
Mississippi.json        2993443
Missouri.json           6063827
Montana.json            1023252
Nebraska.json           1882980
Nevada.json             2838281
New_Hampshire.json      1327996
New_Jersey.json         8938844
New_Mexico.json         2085567
New