# Data Collection and Processing with Python

In [6]:
# Lists with Complex Items
data = ['bagel', 'cream cheese', 'breakfast', 'grits', 'eggs', 'bacon', [34, 9, 73, []], [['willow', 'birch', 'elm'], 'apple', 'peach', 'cherry']]
plant = data[7][0][0]


In [7]:
# Nested Dictionaries
d = {'key1': {'a': 5, 'c': 90, 5: 50}, 'key2':{'b': 3, 'c': "yes"}}

# Processing JSON results

**Site:**

https://jsoneditoronline.org/

In [25]:
# load to JSON
import json
a_string = '\n\n\n{\n "resultCount":25,\n "results": [\n{"wrapperType":"track", "kind":"podcast", "collectionId":10892}]}'
print(a_string)
d = json.loads(a_string)
print("------")
print(type(d))
print(d.keys())
print(d)
print(d['resultCount'])




{
 "resultCount":25,
 "results": [
{"wrapperType":"track", "kind":"podcast", "collectionId":10892}]}
------
<class 'dict'>
dict_keys(['resultCount', 'results'])
{'resultCount': 25, 'results': [{'wrapperType': 'track', 'kind': 'podcast', 'collectionId': 10892}]}
25


In [26]:
# dump from JSON
import json

d = {'key1': {'c': True, 'a': 90, 5: 50}, 'key2':{'b': 3, 'c': "yes"}}

print(d)
print(json.dumps(d, indent=2))

{'key1': {'c': True, 'a': 90, 5: 50}, 'key2': {'b': 3, 'c': 'yes'}}
{
  "key1": {
    "c": true,
    "a": 90,
    "5": 50
  },
  "key2": {
    "b": 3,
    "c": "yes"
  }
}


# Map, Filter, and List Comprehensions

## Map

In [29]:
abbrevs = ["usa", "esp", "chn", "jpn", "mex", "can", "rus", "rsa", "jam"]
abbrevs_upper = map(lambda s: s.upper(), abbrevs)
print(list(abbrevs_upper))

['USA', 'ESP', 'CHN', 'JPN', 'MEX', 'CAN', 'RUS', 'RSA', 'JAM']


In [30]:
things = [2, 5, 9]

things4 = map((lambda value: 4*value), things)
print(list(things4))

# or all on one line
print(list(map((lambda value: 5*value), [1, 2, 3])))

[8, 20, 36]
[5, 10, 15]


## Filter

In [31]:
def keep_evens(nums):
    new_seq = filter(lambda num: num % 2 == 0, nums)
    return list(new_seq)

print(keep_evens([3, 4, 6, 7, 0, 1]))

[4, 6, 0]


In [34]:
lst_check = ['plums', 'watermelon', 'kiwi', 'strawberries', 'blueberries', 'peaches', 'apples', 'mangos', 'papaya']
filter_testing = filter(lambda wrd: "w" in wrd, lst_check)
list(filter_testing)

['watermelon', 'kiwi', 'strawberries']

## List Comprehensions

Python provides an alternative way to do map and filter operations, called a list comprehension. Many programmers find them easier to understand and write. List comprehensions are concise ways to create lists from other lists. The general syntax is:
`[<transformer_expression> for <loop_var> in <sequence> if <filtration_expression>]`
where the if clause is optional. For example,

In [35]:
things = [2, 5, 9]
yourlist = [value * 2 for value in things]
print(yourlist)

[4, 10, 18]


In [39]:
import json
tester = {'info': [{"name": "Lauren", 'class standing': 'Junior', 'major': "Information Science"},{'name': 'Ayo', 'class standing': "Bachelor's", 'major': 'Information Science'}, {'name': 'Kathryn', 'class standing': 'Senior', 'major': 'Sociology'}, {'name': 'Nick', 'class standing': 'Junior', 'major': 'Computer Science'}, {'name': 'Gladys', 'class standing': 'Sophomore', 'major': 'History'}, {'name': 'Adam', 'major': 'Violin Performance', 'class standing': 'Senior'}]}
print(json.dumps(tester, indent=4))
compri = [x['name'] for x in tester['info']]


{
    "info": [
        {
            "name": "Lauren",
            "class standing": "Junior",
            "major": "Information Science"
        },
        {
            "name": "Ayo",
            "class standing": "Bachelor's",
            "major": "Information Science"
        },
        {
            "name": "Kathryn",
            "class standing": "Senior",
            "major": "Sociology"
        },
        {
            "name": "Nick",
            "class standing": "Junior",
            "major": "Computer Science"
        },
        {
            "name": "Gladys",
            "class standing": "Sophomore",
            "major": "History"
        },
        {
            "name": "Adam",
            "major": "Violin Performance",
            "class standing": "Senior"
        }
    ]
}


In [41]:
things = [3, 4, 6, 7, 0, 1]
#chaining together filter and map:
# first, filter to keep only the even numbers
# double each of them
print(list(map(lambda x: x*2, filter(lambda y: y % 2 == 0, things))))

# equivalent version using list comprehension
print([x*2 for x in things if x % 2 == 0])

[8, 12, 0]
[8, 12, 0]


## Zip

In [47]:
L1 = [3, 4, 5]
L2 = [1, 2, 3]
L4 = list(zip(L1, L2))
L4

[(3, 1), (4, 2), (5, 3)]

In [48]:
L1 = [3, 4, 5]
L2 = [1, 2, 3]
L3 = [x1 + x2 for (x1, x2) in list(zip(L1, L2))]
L3

[4, 6, 8]

In [49]:
L1 = [3, 4, 5]
L2 = [1, 2, 3]
L3 = map(lambda x: x[0] + x[1], zip(L1, L2))
list(L3)

[4, 6, 8]

In [51]:
species = ['golden retriever', 'white tailed deer', 'black rhino', 'brown squirrel', 'field mouse', 'orangutan', 'sumatran elephant', 'rainbow trout', 'black bear', 'blue whale', 'water moccasin', 'giant panda', 'green turtle', 'blue jay', 'japanese beetle']

population = [10000, 90000, 1000, 2000000, 500000, 500, 1200, 8000, 12000, 2300, 7500, 100, 1800, 9500, 125000]
pop_info = zip(species, population)
endangered = [x[0] for x in pop_info if x[1] < 2500]
endangered


['black rhino',
 'orangutan',
 'sumatran elephant',
 'blue whale',
 'giant panda',
 'green turtle']

# Internet APIs

## Anatomy of URLs
`<scheme>://<host>:<port>/<path>`

## Requests module

In [53]:
import requests
def requestURL(baseurl, params = {}):
    # This function accepts a URL path and a params diction as inputs.
    # It calls requests.get() with those inputs,
    # and returns the full URL of the data you want to get.
    req = requests.Request(method = 'GET', url = baseurl, params = params)
    prepped = req.prepare()
    return prepped.url

# print(requestURL(some_base_url, some_params_dictionary))

ModuleNotFoundError: No module named 'requests'