In [5]:
import requests
import json

In [3]:
BASE_URL = "http://musicbrainz.org/ws/2/"
ARTIST_URL = BASE_URL + "artist/"


# query parameters are given to the requests.get function as a dictionary; this
# variable contains some starter parameters.
query_type = {  "simple": {},
                "atr": {"inc": "aliases+tags+ratings"},
                "aliases": {"inc": "aliases"},
                "releases": {"inc": "releases"}}


def query_site(url, params, uid="", fmt="json"):
    """
    This is the main function for making queries to the musicbrainz API. The
    query should return a json document.
    """
    params["fmt"] = fmt
    r = requests.get(url + uid, params=params)
    print("requesting", r.url)

    if r.status_code == requests.codes.ok:
        return r.json()
    else:
        r.raise_for_status()


def query_by_name(url, params, name):
    """
    This adds an artist name to the query parameters before making an API call
    to the function above.
    """
    params["query"] = "artist:" + name
    return query_site(url, params)


def pretty_print(data, indent=4):
    """
    After we get our output, we can use this function to format it to be more
    readable.
    """
    if type(data) == dict:
        print(json.dumps(data, indent=indent, sort_keys=True))
    else:
        print(data)

In [6]:
# Query for information in the database about bands named Nirvana
results = query_by_name(ARTIST_URL, query_type["simple"], "Nirvana")
pretty_print(results)

requesting http://musicbrainz.org/ws/2/artist/?query=artist%3ANirvana&fmt=json
{
    "artists": [
        {
            "area": {
                "id": "6a264f94-6ff1-30b1-9a81-41f7bfabd616",
                "name": "Finland",
                "sort-name": "Finland"
            },
            "country": "FI",
            "disambiguation": "Early 1980's Finnish punk band",
            "id": "85af0709-95db-4fbc-801a-120e9f4766d0",
            "life-span": {
                "ended": null
            },
            "name": "Nirvana",
            "score": "100",
            "sort-name": "Nirvana",
            "tags": [
                {
                    "count": 1,
                    "name": "punk"
                },
                {
                    "count": 1,
                    "name": "finland"
                }
            ],
            "type": "Group"
        },
        {
            "disambiguation": "French band from Martigues, activ during the 70s.",
            "id": "c49

In [8]:
# Isolate information from the 4th band returned (index 3)
print("\nARTIST:")
pretty_print(results["artists"][3])


ARTIST:
{
    "area": {
        "id": "8a754a16-0027-3a29-b6d7-2b40ea0481ed",
        "name": "United Kingdom",
        "sort-name": "United Kingdom"
    },
    "begin-area": {
        "id": "f03d09b3-39dc-4083-afd6-159e3f0d462f",
        "name": "London",
        "sort-name": "London"
    },
    "country": "GB",
    "disambiguation": "60s band from the UK",
    "id": "9282c8b4-ca0b-4c6b-b7e3-4f7762dfc4d6",
    "life-span": {
        "begin": "1967",
        "ended": null
    },
    "name": "Nirvana",
    "score": "100",
    "sort-name": "Nirvana",
    "tags": [
        {
            "count": 1,
            "name": "rock"
        },
        {
            "count": 1,
            "name": "pop"
        },
        {
            "count": 1,
            "name": "progressive rock"
        },
        {
            "count": 1,
            "name": "orchestral"
        },
        {
            "count": 1,
            "name": "british"
        },
        {
            "count": 1,
            "nam

In [9]:
# Query for releases from that band using the artist_id
artist_id = results["artists"][3]["id"]
artist_data = query_site(ARTIST_URL, query_type["releases"], artist_id)
releases = artist_data["releases"]

requesting http://musicbrainz.org/ws/2/artist/9282c8b4-ca0b-4c6b-b7e3-4f7762dfc4d6?inc=releases&fmt=json


In [10]:
# Print information about releases from the selected band
print("\nONE RELEASE:")
pretty_print(releases[0], indent=2)


ONE RELEASE:
{
  "barcode": null,
  "country": "GB",
  "date": "1969",
  "disambiguation": "",
  "id": "0b44cb36-550a-491d-bfd9-8751271f9de7",
  "packaging": null,
  "packaging-id": null,
  "quality": "normal",
  "release-events": [
    {
      "area": {
        "disambiguation": "",
        "id": "8a754a16-0027-3a29-b6d7-2b40ea0481ed",
        "iso-3166-1-codes": [
          "GB"
        ],
        "name": "United Kingdom",
        "sort-name": "United Kingdom"
      },
      "date": "1969"
    }
  ],
  "status": "Official",
  "status-id": "4e304316-386d-3409-af2e-78857eec5cfe",
  "text-representation": {
    "language": "eng",
    "script": "Latn"
  },
  "title": "To Markos III"
}


In [12]:
release_titles = [r["title"] for r in releases]
print("\nALL TITLES:")
for t in release_titles:
    print(t)


ALL TITLES:
To Markos III
Travelling on a Cloud
Songs Of Love And Praise
Songs of Love and Praise
Songs of Love and Praise
Secret Theatre
The Story of Simon Simopath
Me And My Friend
All of Us
The Story of Simon Simopath
To Markos III
Chemistry
The Story of Simon Simopath
Local Anaesthetic
Orange & Blue
Pentecost Hotel
Black Flower
All of Us
Local Anaesthetic


In [28]:
## json project
"""
This exercise shows some important concepts that you should be aware about:
- using codecs module to write unicode files
- using authentication with web APIs
- using offset when accessing web APIs
"""

import json
import codecs
import requests

In [29]:
URL_MAIN = "http://api.nytimes.com/svc/"
URL_POPULAR = URL_MAIN + "mostpopular/v2/"
API_KEY = { "popular": "",
            "article": ""}

In [30]:
"""
Your task is to modify the article_overview() function to process the saved
file that represents the most popular articles (by view count) from the last
day, and return a tuple of variables containing the following data:
- labels: list of dictionaries, where the keys are the "section" values and
  values are the "title" values for each of the retrieved articles.
- urls: list of URLs for all 'media' entries with "format": "Standard Thumbnail"
"""

def get_from_file(kind, period):
    filename = "popular-{0}-{1}.json".format(kind, period)
    with open(filename, "r") as f:
        return json.loads(f.read())

In [None]:
"""
All your changes should be in the article_overview() function. See the test() 
function for examples of the elements of the output lists.
The rest of functions are provided for your convenience, if you want to access
the API by yourself.
article_overview("viewed", 1)
"""

def article_overview(kind, period):
    data = get_from_file(kind, period)
    titles = []
    urls =[]
    # YOUR CODE HERE

    return (titles, urls)

In [None]:
def query_site(url, target, offset):
    # This will set up the query with the API key and offset
    # Web services often use offset paramter to return data in small chunks
    # NYTimes returns 20 articles per request, if you want the next 20
    # You have to provide the offset parameter
    if API_KEY["popular"] == "" or API_KEY["article"] == "":
        print("You need to register for NYTimes Developer account to run this program.")
        print("See Intructor notes for information")
        return False
    params = {"api-key": API_KEY[target], "offset": offset}
    r = requests.get(url, params = params)

    if r.status_code == requests.codes.ok:
        return r.json()
    else:
        r.raise_for_status()


def get_popular(url, kind, days, section="all-sections", offset=0):
    # This function will construct the query according to the requirements of the site
    # and return the data, or print an error message if called incorrectly
    if days not in [1,7,30]:
        print "Time period can be 1,7, 30 days only"
        return False
    if kind not in ["viewed", "shared", "emailed"]:
        print "kind can be only one of viewed/shared/emailed"
        return False

    url += "most{0}/{1}/{2}.json".format(kind, section, days)
    data = query_site(url, "popular", offset)

    return data


def save_file(kind, period):
    # This will process all results, by calling the API repeatedly with supplied offset value,
    # combine the data and then write all results in a file.
    data = get_popular(URL_POPULAR, "viewed", 1)
    num_results = data["num_results"]
    full_data = []
    with codecs.open("popular-{0}-{1}.json".format(kind, period), encoding='utf-8', mode='w') as v:
        for offset in range(0, num_results, 20):        
            data = get_popular(URL_POPULAR, kind, period, offset=offset)
            full_data += data["results"]
        
        v.write(json.dumps(full_data, indent=2))

In [None]:
titles, urls = article_overview("viewed", 1)
assert len(titles) == 20
assert len(urls) == 30
assert titles[2] == {'Opinion': 'Professors, We Need You!'}
assert urls[20] == 'http://graphics8.nytimes.com/images/2014/02/17/sports/ICEDANCE/ICEDANCE-thumbStandard.jpg'