In [58]:
import json
import codecs
import requests

URL_MAIN = "http://api.nytimes.com/svc/"
URL_POPULAR = URL_MAIN + "mostpopular/v2/"
API_KEY = { "popular": "cc1879752f4c1ea1e53bf8317c18b7fa:15:70221872",
            "article": "17b3f581e9a519e843cae5197f3ae819:18:70221872"}


def get_from_file(kind, period):
    filename = "popular-{0}-{1}.json".format(kind, period)
    with open(filename, "r") as f:
        return json.loads(f.read())


def article_overview(kind, period):
    data = get_from_file(kind, period)
    titles = []
    urls =[]

    for article in data:
        section = article["section"]
        title = article["title"]
        titles.append({section: title})
        if "media" in article:
            for m in article["media"]:
                for mm in m["media-metadata"]:
                    if mm["format"] == "Standard Thumbnail":
                        urls.append(mm["url"])
    return (titles, urls)


def query_site(url, target, offset):
    # This will set up the query with the API key and offset
    # Web services often use offset paramter to return data in small chunks
    # NYTimes returns 20 articles per request, if you want the next 20
    # You have to provide the offset parameter
    if API_KEY["popular"] == "" or API_KEY["article"] == "":
        print "You need to register for NYTimes Developer account to run this program."
        print "See Intructor notes for information"
        return False
    params = {"api-key": API_KEY[target], "offset": offset}
    r = requests.get(url, params = params)

    if r.status_code == requests.codes.ok:
        return r.json()
    else:
        r.raise_for_status()


def get_popular(url, kind, days, section="all-sections", offset=0):
    # This function will construct the query according to the requirements of the site
    # and return the data, or print an error message if called incorrectly
    if days not in [1,7,30]:
        print "Time period can be 1,7, 30 days only"
        return False
    if kind not in ["viewed", "shared", "emailed"]:
        print "kind can be only one of viewed/shared/emailed"
        return False

    url = URL_POPULAR + "most{0}/{1}/{2}.json".format(kind, section, days)
    data = query_site(url, "popular", offset)

    return data


def save_file(kind, period):
    # This will process all results, by calling the API repeatedly with supplied offset value,
    # combine the data and then write all results in a file.
    data = get_popular(URL_POPULAR, "viewed", 1)
    num_results = data["num_results"]
    full_data = []
    with codecs.open("popular-{0}-{1}-full.json".format(kind, period), encoding='utf-8', mode='w') as v:
        for offset in range(0, num_results, 20):        
            data = get_popular(URL_POPULAR, kind, period, offset=offset)
            full_data += data["results"]
        
        v.write(json.dumps(full_data, indent=2))


def test():
    titles, urls = article_overview("viewed", 1)
    assert len(titles) == 20
    assert len(urls) == 30
    assert titles[2] == {'Opinion': 'Professors, We Need You!'}
    assert urls[20] == 'http://graphics8.nytimes.com/images/2014/02/17/sports/ICEDANCE/ICEDANCE-thumbStandard.jpg'


if __name__ == "__main__":
    test()

get_from_file和article_overview函数是从popular-viewed-1.json、
文件中提取出文件

query_siteget_popular、save_file这三个函数是从网站中提取出\
popular-viewed-1.json文件

In [1]:
import json
import codecs
import requests

URL_MAIN = "http://api.nytimes.com/svc/"
URL_POPULAR = URL_MAIN + "mostpopular/v2/"
API_KEY = { "popular": "cc1879752f4c1ea1e53bf8317c18b7fa:15:70221872",
            "article": "17b3f581e9a519e843cae5197f3ae819:18:70221872"}


In [59]:
def get_from_file(kind, period):
    filename = "popular-{0}-{1}.json".format(kind, period)
    print filename
    #打开popular-viewed-1.json文件 电脑中要有这个文件 路径还要对
    with open(filename, "r") as f:
        #json.loads 函数把json格式转换成python 格式
        return json.loads(f.read())
    


def article_overview(kind, period):
    data = get_from_file(kind, period)
    titles = []
    urls =[]

    for article in data:
        section = article["section"]
        title = article["title"]
        titles.append({section: title})
        if "media" in article:
            for m in article["media"]:
                for mm in m["media-metadata"]:
                    if mm["format"] == "Standard Thumbnail":
                        urls.append(mm["url"])
    return (titles, urls)

In [60]:

article_overview("viewed", 1)

popular-viewed-1.json


([{u'Opinion': u'The All-or-Nothing Marriage'},
  {u'Opinion': u'Marry First, Then Cheat'},
  {u'Opinion': u'Professors, We Need You!'},
  {u'N.Y. / Region': u'Common Core Curriculum Now Has Critics on the Left'},
  {u'Fashion & Style': u'For Interns, All Work and No Payoff'},
  {u'Sports': u'NBC Pushes Too Far in Bringing Bode Miller to Tears'},
  {u'Opinion': u'Barons of Broadband'},
  {u'Sports': u'A Mad Dash for Salt Rescues Olympic Slopes'},
  {u'U.S.': u'Spying by N.S.A. Ally Entangled U.S. Law Firm'},
  {u'N.Y. / Region': u'Addicted on Staten Island'},
  {u'Business Day': u'Saving an Endangered British Species: The Pub'},
  {u'Sports': u'Behind Meryl Davis and Charlie White, U.S. Is Close to Its First Ice Dance Gold'},
  {u'Technology': u'Intel\u2019s Sharp-Eyed Social Scientist'},
  {u'Sports': u'Hanyu Falls Twice, but Still Wins Gold'},
  {u'Opinion': u'Is the Universe a Simulation?'},
  {u'Opinion': u'Parental Pity Party'},
  {u'Opinion': u'Behind Bars'},
  {u'Opinion': u'A W

In [61]:

def article_overvie(kind, period):
    data = get_from_file(kind, period)
    titles = []
    urls =[]

    for article in data:
        section = article["section"]
        title = article["title"]
        titles.append({section: title})
        if "media" in article:
            for m in article["media"]:
                for mm in m["media-metadata"]:
                    if mm["format"] == "Standard Thumbnail":
                        urls.append(mm["url"])
    return titles[2]\
\
,urls[20]\
,len(titles)\
,len(urls)


article_overvie("viewed", 1)

popular-viewed-1.json


({u'Opinion': u'Professors, We Need You!'},
 u'http://graphics8.nytimes.com/images/2014/02/17/sports/ICEDANCE/ICEDANCE-thumbStandard.jpg',
 20,
 30)

In [None]:
#从popular-viewed-1.json文件提取数据可以了
#第二步是试着从网站中提取出popular-viewed-1.json文件
