## New York Times Article Search 
---
#### To df

In [1]:
from pynytimes import NYTAPI
import pandas as pd
import datetime

In [None]:
nyt = NYTAPI("mwnXWF71cgmtnfQdbR5hSBDc1ivbObUX", parse_dates=True, https=False)

articles = nyt.article_search(
    query = "Election",
    results = 5,
    dates = {
        "2010": datetime.datetime(2022, 1, 1),
        "2022": datetime.datetime(2022, 12, 31)
    },
    options = {
        "sort": "oldest",
        "sources": [
            "New York Times",
            "AP",
            "Reuters",
            "International Herald Tribune"
        ],
        "news_desk": [
            "Politics"
        ],
        "type_of_material": [
            "News Analysis"
        ]
    }
)

print(articles)

[{'abstract': 'After $6 billion, two dozen presidential primary days, four general election debates and more TV ads than anyone could watch, the two parties essentially fought to a standstill.', 'web_url': 'https://www.nytimes.com/2012/11/08/us/politics/a-divided-nation-keeps-the-status-quo.html', 'snippet': 'After $6 billion, two dozen presidential primary days, four general election debates and more TV ads than anyone could watch, the two parties essentially fought to a standstill.', 'lead_paragraph': 'After $6 billion, two dozen presidential primary election days, a pair of national conventions, four general election debates, hundreds of Congressional contests and more television advertisements than anyone would ever want to watch, the two major political parties in America essentially fought to a standstill.', 'print_section': 'P', 'print_page': '3', 'source': 'The New York Times', 'multimedia': [{'rank': 0, 'subtype': 'xlarge', 'caption': None, 'credit': None, 'type': 'image', 'ur

In [3]:
# for each of the articles in the list, get the information that is stored in a nested dictionary:
headline = map(lambda x: x["headline"]["main"], articles)
author = map(lambda x: x["headline"]["kicker"], articles)
leadparagraph = map(lambda x: x["lead_paragraph"], articles)
pubdate = map(lambda x: x["pub_date"], articles)
weburl = map(lambda x: x["web_url"],articles)

# since keywords are a branch down in the nested dictionary, we need to add an additional for loop to collect all keywords:
keywords = map(lambda x:list(i["value"] for i in x["keywords"]), articles)


In [4]:
data={'headline': list(headline), 'author': list(author), 'leadparagraph':list(leadparagraph),
     'publication date': list(pubdate), "keywords": list(keywords),"weburl":list(weburl)}
df = pd.DataFrame(data)
df

Unnamed: 0,headline,author,leadparagraph,publication date,keywords,weburl
0,"Obama Wins a Clear Victory, but Balance of Pow...",News Analysis,"After $6 billion, two dozen presidential prima...",2012-11-07 16:44:30+00:00,"[House of Representatives, Democratic Party, S...",https://www.nytimes.com/2012/11/08/us/politics...
1,Senate Republicans Are Newly Hopeful About the...,,WASHINGTON — After a turbulent first year conf...,2018-06-02 09:00:06+00:00,"[Republican Party, Midterm Elections (2018), E...",https://www.nytimes.com/2018/06/02/us/politics...
2,"For Red-State Democrats, Accusation Against Ka...",News Analysis,WASHINGTON — The sexual assault accusation aga...,2018-09-19 14:01:01+00:00,"[Midterm Elections (2018), Kavanaugh, Brett M,...",https://www.nytimes.com/2018/09/19/us/politics...
3,Kavanaugh Could Help G.O.P. in Senate Midterms...,News Analysis,WASHINGTON — By agreeing to delay Judge Brett ...,2018-09-29 14:32:32+00:00,"[Midterm Elections (2018), Elections, House of...",https://www.nytimes.com/2018/09/29/us/politics...
4,How Trump-Fed Conspiracy Theories About Migran...,News Analysis,"MURPHYSBORO, Ill. — Alicia Hooten thinks the c...",2018-10-29 21:34:05+00:00,"[Midterm Elections (2018), Trump, Donald J, Im...",https://www.nytimes.com/2018/10/29/us/politics...
5,Inside the Republican Strategy to Discredit th...,News Analysis,"TALLAHASSEE, Fla. — The concerted effort by Re...",2018-11-13 01:34:32+00:00,"[Midterm Elections (2018), Florida, Elections,...",https://www.nytimes.com/2018/11/12/us/politics...
6,Trump Behaves One Way on Race. Democrats Deman...,News Analysis,WASHINGTON — The irony was difficult to miss: ...,2019-02-04 01:02:16+00:00,"[Presidential Election of 2020, Democratic Par...",https://www.nytimes.com/2019/02/03/us/politics...
7,"On Health Care, 2020 Democrats Find Their Firs...",News Analysis,"The debate unfolded over a period of days, on ...",2019-02-20 10:00:08+00:00,"[Democratic Party, Presidential Election of 20...",https://www.nytimes.com/2019/02/20/us/politics...
8,Mueller Report Puts Pressure on 2020 Candidate...,News Analysis,The conclusion by the special counsel that Pre...,2019-03-25 09:00:05+00:00,"[Presidential Election of 2016, Russian Interf...",https://www.nytimes.com/2019/03/25/us/politics...
9,Kamala Harris Makes the Case That Joe Biden Sh...,News Analysis,This was the moment Joseph R. Biden Jr. had to...,2019-06-28 03:55:14+00:00,"[Presidential Election of 2020, Debates (Polit...",https://www.nytimes.com/2019/06/27/us/politics...


In [5]:
df.weburl[0]


'https://www.nytimes.com/2012/11/08/us/politics/a-divided-nation-keeps-the-status-quo.html'

## All Article Search from min year
---
#### Can only access Paragraph

In [6]:
from datetime import date
import requests
import time 
import sys

KEY = "mwnXWF71cgmtnfQdbR5hSBDc1ivbObUX"

# earliest year supported by NYT API
NYT_MIN_YEAR = 2010

# now 
YEAR = (date.today()).year 

# But to keep things rational, maybe just do 100 years
MIN_YEAR = NYT_MIN_YEAR

# Given a term, I return the total in a year
def countForTerm(term,year):
	# Note, we filter to one key cuz we only really care about the count
	api = f"https://api.nytimes.com/svc/search/v2/articlesearch.json?&api-key={KEY}&fl=web_url"
	# pass term here, requests will url encode
	response = requests.get(api, params=[("fq",f"pub_year:{year} AND headline:{term}")], verify=False)
	data = response.json()

	if "fault" in data:
		print("ERROR:",data["fault"]["faultstring"])
		sys.exit()

	total = data["response"]["meta"]["hits"]

	return total

def main(term):
	# it's an array, but reduce to a string
	term = " ".join(term)
	print(f"Searching for {term} from {MIN_YEAR} to {YEAR}")
	for year in range(MIN_YEAR, YEAR):
		total = countForTerm(term, year)
		print(year,total)
		time.sleep(6)

main('election usa')

Searching for e l e c t i o n   u s a from 2010 to 2022




2010 261




2011 257




2012 242




2013 186




2014 184




2015 119




2016 74


KeyboardInterrupt: 

## Article Search different approach 
---
#### to .csv

In [None]:
import requests as req
import time
API_KEY='mwnXWF71cgmtnfQdbR5hSBDc1ivbObUX' # your API key
TOPIC='Election' # keyword


In [None]:
for i in range(10):
  url='https://api.nytimes.com/svc/search/v2/articlesearch.json?q='+TOPIC+'&api-key='+API_KEY+'&page='+str(i)
  response = req.get(url, verify=False).json()
  time.sleep(6)
response

In [None]:
# Extract the necessary fields from the response.
articles = []
docs = response['response']['docs']
print(response[0])
for doc in docs:
        filteredDoc = {}
        filteredDoc['title'] = doc['headline']['main']
        filteredDoc['abstract'] = doc['abstract']
        filteredDoc['paragraph']=doc['lead_paragraph']
        articles.append(filteredDoc)
articles[:10]

In [None]:
import pandas as pd
df = pd.DataFrame(data=articles)
df.to_csv('TechArticles.csv')