In [1]:
# Dependencies
from pprint import pprint
import pandas as pd
import time
import json

### Get Primary Business Information
- Import ticker_categories.json
- Narrow down to only the needed columns and renamed them for readability
- Add tickers to a list for use in other DataFrames

In [2]:
# Import ticker_categories.json

ticker_cat = pd.read_json("../DataScraping/ticker_categories.json")
ticker_cat.head()

Unnamed: 0,ticker,name,market,locale,primary_exchange,type,active,currency_name,cik,composite_figi,...,sic_description,ticker_root,homepage_url,total_employees,list_date,branding,share_class_shares_outstanding,weighted_shares_outstanding,round_lot,ticker_suffix
0,MMM,3M Company,stocks,us,XNYS,CS,True,usd,66740,BBG000BP52R2,...,SURGICAL & MEDICAL INSTRUMENTS & APPARATUS,MMM,https://www.3m.com,95000,1946-01-14,{'logo_url': 'https://api.polygon.io/v1/refere...,552740000,552742915,100,
1,AOS,A.O. Smith Corporation,stocks,us,XNYS,CS,True,usd,91142,BBG000BC1L02,...,HOUSEHOLD APPLIANCES,AOS,https://www.aosmith.com,13700,1984-09-07,{'logo_url': 'https://api.polygon.io/v1/refere...,126870000,152775527,100,
2,ABT,Abbott Laboratories,stocks,us,XNYS,CS,True,usd,1800,BBG000B9ZXB4,...,PHARMACEUTICAL PREPARATIONS,ABT,https://www.abbottinvestor.com,113000,1949-04-19,{'logo_url': 'https://api.polygon.io/v1/refere...,1743570000,1743573777,100,
3,ABBV,ABBVIE INC.,stocks,us,XNYS,CS,True,usd,1551152,BBG0025Y4RY4,...,PHARMACEUTICAL PREPARATIONS,ABBV,http://www.abbvieinvestor.com,50000,2012-12-10,{'icon_url': 'https://api.polygon.io/v1/refere...,1768480000,1768480508,100,
4,ABMD,Abiomed Inc,stocks,us,XNAS,CS,True,usd,815094,BBG000C101X4,...,SURGICAL & MEDICAL INSTRUMENTS & APPARATUS,ABMD,https://www.abiomed.com,2003,1987-07-30,{'logo_url': 'https://api.polygon.io/v1/refere...,45090000,45091184,100,


In [3]:
# Narrow down columns

ticker_fixed = ticker_cat[['ticker', 'name', 'locale', 'primary_exchange', 'sic_description', 'homepage_url']]
ticker_fixed.head()

Unnamed: 0,ticker,name,locale,primary_exchange,sic_description,homepage_url
0,MMM,3M Company,us,XNYS,SURGICAL & MEDICAL INSTRUMENTS & APPARATUS,https://www.3m.com
1,AOS,A.O. Smith Corporation,us,XNYS,HOUSEHOLD APPLIANCES,https://www.aosmith.com
2,ABT,Abbott Laboratories,us,XNYS,PHARMACEUTICAL PREPARATIONS,https://www.abbottinvestor.com
3,ABBV,ABBVIE INC.,us,XNYS,PHARMACEUTICAL PREPARATIONS,http://www.abbvieinvestor.com
4,ABMD,Abiomed Inc,us,XNAS,SURGICAL & MEDICAL INSTRUMENTS & APPARATUS,https://www.abiomed.com


In [4]:
# Rename columns

ticker_renamed = ticker_fixed.rename(columns={"ticker":"Ticker", "name":"Name", "locale":"Locale", "primary_exchange":"Primary_Exchange", "sic_description":"Industry", "homepage_url":"Homepage"})
ticker_renamed.head()

Unnamed: 0,Ticker,Name,Locale,Primary_Exchange,Industry,Homepage
0,MMM,3M Company,us,XNYS,SURGICAL & MEDICAL INSTRUMENTS & APPARATUS,https://www.3m.com
1,AOS,A.O. Smith Corporation,us,XNYS,HOUSEHOLD APPLIANCES,https://www.aosmith.com
2,ABT,Abbott Laboratories,us,XNYS,PHARMACEUTICAL PREPARATIONS,https://www.abbottinvestor.com
3,ABBV,ABBVIE INC.,us,XNYS,PHARMACEUTICAL PREPARATIONS,http://www.abbvieinvestor.com
4,ABMD,Abiomed Inc,us,XNAS,SURGICAL & MEDICAL INSTRUMENTS & APPARATUS,https://www.abiomed.com


In [5]:
# Add tickers to a list for later use

tickers_list = []
for row in ticker_renamed.iterrows():
    tickers_list.append(ticker_renamed['Ticker'])

### NYT API Information Cleaning (Separated)
- Import json files (June through Nov)
- Add tickers to the DataFrame for merging later
- Drop columns except for tickers and the first returned result from API call and rename
- Open the Article dictionary to get information, then drop unnecessary information
- Open the Headline dictionary to get the main headline, then drop the rest
- Rearrange and rename columns for readability

### November

In [6]:
# Read JSON

nov_df = pd.read_json("Outputs/NYT_Nov.json")
nov_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,{'abstract': 'Jack Welch spent his last few ye...,"{'abstract': 'If the Federal Reserve’s chair, ...",,,,,,,,
1,,,,,,,,,,
2,{'abstract': 'Every year along the Texas borde...,,,,,,,,,
3,,,,,,,,,,
4,,,,,,,,,,


In [7]:
# Add tickers

nov_df["Ticker"] = tickers_list[0]
nov_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,Ticker
0,{'abstract': 'Jack Welch spent his last few ye...,"{'abstract': 'If the Federal Reserve’s chair, ...",,,,,,,,,MMM
1,,,,,,,,,,,AOS
2,{'abstract': 'Every year along the Texas borde...,,,,,,,,,,ABT
3,,,,,,,,,,,ABBV
4,,,,,,,,,,,ABMD


In [8]:
# Drop columns

nov_columns = nov_df[["Ticker", 0]]
nov_columns.head()

Unnamed: 0,Ticker,0
0,MMM,{'abstract': 'Jack Welch spent his last few ye...
1,AOS,
2,ABT,{'abstract': 'Every year along the Texas borde...
3,ABBV,
4,ABMD,


In [9]:
# Rename columns

nov_renamed = nov_columns.rename(columns={"Ticker":"Ticker", 0:"Article"})
nov_renamed.head()

Unnamed: 0,Ticker,Article
0,MMM,{'abstract': 'Jack Welch spent his last few ye...
1,AOS,
2,ABT,{'abstract': 'Every year along the Texas borde...
3,ABBV,
4,ABMD,


In [10]:
# Open Article dictionary

nov_series = nov_renamed[["Ticker"]].assign(**pd.DataFrame(nov_renamed.Article.to_dict()).T)
nov_series

Unnamed: 0,Ticker,abstract,web_url,snippet,lead_paragraph,source,multimedia,headline,keywords,pub_date,...,news_desk,section_name,byline,type_of_material,_id,word_count,uri,print_section,print_page,subsection_name
0,MMM,Jack Welch spent his last few years regretting...,https://www.nytimes.com/2022/11/21/opinion/jac...,Jack Welch spent his last few years regretting...,"Jack Welch, one of the most celebrated corpora...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'How One of the Country’s Most Storie...,"[{'name': 'subject', 'value': 'Appointments an...",2022-11-21T10:00:21+0000,...,OpEd,Opinion,"{'original': 'By William D. Cohan', 'person': ...",Op-Ed,nyt://article/0fa6c9ca-c1a0-542d-802e-7d09db83...,1112,nyt://article/0fa6c9ca-c1a0-542d-802e-7d09db83...,,,
1,AOS,,,,,,,,,,...,,,,,,,,,,
2,ABT,"Every year along the Texas border, high school...",https://www.nytimes.com/2022/11/03/magazine/ma...,"Every year along the Texas border, high school...","On a hot Monday in late August 2021, Marcos Zá...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'A Championship Season in Mariachi Co...,"[{'name': 'subject', 'value': 'Latin Music', '...",2022-11-03T09:00:52+0000,...,Magazine,Magazine,"{'original': 'By Cecilia Ballí', 'person': [{'...",News,nyt://article/65d9b48a-c408-5af8-9358-6d2f15ec...,9204,nyt://article/65d9b48a-c408-5af8-9358-6d2f15ec...,MM,36,
3,ABBV,,,,,,,,,,...,,,,,,,,,,
4,ABMD,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497,YUM,,,,,,,,,,...,,,,,,,,,,
498,ZBRA,,,,,,,,,,...,,,,,,,,,,
499,ZBH,,,,,,,,,,...,,,,,,,,,,
500,ZION,,,,,,,,,,...,,,,,,,,,,


In [11]:
# Use only necessary columns

nov_dropped = nov_series[["Ticker", 'headline','abstract', 'web_url', 'pub_date']]
nov_dropped.head()

Unnamed: 0,Ticker,headline,abstract,web_url,pub_date
0,MMM,{'main': 'How One of the Country’s Most Storie...,Jack Welch spent his last few years regretting...,https://www.nytimes.com/2022/11/21/opinion/jac...,2022-11-21T10:00:21+0000
1,AOS,,,,
2,ABT,{'main': 'A Championship Season in Mariachi Co...,"Every year along the Texas border, high school...",https://www.nytimes.com/2022/11/03/magazine/ma...,2022-11-03T09:00:52+0000
3,ABBV,,,,
4,ABMD,,,,


In [12]:
# Open Headline dictionary

nov_headlines = nov_dropped[["Ticker", 'abstract', 'web_url', 'pub_date']].assign(**pd.DataFrame(nov_dropped.headline.to_dict()).T)
nov_headlines.head()

Unnamed: 0,Ticker,abstract,web_url,pub_date,main,kicker,content_kicker,print_headline,name,seo,sub
0,MMM,Jack Welch spent his last few years regretting...,https://www.nytimes.com/2022/11/21/opinion/jac...,2022-11-21T10:00:21+0000,How One of the Country’s Most Storied C.E.O.s ...,,,,,,
1,AOS,,,,,,,,,,
2,ABT,"Every year along the Texas border, high school...",https://www.nytimes.com/2022/11/03/magazine/ma...,2022-11-03T09:00:52+0000,A Championship Season in Mariachi Country,,,Mariachi Country,,,
3,ABBV,,,,,,,,,,
4,ABMD,,,,,,,,,,


In [13]:
# Only use needed columns (Dropped pub_date because the date is already in the web_url in an up-front and readable spot)

nov_fixed = nov_headlines[['Ticker', 'main', 'abstract', 'web_url']]
nov_fixed.head()

Unnamed: 0,Ticker,main,abstract,web_url
0,MMM,How One of the Country’s Most Storied C.E.O.s ...,Jack Welch spent his last few years regretting...,https://www.nytimes.com/2022/11/21/opinion/jac...
1,AOS,,,
2,ABT,A Championship Season in Mariachi Country,"Every year along the Texas border, high school...",https://www.nytimes.com/2022/11/03/magazine/ma...
3,ABBV,,,
4,ABMD,,,


In [14]:
# Rearrange and rename columns

nov_final = nov_fixed.rename(columns={'Ticker': 'Ticker', 'main':'Headline', 'abstract':'Abstract', 'web_url':'Web_URL'})
nov_final.head()

Unnamed: 0,Ticker,Headline,Abstract,Web_URL
0,MMM,How One of the Country’s Most Storied C.E.O.s ...,Jack Welch spent his last few years regretting...,https://www.nytimes.com/2022/11/21/opinion/jac...
1,AOS,,,
2,ABT,A Championship Season in Mariachi Country,"Every year along the Texas border, high school...",https://www.nytimes.com/2022/11/03/magazine/ma...
3,ABBV,,,
4,ABMD,,,


### October

In [15]:
# Read JSON

oct_df = pd.read_json("Outputs/NYT_Oct.json")
oct_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,,,,,,,,,,
1,,,,,,,,,,
2,,,,,,,,,,
3,,,,,,,,,,
4,,,,,,,,,,


In [16]:
# Add tickers

oct_df["Ticker"] = tickers_list[0]
oct_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,Ticker
0,,,,,,,,,,,MMM
1,,,,,,,,,,,AOS
2,,,,,,,,,,,ABT
3,,,,,,,,,,,ABBV
4,,,,,,,,,,,ABMD


In [17]:
# Something weird happened with October's pull, so it added a lot of extra rows. Narrow down to only the needed rows

new_oct_df = oct_df[:502]

In [18]:
# Drop columns

oct_columns = new_oct_df[["Ticker", 0]]
oct_columns.head()

Unnamed: 0,Ticker,0
0,MMM,
1,AOS,
2,ABT,
3,ABBV,
4,ABMD,


In [19]:
# Rename columns

oct_renamed = oct_columns.rename(columns={"Ticker":"Ticker", 0:"Article"})
oct_renamed.head()

Unnamed: 0,Ticker,Article
0,MMM,
1,AOS,
2,ABT,
3,ABBV,
4,ABMD,


In [20]:
# Open Article dictionary

oct_series = oct_renamed[["Ticker"]].assign(**pd.DataFrame(oct_renamed.Article.to_dict()).T)
oct_series

Unnamed: 0,Ticker,abstract,web_url,snippet,lead_paragraph,print_section,print_page,source,multimedia,headline,...,pub_date,document_type,news_desk,section_name,byline,type_of_material,_id,word_count,uri,subsection_name
0,MMM,,,,,,,,,,...,,,,,,,,,,
1,AOS,,,,,,,,,,...,,,,,,,,,,
2,ABT,,,,,,,,,,...,,,,,,,,,,
3,ABBV,,,,,,,,,,...,,,,,,,,,,
4,ABMD,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497,YUM,"After a few up-and-down years at Netflix, the ...",https://www.nytimes.com/2022/10/29/style/ryan-...,"After a few up-and-down years at Netflix, the ...","When he was 14, Ryan Murphy made a cigarette h...",ST,10,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Ryan Murphy Is Having a Very Happy H...,...,2022-10-29T09:00:19+0000,article,Styles,Style,"{'original': 'By Maureen Dowd', 'person': [{'f...",News,nyt://article/973ed72b-3226-5844-9123-da6da905...,4906,nyt://article/973ed72b-3226-5844-9123-da6da905...,
498,ZBRA,,,,,,,,,,...,,,,,,,,,,
499,ZBH,,,,,,,,,,...,,,,,,,,,,
500,ZION,,,,,,,,,,...,,,,,,,,,,


In [21]:
# Use only necessary columns

oct_dropped = oct_series[["Ticker", 'headline','abstract', 'web_url', 'pub_date']]
oct_dropped.head()

Unnamed: 0,Ticker,headline,abstract,web_url,pub_date
0,MMM,,,,
1,AOS,,,,
2,ABT,,,,
3,ABBV,,,,
4,ABMD,,,,


In [22]:
# Open Headline dictionary

oct_headlines = oct_dropped[["Ticker", 'abstract', 'web_url', 'pub_date']].assign(**pd.DataFrame(oct_dropped.headline.to_dict()).T)
oct_headlines.head()

Unnamed: 0,Ticker,abstract,web_url,pub_date,main,kicker,content_kicker,print_headline,name,seo,sub
0,MMM,,,,,,,,,,
1,AOS,,,,,,,,,,
2,ABT,,,,,,,,,,
3,ABBV,,,,,,,,,,
4,ABMD,,,,,,,,,,


In [23]:
# Only use needed columns (Dropped pub_date because the date is already in the web_url in an up-front and readable spot)

oct_fixed = oct_headlines[['Ticker', 'main', 'abstract', 'web_url']]
oct_fixed.head()

Unnamed: 0,Ticker,main,abstract,web_url
0,MMM,,,
1,AOS,,,
2,ABT,,,
3,ABBV,,,
4,ABMD,,,


In [24]:
# Rearrange and rename columns

oct_final = oct_fixed.rename(columns={'Ticker': 'Ticker', 'main':'Headline', 'abstract':'Abstract', 'web_url':'Web_URL'})
oct_final.head()

Unnamed: 0,Ticker,Headline,Abstract,Web_URL
0,MMM,,,
1,AOS,,,
2,ABT,,,
3,ABBV,,,
4,ABMD,,,


### September

In [25]:
# Read JSON

sept_df = pd.read_json("Outputs/NYT_Sept.json")
sept_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,{'abstract': 'At her galleries and with her fi...,,,,,,,,,
1,{'abstract': 'At her galleries and with her fi...,,,,,,,,,
2,{'abstract': 'At her galleries and with her fi...,,,,,,,,,
3,,,,,,,,,,
4,,,,,,,,,,


In [26]:
# Add tickers

sept_df["Ticker"] = tickers_list[0]
sept_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,Ticker
0,{'abstract': 'At her galleries and with her fi...,,,,,,,,,,MMM
1,{'abstract': 'At her galleries and with her fi...,,,,,,,,,,AOS
2,{'abstract': 'At her galleries and with her fi...,,,,,,,,,,ABT
3,,,,,,,,,,,ABBV
4,,,,,,,,,,,ABMD


In [27]:
# Narrow down to only the needed rows

new_sept_df = sept_df[:502]

In [28]:
# Drop columns

sept_columns = new_sept_df[["Ticker", 0]]
sept_columns.head()

Unnamed: 0,Ticker,0
0,MMM,{'abstract': 'At her galleries and with her fi...
1,AOS,{'abstract': 'At her galleries and with her fi...
2,ABT,{'abstract': 'At her galleries and with her fi...
3,ABBV,
4,ABMD,


In [29]:
# Rename columns

sept_renamed = sept_columns.rename(columns={"Ticker":"Ticker", 0:"Article"})
sept_renamed.head()

Unnamed: 0,Ticker,Article
0,MMM,{'abstract': 'At her galleries and with her fi...
1,AOS,{'abstract': 'At her galleries and with her fi...
2,ABT,{'abstract': 'At her galleries and with her fi...
3,ABBV,
4,ABMD,


In [30]:
# Open Article dictionary

sept_series = sept_renamed[["Ticker"]].assign(**pd.DataFrame(sept_renamed.Article.to_dict()).T)
sept_series

Unnamed: 0,Ticker,abstract,web_url,snippet,lead_paragraph,print_section,print_page,source,multimedia,headline,...,pub_date,document_type,news_desk,section_name,byline,type_of_material,_id,word_count,uri,subsection_name
0,MMM,At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...,At her galleries and with her financial resour...,"Virginia Dwan, who through her galleries in Lo...",B,11,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","{'main': 'Virginia Dwan, Behind-the-Scenes For...",...,2022-09-18T17:30:43+0000,article,Obits,Arts,"{'original': 'By Neil Genzlinger', 'person': [...",Obituary (Obit),nyt://article/50ace9d8-d710-5963-8a42-5ef73f5b...,1257,nyt://article/50ace9d8-d710-5963-8a42-5ef73f5b...,
1,AOS,At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...,At her galleries and with her financial resour...,"Virginia Dwan, who through her galleries in Lo...",B,11,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","{'main': 'Virginia Dwan, Behind-the-Scenes For...",...,2022-09-18T17:30:43+0000,article,Obits,Arts,"{'original': 'By Neil Genzlinger', 'person': [...",Obituary (Obit),nyt://article/50ace9d8-d710-5963-8a42-5ef73f5b...,1257,nyt://article/50ace9d8-d710-5963-8a42-5ef73f5b...,
2,ABT,At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...,At her galleries and with her financial resour...,"Virginia Dwan, who through her galleries in Lo...",B,11,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","{'main': 'Virginia Dwan, Behind-the-Scenes For...",...,2022-09-18T17:30:43+0000,article,Obits,Arts,"{'original': 'By Neil Genzlinger', 'person': [...",Obituary (Obit),nyt://article/50ace9d8-d710-5963-8a42-5ef73f5b...,1257,nyt://article/50ace9d8-d710-5963-8a42-5ef73f5b...,
3,ABBV,,,,,,,,,,...,,,,,,,,,,
4,ABMD,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497,YUM,The victims of a bizarre cyberstalking operati...,https://www.nytimes.com/2022/09/16/technology/...,The victims of a bizarre cyberstalking operati...,“If you are ever going to take her down .. now...,BU,1,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","{'main': 'At eBay, Lurid Crimes and the Search...",...,2022-09-16T15:22:48+0000,article,SundayBusiness,Technology,"{'original': 'By David Streitfeld', 'person': ...",News,nyt://article/f0a4d99e-acf7-53f4-a869-86927f5a...,3657,nyt://article/f0a4d99e-acf7-53f4-a869-86927f5a...,
498,ZBRA,,,,,,,,,,...,,,,,,,,,,
499,ZBH,,,,,,,,,,...,,,,,,,,,,
500,ZION,,,,,,,,,,...,,,,,,,,,,


In [31]:
# Use only necessary columns

sept_dropped = sept_series[["Ticker", 'headline','abstract', 'web_url', 'pub_date']]
sept_dropped.head()

Unnamed: 0,Ticker,headline,abstract,web_url,pub_date
0,MMM,"{'main': 'Virginia Dwan, Behind-the-Scenes For...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...,2022-09-18T17:30:43+0000
1,AOS,"{'main': 'Virginia Dwan, Behind-the-Scenes For...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...,2022-09-18T17:30:43+0000
2,ABT,"{'main': 'Virginia Dwan, Behind-the-Scenes For...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...,2022-09-18T17:30:43+0000
3,ABBV,,,,
4,ABMD,,,,


In [32]:
# Open Headline dictionary

sept_headlines = sept_dropped[["Ticker", 'abstract', 'web_url', 'pub_date']].assign(**pd.DataFrame(sept_dropped.headline.to_dict()).T)
sept_headlines.head()

Unnamed: 0,Ticker,abstract,web_url,pub_date,main,kicker,content_kicker,print_headline,name,seo,sub
0,MMM,At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...,2022-09-18T17:30:43+0000,"Virginia Dwan, Behind-the-Scenes Force in the ...",,,"Virginia Dwan, a Backer Of Artists at the For...",,,
1,AOS,At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...,2022-09-18T17:30:43+0000,"Virginia Dwan, Behind-the-Scenes Force in the ...",,,"Virginia Dwan, a Backer Of Artists at the For...",,,
2,ABT,At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...,2022-09-18T17:30:43+0000,"Virginia Dwan, Behind-the-Scenes Force in the ...",,,"Virginia Dwan, a Backer Of Artists at the For...",,,
3,ABBV,,,,,,,,,,
4,ABMD,,,,,,,,,,


In [33]:
# Only use needed columns (Dropped pub_date because the date is already in the web_url in an up-front and readable spot)

sept_fixed = sept_headlines[['Ticker', 'main', 'abstract', 'web_url']]
sept_fixed.head()

Unnamed: 0,Ticker,main,abstract,web_url
0,MMM,"Virginia Dwan, Behind-the-Scenes Force in the ...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...
1,AOS,"Virginia Dwan, Behind-the-Scenes Force in the ...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...
2,ABT,"Virginia Dwan, Behind-the-Scenes Force in the ...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...
3,ABBV,,,
4,ABMD,,,


In [34]:
# Rearrange and rename columns

sept_final = sept_fixed.rename(columns={'Ticker': 'Ticker', 'main':'Headline', 'abstract':'Abstract', 'web_url':'Web_URL'})
sept_final.head()

Unnamed: 0,Ticker,Headline,Abstract,Web_URL
0,MMM,"Virginia Dwan, Behind-the-Scenes Force in the ...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...
1,AOS,"Virginia Dwan, Behind-the-Scenes Force in the ...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...
2,ABT,"Virginia Dwan, Behind-the-Scenes Force in the ...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...
3,ABBV,,,
4,ABMD,,,


### August

In [35]:
# Read JSON

aug_df = pd.read_json("Outputs/NYT_Aug.json")
aug_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,{'abstract': 'The harmful molecules are everyw...,"{'abstract': 'When wildfires happen, these are...",{'abstract': 'Hyper-specific and unconventiona...,,,,,,,
1,{'abstract': 'The harmful molecules are everyw...,"{'abstract': 'When wildfires happen, these are...",{'abstract': 'Hyper-specific and unconventiona...,,,,,,,
2,{'abstract': 'The harmful molecules are everyw...,"{'abstract': 'When wildfires happen, these are...",{'abstract': 'Hyper-specific and unconventiona...,,,,,,,
3,,,,,,,,,,
4,,,,,,,,,,


In [36]:
# Add tickers

aug_df["Ticker"] = tickers_list[0]
aug_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,Ticker
0,{'abstract': 'The harmful molecules are everyw...,"{'abstract': 'When wildfires happen, these are...",{'abstract': 'Hyper-specific and unconventiona...,,,,,,,,MMM
1,{'abstract': 'The harmful molecules are everyw...,"{'abstract': 'When wildfires happen, these are...",{'abstract': 'Hyper-specific and unconventiona...,,,,,,,,AOS
2,{'abstract': 'The harmful molecules are everyw...,"{'abstract': 'When wildfires happen, these are...",{'abstract': 'Hyper-specific and unconventiona...,,,,,,,,ABT
3,,,,,,,,,,,ABBV
4,,,,,,,,,,,ABMD


In [37]:
# Narrow down to only the needed rows

new_aug_df = aug_df[:502]

In [38]:
# Drop columns

aug_columns = new_aug_df[["Ticker", 0]]
aug_columns.head()

Unnamed: 0,Ticker,0
0,MMM,{'abstract': 'The harmful molecules are everyw...
1,AOS,{'abstract': 'The harmful molecules are everyw...
2,ABT,{'abstract': 'The harmful molecules are everyw...
3,ABBV,
4,ABMD,


In [39]:
# Rename columns

aug_renamed = aug_columns.rename(columns={"Ticker":"Ticker", 0:"Article"})
aug_renamed.head()

Unnamed: 0,Ticker,Article
0,MMM,{'abstract': 'The harmful molecules are everyw...
1,AOS,{'abstract': 'The harmful molecules are everyw...
2,ABT,{'abstract': 'The harmful molecules are everyw...
3,ABBV,
4,ABMD,


In [40]:
# Open Article dictionary

aug_series = aug_renamed[["Ticker"]].assign(**pd.DataFrame(aug_renamed.Article.to_dict()).T)
aug_series

Unnamed: 0,Ticker,abstract,web_url,snippet,lead_paragraph,print_section,print_page,source,multimedia,headline,...,pub_date,document_type,news_desk,section_name,byline,type_of_material,_id,word_count,uri,subsection_name
0,MMM,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...,"The harmful molecules are everywhere, but chem...","A team of scientists has found a cheap, effect...",D,3,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Forever Chemicals No More? PFAS Are ...,...,2022-08-18T18:00:08+0000,article,Science,Science,"{'original': 'By Carl Zimmer', 'person': [{'fi...",News,nyt://article/1940831c-d4df-5d59-8f29-3cac12a3...,1332,nyt://article/1940831c-d4df-5d59-8f29-3cac12a3...,
1,AOS,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...,"The harmful molecules are everywhere, but chem...","A team of scientists has found a cheap, effect...",D,3,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Forever Chemicals No More? PFAS Are ...,...,2022-08-18T18:00:08+0000,article,Science,Science,"{'original': 'By Carl Zimmer', 'person': [{'fi...",News,nyt://article/1940831c-d4df-5d59-8f29-3cac12a3...,1332,nyt://article/1940831c-d4df-5d59-8f29-3cac12a3...,
2,ABT,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...,"The harmful molecules are everywhere, but chem...","A team of scientists has found a cheap, effect...",D,3,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Forever Chemicals No More? PFAS Are ...,...,2022-08-18T18:00:08+0000,article,Science,Science,"{'original': 'By Carl Zimmer', 'person': [{'fi...",News,nyt://article/1940831c-d4df-5d59-8f29-3cac12a3...,1332,nyt://article/1940831c-d4df-5d59-8f29-3cac12a3...,
3,ABBV,,,,,,,,,,...,,,,,,,,,,
4,ABMD,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497,YUM,The star fashion editor has reinvented British...,https://www.nytimes.com/2022/08/26/style/edwar...,The star fashion editor has reinvented British...,"LONDON — Edward Enninful, the editor of Britis...",ST,8,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","{'main': 'Edward Enninful Also Wears Prada', '...",...,2022-08-26T21:10:19+0000,article,Styles,Style,"{'original': 'By Maureen Dowd', 'person': [{'f...",News,nyt://article/21336f2f-b633-5e7f-9fa7-08acbb04...,4668,nyt://article/21336f2f-b633-5e7f-9fa7-08acbb04...,
498,ZBRA,,,,,,,,,,...,,,,,,,,,,
499,ZBH,,,,,,,,,,...,,,,,,,,,,
500,ZION,,,,,,,,,,...,,,,,,,,,,


In [41]:
# Use only necessary columns

aug_dropped = aug_series[["Ticker", 'headline','abstract', 'web_url', 'pub_date']]
aug_dropped.head()

Unnamed: 0,Ticker,headline,abstract,web_url,pub_date
0,MMM,{'main': 'Forever Chemicals No More? PFAS Are ...,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...,2022-08-18T18:00:08+0000
1,AOS,{'main': 'Forever Chemicals No More? PFAS Are ...,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...,2022-08-18T18:00:08+0000
2,ABT,{'main': 'Forever Chemicals No More? PFAS Are ...,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...,2022-08-18T18:00:08+0000
3,ABBV,,,,
4,ABMD,,,,


In [42]:
# Open Headline dictionary

aug_headlines = aug_dropped[["Ticker", 'abstract', 'web_url', 'pub_date']].assign(**pd.DataFrame(aug_dropped.headline.to_dict()).T)
aug_headlines.head()

Unnamed: 0,Ticker,abstract,web_url,pub_date,main,kicker,content_kicker,print_headline,name,seo,sub
0,MMM,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...,2022-08-18T18:00:08+0000,Forever Chemicals No More? PFAS Are Destroyed ...,Matter,,Fighting Forever Chemicals With Chemicals,,,
1,AOS,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...,2022-08-18T18:00:08+0000,Forever Chemicals No More? PFAS Are Destroyed ...,Matter,,Fighting Forever Chemicals With Chemicals,,,
2,ABT,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...,2022-08-18T18:00:08+0000,Forever Chemicals No More? PFAS Are Destroyed ...,Matter,,Fighting Forever Chemicals With Chemicals,,,
3,ABBV,,,,,,,,,,
4,ABMD,,,,,,,,,,


In [43]:
# Only use needed columns (Dropped pub_date because the date is already in the web_url in an up-front and readable spot)

aug_fixed = aug_headlines[['Ticker', 'main', 'abstract', 'web_url']]
aug_fixed.head()

Unnamed: 0,Ticker,main,abstract,web_url
0,MMM,Forever Chemicals No More? PFAS Are Destroyed ...,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...
1,AOS,Forever Chemicals No More? PFAS Are Destroyed ...,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...
2,ABT,Forever Chemicals No More? PFAS Are Destroyed ...,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...
3,ABBV,,,
4,ABMD,,,


In [44]:
# Rearrange and rename columns

aug_final = aug_fixed.rename(columns={'Ticker': 'Ticker', 'main':'Headline', 'abstract':'Abstract', 'web_url':'Web_URL'})
aug_final.head()

Unnamed: 0,Ticker,Headline,Abstract,Web_URL
0,MMM,Forever Chemicals No More? PFAS Are Destroyed ...,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...
1,AOS,Forever Chemicals No More? PFAS Are Destroyed ...,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...
2,ABT,Forever Chemicals No More? PFAS Are Destroyed ...,"The harmful molecules are everywhere, but chem...",https://www.nytimes.com/2022/08/18/science/pfa...
3,ABBV,,,
4,ABMD,,,


### July

In [45]:
# Read JSON

july_df = pd.read_json("Outputs/NYT_July.json")
july_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,{'abstract': 'A Times investigation shows how ...,,,,,,,,,
1,{'abstract': 'A Times investigation shows how ...,,,,,,,,,
2,{'abstract': 'A Times investigation shows how ...,,,,,,,,,
3,,,,,,,,,,
4,,,,,,,,,,


In [46]:
# Add tickers

july_df["Ticker"] = tickers_list[0]
july_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,Ticker
0,{'abstract': 'A Times investigation shows how ...,,,,,,,,,,MMM
1,{'abstract': 'A Times investigation shows how ...,,,,,,,,,,AOS
2,{'abstract': 'A Times investigation shows how ...,,,,,,,,,,ABT
3,,,,,,,,,,,ABBV
4,,,,,,,,,,,ABMD


In [47]:
# Narrow down to only the needed rows

new_july_df = july_df[:502]

In [48]:
# Drop columns

july_columns = new_july_df[["Ticker", 0]]
july_columns.head()

Unnamed: 0,Ticker,0
0,MMM,{'abstract': 'A Times investigation shows how ...
1,AOS,{'abstract': 'A Times investigation shows how ...
2,ABT,{'abstract': 'A Times investigation shows how ...
3,ABBV,
4,ABMD,


In [49]:
# Rename columns

july_renamed = july_columns.rename(columns={"Ticker":"Ticker", 0:"Article"})
july_renamed.head()

Unnamed: 0,Ticker,Article
0,MMM,{'abstract': 'A Times investigation shows how ...
1,AOS,{'abstract': 'A Times investigation shows how ...
2,ABT,{'abstract': 'A Times investigation shows how ...
3,ABBV,
4,ABMD,


In [50]:
# Open Article dictionary

july_series = july_renamed[["Ticker"]].assign(**pd.DataFrame(july_renamed.Article.to_dict()).T)
july_series

Unnamed: 0,Ticker,abstract,web_url,snippet,lead_paragraph,print_section,print_page,source,multimedia,headline,...,pub_date,document_type,news_desk,section_name,byline,type_of_material,_id,word_count,uri,subsection_name
0,MMM,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...,A Times investigation shows how a New York Cit...,A Times investigation shows how a New York Cit...,A,15,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'The Chain of Failures That Left 17 D...,...,2022-07-08T20:05:41+0000,multimedia,Graphics,New York,"{'original': 'By Anjali Singhvi, James Glanz, ...",Interactive Feature,nyt://interactive/c6517026-ac72-589c-a1e9-e195...,0,nyt://interactive/c6517026-ac72-589c-a1e9-e195...,
1,AOS,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...,A Times investigation shows how a New York Cit...,A Times investigation shows how a New York Cit...,A,15,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'The Chain of Failures That Left 17 D...,...,2022-07-08T20:05:41+0000,multimedia,Graphics,New York,"{'original': 'By Anjali Singhvi, James Glanz, ...",Interactive Feature,nyt://interactive/c6517026-ac72-589c-a1e9-e195...,0,nyt://interactive/c6517026-ac72-589c-a1e9-e195...,
2,ABT,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...,A Times investigation shows how a New York Cit...,A Times investigation shows how a New York Cit...,A,15,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'The Chain of Failures That Left 17 D...,...,2022-07-08T20:05:41+0000,multimedia,Graphics,New York,"{'original': 'By Anjali Singhvi, James Glanz, ...",Interactive Feature,nyt://interactive/c6517026-ac72-589c-a1e9-e195...,0,nyt://interactive/c6517026-ac72-589c-a1e9-e195...,
3,ABBV,,,,,,,,,,...,,,,,,,,,,
4,ABMD,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497,YUM,Did tech win the pandemic or not? We likely wo...,https://www.nytimes.com/2022/07/28/technology/...,Did tech win the pandemic or not? We likely wo...,"More than two years into Pandemic Times, techn...",B,6,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'The Word of the Year Is ‘Uncertainty...,...,2022-07-28T16:04:00+0000,article,Business,Technology,"{'original': 'By Shira Ovide', 'person': [{'fi...",News,nyt://article/8b053cbe-4125-5f8a-bdca-6b091710...,1347,nyt://article/8b053cbe-4125-5f8a-bdca-6b091710...,
498,ZBRA,,,,,,,,,,...,,,,,,,,,,
499,ZBH,,,,,,,,,,...,,,,,,,,,,
500,ZION,,,,,,,,,,...,,,,,,,,,,


In [51]:
# Use only necessary columns

july_dropped = july_series[["Ticker", 'headline','abstract', 'web_url', 'pub_date']]
july_dropped.head()

Unnamed: 0,Ticker,headline,abstract,web_url,pub_date
0,MMM,{'main': 'The Chain of Failures That Left 17 D...,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...,2022-07-08T20:05:41+0000
1,AOS,{'main': 'The Chain of Failures That Left 17 D...,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...,2022-07-08T20:05:41+0000
2,ABT,{'main': 'The Chain of Failures That Left 17 D...,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...,2022-07-08T20:05:41+0000
3,ABBV,,,,
4,ABMD,,,,


In [52]:
# Open Headline dictionary

july_headlines = july_dropped[["Ticker", 'abstract', 'web_url', 'pub_date']].assign(**pd.DataFrame(july_dropped.headline.to_dict()).T)
july_headlines.head()

Unnamed: 0,Ticker,abstract,web_url,pub_date,main,kicker,content_kicker,print_headline,name,seo,sub
0,MMM,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...,2022-07-08T20:05:41+0000,The Chain of Failures That Left 17 Dead in a B...,,,A Chain of Catastrophic Safety Failures That K...,,,
1,AOS,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...,2022-07-08T20:05:41+0000,The Chain of Failures That Left 17 Dead in a B...,,,A Chain of Catastrophic Safety Failures That K...,,,
2,ABT,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...,2022-07-08T20:05:41+0000,The Chain of Failures That Left 17 Dead in a B...,,,A Chain of Catastrophic Safety Failures That K...,,,
3,ABBV,,,,,,,,,,
4,ABMD,,,,,,,,,,


In [53]:
# Only use needed columns (Dropped pub_date because the date is already in the web_url in an up-front and readable spot)

july_fixed = july_headlines[['Ticker', 'main', 'abstract', 'web_url']]
july_fixed.head()

Unnamed: 0,Ticker,main,abstract,web_url
0,MMM,The Chain of Failures That Left 17 Dead in a B...,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...
1,AOS,The Chain of Failures That Left 17 Dead in a B...,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...
2,ABT,The Chain of Failures That Left 17 Dead in a B...,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...
3,ABBV,,,
4,ABMD,,,


In [54]:
# Rearrange and rename columns

july_final = july_fixed.rename(columns={'Ticker': 'Ticker', 'main':'Headline', 'abstract':'Abstract', 'web_url':'Web_URL'})
july_final.head()

Unnamed: 0,Ticker,Headline,Abstract,Web_URL
0,MMM,The Chain of Failures That Left 17 Dead in a B...,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...
1,AOS,The Chain of Failures That Left 17 Dead in a B...,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...
2,ABT,The Chain of Failures That Left 17 Dead in a B...,A Times investigation shows how a New York Cit...,https://www.nytimes.com/interactive/2022/07/08...
3,ABBV,,,
4,ABMD,,,


### June

In [55]:
# Read JSON

june_df = pd.read_json("Outputs/NYT_June.json")
june_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,{'abstract': 'A risky compensation plan at Tes...,,,,,,,,,
1,{'abstract': 'A risky compensation plan at Tes...,,,,,,,,,
2,{'abstract': 'A risky compensation plan at Tes...,,,,,,,,,
3,,,,,,,,,,
4,,,,,,,,,,


In [56]:
# Add tickers

june_df["Ticker"] = tickers_list[0]
june_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,Ticker
0,{'abstract': 'A risky compensation plan at Tes...,,,,,,,,,,MMM
1,{'abstract': 'A risky compensation plan at Tes...,,,,,,,,,,AOS
2,{'abstract': 'A risky compensation plan at Tes...,,,,,,,,,,ABT
3,,,,,,,,,,,ABBV
4,,,,,,,,,,,ABMD


In [57]:
# Narrow down to only the needed rows

new_june_df = june_df[:502]

In [58]:
# Drop columns

june_columns = new_june_df[["Ticker", 0]]
june_columns.head()

Unnamed: 0,Ticker,0
0,MMM,{'abstract': 'A risky compensation plan at Tes...
1,AOS,{'abstract': 'A risky compensation plan at Tes...
2,ABT,{'abstract': 'A risky compensation plan at Tes...
3,ABBV,
4,ABMD,


In [59]:
# Rename columns

june_renamed = june_columns.rename(columns={"Ticker":"Ticker", 0:"Article"})
june_renamed.head()

Unnamed: 0,Ticker,Article
0,MMM,{'abstract': 'A risky compensation plan at Tes...
1,AOS,{'abstract': 'A risky compensation plan at Tes...
2,ABT,{'abstract': 'A risky compensation plan at Tes...
3,ABBV,
4,ABMD,


In [60]:
# Open Article dictionary

june_series = june_renamed[["Ticker"]].assign(**pd.DataFrame(june_renamed.Article.to_dict()).T)
june_series

Unnamed: 0,Ticker,abstract,web_url,snippet,lead_paragraph,source,multimedia,headline,keywords,pub_date,...,news_desk,section_name,subsection_name,byline,type_of_material,_id,word_count,uri,print_section,print_page
0,MMM,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...,A risky compensation plan at Tesla paid off an...,"In 2018, Elon Musk unveiled a groundbreaking c...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Elon Musk Fuels Record C.E.O. Payday...,"[{'name': 'persons', 'value': 'Abrams, Stacey ...",2022-06-27T13:27:50+0000,...,Business,Business Day,DealBook,"{'original': 'By Andrew Ross Sorkin, Vivian Gi...",News,nyt://article/a6ccea76-3ab7-5e6d-b057-c4b5bb9a...,1805,nyt://article/a6ccea76-3ab7-5e6d-b057-c4b5bb9a...,,
1,AOS,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...,A risky compensation plan at Tesla paid off an...,"In 2018, Elon Musk unveiled a groundbreaking c...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Elon Musk Fuels Record C.E.O. Payday...,"[{'name': 'persons', 'value': 'Abrams, Stacey ...",2022-06-27T13:27:50+0000,...,Business,Business Day,DealBook,"{'original': 'By Andrew Ross Sorkin, Vivian Gi...",News,nyt://article/a6ccea76-3ab7-5e6d-b057-c4b5bb9a...,1805,nyt://article/a6ccea76-3ab7-5e6d-b057-c4b5bb9a...,,
2,ABT,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...,A risky compensation plan at Tesla paid off an...,"In 2018, Elon Musk unveiled a groundbreaking c...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Elon Musk Fuels Record C.E.O. Payday...,"[{'name': 'persons', 'value': 'Abrams, Stacey ...",2022-06-27T13:27:50+0000,...,Business,Business Day,DealBook,"{'original': 'By Andrew Ross Sorkin, Vivian Gi...",News,nyt://article/a6ccea76-3ab7-5e6d-b057-c4b5bb9a...,1805,nyt://article/a6ccea76-3ab7-5e6d-b057-c4b5bb9a...,,
3,ABBV,,,,,,,,,,...,,,,,,,,,,
4,ABMD,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497,YUM,No cryptocurrency investor has been spared the...,https://www.nytimes.com/2022/06/29/technology/...,No cryptocurrency investor has been spared the...,"ENGLEWOOD, Colo. — The cryptocurrency market w...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Crypto Crash Widens a Divide: ‘Those...,"[{'name': 'subject', 'value': 'Virtual Currenc...",2022-06-29T09:00:47+0000,...,Business,Technology,,"{'original': 'By David Yaffe-Bellany', 'person...",News,nyt://article/58df07ea-445d-5582-8080-6fef9477...,1452,nyt://article/58df07ea-445d-5582-8080-6fef9477...,B,1
498,ZBRA,,,,,,,,,,...,,,,,,,,,,
499,ZBH,,,,,,,,,,...,,,,,,,,,,
500,ZION,,,,,,,,,,...,,,,,,,,,,


In [61]:
# Use only necessary columns

june_dropped = june_series[["Ticker", 'headline','abstract', 'web_url', 'pub_date']]
june_dropped.head()

Unnamed: 0,Ticker,headline,abstract,web_url,pub_date
0,MMM,{'main': 'Elon Musk Fuels Record C.E.O. Payday...,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...,2022-06-27T13:27:50+0000
1,AOS,{'main': 'Elon Musk Fuels Record C.E.O. Payday...,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...,2022-06-27T13:27:50+0000
2,ABT,{'main': 'Elon Musk Fuels Record C.E.O. Payday...,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...,2022-06-27T13:27:50+0000
3,ABBV,,,,
4,ABMD,,,,


In [62]:
# Open Headline dictionary

june_headlines = june_dropped[["Ticker", 'abstract', 'web_url', 'pub_date']].assign(**pd.DataFrame(june_dropped.headline.to_dict()).T)
june_headlines.head()

Unnamed: 0,Ticker,abstract,web_url,pub_date,main,kicker,content_kicker,print_headline,name,seo,sub
0,MMM,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...,2022-06-27T13:27:50+0000,Elon Musk Fuels Record C.E.O. Paydays,DealBook Newsletter,,,,,
1,AOS,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...,2022-06-27T13:27:50+0000,Elon Musk Fuels Record C.E.O. Paydays,DealBook Newsletter,,,,,
2,ABT,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...,2022-06-27T13:27:50+0000,Elon Musk Fuels Record C.E.O. Paydays,DealBook Newsletter,,,,,
3,ABBV,,,,,,,,,,
4,ABMD,,,,,,,,,,


In [63]:
# Only use needed columns (Dropped pub_date because the date is already in the web_url in an up-front and readable spot)

june_fixed = june_headlines[['Ticker', 'main', 'abstract', 'web_url']]
june_fixed.head()

Unnamed: 0,Ticker,main,abstract,web_url
0,MMM,Elon Musk Fuels Record C.E.O. Paydays,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...
1,AOS,Elon Musk Fuels Record C.E.O. Paydays,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...
2,ABT,Elon Musk Fuels Record C.E.O. Paydays,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...
3,ABBV,,,
4,ABMD,,,


In [64]:
# Rearrange and rename columns

june_final = june_fixed.rename(columns={'Ticker': 'Ticker', 'main':'Headline', 'abstract':'Abstract', 'web_url':'Web_URL'})
june_final.head()

Unnamed: 0,Ticker,Headline,Abstract,Web_URL
0,MMM,Elon Musk Fuels Record C.E.O. Paydays,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...
1,AOS,Elon Musk Fuels Record C.E.O. Paydays,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...
2,ABT,Elon Musk Fuels Record C.E.O. Paydays,A risky compensation plan at Tesla paid off an...,https://www.nytimes.com/2022/06/27/business/de...
3,ABBV,,,
4,ABMD,,,


## Merging

In [65]:
oct_nov = nov_final.combine_first(oct_final)
oct_nov.head()

Unnamed: 0,Ticker,Headline,Abstract,Web_URL
0,MMM,How One of the Country’s Most Storied C.E.O.s ...,Jack Welch spent his last few years regretting...,https://www.nytimes.com/2022/11/21/opinion/jac...
1,AOS,,,
2,ABT,A Championship Season in Mariachi Country,"Every year along the Texas border, high school...",https://www.nytimes.com/2022/11/03/magazine/ma...
3,ABBV,,,
4,ABMD,,,


In [66]:
sept_oct = oct_nov.combine_first(sept_final)
sept_oct.head()

Unnamed: 0,Ticker,Headline,Abstract,Web_URL
0,MMM,How One of the Country’s Most Storied C.E.O.s ...,Jack Welch spent his last few years regretting...,https://www.nytimes.com/2022/11/21/opinion/jac...
1,AOS,"Virginia Dwan, Behind-the-Scenes Force in the ...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...
2,ABT,A Championship Season in Mariachi Country,"Every year along the Texas border, high school...",https://www.nytimes.com/2022/11/03/magazine/ma...
3,ABBV,,,
4,ABMD,,,


In [67]:
aug_sept = sept_oct.combine_first(aug_final)
aug_sept.head()

Unnamed: 0,Ticker,Headline,Abstract,Web_URL
0,MMM,How One of the Country’s Most Storied C.E.O.s ...,Jack Welch spent his last few years regretting...,https://www.nytimes.com/2022/11/21/opinion/jac...
1,AOS,"Virginia Dwan, Behind-the-Scenes Force in the ...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...
2,ABT,A Championship Season in Mariachi Country,"Every year along the Texas border, high school...",https://www.nytimes.com/2022/11/03/magazine/ma...
3,ABBV,,,
4,ABMD,,,


In [68]:
july_aug = aug_sept.combine_first(july_final)
july_aug.head()

Unnamed: 0,Ticker,Headline,Abstract,Web_URL
0,MMM,How One of the Country’s Most Storied C.E.O.s ...,Jack Welch spent his last few years regretting...,https://www.nytimes.com/2022/11/21/opinion/jac...
1,AOS,"Virginia Dwan, Behind-the-Scenes Force in the ...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...
2,ABT,A Championship Season in Mariachi Country,"Every year along the Texas border, high school...",https://www.nytimes.com/2022/11/03/magazine/ma...
3,ABBV,,,
4,ABMD,,,


In [69]:
final_article_df = july_aug.combine_first(june_final)
final_article_df.head()

Unnamed: 0,Ticker,Headline,Abstract,Web_URL
0,MMM,How One of the Country’s Most Storied C.E.O.s ...,Jack Welch spent his last few years regretting...,https://www.nytimes.com/2022/11/21/opinion/jac...
1,AOS,"Virginia Dwan, Behind-the-Scenes Force in the ...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...
2,ABT,A Championship Season in Mariachi Country,"Every year along the Texas border, high school...",https://www.nytimes.com/2022/11/03/magazine/ma...
3,ABBV,,,
4,ABMD,,,


### Final DataFrame

In [70]:
almost_final_df = pd.merge(ticker_renamed, final_article_df, on='Ticker')
almost_final_df.head()

Unnamed: 0,Ticker,Name,Locale,Primary_Exchange,Industry,Homepage,Headline,Abstract,Web_URL
0,MMM,3M Company,us,XNYS,SURGICAL & MEDICAL INSTRUMENTS & APPARATUS,https://www.3m.com,How One of the Country’s Most Storied C.E.O.s ...,Jack Welch spent his last few years regretting...,https://www.nytimes.com/2022/11/21/opinion/jac...
1,AOS,A.O. Smith Corporation,us,XNYS,HOUSEHOLD APPLIANCES,https://www.aosmith.com,"Virginia Dwan, Behind-the-Scenes Force in the ...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...
2,ABT,Abbott Laboratories,us,XNYS,PHARMACEUTICAL PREPARATIONS,https://www.abbottinvestor.com,A Championship Season in Mariachi Country,"Every year along the Texas border, high school...",https://www.nytimes.com/2022/11/03/magazine/ma...
3,ABBV,ABBVIE INC.,us,XNYS,PHARMACEUTICAL PREPARATIONS,http://www.abbvieinvestor.com,,,
4,ABMD,Abiomed Inc,us,XNAS,SURGICAL & MEDICAL INSTRUMENTS & APPARATUS,https://www.abiomed.com,,,


In [71]:
# After reviewing the merged JSON data from the stock performance, Name, Locale, Primary_Exchange, Industry, and Homepage were already included, so these columns are now dropped

final_df = almost_final_df[['Ticker', 'Headline', 'Abstract', 'Web_URL']]
final_df.head()

Unnamed: 0,Ticker,Headline,Abstract,Web_URL
0,MMM,How One of the Country’s Most Storied C.E.O.s ...,Jack Welch spent his last few years regretting...,https://www.nytimes.com/2022/11/21/opinion/jac...
1,AOS,"Virginia Dwan, Behind-the-Scenes Force in the ...",At her galleries and with her financial resour...,https://www.nytimes.com/2022/09/18/arts/virgin...
2,ABT,A Championship Season in Mariachi Country,"Every year along the Texas border, high school...",https://www.nytimes.com/2022/11/03/magazine/ma...
3,ABBV,,,
4,ABMD,,,


## Export to JSON and Reload to Clean Again

In [72]:
final_df_export = final_df.to_json(index=False, orient='table')
final_df_export

'{"schema":{"fields":[{"name":"Ticker","type":"string"},{"name":"Headline","type":"string"},{"name":"Abstract","type":"string"},{"name":"Web_URL","type":"string"}],"pandas_version":"0.20.0"},"data":[{"Ticker":"MMM","Headline":"How One of the Country\\u2019s Most Storied C.E.O.s Destroyed His Legacy","Abstract":"Jack Welch spent his last few years regretting the most important decision of his career.","Web_URL":"https:\\/\\/www.nytimes.com\\/2022\\/11\\/21\\/opinion\\/jack-welch-ge-jeff-immelt.html"},{"Ticker":"AOS","Headline":"Virginia Dwan, Behind-the-Scenes Force in the Art World, Dies at 90","Abstract":"At her galleries and with her financial resources, she supported artists working on art\\u2019s frontiers and later donated works to museum collections.","Web_URL":"https:\\/\\/www.nytimes.com\\/2022\\/09\\/18\\/arts\\/virginia-dwan-dead.html"},{"Ticker":"ABT","Headline":"A Championship Season in Mariachi Country","Abstract":"Every year along the Texas border, high school teams battl

In [76]:
with open ('Outputs/NYT_Final.json', 'w') as f:
    f.write(final_df_export)

In [160]:
with open ('Outputs/NYT_Final.json', 'r') as file:
    datas = json.load(file)

data = datas['data']

In [161]:
ticker_list = []
record_list = []

for record in data:
    ticker = record.pop('Ticker')
    ticker_list.append(ticker)
    record_list.append(record)

In [163]:
ticker_series = pd.DataFrame(tickers_list[0])
ticker_series

Unnamed: 0,Ticker
0,MMM
1,AOS
2,ABT
3,ABBV
4,ABMD
...,...
497,YUM
498,ZBRA
499,ZBH
500,ZION


In [164]:
ticker_series.insert(1, 'Article', record_list)


In [165]:
ticker_series.head()

Unnamed: 0,Ticker,Article
0,MMM,{'Headline': 'How One of the Country’s Most St...
1,AOS,"{'Headline': 'Virginia Dwan, Behind-the-Scenes..."
2,ABT,{'Headline': 'A Championship Season in Mariach...
3,ABBV,"{'Headline': None, 'Abstract': None, 'Web_URL'..."
4,ABMD,"{'Headline': None, 'Abstract': None, 'Web_URL'..."


In [173]:
to_json = ticker_series.to_dict(orient='records')
to_json

[{'Ticker': 'MMM',
  'Article': {'Headline': 'How One of the Country’s Most Storied C.E.O.s Destroyed His Legacy',
   'Abstract': 'Jack Welch spent his last few years regretting the most important decision of his career.',
   'Web_URL': 'https://www.nytimes.com/2022/11/21/opinion/jack-welch-ge-jeff-immelt.html'}},
 {'Ticker': 'AOS',
  'Article': {'Headline': 'Virginia Dwan, Behind-the-Scenes Force in the Art World, Dies at 90',
   'Abstract': 'At her galleries and with her financial resources, she supported artists working on art’s frontiers and later donated works to museum collections.',
   'Web_URL': 'https://www.nytimes.com/2022/09/18/arts/virginia-dwan-dead.html'}},
 {'Ticker': 'ABT',
  'Article': {'Headline': 'A Championship Season in Mariachi Country',
   'Abstract': 'Every year along the Texas border, high school teams battle it out in one of the nation’s most intense championship rivalries. But they’re not playing football.',
   'Web_URL': 'https://www.nytimes.com/2022/11/03/m

In [176]:
with open ('../CleanedData/NYT_Cleaned_Final.json', 'w') as f:
    json.dump(to_json, f)