In [14]:
import pandas as pd

from pathlib import Path

from IPython.display import display

In [3]:
# data directory
data_dir = Path("../data")
collection_dir = data_dir / "collection"

# soad news sources
news_sources = pd.read_csv(data_dir / "news_sources.csv")

venue_names = news_sources["short_name"].tolist()
feed_urls = news_sources["feed_url"].tolist()

In [4]:
jsonl_files = list(collection_dir.glob("*.jsonl"))

## Compare the returned fields for each RSS feed

In [7]:
dfs = {}
for f in jsonl_files:
    df = pd.read_json(f, orient="records", lines=True)
    dfs[f.name.split(".")[0]] = df
    
columns = []
sources = []
for k, df in dfs.items():
    sources.append(k)
    columns.append(df.columns)
all_vals = set([j for i in columns for j in i])

In [8]:
compare_dfs = pd.DataFrame(columns=sources, index=all_vals)

for source, df in dfs.items():
    for v in all_vals:
        compare_dfs.loc[v, source] = v in df.columns

In [16]:
compare_dfs

Unnamed: 0,sciblogs,popsci,nyt,sciline,wired,guardian
link,True,True,True,True,True,True
credit,False,False,True,False,False,True
dc_modified,False,False,False,False,True,False
title,True,True,True,True,True,True
published_parsed,True,True,True,True,True,True
content,False,True,True,False,False,False
wfw_commentrss,False,False,False,True,False,False
media_content,False,False,True,False,True,True
href,False,False,False,False,True,False
author,True,True,True,True,True,True


In [12]:
compare_dfs[compare_dfs.all(axis=1)]

Unnamed: 0,sciblogs,popsci,nyt,sciline,wired,guardian
link,True,True,True,True,True,True
title,True,True,True,True,True,True
published_parsed,True,True,True,True,True,True
author,True,True,True,True,True,True
author_detail,True,True,True,True,True,True
links,True,True,True,True,True,True
guidislink,True,True,True,True,True,True
id,True,True,True,True,True,True
published,True,True,True,True,True,True
title_detail,True,True,True,True,True,True


## Processing Methodology

We mainly focus on fields that are available for all news sources.

- `id`
    - is different for each news source (URL, text field, unique id)
- `tags`
    - guardian actually provides tags related to subcategories
    - sciline, popsci, wired provide tags without a schema 
    - not available for sciblogs, however, we can extract a category from the fulltext
- `summary`
    - 5/6 provide a short summary indeed.
    - sciblogs actually returns the full article content as HTML

In [25]:
dfs["sciblogs"]["summary"].iloc[4][-100:]

'item"><a href="/channel/medicine" hreflang="en">Medicine</a></div>\n              </div>\n      </div>'

In [15]:
for k, df in dfs.items():
    print(k)
    display(df.head())

sciblogs


Unnamed: 0,title,title_detail,links,link,summary,summary_detail,published,published_parsed,authors,author,author_detail,id,guidislink
0,COVID-19: The Downside To More Testing Could B...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://scienceblogs.com/conversation/2020/03/...,<span>COVID-19: The Downside To More Testing C...,"{'type': 'text/html', 'language': 'en', 'base'...",2020-03-31T02:25:10.000Z,"[2020, 3, 31, 2, 25, 10, 1, 91, 0]",[{'name': 'The Conversation'}],The Conversation,{'name': 'The Conversation'},151446 at https://scienceblogs.com,False
1,The Biology Of Why Coronavirus Is So Deadly,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://scienceblogs.com/conversation/2020/04/...,<span>The Biology Of Why Coronavirus Is So Dea...,"{'type': 'text/html', 'language': 'en', 'base'...",2020-04-02T18:02:27.000Z,"[2020, 4, 2, 18, 2, 27, 3, 93, 0]",[{'name': 'The Conversation'}],The Conversation,{'name': 'The Conversation'},151447 at https://scienceblogs.com,False
2,The Yeast All Around Us,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://scienceblogs.com/sb-admin/2020/05/11/y...,<span>The Yeast All Around Us</span>\n\n ...,"{'type': 'text/html', 'language': 'en', 'base'...",2020-05-11T15:54:57.000Z,"[2020, 5, 11, 15, 54, 57, 0, 132, 0]",[{'name': 'sb admin'}],sb admin,{'name': 'sb admin'},151448 at https://scienceblogs.com,False
3,The 'Uplift of the Tibetan Plateau' Myth,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://scienceblogs.com/sb-admin/2020/05/12/u...,<span>The &#039;Uplift of the Tibetan Plateau&...,"{'type': 'text/html', 'language': 'en', 'base'...",2020-05-12T14:40:03.000Z,"[2020, 5, 12, 14, 40, 3, 1, 133, 0]",[{'name': 'sb admin'}],sb admin,{'name': 'sb admin'},151449 at https://scienceblogs.com,False
4,NVX-CoV2373: Here's How The Coronavirus Vaccin...,"{'type': 'text/plain', 'language': 'en', 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://scienceblogs.com/sb-admin/2020/05/27/n...,<span>NVX-CoV2373: Here&#039;s How The Coronav...,"{'type': 'text/html', 'language': 'en', 'base'...",2020-05-27T14:18:04.000Z,"[2020, 5, 27, 14, 18, 4, 2, 148, 0]",[{'name': 'sb admin'}],sb admin,{'name': 'sb admin'},151450 at https://scienceblogs.com,False


popsci


Unnamed: 0,title,title_detail,links,link,summary,summary_detail,id,guidislink,authors,author,author_detail,tags,published,published_parsed,content
0,"Best document scanner: Wireless, handheld, and...","{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.popsci.com/story/reviews/best-docu...,Buy the best document scanner for your office ...,"{'type': 'text/html', 'language': None, 'base'...",https://www.popsci.com/story/reviews/best-docu...,False,[{'name': 'PopSci Commerce Team'}],PopSci Commerce Team,{'name': 'PopSci Commerce Team'},"[{'term': 'Reviews', 'scheme': None, 'label': ...",2021-02-24T17:59:00.000Z,"[2021, 2, 24, 17, 59, 0, 2, 55, 0]","[{'type': 'text/html', 'language': None, 'base..."
1,Why are we still disinfecting surfaces to stop...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.popsci.com/story/health/covid-19-s...,The question isn't if COVID-19 can spread from...,"{'type': 'text/html', 'language': None, 'base'...",https://www.popsci.com/story/health/covid-19-s...,False,[{'name': 'By Hassan Vally/The Conversation'}],By Hassan Vally/The Conversation,{'name': 'By Hassan Vally/The Conversation'},"[{'term': 'Health', 'scheme': None, 'label': N...",2021-02-24T19:23:42.000Z,"[2021, 2, 24, 19, 23, 42, 2, 55, 0]","[{'type': 'text/html', 'language': None, 'base..."
2,Best laser printer: For the home or for your o...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.popsci.com/story/reviews/best-lase...,Are you looking for the best laser printer for...,"{'type': 'text/html', 'language': None, 'base'...",https://www.popsci.com/story/reviews/best-lase...,False,[{'name': 'PopSci Commerce Team'}],PopSci Commerce Team,{'name': 'PopSci Commerce Team'},"[{'term': 'Reviews', 'scheme': None, 'label': ...",2021-02-24T19:59:00.000Z,"[2021, 2, 24, 19, 59, 0, 2, 55, 0]","[{'type': 'text/html', 'language': None, 'base..."
3,This Airbus prototype could deploy drones from...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.popsci.com/story/technology/airbus...,Here's why militaries are interested in airpla...,"{'type': 'text/html', 'language': None, 'base'...",https://www.popsci.com/story/technology/airbus...,False,[{'name': 'Kelsey D. Atherton'}],Kelsey D. Atherton,{'name': 'Kelsey D. Atherton'},"[{'term': 'Technology', 'scheme': None, 'label...",2021-02-24T21:00:00.000Z,"[2021, 2, 24, 21, 0, 0, 2, 55, 0]","[{'type': 'text/html', 'language': None, 'base..."
4,No cell service? These are the best offline apps.,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.popsci.com/story/diy/best-offline-...,These apps are the best in the biz at offline ...,"{'type': 'text/html', 'language': None, 'base'...",https://www.popsci.com/story/diy/best-offline-...,False,[{'name': 'David Nield'}],David Nield,{'name': 'David Nield'},"[{'term': 'Diy', 'scheme': None, 'label': None}]",2021-02-25T13:00:00.000Z,"[2021, 2, 25, 13, 0, 0, 3, 56, 0]","[{'type': 'text/html', 'language': None, 'base..."


nyt


Unnamed: 0,title,title_detail,links,link,id,guidislink,summary,summary_detail,authors,author,author_detail,published,published_parsed,tags,media_content,media_credit,credit,content
0,Octopuses Have a Secret Sense to Keep Their 8 ...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.nytimes.com/2021/02/20/science/oct...,https://www.nytimes.com/2021/02/20/science/oct...,False,Even when an octopus can’t see light with its ...,"{'type': 'text/html', 'language': None, 'base'...",[{'name': 'Richard Sima'}],Richard Sima,{'name': 'Richard Sima'},2021-02-20T10:00:13.000Z,"[2021, 2, 20, 10, 0, 13, 5, 51, 0]","[{'term': 'Light', 'scheme': 'http://www.nytim...",,,,
1,Things To Do At Home,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.nytimes.com/2021/02/20/at-home/thi...,https://www.nytimes.com/2021/02/20/at-home/thi...,False,"This week, celebrate the life of Malcolm X, br...","{'type': 'text/html', 'language': None, 'base'...",[{'name': 'Emma Grillo and Danya Issawi'}],Emma Grillo and Danya Issawi,{'name': 'Emma Grillo and Danya Issawi'},2021-02-21T04:59:05.000Z,"[2021, 2, 21, 4, 59, 5, 6, 52, 0]","[{'term': 'Quarantine (Life and Culture)', 'sc...","[{'height': '151', 'medium': 'image', 'url': '...",[{'content': 'Miguel Porlan'}],Miguel Porlan,
2,Texas Blackouts Point to Coast-to-Coast Crises...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.nytimes.com/2021/02/20/climate/uni...,https://www.nytimes.com/2021/02/20/climate/uni...,False,Continent-spanning storms triggered blackouts ...,"{'type': 'text/html', 'language': None, 'base'...","[{'name': 'Christopher Flavelle, Brad Plumer a...","Christopher Flavelle, Brad Plumer and Hiroko T...","{'name': 'Christopher Flavelle, Brad Plumer an...",2021-02-21T17:44:08.000Z,"[2021, 2, 21, 17, 44, 8, 6, 52, 0]","[{'term': 'Power Failures and Blackouts', 'sch...","[{'height': '151', 'medium': 'image', 'url': '...",[{'content': 'Joe Raedle/Getty Images'}],Joe Raedle/Getty Images,"[{'type': 'text/plain', 'language': None, 'bas..."
3,Seven Hundred Leagues Beneath Titan’s Methane ...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.nytimes.com/2021/02/21/science/sat...,https://www.nytimes.com/2021/02/21/science/sat...,False,"Mars, Shmars; this voyager is looking forward ...","{'type': 'text/html', 'language': None, 'base'...",[{'name': 'Dennis Overbye'}],Dennis Overbye,{'name': 'Dennis Overbye'},2021-02-21T22:21:34.000Z,"[2021, 2, 21, 22, 21, 34, 6, 52, 0]","[{'term': 'Space and Astronomy', 'scheme': 'ht...","[{'height': '151', 'medium': 'image', 'url': '...",[{'content': 'NASA/JPL-Caltech/Space Science I...,NASA/JPL-Caltech/Space Science Institute,
4,She Beat Cancer at 10. Now She'll Join SpaceX'...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.nytimes.com/2021/02/22/science/spa...,https://www.nytimes.com/2021/02/22/science/spa...,False,"St. Jude Hospital and Jared Isaacman, a billio...","{'type': 'text/html', 'language': None, 'base'...",[{'name': 'Kenneth Chang'}],Kenneth Chang,{'name': 'Kenneth Chang'},2021-02-22T18:10:23.000Z,"[2021, 2, 22, 18, 10, 23, 0, 53, 0]","[{'term': 'Isaacman, Jared (1983- )', 'scheme'...",,,,


sciline


Unnamed: 0,title,title_detail,links,link,comments,authors,author,author_detail,published,published_parsed,tags,id,guidislink,summary,summary_detail,wfw_commentrss,slash_comments
0,Roundup may harm honeybee gut health,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://scienceline.org/2021/01/roundup-may-ha...,https://scienceline.org/2021/01/roundup-may-ha...,[{'name': 'Casey Crownhart'}],Casey Crownhart,{'name': 'Casey Crownhart'},2021-01-25T13:00:45.000Z,"[2021, 1, 25, 13, 0, 45, 0, 25, 0]","[{'term': 'Environment', 'scheme': None, 'labe...",https://scienceline.org/?p=32739,False,<p>The popular weed killer could impair honeyb...,"{'type': 'text/html', 'language': None, 'base'...",https://scienceline.org/2021/01/roundup-may-ha...,0
1,Terra-farming: A possible step toward growing ...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://scienceline.org/2021/01/terra-farming-...,https://scienceline.org/2021/01/terra-farming-...,[{'name': 'Jackie Appel'}],Jackie Appel,{'name': 'Jackie Appel'},2021-01-27T13:00:19.000Z,"[2021, 1, 27, 13, 0, 19, 2, 27, 0]","[{'term': 'Space, Physics, and Math', 'scheme'...",https://scienceline.org/?p=32916,False,<p>Scientists recently developed a substance t...,"{'type': 'text/html', 'language': None, 'base'...",https://scienceline.org/2021/01/terra-farming-...,4
2,Death of a sourdough,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://scienceline.org/2021/01/death-of-a-sou...,https://scienceline.org/2021/01/death-of-a-sou...,[{'name': 'Ethan Freedman'}],Ethan Freedman,{'name': 'Ethan Freedman'},2021-01-28T17:00:47.000Z,"[2021, 1, 28, 17, 0, 47, 3, 28, 0]","[{'term': 'Audio', 'scheme': None, 'label': No...",https://scienceline.org/?p=32943,False,<p>How a neglected sourdough starter can go fr...,"{'type': 'text/html', 'language': None, 'base'...",https://scienceline.org/2021/01/death-of-a-sou...,0
3,A little blue pill that protects you from HIV ...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://scienceline.org/2021/01/a-little-blue-...,https://scienceline.org/2021/01/a-little-blue-...,[{'name': 'Karen Kwon'}],Karen Kwon,{'name': 'Karen Kwon'},2021-01-29T13:00:39.000Z,"[2021, 1, 29, 13, 0, 39, 4, 29, 0]","[{'term': 'Health', 'scheme': None, 'label': N...",https://scienceline.org/?p=32934,False,<p>PrEP is close to 99% effective in preventin...,"{'type': 'text/html', 'language': None, 'base'...",https://scienceline.org/2021/01/a-little-blue-...,0
4,Novel Science: Steampunk and the history of pr...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://scienceline.org/2021/02/novel-science-...,https://scienceline.org/2021/02/novel-science-...,[{'name': 'Joanna Thompson'}],Joanna Thompson,{'name': 'Joanna Thompson'},2021-02-01T13:00:05.000Z,"[2021, 2, 1, 13, 0, 5, 0, 32, 0]","[{'term': 'Novel Science', 'scheme': None, 'la...",https://scienceline.org/?p=32975,False,"<p>Reimagining the past, now with 40% more gea...","{'type': 'text/html', 'language': None, 'base'...",https://scienceline.org/2021/02/novel-science-...,0


wired


Unnamed: 0,title,title_detail,links,link,id,guidislink,published,published_parsed,media_content,summary,...,tags,media_keywords,authors,author,author_detail,dc_modified,publisher,publisher_detail,media_thumbnail,href
0,The AI Research Paper Was Real. The ‘Coauthor’...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.wired.com/story/ai-research-paper-...,602e965d0498dc83a227e612,False,2021-02-19T12:00:00.000Z,"[2021, 2, 19, 12, 0, 0, 4, 50, 0]",[{}],An IBM researcher found his name on two papers...,...,"[{'term': 'Business', 'scheme': None, 'label':...","science, academia, research, artificial intell...",[{'name': 'Will Knight'}],Will Knight,{'name': 'Will Knight'},"Sat, 20 Feb 2021 18:52:44 +0000",Condé Nast,{'name': 'Condé Nast'},[{'url': 'https://media.wired.com/photos/602f1...,
1,"As Coronavirus Variants Spread, the US Struggl...","{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.wired.com/story/as-coronavirus-var...,602d75c05b23975cb700ff5e,False,2021-02-19T12:00:00.000Z,"[2021, 2, 19, 12, 0, 0, 4, 50, 0]",[{}],The nation is a sequencing superpower. But wit...,...,"[{'term': 'Science', 'scheme': None, 'label': ...","coronavirus, COVID-19, Genomics, surveillance,...",[{'name': 'Megan Molteni'}],Megan Molteni,{'name': 'Megan Molteni'},"Mon, 22 Feb 2021 16:54:32 +0000",Condé Nast,{'name': 'Condé Nast'},[{'url': 'https://media.wired.com/photos/602ef...,
2,"Storm Delays for Vaccines, Expanded Sequencing...","{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.wired.com/story/storm-delays-vacci...,602fd0a644e48281d964ca29,False,2021-02-19T20:09:42.000Z,"[2021, 2, 19, 20, 9, 42, 4, 50, 0]",[{}],Catch up on the most important updates from th...,...,"[{'term': 'Science', 'scheme': None, 'label': ...","COVID-19, coronavirus, vaccines",[{'name': 'Eve Sneider'}],Eve Sneider,{'name': 'Eve Sneider'},"Fri, 19 Feb 2021 22:11:49 +0000",Condé Nast,{'name': 'Condé Nast'},[{'url': 'https://media.wired.com/photos/602fd...,
3,Why France’s New Tech ‘Repairability Index’ Is...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.wired.com/story/frances-new-tech-r...,602c39295b23975cb700ff59,False,2021-02-20T13:00:00.000Z,"[2021, 2, 20, 13, 0, 0, 5, 51, 0]",[{}],"Liberté, égalité, reparabilité.",...,"[{'term': 'Science', 'scheme': None, 'label': ...","Climate Desk, recycling, trash, electronics, m...",[{'name': 'Maddie Stone'}],Maddie Stone,{'name': 'Maddie Stone'},"Fri, 19 Feb 2021 15:33:35 +0000",Condé Nast,{'name': 'Condé Nast'},[{'url': 'https://media.wired.com/photos/602fd...,
4,Can Hamburger Buns Save Your Pipes from Freezing?,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.wired.com/story/can-hamburger-buns...,60302c7ef5a63ce68f38a76f,False,2021-02-20T14:00:00.000Z,"[2021, 2, 20, 14, 0, 0, 5, 51, 0]",[{}],"Water expands when it freezes, and that’s bad ...",...,"[{'term': 'Science', 'scheme': None, 'label': ...","Dot Physics, temperature, water, extreme weath...",[{'name': 'Rhett Allain'}],Rhett Allain,{'name': 'Rhett Allain'},"Sat, 20 Feb 2021 05:48:31 +0000",Condé Nast,{'name': 'Condé Nast'},[{'url': 'https://media.wired.com/photos/60305...,


guardian


Unnamed: 0,title,title_detail,links,link,summary,summary_detail,tags,published,published_parsed,id,guidislink,media_content,media_credit,credit,authors,author,author_detail,updated,updated_parsed
0,Can you solve it? Think of a number,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.theguardian.com/science/2021/feb/0...,<p>A new twist on the all time classic maths t...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Mathematics', 'scheme': 'https://ww...",2021-02-08T07:12:18.000Z,"[2021, 2, 8, 7, 12, 18, 0, 39, 0]",https://www.theguardian.com/science/2021/feb/0...,False,"[{'width': '140', 'url': 'https://i.guim.co.uk...","[{'scheme': 'urn:ebu', 'content': 'Photograph:...",Photograph: ITV/REX,[{'name': 'Alex Bellos'}],Alex Bellos,{'name': 'Alex Bellos'},2021-02-08T07:12:18Z,"[2021, 2, 8, 7, 12, 18, 0, 39, 0]"
1,Did you solve it? Think of a number,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.theguardian.com/science/2021/feb/0...,<p>The solution to today’s Q&amp;A puzzle</p><...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Mathematics', 'scheme': 'https://ww...",2021-02-08T17:00:48.000Z,"[2021, 2, 8, 17, 0, 48, 0, 39, 0]",https://www.theguardian.com/science/2021/feb/0...,False,"[{'width': '140', 'url': 'https://i.guim.co.uk...","[{'scheme': 'urn:ebu', 'content': 'Photograph:...",Photograph: Bernat Armangue/AP,[{'name': 'Alex Bellos'}],Alex Bellos,{'name': 'Alex Bellos'},2021-02-08T17:00:48Z,"[2021, 2, 8, 17, 0, 48, 0, 39, 0]"
2,Covid-19: love in lockdown – podcast,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.theguardian.com/science/audio/2021...,"<p>Valentine’s Day is fast approaching, and fo...","{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Science', 'scheme': 'https://www.th...",2021-02-11T09:40:35.000Z,"[2021, 2, 11, 9, 40, 35, 3, 42, 0]",https://www.theguardian.com/science/audio/2021...,False,"[{'width': '140', 'url': 'https://i.guim.co.uk...","[{'scheme': 'urn:ebu', 'content': 'Photograph:...",Photograph: Action Press/REX/Shutterstock,[{'name': 'Presented by Linda Geddes and produ...,Presented by Linda Geddes and produced by Made...,{'name': 'Presented by Linda Geddes and produc...,2021-02-11T09:40:35Z,"[2021, 2, 11, 9, 40, 35, 3, 42, 0]"
3,New Covid variant with potentially worrying mu...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.theguardian.com/world/2021/feb/15/...,<p>Researchers say 32 cases of B1525 in Britai...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Coronavirus', 'scheme': 'https://ww...",2021-02-15T16:48:44.000Z,"[2021, 2, 15, 16, 48, 44, 0, 46, 0]",https://www.theguardian.com/world/2021/feb/15/...,False,"[{'width': '140', 'url': 'https://i.guim.co.uk...","[{'scheme': 'urn:ebu', 'content': 'Photograph:...",Photograph: Ammar Awad/Reuters,[{'name': 'Nicola Davis Science correspondent'}],Nicola Davis Science correspondent,{'name': 'Nicola Davis Science correspondent'},2021-02-15T16:48:44Z,"[2021, 2, 15, 16, 48, 44, 0, 46, 0]"
4,Covid-19: why mix and match vaccines? – podcast,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.theguardian.com/science/audio/2021...,<p>The Com-Cov trial run by the Oxford Vaccine...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Science', 'scheme': 'https://www.th...",2021-02-16T05:00:41.000Z,"[2021, 2, 16, 5, 0, 41, 1, 47, 0]",https://www.theguardian.com/science/audio/2021...,False,"[{'width': '140', 'url': 'https://i.guim.co.uk...","[{'scheme': 'urn:ebu', 'content': 'Photograph:...",Photograph: Thomas Samson/AP,[{'name': 'Presented by Sarah Boseley and prod...,Presented by Sarah Boseley and produced by Tif...,{'name': 'Presented by Sarah Boseley and produ...,2021-02-16T05:00:41Z,"[2021, 2, 16, 5, 0, 41, 1, 47, 0]"


In [33]:
for k, df in dfs.items():
    print(k)
    try:
        print(df["tags"].iloc[0])
    except:
        pass
    print("==========")

sciblogs
popsci
[{'term': 'Reviews', 'scheme': None, 'label': None}]
nyt
[{'term': 'Light', 'scheme': 'http://www.nytimes.com/namespaces/keywords/des', 'label': None}, {'term': 'Research', 'scheme': 'http://www.nytimes.com/namespaces/keywords/des', 'label': None}, {'term': 'Octopus', 'scheme': 'http://www.nytimes.com/namespaces/keywords/des', 'label': None}, {'term': 'Animal Behavior', 'scheme': 'http://www.nytimes.com/namespaces/keywords/des', 'label': None}, {'term': 'Animal Cognition', 'scheme': 'http://www.nytimes.com/namespaces/keywords/des', 'label': None}, {'term': 'Journal of Experimental Biology', 'scheme': 'http://www.nytimes.com/namespaces/keywords/nyt_org', 'label': None}, {'term': 'your-feed-animals', 'scheme': 'http://www.nytimes.com/namespaces/keywords/des', 'label': None}, {'term': 'your-feed-science', 'scheme': 'http://www.nytimes.com/namespaces/keywords/des', 'label': None}]
sciline
[{'term': 'Environment', 'scheme': None, 'label': None}, {'term': 'animals', 'scheme':