# Easily Perform Advanced News Scraping with mediastack

## Get Started with Python's Requests Lib 

In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Python 3
import requests

# Send GET request to mediastack API
r = requests.get("http://api.mediastack.com/v1/news?access_key=YOUR_ACCESS_KEY")

responses = r.json()
responses

{'pagination': {'limit': 25, 'offset': 0, 'count': 25, 'total': 10000},
 'data': [{'author': 'Hala Abdallah',
   'title': 'Taliban says its purge of officials unrelated to sanctioned ministers\xa0',
   'description': 'The Taliban appointed ministers sanctioned by the US in its interim government. The Taliban has refused calls by the international community to remove ministers from its interim government under US and UN sanction threats, a senior official said according to a Sputnik report. In comments made to the Russian outlet, Taliban Spokesman Mohammad Naeem confirmed [&#8230;](The post Taliban says its purge of officials unrelated to sanctioned ministers\xa0 is from Doha News | Qatar.)',
   'url': 'http://feedproxy.google.com/~r/DohaNews/~3/_3OlD8mmIAA/',
   'source': 'Doha News',
   'image': None,
   'category': 'general',
   'language': 'ar',
   'country': 'qa',
   'published_at': '2021-10-13T13:15:39+00:00'},
  {'author': None,
   'title': 'Le Polisario menace de «poursuivre la 

## Collect news with specific categories using Python and mediastack (e.g. Business & Technology)

In [2]:
# Collect business and technology news with Python and mediastack
r2 = requests.get("http://api.mediastack.com/v1/news?access_key=YOUR_ACCESS_KEY&categories=business,technology")

responses2 = r2.json()
responses2

{'pagination': {'limit': 25, 'offset': 0, 'count': 25, 'total': 10000},
 'data': [{'author': None,
   'title': 'Immobilien: Furcht vor steigenden Zinsen: Wie Experten das Umfeld für Immobilienanleger beurteilen',
   'description': 'Eine mögliche Zinswende und weiter steigende Preise könnten für Haus- und Wohnungskäufer zur gefährlichen Mischung werden. Worauf Investoren achten sollten.',
   'url': 'https://www.handelsblatt.com/finanzen/immobilien/immobilien-furcht-vor-steigenden-zinsen-wie-experten-das-umfeld-fuer-immobilienanleger-beurteilen/27660494.html',
   'source': 'Handelsblatt',
   'image': 'https://www.handelsblatt.com/images/immobilien/27670766/2-format2020.jpg',
   'category': 'business',
   'language': 'de',
   'country': 'de',
   'published_at': '2021-10-02T10:27:06+00:00'},
  {'author': None,
   'title': 'iOS 15.01: Update behebt nervigen Bug auf iPhone 13',
   'description': 'Apple bringt seine mobilen Betriebssysteme auf den neusten Stand und veröffentlicht iOS 15.01 un

## Collect news with specific language only (e.g. English) using Python and mediastack

In [3]:
# Collect news that use English with Python and mediastack
r3 = requests.get("http://api.mediastack.com/v1/news?access_key=YOUR_ACCESS_KEY&languages=en")

responses3 = r3.json()
responses3

{'pagination': {'limit': 25, 'offset': 0, 'count': 25, 'total': 10000},
 'data': [{'author': 'Emma Koehn',
   'title': 'ASX to follow Wall St higher, Bitcoin at record high',
   'description': 'ASX to follow Wall St higher, while Bitcoin trades at another record high.',
   'url': 'https://www.brisbanetimes.com.au/business/markets/asx-to-follow-wall-st-higher-bitcoin-at-record-high-20211020-p591pm.html?ref=rss&utm_medium=rss&utm_source=rss_business',
   'source': 'brisbanetimes',
   'image': None,
   'category': 'general',
   'language': 'en',
   'country': 'au',
   'published_at': '2021-10-20T21:51:46+00:00'},
  {'author': 'Nick Sestanovich',
   'title': 'Vacaville Planning Commission will revisit subcommittee discussion',
   'description': 'The Vacaville Planning Commission unanimously expressed interest in continuing a conversation about establishing subcommittees at its Tuesday meeting.',
   'url': 'https://www.thereporter.com/2021/10/20/vacaville-planning-commission-will-revisit-su

## Collect news from specific sources (e.g. BBC) with Python and mediastack

In [4]:
# Collect news from BBC with Python and mediastack
r4 = requests.get("http://api.mediastack.com/v1/news?access_key=YOUR_ACCESS_KEY&sources=bbc")

responses4 = r4.json()
responses4

{'pagination': {'limit': 25, 'offset': 0, 'count': 25, 'total': 2904},
 'data': [{'author': None,
   'title': 'UK agrees free trade deal with New Zealand',
   'description': 'The government says consumers and businesses will benefit from deal, but it is unlikely to boost growth.',
   'url': 'https://www.bbc.co.uk/news/business-58988711?at_medium=RSS&at_campaign=KARANGA',
   'source': 'BBC News - UK Politics',
   'image': None,
   'category': 'politics',
   'language': 'en',
   'country': 'gb',
   'published_at': '2021-10-20T22:06:04+00:00'},
  {'author': None,
   'title': 'The day a Scottish shopping street exploded',
   'description': 'Fifty years ago, a row of shops exploded killing 22 people and injuring more than 100, mostly women.',
   'url': 'https://www.bbc.co.uk/news/uk-scotland-58625427?at_medium=RSS&at_campaign=KARANGA',
   'source': 'BBC',
   'image': None,
   'category': 'general',
   'language': 'en',
   'country': 'gb',
   'published_at': '2021-10-21T01:25:40+00:00'},
  {

## Collect hundreds of news or more with Python and mediastack 

In [5]:
# Collect 100 of news from BBC with Python and mediastack
r5 = requests.get("http://api.mediastack.com/v1/news?access_key=YOUR_ACCESS_KEY&limit=100")

responses5 = r5.json()
responses5

{'pagination': {'limit': 100, 'offset': 0, 'count': 100, 'total': 10000},
 'data': [{'author': 'Hala Abdallah',
   'title': 'Taliban says its purge of officials unrelated to sanctioned ministers\xa0',
   'description': 'The Taliban appointed ministers sanctioned by the US in its interim government. The Taliban has refused calls by the international community to remove ministers from its interim government under US and UN sanction threats, a senior official said according to a Sputnik report. In comments made to the Russian outlet, Taliban Spokesman Mohammad Naeem confirmed [&#8230;](The post Taliban says its purge of officials unrelated to sanctioned ministers\xa0 is from Doha News | Qatar.)',
   'url': 'http://feedproxy.google.com/~r/DohaNews/~3/_3OlD8mmIAA/',
   'source': 'Doha News',
   'image': None,
   'category': 'general',
   'language': 'ar',
   'country': 'qa',
   'published_at': '2021-10-13T13:15:39+00:00'},
  {'author': None,
   'title': 'Le Polisario menace de «poursuivre l

## Collect specific news with their index 

Get the first news:

In [6]:
# Collect specific news with their index
responses5['data'][0]

{'author': 'Hala Abdallah',
 'title': 'Taliban says its purge of officials unrelated to sanctioned ministers\xa0',
 'description': 'The Taliban appointed ministers sanctioned by the US in its interim government. The Taliban has refused calls by the international community to remove ministers from its interim government under US and UN sanction threats, a senior official said according to a Sputnik report. In comments made to the Russian outlet, Taliban Spokesman Mohammad Naeem confirmed [&#8230;](The post Taliban says its purge of officials unrelated to sanctioned ministers\xa0 is from Doha News | Qatar.)',
 'url': 'http://feedproxy.google.com/~r/DohaNews/~3/_3OlD8mmIAA/',
 'source': 'Doha News',
 'image': None,
 'category': 'general',
 'language': 'ar',
 'country': 'qa',
 'published_at': '2021-10-13T13:15:39+00:00'}

Get the 27th news:

In [7]:
# Collect specific news with their index
responses5['data'][26]

{'author': None,
 'title': 'ليكون &quot;عبرة للآخرين&quot;.. السجن 10 أسابيع لمشجع (52 سنة) شتم &quot;ثلاثي&quot; منتخب إنجلترا بعبارات عنصرية',
 'description': 'أصدرت محكمة في إنجلترا، عقوبة "صارمة" في حق مشجع إنجليزي، استخدم عبارات عنصرية ضد الثلاثي جادون سانشو وماركوس راشفورد وبوكايو ساكا، بعد نهائي كأس أمم أوروبا "يورو 2020".وتم اعتقال مشجع يبلغ من العمر 5',
 'url': 'http://www.elbotola.com/article/2021-11-04-14-09-708.html',
 'source': 'Elbotola.com',
 'image': None,
 'category': 'general',
 'language': 'ar',
 'country': 'ma',
 'published_at': '2021-11-04T14:15:36+00:00'}

Get the 100th news:

In [8]:
# Collect specific news with their index
responses5['data'][99]

{'author': None,
 'title': 'Histoire du chiisme au Maroc : Quand les Fatimides ont fondé le premier pays chiite [2/4]',
 'description': 'Lhistoire se poursuit sur\xa0Idriss Ier et son adhésion au chiisme. Les historiens s’accordent sur le fait que les Fatimides, venus au Maroc par l’Est, étaient de mouvance chiite.',
 'url': 'https://www.yabiladi.com/articles/details/59523/histoire-chiisme-maroc-quand-fatimides.html',
 'source': 'yabiladi.com',
 'image': 'https://static.yabiladi.com/files/articles/81a2703913ea05fc8404eff6baf992c720171121162247150.jpg',
 'category': 'general',
 'language': 'ar',
 'country': 'ma',
 'published_at': '2021-11-21T07:00:00+00:00'}

## Get specific data individually using API response objects 

#### Author

In [9]:
# Get specific data individually using their API response objects
print("Author: "+responses['data'][0]['author'])

Author: Hala Abdallah


#### Category

In [10]:
print("Category: "+responses['data'][0]['category'])

Category: general


#### Country

In [11]:
print("Country: "+responses['data'][0]['country'])

Country: qa


#### Description

In [12]:
print("Description: "+responses['data'][0]['description'])

Description: The Taliban appointed ministers sanctioned by the US in its interim government. The Taliban has refused calls by the international community to remove ministers from its interim government under US and UN sanction threats, a senior official said according to a Sputnik report. In comments made to the Russian outlet, Taliban Spokesman Mohammad Naeem confirmed [&#8230;](The post Taliban says its purge of officials unrelated to sanctioned ministers  is from Doha News | Qatar.)


#### Image

In [13]:
print("Image: "+responses['data'][0]['image'])

TypeError: can only concatenate str (not "NoneType") to str

#### Language

In [14]:
print("Language: "+responses['data'][0]['language'])

Language: ar


#### Published at

In [15]:
print("Published at: "+responses['data'][0]['published_at'])

Published at: 2021-10-13T13:15:39+00:00


#### Source

In [16]:
print("Source: "+responses['data'][0]['source'])

Source: Doha News


#### Title

In [17]:
print("Title: "+responses['data'][0]['title'])

Title: Taliban says its purge of officials unrelated to sanctioned ministers 


#### URL

In [18]:
print("URL: "+responses['data'][0]['url'])

URL: http://feedproxy.google.com/~r/DohaNews/~3/_3OlD8mmIAA/


## Scrape hundreds or more news and save them into a CSV file

#### Save scraped news to CSV file

In [19]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Python 3
import requests

# Collect 1000 of news or more with Python and mediastack
r = requests.get("http://api.mediastack.com/v1/news?access_key=YOUR_ACCESS_KEY&languages=en&categories=business,technology&limit=100")

responses = r.json()
responses

# Define the collected API responses
mediastackData = responses['data']

# Normalize or unnest the JSON response
from pandas.io.json import json_normalize
flatData = json_normalize(mediastackData)

# Create data frame, and save them to CSV
import pandas as pd
df = pd.DataFrame(flatData)
df.to_csv("mediastackSample.csv", encoding = 'utf-8', index = False)

#### Show the data frame

In [20]:
flatData

Unnamed: 0,author,title,description,url,source,image,category,language,country,published_at
0,GlobeNewswire,Center Coast Brookfield MLP & Energy Infrastru...,"NEW YORK, Oct. 20, 2021 (GLOBE NEWSWIRE) &#821...",https://financialpost.com/globe-newswire/cente...,Financial Post | Canada Business News,,business,en,us,2021-10-20T22:03:56+00:00
1,Reuters,Workers at Exxon Texas refinery reject contrac...,BEAUMONT &#8212; The two sides in the Exxon Mo...,https://financialpost.com/pmn/business-pmn/wor...,Financial Post | Canada Business News,,business,en,us,2021-10-20T22:11:57+00:00
2,,Cinemark enhances moviegoing experience with S...,Cinemark enhances moviegoing experience with S...,https://seekingalpha.com/news/3755883-cinemark...,Seeking Alpha,,business,en,us,2021-10-20T22:23:56+00:00
3,Aria Alamalhodaei,Tesla will only use iron-based batteries for s...,Tesla said Wednesday it will use iron-based ba...,https://techcrunch.com/2021/10/20/tesla-earnin...,TechCrunch,,technology,en,us,2021-10-20T22:55:23+00:00
4,Indu Bhan,Future Retail party to dispute with Amazon: Ar...,This stance of the arbitration panel further b...,https://www.financialexpress.com/industry/futu...,The Financial Express,https://images.financialexpress.com/2021/10/am...,business,en,us,2021-10-20T23:01:00+00:00
...,...,...,...,...,...,...,...,...,...,...
95,Alex Millson,Hundreds Sick as Onion-Linked Salmonella Outbr...,Hundreds Sick as Onion-Linked Salmonella Outbr...,https://www.bloombergquint.com/business/hundre...,Bloomberg | Latest And Live Business,,business,en,us,2021-10-21T09:56:01+00:00
96,,"Dow EPS beats by $0.19, beats on revenue","Dow EPS beats by $0.19, beats on revenue",https://seekingalpha.com/news/3755939-dow-eps-...,Seeking Alpha,,business,en,us,2021-10-21T10:06:23+00:00
97,,First Trust Mortgage Income Fund declares $0.0...,First Trust Mortgage Income Fund declares $0.0...,https://seekingalpha.com/news/3755934-first-tr...,Seeking Alpha,,business,en,us,2021-10-21T10:06:04+00:00
98,,KnowBe4 acquires SecurityAdvisor in stock and ...,KnowBe4 acquires SecurityAdvisor in stock and ...,https://seekingalpha.com/news/3755941-knowbe4-...,Seeking Alpha,,business,en,us,2021-10-21T10:11:53+00:00
