Skip to content

Commit a164133

Browse files
Merge pull request avinashkranjan#2924 from Mihan786Chistie/googleNews
added Google News Scraper
2 parents 8d97dcd + 993ad3e commit a164133

File tree

3 files changed

+84
-0
lines changed

3 files changed

+84
-0
lines changed

GoogleNews-Scraper/README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
## Google News
2+
3+
### Scrape articles with title, descriptions, news source, date and link regarding a topic
4+
5+
Create an instance of `GoogleNews` class.
6+
7+
```python
8+
articles = GoogleNews("topic")
9+
```
10+
11+
| Methods | Details |
12+
| ---------------- | ---------------------------------------------------------------------------------------- |
13+
| `.getArticles()` | Returns the articles with title, descriptions, news source, date and link in JSON format |
14+
15+
---

GoogleNews-Scraper/googleNews.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
4+
5+
class GoogleNews:
6+
"""
7+
Class - `GoogleNews`
8+
Example:
9+
```
10+
articles = GoogleNews(topic = "topic")
11+
```\n
12+
Methods :\n
13+
1. ``.getArticles() | Response - Articles with title, descriptions, news source, date and link.
14+
"""
15+
16+
def __init__(self, topic):
17+
self.topic = topic
18+
19+
def getArticles(self):
20+
"""
21+
Class - `GoogleNews`
22+
Example:
23+
```
24+
articles = GoogleNews("github")
25+
articles.getArticles()
26+
```
27+
Returns:
28+
{
29+
"title": Tile of the article
30+
"description": Description of the article
31+
"news_source": News Source of the Article
32+
"date": Date the article was posted
33+
"link": Link to the article
34+
}
35+
"""
36+
url = "https://www.google.com/search?q=" + self.topic + "&tbm=nws"
37+
try:
38+
res = requests.get(url)
39+
soup = BeautifulSoup(res.text, "html.parser")
40+
41+
articles_data = {"articles": []}
42+
43+
articles = soup.find_all("a", jsname="ACyKwe")
44+
for a in articles:
45+
title = a.find("div", class_="BNeawe vvjwJb AP7Wnd").getText()
46+
date = a.find("span", class_="r0bn4c rQMQod").getText()
47+
desc = (
48+
a.find("div", class_="BNeawe s3v9rd AP7Wnd")
49+
.getText()
50+
.replace(date, "")
51+
)
52+
news_source = a.find(
53+
"div", class_="BNeawe UPmit AP7Wnd lRVwie"
54+
).getText()
55+
link = a["href"].replace("/url?q=", "")
56+
articles_data["articles"].append(
57+
{
58+
"title": title,
59+
"description": desc,
60+
"news_source": news_source,
61+
"date": date,
62+
"link": link,
63+
}
64+
)
65+
return articles_data
66+
except:
67+
return None

GoogleNews-Scraper/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
bs4
2+
requests

0 commit comments

Comments
 (0)