|
| 1 | +import requests |
| 2 | +from bs4 import BeautifulSoup |
| 3 | + |
| 4 | + |
| 5 | +class GoogleNews: |
| 6 | + """ |
| 7 | + Class - `GoogleNews` |
| 8 | + Example: |
| 9 | + ``` |
| 10 | + articles = GoogleNews(topic = "topic") |
| 11 | + ```\n |
| 12 | + Methods :\n |
| 13 | + 1. ``.getArticles() | Response - Articles with title, descriptions, news source, date and link. |
| 14 | + """ |
| 15 | + |
| 16 | + def __init__(self, topic): |
| 17 | + self.topic = topic |
| 18 | + |
| 19 | + def getArticles(self): |
| 20 | + """ |
| 21 | + Class - `GoogleNews` |
| 22 | + Example: |
| 23 | + ``` |
| 24 | + articles = GoogleNews("github") |
| 25 | + articles.getArticles() |
| 26 | + ``` |
| 27 | + Returns: |
| 28 | + { |
| 29 | + "title": Tile of the article |
| 30 | + "description": Description of the article |
| 31 | + "news_source": News Source of the Article |
| 32 | + "date": Date the article was posted |
| 33 | + "link": Link to the article |
| 34 | + } |
| 35 | + """ |
| 36 | + url = "https://www.google.com/search?q=" + self.topic + "&tbm=nws" |
| 37 | + try: |
| 38 | + res = requests.get(url) |
| 39 | + soup = BeautifulSoup(res.text, "html.parser") |
| 40 | + |
| 41 | + articles_data = {"articles": []} |
| 42 | + |
| 43 | + articles = soup.find_all("a", jsname="ACyKwe") |
| 44 | + for a in articles: |
| 45 | + title = a.find("div", class_="BNeawe vvjwJb AP7Wnd").getText() |
| 46 | + date = a.find("span", class_="r0bn4c rQMQod").getText() |
| 47 | + desc = ( |
| 48 | + a.find("div", class_="BNeawe s3v9rd AP7Wnd") |
| 49 | + .getText() |
| 50 | + .replace(date, "") |
| 51 | + ) |
| 52 | + news_source = a.find( |
| 53 | + "div", class_="BNeawe UPmit AP7Wnd lRVwie" |
| 54 | + ).getText() |
| 55 | + link = a["href"].replace("/url?q=", "") |
| 56 | + articles_data["articles"].append( |
| 57 | + { |
| 58 | + "title": title, |
| 59 | + "description": desc, |
| 60 | + "news_source": news_source, |
| 61 | + "date": date, |
| 62 | + "link": link, |
| 63 | + } |
| 64 | + ) |
| 65 | + return articles_data |
| 66 | + except: |
| 67 | + return None |
0 commit comments