# GoogleNews

In [1]:
from GoogleNews import GoogleNews as g
import pandas as pd
import newspaper

In [9]:
def findArticles(query: str = None, language: str = "en"):
    """ Given a query, find and return the relevant articles from Google News.

    :param language:        The language of the articles.
    :param query:           Search query.
    :return:                A dataframe of the articles, with the columns:
                            ['title', 'media', 'date', 'datetime', 'desc', 'link', 'img']
    """

    def getContent(link):
        """ Given a link to a specific article, return the content of it (text).

        :param link:        The link for the article.
        :return:            Article's content, as text.
        """

        article = newspaper.Article(link)
        article.download()
        article.parse()

        return [article.text, article.top_img]

    # Find Articles
    gn = g(lang=language, period='1d')
    gn.search(query)
    df = pd.DataFrame(gn.page_at(1))

    # Merge content
    df[['content', 'img']] = df['link'].apply(lambda l: getContent(l)).to_list()

    # Clear GoogleNews engine
    gn.clear()

    return df[['title', 'desc', 'content', 'media', 'link', 'img', 'date', 'datetime']]

In [10]:
df = findArticles("Sports")

In [11]:
df.shape

(10, 8)

In [25]:
news = g(lang="en", period="1d")
news.search("Sports")

In [37]:
dfs = [pd.DataFrame(news.page_at(i)) for i in range(1, 14)]

In [40]:
articles = pd.concat(dfs).reset_index()

In [42]:
articles.shape

(130, 8)

In [43]:
articles

Unnamed: 0,index,title,media,date,datetime,desc,link,img
0,0,Forest Green Rovers rank top of English Footba...,Sky Sports,51 mins ago,NaT,Sport Positive Leagues has collated a large cr...,https://www.skysports.com/football/news/11095/...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
1,1,Jason Smyth: Paralympic sport's fastest man re...,BBC,59 mins ago,NaT,"Ireland's Jason Smyth, Paralympic sport's fast...",https://www.bbc.com/sport/disability-sport/651...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
2,2,Carla Ward: WSL title race will go right to th...,Sky Sports,1 hour ago,2023-03-29 13:00:58.642336,Aston Villa women's manager Carla Ward expects...,https://www.skysports.com/watch/video/sports/f...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
3,3,Dave Kindred among Kentucky Journalism Hall of...,Courier-Journal,1 hour ago,2023-03-29 13:00:58.642336,"Renown sportswriter Dave Kindred, who will be ...",https://www.courier-journal.com/story/sports/2...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
4,4,Gunshots fired during shootout between sports ...,FOX 5 Atlanta,1 hour ago,2023-03-29 13:00:58.642336,Police are searching for multiple gunmen conne...,https://www.fox5atlanta.com/news/gunshots-fire...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
...,...,...,...,...,...,...,...,...
125,5,"Amid Lamar Jackson trade request, Colts owner ...",CBS Sports,21 hours ago,2023-03-28 17:01:16.619419,"We may be in the middle of the offseason, but ...",https://www.cbssports.com/nfl/news/amid-lamar-...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
126,6,"Canada vs. Honduras odds, picks, how to watch,...",CBS Sports,21 hours ago,2023-03-28 17:01:16.619419,Soccer best bets for Tuesday's Honduras vs. Ca...,https://www.cbssports.com/soccer/news/canada-v...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
127,7,Scotland v Spain: Ryan Christie and Scott McTo...,BBC,21 hours ago,2023-03-28 17:01:16.619419,Ryan Christie and Scott McTominay are backed i...,https://www.bbc.com/sport/football/65101210,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
128,8,Young Sports Volunteers Inspire at Belfast Cit...,Ulster Rugby,22 hours ago,2023-03-28 16:01:16.619419,Young sports volunteers from across Ulster too...,https://ulster.rugby/content/siawards,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
