In [1]:
# !pip install beautifulsoup4
# !pip install selenium
# !pip install requests
# !pip install pandas

### Importing libraries

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os


### Establishing Reuters Home page as the url to scrape:

In [3]:
url = "https://www.reuters.com"

In [4]:
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")

In [5]:
def getCategories(url):
    categories = soup.find_all("a",{
        "class":"text__text__1FZLe text__dark-grey__3Ml43 text__medium__1kbOh text__default__UPMUu nav-bar__link__3mja8"
    })
    categoriesLinks =  []
    for category in categories:
        categoriesLinks.append({
            "category":category.text.strip(),
            "link":category['href']
        })
    return categoriesLinks

In [6]:
categories = getCategories(url)
categories

[{'category': 'World', 'link': '/world/'},
 {'category': 'Business', 'link': '/business/'},
 {'category': 'Legal', 'link': '/legal/'},
 {'category': 'Markets', 'link': '/markets/'},
 {'category': 'Technology', 'link': '/technology/'}]

In [7]:
def getTopicsAndMenus(url,categoriesLinks, df):
    for category in categoriesLinks:
        category["link"] = url+category["link"]
        request = requests.get(category["link"])
        categorySoup= BeautifulSoup(request.content, "html.parser")
        buttons = categorySoup.find_all(
            "button",
            {"class":"button__button__2Ecqi button__secondary__18moI button__pill__2LA8V text-button__container__3q3zX"}
            )
        category["topics"] = []
        for button in buttons: 
            topic = button.find(
                "span",
                {"class":"text__text__1FZLe text__inherit-color__3208F text__bold__2-8Kc text__default__UPMUu text-button__medium__113uZ"}
                )
            df = pd.concat([df,pd.DataFrame([{"Category": category["category"],"CategoryLink": category["link"], "Menus": button["data-id"], "Topics": topic.text.strip()}])],ignore_index=True)
    df["topicLink"] = url+df["Menus"]
    return df

In [8]:
df = pd.DataFrame(columns=["Category","CategoryLink", "Menus", "Topics"])
df = getTopicsAndMenus(url,categories, df)
df

Unnamed: 0,Category,CategoryLink,Menus,Topics,topicLink
0,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/
1,World,https://www.reuters.com/world/,/world/americas/,Americas,https://www.reuters.com/world/americas/
2,World,https://www.reuters.com/world/,/world/asia-pacific/,Asia Pacific,https://www.reuters.com/world/asia-pacific/
3,World,https://www.reuters.com/world/,/world/china/,China,https://www.reuters.com/world/china/
4,World,https://www.reuters.com/world/,/world/europe/,Europe,https://www.reuters.com/world/europe/
5,World,https://www.reuters.com/world/,/world/india/,India,https://www.reuters.com/world/india/
6,World,https://www.reuters.com/world/,/world/middle-east/,Middle East,https://www.reuters.com/world/middle-east/
7,World,https://www.reuters.com/world/,/world/uk/,United Kingdom,https://www.reuters.com/world/uk/
8,World,https://www.reuters.com/world/,/world/us/,United States,https://www.reuters.com/world/us/
9,World,https://www.reuters.com/world/,/world/reuters-next/,Reuters Next,https://www.reuters.com/world/reuters-next/


In [9]:
def getArtciles(df,url):
    data = []
    for topicLink in df["topicLink"]:
        request = requests.get(topicLink)
        topicSoup= BeautifulSoup(request.content, "html.parser")
        topicCards = topicSoup.find_all(
            "div",
            {"class":"media-story-card__hub__3mHOR story-card"}
        )
        for card in topicCards:
            categories = card.find("a",{
                "class" : "text__text__1FZLe text__inherit-color__3208F text__inherit-font__1Y8w3 text__inherit-size__1DZJi link__underline_on_hover__2zGL4"
            })
            linksOfTitles = card.find("a",{
                "class":"text__text__1FZLe text__dark-grey__3Ml43 text__medium__1kbOh text__heading_5_and_half__3YluN heading__base__2T28j heading_5_half media-story-card__heading__eqhp9"
            })
            time = card.find("time",{"class":"text__text__1FZLe text__inherit-color__3208F text__regular__2N1Xr text__extra_small__1Mw6v label__label__f9Hew label__small__274ei media-story-card__time__2i9EK"})
            data.append({
                "Title":linksOfTitles.find("span").text.strip(),
                "ArticleLink":(url+linksOfTitles["href"]) if linksOfTitles["href"] is not None else None,
                # "Date":time.text.strip(),
                "Topics":next(categories.stripped_strings) if categories is not None else None
            })
    data = pd.DataFrame(data)
    df = pd.merge(df, data, on=['Topics'], how='outer')
    return df

In [10]:
df = getArtciles(df,url)
df

Unnamed: 0,Category,CategoryLink,Menus,Topics,topicLink,Title,ArticleLink
0,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...
1,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,"As Russia looms, US seeks influence in West Af...",https://www.reuters.com/world/africa/russia-lo...
2,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,"Blinken praises Ethiopia on Tigray peace, no r...",https://www.reuters.com/world/africa/blinken-m...
3,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Blinken brings aid and praise to Niger as it b...,https://www.reuters.com/world/africa/blinken-b...
4,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Senegal rocked by more unrest as police clash ...,https://www.reuters.com/world/africa/more-unre...
...,...,...,...,...,...,...,...
582,,,,Clean Energy,,Energy crisis seen posing 'existential threat'...,https://www.reuters.com/business/energy/reuter...
583,,,,Clean Energy,,Reducing energy waste key to meeting climate g...,https://www.reuters.com/business/energy/reuter...
584,,,,Governance,,"Buying Uniper was 'a mistake', Finnish finance...",https://www.reuters.com/business/energy/buying...
585,,,,Employee Benefits & Executive Compensation,,Calpers' CEO Frost hopes COP26 yields more gre...,https://www.reuters.com/business/sustainable-b...


In [11]:
tempDf = pd.DataFrame()
tempDf[['website', 'category', 'topic']] = df['ArticleLink'].str.split('/', expand=True)[[2,3,4]]
mask = df[['Category', 'CategoryLink', 'Menus', 'topicLink']].isna().all(axis=1)
df.loc[mask, 'Menus'] = '/' + tempDf['category'] + '/'+ tempDf['topic'] +'/'
df.loc[mask, 'Category'] = tempDf['category']
df.loc[mask, 'CategoryLink'] = 'https://'+tempDf["website"]+"/"+tempDf["category"]+"/"
df.loc[mask, 'topicLink'] = 'https://'+tempDf["website"]+df[mask]['Menus']
df

Unnamed: 0,Category,CategoryLink,Menus,Topics,topicLink,Title,ArticleLink
0,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...
1,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,"As Russia looms, US seeks influence in West Af...",https://www.reuters.com/world/africa/russia-lo...
2,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,"Blinken praises Ethiopia on Tigray peace, no r...",https://www.reuters.com/world/africa/blinken-m...
3,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Blinken brings aid and praise to Niger as it b...,https://www.reuters.com/world/africa/blinken-b...
4,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Senegal rocked by more unrest as police clash ...,https://www.reuters.com/world/africa/more-unre...
...,...,...,...,...,...,...,...
582,business,https://www.reuters.com/business/,/business/energy/,Clean Energy,https://www.reuters.com/business/energy/,Energy crisis seen posing 'existential threat'...,https://www.reuters.com/business/energy/reuter...
583,business,https://www.reuters.com/business/,/business/energy/,Clean Energy,https://www.reuters.com/business/energy/,Reducing energy waste key to meeting climate g...,https://www.reuters.com/business/energy/reuter...
584,business,https://www.reuters.com/business/,/business/energy/,Governance,https://www.reuters.com/business/energy/,"Buying Uniper was 'a mistake', Finnish finance...",https://www.reuters.com/business/energy/buying...
585,business,https://www.reuters.com/business/,/business/sustainable-business/,Employee Benefits & Executive Compensation,https://www.reuters.com/business/sustainable-b...,Calpers' CEO Frost hopes COP26 yields more gre...,https://www.reuters.com/business/sustainable-b...


In [12]:
df[df['ArticleLink'].isnull()]

Unnamed: 0,Category,CategoryLink,Menus,Topics,topicLink,Title,ArticleLink
92,World,https://www.reuters.com/world/,/world/reuters-next/,Reuters Next,https://www.reuters.com/world/reuters-next/,,
108,Business,https://www.reuters.com/business/,/business/cop/,COP27,https://www.reuters.com/business/cop/,,
197,Business,https://www.reuters.com/business/,/business/reuters-impact/,Reuters Impact,https://www.reuters.com/business/reuters-impact/,,
228,Markets,https://www.reuters.com/markets/,/markets/emerging/,Emerging Markets,https://www.reuters.com/markets/emerging/,,
239,Markets,https://www.reuters.com/markets/,/markets/funds/,Funds,https://www.reuters.com/markets/funds/,,
240,Markets,https://www.reuters.com/markets/,/markets/global-market-data/,Global Market Data,https://www.reuters.com/markets/global-market-...,,
243,Markets,https://www.reuters.com/markets/,/markets/stocks/,Stocks,https://www.reuters.com/markets/stocks/,,
247,Markets,https://www.reuters.com/markets/,/markets/wealth/,Wealth,https://www.reuters.com/markets/wealth/,,
287,Technology,https://www.reuters.com/technology/,/technology/reuters-momentum/,Reuters Momentum,https://www.reuters.com/technology/reuters-mom...,,


In [13]:
df.dropna(subset=['ArticleLink'], inplace=True)

In [14]:
df.isnull().sum()

Category        0
CategoryLink    0
Menus           0
Topics          9
topicLink       0
Title           0
ArticleLink     0
dtype: int64

In [15]:
def getArticleDetails(df):
    data = []
    for articleLink in df[df['ArticleLink'].notnull()]['ArticleLink']:
        if articleLink is not None:
            request = requests.get(articleLink)
            articleSoup= BeautifulSoup(request.content, "html.parser")
            image = articleSoup.find("div",{
                "class":"styles__image-container__skIG1 styles__fill__3xCr1 styles__center_center__1AaPV styles__apply-ratio__1_FYQ"
            })
            timeContainer = articleSoup.find("time",{
                "class":"text__text__1FZLe text__dark-grey__3Ml43 text__regular__2N1Xr text__extra_small__1Mw6v article-header__dateline__4jE04"
            })
            title = articleSoup.find("div",{
                "class":"article-header__heading__15OpQ"
            })
            author = articleSoup.find("a",{
                "class":"author-name__author__1gx5k"
            })
            articleBodyContainer = articleSoup.find("div",{
                "class":"article-body__content__17Yit paywall-article"
            })
            
            artcileBody  = articleBodyContainer.find_all("p") if articleBodyContainer is not None else None
            wholeBody = ""
            if artcileBody is not None:
                for body in artcileBody:
                    wholeBody+=body.text+"\n"
            if timeContainer is not None:
                date = timeContainer.find_all("span")[1].text
                time = timeContainer.find_all("span")[2].text
            else:
                date = None
                time = None
            image = image.find("img")["src"] if image is not None else None
            data.append({
                "ArticleLink":articleLink,
                "Title":title.find("h1").text if title is not None else None,
                "Author":author.text if author is not None else None,
                "Date":date,
                "Time":time,
                "Image":image,
                "Article":wholeBody
                })
    data = pd.DataFrame(data)
    df = pd.merge(df, data, on=['ArticleLink'], how='outer')
    return df

In [16]:
df = getArticleDetails(df)
df

Unnamed: 0,Category,CategoryLink,Menus,Topics,topicLink,Title_x,ArticleLink,Title_y,Author,Date,Time,Image,Article
0,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...,Survivors in shock as Cyclone Freddy toll pass...,Tom Gibb,"March 16, 2023",1:44 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"BLANTYRE, March 16 (Reuters) - The last thing ..."
1,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...,Survivors in shock as Cyclone Freddy toll pass...,Tom Gibb,"March 16, 2023",1:44 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"BLANTYRE, March 16 (Reuters) - The last thing ..."
2,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...,Survivors in shock as Cyclone Freddy toll pass...,Tom Gibb,"March 16, 2023",1:44 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"BLANTYRE, March 16 (Reuters) - The last thing ..."
3,world,https://www.reuters.com/world/,/world/africa/,World,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...,Survivors in shock as Cyclone Freddy toll pass...,Tom Gibb,"March 16, 2023",1:44 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"BLANTYRE, March 16 (Reuters) - The last thing ..."
4,world,https://www.reuters.com/world/,/world/africa/,World,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...,Survivors in shock as Cyclone Freddy toll pass...,Tom Gibb,"March 16, 2023",1:44 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"BLANTYRE, March 16 (Reuters) - The last thing ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
931,business,https://www.reuters.com/business/,/business/energy/,Clean Energy,https://www.reuters.com/business/energy/,Energy crisis seen posing 'existential threat'...,https://www.reuters.com/business/energy/reuter...,Energy crisis seen posing 'existential threat'...,Reuters,"October 3, 2022",5:45 PM UTC,,"LONDON, Oct 3 (Reuters) - Europe's energy cris..."
932,business,https://www.reuters.com/business/,/business/energy/,Clean Energy,https://www.reuters.com/business/energy/,Reducing energy waste key to meeting climate g...,https://www.reuters.com/business/energy/reuter...,Reducing energy waste key to meeting climate g...,Gloria Dickie,"October 3, 2022",7:15 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"LONDON, Oct 3 (Reuters) - Embracing decentrali..."
933,business,https://www.reuters.com/business/,/business/energy/,Governance,https://www.reuters.com/business/energy/,"Buying Uniper was 'a mistake', Finnish finance...",https://www.reuters.com/business/energy/buying...,"Buying Uniper was 'a mistake', Finnish finance...",Anne Kauranen,"September 28, 2022",9:42 AM UTC,,"HELSINKI, Sept 28 (Reuters) - Buying Germany's..."
934,business,https://www.reuters.com/business/,/business/sustainable-business/,Employee Benefits & Executive Compensation,https://www.reuters.com/business/sustainable-b...,Calpers' CEO Frost hopes COP26 yields more gre...,https://www.reuters.com/business/sustainable-b...,Calpers' CEO Frost hopes COP26 yields more gre...,Ross Kerber,"October 4, 2021",9:56 PM UTC,,"BOSTON, Oct 4 (Reuters) - An upcoming United N..."


In [17]:
df.isnull().sum()

Category          0
CategoryLink      0
Menus             0
Topics           11
topicLink         0
Title_x           0
ArticleLink       0
Title_y           1
Author            1
Date              1
Time              1
Image           288
Article           0
dtype: int64

In [18]:
df[df['Topics'].isnull()]

Unnamed: 0,Category,CategoryLink,Menus,Topics,topicLink,Title_x,ArticleLink,Title_y,Author,Date,Time,Image,Article
860,world,https://www.reuters.com/world/,/world/europe/,,https://www.reuters.com/world/europe/,Explainer: Can China broker peace between Russ...,https://www.reuters.com/world/europe/can-china...,Explainer: Can China broker peace between Russ...,Yew Lun Tian,"March 16, 2023",6:24 AM UTC,https://cloudfront-us-east-2.images.arcpublish...,"BEIJING, March 16 (Reuters) - Chinese Presiden..."
861,technology,https://www.reuters.com/technology/,/technology/xiaomis-slow-shift-india-premium-s...,,https://www.reuters.com/technology/xiaomis-slo...,Xiaomi's slow shift in India to premium smartp...,https://www.reuters.com/technology/xiaomis-slo...,Xiaomi's slow shift in India to premium smartp...,Munsif Vengattil,"March 16, 2023",5:46 AM UTC,,"NEW DELHI/LUCKNOW, March 16 (Reuters) - Xiaomi..."
862,business,https://www.reuters.com/business/,/business/retail-consumer/,,https://www.reuters.com/business/retail-consumer/,"Food makers, feeling squeezed, pull the plug o...",https://www.reuters.com/business/retail-consum...,"Food makers, feeling squeezed, pull the plug o...",Jessica DiNapoli,"February 27, 2023",4:59 AM UTC,,"BOCA RATON, Florida/LONDON, Feb 24 (Reuters) -..."
863,world,https://www.reuters.com/world/,/world/europe/,,https://www.reuters.com/world/europe/,"North of Bakhmut, another key battle tests Ukr...",https://www.reuters.com/world/europe/north-bak...,"North of Bakhmut, another key battle tests Ukr...",Mike Collett-White,"March 15, 2023",5:06 PM UTC,,"NEAR KREMINNA, March 15 (Reuters) - From a sma..."
864,world,https://www.reuters.com/world/,/world/asia-pacific/,,https://www.reuters.com/world/asia-pacific/,Japan battles to persuade its big brands to jo...,https://www.reuters.com/world/asia-pacific/jap...,Japan battles to persuade its big brands to jo...,Tim Kelly,"March 15, 2023",11:23 PM UTC,,March 16 (Reuters) - As Tokyo spins up its def...
865,business,https://www.reuters.com/business/,/business/finance/,,https://www.reuters.com/business/finance/,Explainer: Credit Suisse: How did it get to cr...,https://www.reuters.com/business/finance/credi...,Explainer: Credit Suisse: How did it get to cr...,Anshuman Daga,"March 16, 2023",11:29 AM UTC,https://cloudfront-us-east-2.images.arcpublish...,March 16 (Reuters) - Credit Suisse (CSGN.S) sa...
866,business,https://www.reuters.com/business/,/business/finance/,,https://www.reuters.com/business/finance/,Explainer: Credit Suisse: How did it get to cr...,https://www.reuters.com/business/finance/credi...,Explainer: Credit Suisse: How did it get to cr...,Anshuman Daga,"March 16, 2023",11:29 AM UTC,https://cloudfront-us-east-2.images.arcpublish...,March 16 (Reuters) - Credit Suisse (CSGN.S) sa...
867,business,https://www.reuters.com/business/,/business/finance/,,https://www.reuters.com/business/finance/,Explainer: Credit Suisse: How did it get to cr...,https://www.reuters.com/business/finance/credi...,Explainer: Credit Suisse: How did it get to cr...,Anshuman Daga,"March 16, 2023",11:29 AM UTC,https://cloudfront-us-east-2.images.arcpublish...,March 16 (Reuters) - Credit Suisse (CSGN.S) sa...
868,business,https://www.reuters.com/business/,/business/finance/,,https://www.reuters.com/business/finance/,Explainer: Credit Suisse: How did it get to cr...,https://www.reuters.com/business/finance/credi...,Explainer: Credit Suisse: How did it get to cr...,Anshuman Daga,"March 16, 2023",11:29 AM UTC,https://cloudfront-us-east-2.images.arcpublish...,March 16 (Reuters) - Credit Suisse (CSGN.S) sa...
869,world,https://www.reuters.com/world/,/world/uk/,,https://www.reuters.com/world/uk/,Britain's tax take risks blowing green energy ...,https://www.reuters.com/world/uk/britains-tax-...,Britain's tax take risks blowing green energy ...,Susanna Twidale,"March 13, 2023",4:20 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"LONDON, March 13 (Reuters) - A cap on revenue ..."


In [19]:
tempDf = pd.DataFrame()
tempDf['topic'] = df['topicLink'].str.split('/', expand=True)[4]
mask = df[['Topics']].isna().all(axis=1)
df.loc[mask, 'Topics'] = tempDf['topic']
df

Unnamed: 0,Category,CategoryLink,Menus,Topics,topicLink,Title_x,ArticleLink,Title_y,Author,Date,Time,Image,Article
0,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...,Survivors in shock as Cyclone Freddy toll pass...,Tom Gibb,"March 16, 2023",1:44 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"BLANTYRE, March 16 (Reuters) - The last thing ..."
1,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...,Survivors in shock as Cyclone Freddy toll pass...,Tom Gibb,"March 16, 2023",1:44 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"BLANTYRE, March 16 (Reuters) - The last thing ..."
2,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...,Survivors in shock as Cyclone Freddy toll pass...,Tom Gibb,"March 16, 2023",1:44 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"BLANTYRE, March 16 (Reuters) - The last thing ..."
3,world,https://www.reuters.com/world/,/world/africa/,World,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...,Survivors in shock as Cyclone Freddy toll pass...,Tom Gibb,"March 16, 2023",1:44 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"BLANTYRE, March 16 (Reuters) - The last thing ..."
4,world,https://www.reuters.com/world/,/world/africa/,World,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...,Survivors in shock as Cyclone Freddy toll pass...,Tom Gibb,"March 16, 2023",1:44 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"BLANTYRE, March 16 (Reuters) - The last thing ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
931,business,https://www.reuters.com/business/,/business/energy/,Clean Energy,https://www.reuters.com/business/energy/,Energy crisis seen posing 'existential threat'...,https://www.reuters.com/business/energy/reuter...,Energy crisis seen posing 'existential threat'...,Reuters,"October 3, 2022",5:45 PM UTC,,"LONDON, Oct 3 (Reuters) - Europe's energy cris..."
932,business,https://www.reuters.com/business/,/business/energy/,Clean Energy,https://www.reuters.com/business/energy/,Reducing energy waste key to meeting climate g...,https://www.reuters.com/business/energy/reuter...,Reducing energy waste key to meeting climate g...,Gloria Dickie,"October 3, 2022",7:15 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"LONDON, Oct 3 (Reuters) - Embracing decentrali..."
933,business,https://www.reuters.com/business/,/business/energy/,Governance,https://www.reuters.com/business/energy/,"Buying Uniper was 'a mistake', Finnish finance...",https://www.reuters.com/business/energy/buying...,"Buying Uniper was 'a mistake', Finnish finance...",Anne Kauranen,"September 28, 2022",9:42 AM UTC,,"HELSINKI, Sept 28 (Reuters) - Buying Germany's..."
934,business,https://www.reuters.com/business/,/business/sustainable-business/,Employee Benefits & Executive Compensation,https://www.reuters.com/business/sustainable-b...,Calpers' CEO Frost hopes COP26 yields more gre...,https://www.reuters.com/business/sustainable-b...,Calpers' CEO Frost hopes COP26 yields more gre...,Ross Kerber,"October 4, 2021",9:56 PM UTC,,"BOSTON, Oct 4 (Reuters) - An upcoming United N..."


In [20]:
data = pd.read_csv("./reutersData.csv")
data

Unnamed: 0,Category,CategoryLink,Menus,Topics,topicLink,Title_x,ArticleLink,Title_y,Author,Date,Time,Image,Article
0,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,IMF sees Senegal 2023 growth lower than expect...,https://www.reuters.com/world/africa/imf-lower...,IMF sees Senegal 2023 growth lower than expect...,Reuters,"March 14, 2023",12:28 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"DAKAR, March 14 (Reuters) - Growth in Senegal'..."
1,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,IMF sees Senegal 2023 growth lower than expect...,https://www.reuters.com/world/africa/imf-lower...,IMF sees Senegal 2023 growth lower than expect...,Reuters,"March 14, 2023",12:28 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"DAKAR, March 14 (Reuters) - Growth in Senegal'..."
2,world,https://www.reuters.com/world/,/world/africa/,World,https://www.reuters.com/world/africa/,"IMF lowers Senegal 2022 GDP estimate, sees 202...",https://www.reuters.com/world/africa/imf-lower...,IMF sees Senegal 2023 growth lower than expect...,Reuters,"March 14, 2023",12:28 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"DAKAR, March 14 (Reuters) - Growth in Senegal'..."
3,world,https://www.reuters.com/world/,/world/africa/,World,https://www.reuters.com/world/africa/,"IMF lowers Senegal 2022 GDP estimate, sees 202...",https://www.reuters.com/world/africa/imf-lower...,IMF sees Senegal 2023 growth lower than expect...,Reuters,"March 14, 2023",12:28 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"DAKAR, March 14 (Reuters) - Growth in Senegal'..."
4,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Nigeria central bank says old naira notes stil...,https://www.reuters.com/world/africa/nigeria-c...,Nigeria central bank says old naira notes stil...,Reuters,"March 14, 2023",8:47 AM UTC,https://cloudfront-us-east-2.images.arcpublish...,"ABUJA, March 13 (Reuters) - Nigeria's central ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1005,business,https://www.reuters.com/business/,/business/energy/,Clean Energy,https://www.reuters.com/business/energy/,Energy crisis seen posing 'existential threat'...,https://www.reuters.com/business/energy/reuter...,Energy crisis seen posing 'existential threat'...,Reuters,"October 3, 2022",5:45 PM UTC,,"LONDON, Oct 3 (Reuters) - Europe's energy cris..."
1006,business,https://www.reuters.com/business/,/business/energy/,Clean Energy,https://www.reuters.com/business/energy/,Reducing energy waste key to meeting climate g...,https://www.reuters.com/business/energy/reuter...,Reducing energy waste key to meeting climate g...,Gloria Dickie,"October 3, 2022",7:15 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"LONDON, Oct 3 (Reuters) - Embracing decentrali..."
1007,business,https://www.reuters.com/business/,/business/energy/,Governance,https://www.reuters.com/business/energy/,"Buying Uniper was 'a mistake', Finnish finance...",https://www.reuters.com/business/energy/buying...,"Buying Uniper was 'a mistake', Finnish finance...",Anne Kauranen,"September 28, 2022",9:42 AM UTC,,"HELSINKI, Sept 28 (Reuters) - Buying Germany's..."
1008,business,https://www.reuters.com/business/,/business/healthcare-pharmaceuticals/,COVID-19,https://www.reuters.com/business/healthcare-ph...,"Moderna to hire around 2,000 employees amid mR...",https://www.reuters.com/business/healthcare-ph...,"Moderna to hire around 2,000 employees amid mR...",Reuters,"March 10, 2023",7:38 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,March 10 (Reuters) - Moderna Inc (MRNA.O) said...


In [21]:
df = df_concat = pd.concat([df, data]).drop_duplicates()

In [30]:
df = df.reset_index(drop=True)
print(df.shape)
df

(875, 13)


Unnamed: 0,Category,CategoryLink,Menus,Topics,topicLink,Title_x,ArticleLink,Title_y,Author,Date,Time,Image,Article
0,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...,Survivors in shock as Cyclone Freddy toll pass...,Tom Gibb,"March 16, 2023",1:44 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"BLANTYRE, March 16 (Reuters) - The last thing ..."
1,world,https://www.reuters.com/world/,/world/africa/,World,https://www.reuters.com/world/africa/,Survivors in shock as Cyclone Freddy toll pass...,https://www.reuters.com/world/africa/cyclone-f...,Survivors in shock as Cyclone Freddy toll pass...,Tom Gibb,"March 16, 2023",1:44 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,"BLANTYRE, March 16 (Reuters) - The last thing ..."
2,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,"As Russia looms, US seeks influence in West Af...",https://www.reuters.com/world/africa/russia-lo...,"As Russia looms, US seeks influence in West Af...",Cooper Inveen,"March 15, 2023",9:13 PM UTC,,"DABOYA, Ghana, March 15 (Reuters) - U.S. comma..."
3,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,"Blinken praises Ethiopia on Tigray peace, no r...",https://www.reuters.com/world/africa/blinken-m...,"Blinken praises Ethiopia on Tigray peace, no r...",Daphne Psaledakis,"March 15, 2023",9:04 PM UTC,,"ADDIS ABABA, March 15 (Reuters) - U.S. Secreta..."
4,World,https://www.reuters.com/world/,/world/africa/,Africa,https://www.reuters.com/world/africa/,Blinken brings aid and praise to Niger as it b...,https://www.reuters.com/world/africa/blinken-b...,Blinken brings aid and praise to Niger as it b...,Reuters,"March 16, 2023",12:20 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,March 16 (Reuters) - U.S. Secretary of State A...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
870,business,https://www.reuters.com/business/,/business/energy/,Renewable Fuels,https://www.reuters.com/business/energy/,Exclusive: Hydrogen firm Thyssenkrupp Nucera s...,https://www.reuters.com/business/energy/hydrog...,Exclusive: Hydrogen firm Thyssenkrupp Nucera s...,Christoph Steitz,"March 9, 2023",12:34 PM UTC,,"FRANKFURT, March 9 (Reuters) - German hydrogen..."
871,business,https://www.reuters.com/business/,/business/healthcare-pharmaceuticals/,Shareholder Activism,https://www.reuters.com/business/healthcare-ph...,Carl Icahn prepares for proxy fight at Illumin...,https://www.reuters.com/business/healthcare-ph...,Carl Icahn prepares for proxy fight at Illumin...,Reuters,"March 13, 2023",2:22 AM UTC,https://cloudfront-us-east-2.images.arcpublish...,March 12 (Reuters) - Activist investor Carl Ic...
872,business,https://www.reuters.com/business/,/business/autos-transportation/,Employee Benefits & Executive Compensation,https://www.reuters.com/business/autos-transpo...,Caterpillar union workers vote in favor of six...,https://www.reuters.com/business/autos-transpo...,Caterpillar union workers vote in favor of six...,Bianca Flowers,"March 13, 2023",8:50 AM UTC,,March 12 (Reuters) - The United Auto Workers (...
873,business,https://www.reuters.com/business/,/business/healthcare-pharmaceuticals/,COVID-19,https://www.reuters.com/business/healthcare-ph...,"Moderna to hire around 2,000 employees amid mR...",https://www.reuters.com/business/healthcare-ph...,"Moderna to hire around 2,000 employees amid mR...",Reuters,"March 10, 2023",7:38 PM UTC,https://cloudfront-us-east-2.images.arcpublish...,March 10 (Reuters) - Moderna Inc (MRNA.O) said...


In [200]:
df.to_csv('./reutersData.csv', index=False)