## Data Collection

In [8]:
from selenium import webdriver # Importing the webdriver
import time # For implicit wait
import pandas as pd
from selenium.webdriver.common.by import By # To find elements

In [9]:
# To solve browser automatically closing problem
options = webdriver.EdgeOptions()
options.add_experimental_option("detach", True)

# Define a Driver
driver = webdriver.Edge(options = options)

# Minimize the browser window
driver.maximize_window()

In [10]:
def scrape(url, section, pages):
    # Fetch the url using the driver
    driver.get(url)

    # Fetching the Data from CNBC
    load_more = driver.find_element(By.CLASS_NAME, 'LoadMoreButton-loadMore')

    for i in range(pages):
        load_more.click()
        time.sleep(5)

    news_header = driver.find_elements(By.CLASS_NAME, 'Card-title')

    news_data = []

    for i in news_header:
        news_data.append({'Header':i.text, 'Section':section})

    news_df = pd.DataFrame(data = news_data, columns = ['Header', 'Section'])
    print(news_df)

    print('The number of News Scraped:', len(news_data))

    time.sleep(5)

    driver.close()

    return news_df

In [7]:
# Define the url
url = 'https://www.cnbc.com/technology/'

# Technology
tech_df = scrape(url,'Technology', 7)

# Saving to a CSV
tech_df.to_csv('tech.csv', index = False)

                                                Header     Section
0    Xpeng plans to hire 4,000 people, invest in AI...  Technology
1    Nintendo shares fall nearly 6% after report th...  Technology
2    Singapore’s AI ambitions get a boost with $740...  Technology
3    EU reportedly set to fine Apple 500 million eu...  Technology
4    Sony plunged $10 billion after its PS5 sales c...  Technology
..                                                 ...         ...
196  Founders of Wise and Skype raise $436 million ...  Technology
197  India’s Zee Entertainment dives 31% after Sony...  Technology
198  Tencent’s Riot Games division cuts 11% of staf...  Technology
199  YouTube star MrBeast makes more than $263,000 ...  Technology
200  New details emerge about SEC’s X account hack,...  Technology

[201 rows x 2 columns]
The number of News Scraped: 201


In [12]:
# Define the url
url = 'https://www.cnbc.com/politics/'

# Politics
politics_df = scrape(url,'Politics', 7)

# Saving to a CSV
politics_df.to_csv('politics.csv', index = False)

                                                Header   Section
0    Wisconsin’s new legislative maps signed into l...  Politics
1    ‘China has a lot more to lose’: U.S. consideri...  Politics
2    Russia says an investigation is underway into ...  Politics
3    Houthis claim ‘catastrophic’ attack in Red Sea...  Politics
4    Trump breaks silence on Navalny; Yulia Navalna...  Politics
..                                                 ...       ...
195  Jury rules Trump must pay E. Jean Carroll $83....  Politics
196  Saudi Arabia’s first alcohol store aims to tac...  Politics
197  Nikki Haley touts $1.2 million in donations af...  Politics
198  Alabama execution using nitrogen gas puts U.S....  Politics
199  Top UN court rules Israel must prevent genocid...  Politics

[200 rows x 2 columns]
The number of News Scraped: 200


In [3]:
# Define the url
url = 'https://www.cnbc.com/investing/'

# Investing
invest_df = scrape(url,'Investing', 7)

# Saving to a CSV
invest_df.to_csv('invest.csv', index = False)

                                                Header    Section
0    Top Wall Street analysts like these 3 stocks f...  Investing
1    These options plays can help investors profit ...  Investing
2    Goldman Sachs likes these five tech stocks com...  Investing
3    Cocoa hit a record high. Here’s what’s driving...  Investing
4    These are the stocks investors should bet agai...  Investing
..                                                 ...        ...
196  A lower-cost way to get long this ‘Magnificent...  Investing
197  Apple analysts are starting to get a bit worri...  Investing
198  Goldman Sachs says the soft landing for the ec...  Investing
199  Monday’s analyst calls: Dollar store stock get...  Investing
200  Warren Buffett’s Berkshire Hathaway keeps buyi...  Investing

[201 rows x 2 columns]
The number of News Scraped: 201


In [3]:
# Define the url
url = 'https://www.cnbc.com/health-and-science/'

# Health and Science
health_science_df = scrape(url,'Health_and_Science', 7)

# Saving to a CSV
health_science_df.to_csv('healthandscience.csv', index = False)

                                                Header             Section
0    Most employer health plans don’t cover new blo...  Health_and_Science
1    In this fast-paced bull market, we’re increasi...  Health_and_Science
2    What Eli Lilly investors can learn from the sl...  Health_and_Science
3    FTC, HHS administration examining cause of gen...  Health_and_Science
4    Jim Cramer praises GE following U.S. regulator...  Health_and_Science
..                                                 ...                 ...
196  15 stocks with a history of dividend growth ar...  Health_and_Science
197  As Covid-testing business drops, Abbott search...  Health_and_Science
198  Microsoft announces new AI tools to help docto...  Health_and_Science
199  This animal health stock could gain nearly 30%...  Health_and_Science
200  How Ozempic and Wegovy turned Novo Nordisk int...  Health_and_Science

[201 rows x 2 columns]
The number of News Scraped: 201


In [3]:
# Define the url
url = 'https://www.cnbc.com/autos/'

# Automobile
auto_df = scrape(url,'Automobile', 7)

# Saving to a CSV
auto_df.to_csv('auto.csv', index = False)

                                                Header     Section
0    Companies — profitable or not — make 2024 the ...  Automobile
1    What the U.S. can learn from Norway when it co...  Automobile
2    UAW threatens to strike Ford’s Kentucky Truck ...  Automobile
3    2 takeaways from Ford CEO Jim Farley that boos...  Automobile
4    Ford CEO says forget Tesla, ‘Pro’ unit is auto...  Automobile
..                                                 ...         ...
195  GM reaches tentative deal with UAW, ending str...  Automobile
196  Canadian auto union reaches deal with Stellant...  Automobile
197  UAW deal with Ford includes $8.1 billion in in...  Automobile
198  UAW in tentative deal to end labor strike with...  Automobile
199  GM, UAW may be nearing a labor deal after mara...  Automobile

[200 rows x 2 columns]
The number of News Scraped: 200


##### Load dataset

In [13]:
auto = pd.read_csv('auto.csv')
healthandscience = pd.read_csv('healthandscience.csv')
invest = pd.read_csv('invest.csv')
politics = pd.read_csv('politics.csv')
tech = pd.read_csv('tech.csv')

In [14]:
auto.head()

Unnamed: 0,Header,Section
0,Companies — profitable or not — make 2024 the ...,Automobile
1,What the U.S. can learn from Norway when it co...,Automobile
2,UAW threatens to strike Ford’s Kentucky Truck ...,Automobile
3,2 takeaways from Ford CEO Jim Farley that boos...,Automobile
4,"Ford CEO says forget Tesla, ‘Pro’ unit is auto...",Automobile


In [15]:
news_classifier_df = pd.concat([auto,healthandscience,invest,politics,tech],axis=0)
news_classifier_df.reset_index(drop=True, inplace=True)
news_classifier_df

Unnamed: 0,Header,Section
0,Companies — profitable or not — make 2024 the ...,Automobile
1,What the U.S. can learn from Norway when it co...,Automobile
2,UAW threatens to strike Ford’s Kentucky Truck ...,Automobile
3,2 takeaways from Ford CEO Jim Farley that boos...,Automobile
4,"Ford CEO says forget Tesla, ‘Pro’ unit is auto...",Automobile
...,...,...
998,Founders of Wise and Skype raise $436 million ...,Technology
999,India’s Zee Entertainment dives 31% after Sony...,Technology
1000,Tencent’s Riot Games division cuts 11% of staf...,Technology
1001,"YouTube star MrBeast makes more than $263,000 ...",Technology


In [16]:
# Saving to a CSV file
news_classifier_df.to_csv('news_classifier_dataset.csv', index = False)