<a href="https://colab.research.google.com/github/Asterisk514/Web-Scraping-and-Sentiment-Classification/blob/main/Web_Scraping_and_Sentiment_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Importing TextBlob for Classification ###

In [13]:
from textblob import TextBlob

### Importing Required Modules ###

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

### Fetching HTML Content using requests library ###

In [2]:
webpage = requests.get('https://www.livemint.com/market/stock-market-news').text

In [3]:
soup = BeautifulSoup(webpage, 'lxml')

In [4]:
soup.find_all('h2')[11].text.strip()

'Nifty 50, Sensex hit fresh highs; auto stocks Maruti, M&M steal the show'

In [5]:
names = []
times = []
reading_time = []
urls = []

In [6]:
from urllib.parse import urljoin

### Extracting information for 1st Webpage ###

In [7]:
base_url = "https://www.livemint.com/market/stock-market-news"

In [8]:
all_news = soup.find_all('div', class_ = "listingNew clearfix impression-candidate ga-tracking")
for news in all_news:
    the_news = news.find('div', class_ = "listtostory clearfix")
    headline_section = the_news.find('div', class_ = "headlineSec")
    headline = headline_section.find('h2').text.strip()
    names.append(headline)
    time = headline_section.find('span', attrs = {'data-updatedtime' : True}).get('data-updatedtime').strip()
    time_read = headline_section.find('span', class_ = "fl date").find('em').text
    reading_time.append(time_read)
    target_url = headline_section.find('h2', class_='headline').find('a')['href']
    stripped_target_url = target_url[len("/market/stock-market-news"):]
    final_link = urljoin(base_url, stripped_target_url)
    urls.append(final_link)
    times.append(time)

### Extracting relevant information from each webpage ###

In [10]:
for idx in range(2, 10):
  webpage = requests.get(f'https://www.livemint.com/market/stock-market-news/page-{idx}').text
  base_url = f"https://www.livemint.com/market/stock-market-news/page-{idx}"
  soup = BeautifulSoup(webpage, 'lxml')
  all_news = soup.find_all('div', class_ = "listingNew clearfix impression-candidate ga-tracking")
  for news in all_news:
      the_news = news.find('div', class_ = "listtostory clearfix")
      headline_section = the_news.find('div', class_ = "headlineSec")
      headline = headline_section.find('h2').text.strip()
      names.append(headline)
      time = headline_section.find('span', attrs = {'data-updatedtime' : True}).get('data-updatedtime').strip()
      time_read = headline_section.find('span', class_ = "fl date").find('em').text
      reading_time.append(time_read)
      target_url = headline_section.find('h2', class_='headline').find('a')['href']
      stripped_target_url = target_url[len("/market/stock-market-news"):]
      final_link = urljoin(base_url, stripped_target_url)
      urls.append(final_link)
      times.append(time)

### Extracted news headlines ###

In [11]:
for name in names:
  print(name)

Jerome Powell says more good data would strengthen case for rate cuts by Fed
Budget 2024 | D-Street experts see heightened volatility ahead, suggest caution
Stocks to buy: Heritage Foods, ONGC, among top four stock picks by SMC Global
Voltas, Blue Star, Havells, others: Strong summer season sales to drive Q1 show
Budget 2024 to focus on capex, Sensex eyes 90K by year-end: Subhash Aggarwal
Kalpataru Projects rallied 265% in 25 months; should investors bet on the stock?
Q1 earnings preview: These 4 companies may post over 40% jump in net profit
Jupiter Wagon shares fall over 4% after board approves QIP launch
Multibagger: Stylam Industries stock gained 110% in 16 months; what lies ahead?
Sobha: After over 250% jump in the stock in a year, Geojit recommends ‘sell’
Top Gainers and Losers today on 9 July, 2024: Maruti Suzuki India, Mahindra & Mahindra, Tata Consumer, Reliance Industries among most active stocks; Check full list here
Nifty 50, Sensex hit fresh highs; auto stocks Maruti, M&M 

### Perform sentiment analysis using TextBlob and store in dataframe ###

In [15]:
data = []


for headline in names:
    blob = TextBlob(headline)
    sentiment_score = blob.sentiment.polarity

    if sentiment_score > 0:
        sentiment = 'positive'
    elif sentiment_score < 0:
        sentiment = 'negative'
    else:
        sentiment = 'neutral'

    data.append({'headline': headline, 'sentiment': sentiment})


df = pd.DataFrame(data)

df

Unnamed: 0,headline,sentiment
0,Jerome Powell says more good data would streng...,positive
1,Budget 2024 | D-Street experts see heightened ...,neutral
2,"Stocks to buy: Heritage Foods, ONGC, among top...",positive
3,"Voltas, Blue Star, Havells, others: Strong sum...",positive
4,"Budget 2024 to focus on capex, Sensex eyes 90K...",neutral
...,...,...
159,"Sensex, Nifty 50 end with mild gains on profit...",positive
160,Multibagger Shilchar Technologies soars 11550%...,neutral
161,Power sector's meteoric rise faces valuation c...,neutral
162,Bandhan Bank share price down almost 2% post Q...,negative
