# ***Sentiment Analysis***

In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
def get_article_from_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the specific div
    content_div = soup.find('div', id='therace-post-content')

    # Extract all <p> tags within this div
    paragraphs = content_div.find_all('p')

    article = ' '.join([p.get_text() for p in paragraphs])
    return article

In [3]:
# Use the function
url = 'https://the-race.com/formula-1/ferrari-red-bull-long-run-bahrain-f1-testing-mark-hughes/'
article = get_article_from_url(url)

In [4]:
print(article)

Day two of Formula 1 2024 pre-season testing in Bahrain gave us a slightly more detailed data set to work from than the opening day - and although it was Carlos Sainz’s Ferrari at the top of the headline times, what do we find when we delve beneath the surface? This was a day on which reigning champion Max Verstappen did not get to drive the Red Bull, his scheduled afternoon appearance was cancelled to give Sergio Perez more cockpit time after his morning running was curtailed by the red flag and the loss of over an hour of running. A pace comparison, of course, requires some assumptions - in our case, there's the pre-supposition that Red Bull and Ferrari are using the same base weight for their low-fuel laps. This, admittedly, may not be the case.  But recent history suggests that the two teams actually do run quite a similar fuel load – and we derive this from any variation in how they have qualified in the opening races compared to their calculated pre-season testing performance. La

In [5]:
import string
from collections import Counter
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt
text = article

In [6]:
lower_case = text.lower()

In [7]:
clean_text = lower_case.translate(str.maketrans('','',string.punctuation))

In [8]:
tokenized_words = clean_text.split()

In [9]:
stop_words = ["i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself",
              "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself",
              "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these",
              "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do",
              "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while",
              "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before",
              "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again",
              "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each",
              "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than",
              "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"]
final_words = []
for word in tokenized_words :
    if word not in stop_words :
        final_words.append(word)

using nltk library to perform sentiment analysis on the given dataset.

In [10]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to C:\Users\Yatharth
[nltk_data]     Jain\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [11]:
def sentiment_analyze(senti_text) :
    score = SentimentIntensityAnalyzer().polarity_scores(senti_text)
    neg = score['neg']
    pos = score['pos']
    print(score)
    if pos > neg :
        print("Positive Sentiment")
    elif pos < neg :
        print("Negative Sentiment")
    else :
        print("Neutral Sentiment")

In [12]:
sentiment_analyze(clean_text)

{'neg': 0.02, 'neu': 0.897, 'pos': 0.083, 'compound': 0.9975}
Positive Sentiment


In [13]:
! pip install -U accelerate
! pip install -U transformers

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [14]:
page= requests.get('https://timesofindia.indiatimes.com/business/budget/interim-budget-2024-a-trailer-of-the-visionary-roadmap-for-viksit-bharat-key-tax-proposals-to-know/articleshow/107449222.cms').text

In [15]:
content = BeautifulSoup(page, 'html.parser')

In [16]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained('ProsusAI/finbert')
model= AutoModelForSequenceClassification.from_pretrained('ProsusAI/finbert')

In [17]:
from transformers import pipeline
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

In [18]:
l=[]
for line in content.find_all('li'):
  try:
    l.append(str(line.contents[0].contents[0].contents[0]))
    print(l[-1])
  except:
    pass

News
Business News
Budget News


In [19]:
import re
text= []
for item in l:
  text.append(re.sub(r'<.*?>',"",item))
  print(text[-1])

News
Business News
Budget News


In [20]:
results = classifier(text)
avg= 0.0
pos= 0
for result in results:
  avg+= result['score']
  print(result)
avg/=len(results)

{'label': 'neutral', 'score': 0.8944812417030334}
{'label': 'neutral', 'score': 0.901756227016449}
{'label': 'neutral', 'score': 0.9311054944992065}


In [21]:
print("Average Score:",avg)
print("Sentiment:","Positive" if avg>0.5 else "Negative")

Average Score: 0.9091143210728964
Sentiment: Positive
