# ***Sentiment Analysis***

In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
def get_article_from_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the specific div
    content_div = soup.find('div', id='therace-post-content')

    # Extract all <p> tags within this div
    paragraphs = content_div.find_all('p')

    article = ' '.join([p.get_text() for p in paragraphs])
    return article

In [3]:
# Use the function
url = 'https://the-race.com/formula-1/ferrari-red-bull-long-run-bahrain-f1-testing-mark-hughes/'
article = get_article_from_url(url)

In [4]:
print(article)

Day two of Formula 1 2024 pre-season testing in Bahrain gave us a slightly more detailed data set to work from than the opening day - and although it was Carlos Sainz’s Ferrari at the top of the headline times, what do we find when we delve beneath the surface? This was a day on which reigning champion Max Verstappen did not get to drive the Red Bull, his scheduled afternoon appearance was cancelled to give Sergio Perez more cockpit time after his morning running was curtailed by the red flag and the loss of over an hour of running. A pace comparison, of course, requires some assumptions - in our case, there's the pre-supposition that Red Bull and Ferrari are using the same base weight for their low-fuel laps. This, admittedly, may not be the case.  But recent history suggests that the two teams actually do run quite a similar fuel load – and we derive this from any variation in how they have qualified in the opening races compared to their calculated pre-season testing performance. La

In [5]:
import string
from collections import Counter
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt
text = article

In [6]:
lower_case = text.lower()
lower_case

"day two of formula 1 2024 pre-season testing in bahrain gave us a slightly more detailed data set to work from than the opening day - and although it was carlos sainz’s ferrari at the top of the headline times, what do we find when we delve beneath the surface? this was a day on which reigning champion max verstappen did not get to drive the red bull, his scheduled afternoon appearance was cancelled to give sergio perez more cockpit time after his morning running was curtailed by the red flag and the loss of over an hour of running. a pace comparison, of course, requires some assumptions - in our case, there's the pre-supposition that red bull and ferrari are using the same base weight for their low-fuel laps. this, admittedly, may not be the case.  but recent history suggests that the two teams actually do run quite a similar fuel load – and we derive this from any variation in how they have qualified in the opening races compared to their calculated pre-season testing performance. l

In [7]:
clean_text = lower_case.translate(str.maketrans('','',string.punctuation))
clean_text

'day two of formula 1 2024 preseason testing in bahrain gave us a slightly more detailed data set to work from than the opening day  and although it was carlos sainz’s ferrari at the top of the headline times what do we find when we delve beneath the surface this was a day on which reigning champion max verstappen did not get to drive the red bull his scheduled afternoon appearance was cancelled to give sergio perez more cockpit time after his morning running was curtailed by the red flag and the loss of over an hour of running a pace comparison of course requires some assumptions  in our case theres the presupposition that red bull and ferrari are using the same base weight for their lowfuel laps this admittedly may not be the case  but recent history suggests that the two teams actually do run quite a similar fuel load – and we derive this from any variation in how they have qualified in the opening races compared to their calculated preseason testing performance last year in bahrain

In [8]:
tokenized_words = clean_text.split()
tokenized_words

['day',
 'two',
 'of',
 'formula',
 '1',
 '2024',
 'preseason',
 'testing',
 'in',
 'bahrain',
 'gave',
 'us',
 'a',
 'slightly',
 'more',
 'detailed',
 'data',
 'set',
 'to',
 'work',
 'from',
 'than',
 'the',
 'opening',
 'day',
 'and',
 'although',
 'it',
 'was',
 'carlos',
 'sainz’s',
 'ferrari',
 'at',
 'the',
 'top',
 'of',
 'the',
 'headline',
 'times',
 'what',
 'do',
 'we',
 'find',
 'when',
 'we',
 'delve',
 'beneath',
 'the',
 'surface',
 'this',
 'was',
 'a',
 'day',
 'on',
 'which',
 'reigning',
 'champion',
 'max',
 'verstappen',
 'did',
 'not',
 'get',
 'to',
 'drive',
 'the',
 'red',
 'bull',
 'his',
 'scheduled',
 'afternoon',
 'appearance',
 'was',
 'cancelled',
 'to',
 'give',
 'sergio',
 'perez',
 'more',
 'cockpit',
 'time',
 'after',
 'his',
 'morning',
 'running',
 'was',
 'curtailed',
 'by',
 'the',
 'red',
 'flag',
 'and',
 'the',
 'loss',
 'of',
 'over',
 'an',
 'hour',
 'of',
 'running',
 'a',
 'pace',
 'comparison',
 'of',
 'course',
 'requires',
 'some',
 '

In [9]:
stop_words = ["i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself",
              "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself",
              "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these",
              "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do",
              "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while",
              "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before",
              "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again",
              "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each",
              "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than",
              "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"]
final_words = []
for word in tokenized_words :
    if word not in stop_words :
        final_words.append(word)
        
final_words

['day',
 'two',
 'formula',
 '1',
 '2024',
 'preseason',
 'testing',
 'bahrain',
 'gave',
 'us',
 'slightly',
 'detailed',
 'data',
 'set',
 'work',
 'opening',
 'day',
 'although',
 'carlos',
 'sainz’s',
 'ferrari',
 'top',
 'headline',
 'times',
 'find',
 'delve',
 'beneath',
 'surface',
 'day',
 'reigning',
 'champion',
 'max',
 'verstappen',
 'get',
 'drive',
 'red',
 'bull',
 'scheduled',
 'afternoon',
 'appearance',
 'cancelled',
 'give',
 'sergio',
 'perez',
 'cockpit',
 'time',
 'morning',
 'running',
 'curtailed',
 'red',
 'flag',
 'loss',
 'hour',
 'running',
 'pace',
 'comparison',
 'course',
 'requires',
 'assumptions',
 'case',
 'theres',
 'presupposition',
 'red',
 'bull',
 'ferrari',
 'using',
 'base',
 'weight',
 'lowfuel',
 'laps',
 'admittedly',
 'may',
 'case',
 'recent',
 'history',
 'suggests',
 'two',
 'teams',
 'actually',
 'run',
 'quite',
 'similar',
 'fuel',
 'load',
 '–',
 'derive',
 'variation',
 'qualified',
 'opening',
 'races',
 'compared',
 'calculated',

using nltk library to perform sentiment analysis on the given dataset.

In [10]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to C:\Users\Yatharth
[nltk_data]     Jain\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [11]:
def sentiment_analyze(senti_text) :
    score = SentimentIntensityAnalyzer().polarity_scores(senti_text)
    neg = score['neg']
    pos = score['pos']
    print(score)
    if pos > neg :
        print("Positive Sentiment")
    elif pos < neg :
        print("Negative Sentiment")
    else :
        print("Neutral Sentiment")

In [12]:
sentiment_analyze(clean_text)

{'neg': 0.02, 'neu': 0.897, 'pos': 0.083, 'compound': 0.9975}
Positive Sentiment
