In [4]:
!pip3 install textblob
!pip3 install spacy



In [5]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [6]:
import requests
from bs4 import BeautifulSoup
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob
from transformers import pipeline
from nltk import download
from nltk.tokenize import sent_tokenize
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import pandas as pd
import spacy
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation


In [7]:
download('vader_lexicon')
download('punkt')

nlp = spacy.load("en_core_web_sm")

urls = [
    'https://www.robomarkets.com/blog/stock-market/boeing-stock-analysis-and-forecast/',
    'https://www.robomarkets.com/blog/stock-market/general-motors-stock-analysis-and-outlook/'
]

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [8]:
def scrape_content(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    paragraphs = soup.find_all('p')
    content = ' '.join([para.get_text() for para in paragraphs if para.get_text()])
    return content

In [9]:
contents = [scrape_content(url) for url in urls]

vader_analyzer = SentimentIntensityAnalyzer()
bert_analyzer = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [10]:
def analyze_sentiment(content):
    sentences = sent_tokenize(content)
    sentiment_scores = []
    for sentence in sentences:
        vader_score = vader_analyzer.polarity_scores(sentence)
        textblob_score = TextBlob(sentence).sentiment.polarity
        bert_score = bert_analyzer(sentence)[0]
        sentiment_scores.append((sentence, vader_score, textblob_score, bert_score))
    return sentiment_scores


all_sentiments = []
for content in contents:
    sentiments = analyze_sentiment(content)
    all_sentiments.append(sentiments)

In [11]:
def extract_entities_and_sentiment(sentiments):
    entity_sentiments = []
    for sentence, vader_score, textblob_score, bert_score in sentiments:
        doc = nlp(sentence)
        for ent in doc.ents:
            entity_sentiments.append((ent.text, ent.label_, vader_score['compound'], textblob_score, bert_score['score']))
    return entity_sentiments

all_entity_sentiments = []
for sentiments in all_sentiments:
    entity_sentiments = extract_entities_and_sentiment(sentiments)
    all_entity_sentiments.append(entity_sentiments)

In [12]:
def perform_topic_modeling(contents, n_topics=5):
    vectorizer = CountVectorizer(stop_words='english')
    dtm = vectorizer.fit_transform(contents)
    lda = LatentDirichletAllocation(n_components=n_topics, random_state=42)
    lda.fit(dtm)
    return lda, vectorizer

lda, vectorizer = perform_topic_modeling(contents)
topics = lda.transform(vectorizer.transform(contents))

In [13]:
def perform_topic_modeling(contents, n_topics=5):
    vectorizer = CountVectorizer(stop_words='english')
    dtm = vectorizer.fit_transform(contents)
    lda = LatentDirichletAllocation(n_components=n_topics, random_state=42)
    lda.fit(dtm)
    return lda, vectorizer

lda, vectorizer = perform_topic_modeling(contents)
topics = lda.transform(vectorizer.transform(contents))

In [15]:
for i, sentiments in enumerate(all_sentiments):
    print(f'Detailed sentiment scores for Blog {i+1}:')
    for sentence, vader_score, textblob_score, bert_score in sentiments:
        print(f'Sentence: {sentence}')
        print(f'VADER scores: {vader_score}')
        print(f'TextBlob polarity: {textblob_score}')
        print(f'BERT score: {bert_score}')
        print('-' * 80)

for i, entity_sentiments in enumerate(all_entity_sentiments):
    print(f'Entity sentiments for Blog {i+1}:')
    for ent_text, ent_label, vader_score, textblob_score, bert_score in entity_sentiments:
        print(f'Entity: {ent_text} ({ent_label})')
        print(f'VADER score: {vader_score}')
        print(f'TextBlob polarity: {textblob_score}')
        print(f'BERT score: {bert_score}')
        print('-' * 80)

Detailed sentiment scores for Blog 1:
Sentence: On 2 January 2024, Boeing Company’s stock surged to 267 USD per unit, reflecting a 42.5% return in just two months.
VADER scores: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
TextBlob polarity: 0.0
BERT score: {'label': 'NEGATIVE', 'score': 0.8579011559486389}
--------------------------------------------------------------------------------
Sentence: However, by 22 January 2024, the share price had plummeted by over 19%, reaching 215 USD.
VADER scores: {'neg': 0.0, 'neu': 0.778, 'pos': 0.222, 'compound': 0.4588}
TextBlob polarity: 0.0
BERT score: {'label': 'NEGATIVE', 'score': 0.9970587491989136}
--------------------------------------------------------------------------------
Sentence: In this article, we aim to explain the reasons behind the decline in the stock value of one of the world’s largest aircraft, space, and military machinery manufacturers.
VADER scores: {'neg': 0.0, 'neu': 0.918, 'pos': 0.082, 'compound': 0.34}
TextBl