In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# List of URLs of financial news articles
urls = [
    'https://finance.yahoo.com/news/stock-market-news-live-updates-2023-10-20',
    'https://www.cnbc.com/2023/10/20/stock-market-today.html',
    # Add more URLs as needed
]

# Function to scrape articles using BeautifulSoup
def scrape_articles_bs4(urls):
    articles = []
    for url in urls:
        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.content, 'html.parser')
            paragraphs = soup.find_all('p')
            article_text = ' '.join([para.get_text() for para in paragraphs])
            articles.append(article_text)
        except Exception as e:
            print(f"Error processing {url}: {e}")
    return articles

# Scrape the articles
articles_bs4 = scrape_articles_bs4(urls)

# Display the first article
print(articles_bs4[0])


Thank you for your patience. Our engineers are working quickly to resolve the issue.


In [10]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Download NLTK data
nltk.download('punkt')
nltk.download('stopwords')

# Function to preprocess text
def preprocess_text(text):
    # Tokenize the text
    words = word_tokenize(text)
    # Remove stopwords
    words = [word for word in words if word.lower() not in stopwords.words('english')]
    return ' '.join(words)

# Preprocess the articles
preprocessed_articles = [preprocess_text(article) for article in articles_bs4]

# Display the first preprocessed article
print(preprocessed_articles[0])


[nltk_data] Downloading package punkt to C:\Users\GURUDEEP
[nltk_data]     SINGH\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\GURUDEEP
[nltk_data]     SINGH\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Thank patience . engineers working quickly resolve issue .


In [11]:
from textblob import TextBlob

# Function to analyze sentiment
def analyze_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity

# Analyze the sentiment of the articles
sentiments = [analyze_sentiment(article) for article in preprocessed_articles]

# Display the sentiment scores
print(sentiments)


[0.3333333333333333, 0.5527157001414426]


In [12]:
import yfinance as yf

# Download historical stock price data for a specific stock (e.g., MSFT)
ticker = 'MSFT'
stock_data = yf.download(ticker, start='2023-10-01', end='2023-10-20')

# Ensure the lengths match
min_length = min(len(stock_data), len(sentiments))
stock_data = stock_data[:min_length]
sentiments = sentiments[:min_length]

# Create a DataFrame with the sentiment scores and stock prices
df = pd.DataFrame({
    'Date': stock_data.index,
    'Close': stock_data['Close'],
    'Sentiment': sentiments
})

# Display the DataFrame
print(df)

# Calculate the correlation between sentiment and stock price
correlation = df['Sentiment'].corr(df['Close'])
print(f'Correlation between sentiment and stock price: {correlation}')


[*********************100%***********************]  1 of 1 completed

                 Date       Close  Sentiment
Date                                        
2023-10-02 2023-10-02  321.799988   0.333333
2023-10-03 2023-10-03  313.390015   0.552716
Correlation between sentiment and stock price: -0.9999999999999999



