Importing the Necessary Packages

In [30]:
import pandas as pd
from datetime import datetime
import textblob

Normalize Dates For the Sentiment Dataset and Only Keeping Data about the Stocks that we have the Financial Data Of

In [24]:
# Load sentiment data
sentiment_df = pd.read_csv(r'C:\Users\Blen\OneDrive\Documents\10Academy\Week1\Data\raw_analyst_ratings.csv')

# Drop the URL column
sentiment_df = sentiment_df.drop(columns=["url"])
# Convert date 
sentiment_df['date'] = pd.to_datetime(sentiment_df['date'], format='mixed', dayfirst=True, utc=True)
# Filter for relevant stocks
relevant_stocks = ['AAPL', 'AMZN', 'GOOG', 'NVDA']
sentiment_df = sentiment_df[sentiment_df['stock'].isin(relevant_stocks)]

In [27]:
sentiment_df.head(10)

Unnamed: 0.1,Unnamed: 0,headline,publisher,date,stock
6680,7120,Tech Stocks And FAANGS Strong Again To Start D...,JJ Kinahan,2020-06-10 15:33:26+00:00,AAPL
6681,7121,10 Biggest Price Target Changes For Wednesday,Lisa Levin,2020-06-10 12:14:08+00:00,AAPL
6682,7122,"Benzinga Pro's Top 5 Stocks To Watch For Wed.,...",Benzinga Newsdesk,2020-06-10 11:53:47+00:00,AAPL
6683,7123,"Deutsche Bank Maintains Buy on Apple, Raises P...",Benzinga Newsdesk,2020-06-10 11:19:25+00:00,AAPL
6684,7124,Apple To Let Users Trade In Their Mac Computer...,Neer Varshney,2020-06-10 10:27:11+00:00,AAPL
6685,7125,Big Tech Reaches New Record Heights At The Sto...,Neer Varshney,2020-06-10 04:52:01+00:00,AAPL
6686,7126,Why Apple's Stock Is Trading Higher Today,Tanzeel Akhtar,2020-06-09 19:14:07+00:00,AAPL
6687,7127,Apple Could Announce In-House Chips For Macs A...,Shanthi Rexaline,2020-06-09 17:58:46+00:00,AAPL
6688,7128,Apple shares are trading higher despite market...,Benzinga Newsdesk,2020-06-09 16:41:02+00:00,AAPL
6689,7129,"Sonos Shares Spike To Session High, Now Up 9.5...",Benzinga Newsdesk,2020-06-09 15:11:34+00:00,AAPL


In [38]:
sentiment_df.shape

(5064, 6)

Calculate Sentiment Scores and Aggregate the Sentiments Scores that are on the Same Day by Taking their Average

In [31]:
from textblob import TextBlob

# Function to calculate sentiment score
def calculate_sentiment(text):
    analysis = TextBlob(text)
    return analysis.sentiment.polarity

# Apply sentiment analysis
sentiment_df['sentiment_score'] = sentiment_df['headline'].apply(calculate_sentiment)

# Aggregate sentiment scores by date and stock
aggregated_sentiment = sentiment_df.groupby(['date', 'stock'])['sentiment_score'].mean().reset_index()


In [35]:
aggregated_sentiment.head()

Unnamed: 0,date,stock,sentiment_score
0,2011-01-06 00:00:00+00:00,NVDA,0.0
1,2011-01-08 00:00:00+00:00,NVDA,0.1
2,2011-01-11 00:00:00+00:00,NVDA,0.0
3,2011-01-12 00:00:00+00:00,NVDA,0.079167
4,2011-02-06 00:00:00+00:00,NVDA,0.0


In [37]:
aggregated_sentiment.shape

(1717, 3)

Processing Each Stock Data Individually 

In [44]:
# Load stock data for AAPL, AMZN, GOOG, NVDA separately
stock_files = {
    'AAPL': r'C:\Users\Blen\OneDrive\Documents\10Academy\Week1\Data\yfinance_data\AAPL_historical_data.csv',
    'AMZN': r'C:\Users\Blen\OneDrive\Documents\10Academy\Week1\Data\yfinance_data\AMZN_historical_data.csv',
    'GOOG': r'C:\Users\Blen\OneDrive\Documents\10Academy\Week1\Data\yfinance_data\GOOG_historical_data.csv',
    'NVDA': r'C:\Users\Blen\OneDrive\Documents\10Academy\Week1\Data\yfinance_data\NVDA_historical_data.csv'
}

stock_data = {}

for stock, file_path in stock_files.items():
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'], utc=True)
    df = df.drop(['Dividends', 'Stock Splits','Volume'], axis=1)
    stock_data[stock] = df



Computing Daily Returns for the Stocks

In [45]:
# Function to calculate daily returns
def calculate_daily_return(df):
    df['Daily Return'] = (df['Close'] - df['Open']) / df['Open']
    return df

# Apply the function to calculate daily returns for each stock
for stock, df in stock_data.items():
    stock_data[stock] = calculate_daily_return(df)


In [46]:
stock_data['AAPL']

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Daily Return
0,1980-12-12 00:00:00+00:00,0.128348,0.128906,0.128348,0.128348,0.098943,0.000000
1,1980-12-15 00:00:00+00:00,0.122210,0.122210,0.121652,0.121652,0.093781,-0.004566
2,1980-12-16 00:00:00+00:00,0.113281,0.113281,0.112723,0.112723,0.086898,-0.004926
3,1980-12-17 00:00:00+00:00,0.115513,0.116071,0.115513,0.115513,0.089049,0.000000
4,1980-12-18 00:00:00+00:00,0.118862,0.119420,0.118862,0.118862,0.091630,0.000000
...,...,...,...,...,...,...,...
10993,2024-07-24 00:00:00+00:00,224.000000,224.800003,217.130005,218.539993,218.287323,-0.024375
10994,2024-07-25 00:00:00+00:00,218.929993,220.850006,214.619995,217.490005,217.238556,-0.006577
10995,2024-07-26 00:00:00+00:00,218.699997,219.490005,216.009995,217.960007,217.708008,-0.003384
10996,2024-07-29 00:00:00+00:00,216.960007,219.300003,215.750000,218.240005,217.987686,0.005900


Merging the Data for the Correlation Analysis

In [47]:
merged_data = {}

for stock, df in stock_data.items():
    # Filter the aggregated sentiment data for the specific stock
    sentiment_filtered = aggregated_sentiment[aggregated_sentiment['stock'] == stock]
    
    # Merge sentiment data with stock data
    merged_df = pd.merge(sentiment_filtered, df, how='inner', left_on='date', right_on='Date')
    
    # Keep only the necessary columns
    merged_df = merged_df[['date', 'stock', 'sentiment_score', 'Daily Return']]
    
    merged_data[stock] = merged_df


In [49]:
merged_data['AAPL'].head()

Unnamed: 0,date,stock,sentiment_score,Daily Return
0,2020-01-06 00:00:00+00:00,AAPL,0.036616,0.020457
1,2020-02-04 00:00:00+00:00,AAPL,0.175,0.011227
2,2020-02-05 00:00:00+00:00,AAPL,0.5,-0.006398
3,2020-02-06 00:00:00+00:00,AAPL,-0.070833,0.008184
4,2020-03-04 00:00:00+00:00,AAPL,0.0,0.021252


Calculate Correlation for each Stock

In [50]:
correlations = {}

for stock, df in merged_data.items():
    # Calculate Pearson correlation
    correlation = df['sentiment_score'].corr(df['Daily Return'])
    correlations[stock] = correlation

# Output the correlation results
for stock, correlation in correlations.items():
    print(f"Correlation between sentiment and daily returns for {stock}: {correlation}")


Correlation between sentiment and daily returns for AAPL: 0.06408955754510415
Correlation between sentiment and daily returns for AMZN: -0.24477046972977715
Correlation between sentiment and daily returns for GOOG: 0.05313472524561042
Correlation between sentiment and daily returns for NVDA: 0.04727935141118188
