## **Sentiment Analysis**

----
----

### **Setup & Load the Data**
---

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import re
import pytz
from IPython.display import display
import matplotlib.pyplot as plt
import sys
import os
import seaborn as sns

In [2]:
# Load a CSV file from local file system
df_news = pd.read_csv('C:/Users/Admin/OneDrive/10 Academy/Week 1/Technical Content/Data/raw_analyst_ratings_cleaned.csv')

df_historical = pd.read_csv('C:/Users/Admin/OneDrive/10 Academy/Week 1/Technical Content/Data/historical_data_of_all_tickers_cleaned.csv')

df_news.head()

Unnamed: 0,headline,url,publisher,date,stock
0,Stocks That Hit 52-Week Highs On Friday,https://www.benzinga.com/news/20/06/16190091/s...,Benzinga Insights,2020-06-05 14:30:54+00:00,A
1,Stocks That Hit 52-Week Highs On Wednesday,https://www.benzinga.com/news/20/06/16170189/s...,Benzinga Insights,2020-06-03 14:45:20+00:00,A
2,71 Biggest Movers From Friday,https://www.benzinga.com/news/20/05/16103463/7...,Lisa Levin,2020-05-26 08:30:07+00:00,A
3,46 Stocks Moving In Friday's Mid-Day Session,https://www.benzinga.com/news/20/05/16095921/4...,Lisa Levin,2020-05-22 16:45:06+00:00,A
4,B of A Securities Maintains Neutral on Agilent...,https://www.benzinga.com/news/20/05/16095304/b...,Vick Meyer,2020-05-22 15:38:59+00:00,A


In [3]:
df_historical.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits,Ticker
0,2020-01-02 00:00:00+00:00,74.059998,75.150002,73.797501,75.087502,72.876099,135480400,0.0,0.0,AAPL
1,2020-01-03 00:00:00+00:00,74.287498,75.144997,74.125,74.357498,72.167595,146322800,0.0,0.0,AAPL
2,2020-01-06 00:00:00+00:00,73.447502,74.989998,73.1875,74.949997,72.742661,118387200,0.0,0.0,AAPL
3,2020-01-07 00:00:00+00:00,74.959999,75.224998,74.370003,74.597504,72.400536,108872000,0.0,0.0,AAPL
4,2020-01-08 00:00:00+00:00,74.290001,76.110001,74.290001,75.797501,73.565208,132079200,0.0,0.0,AAPL


In [4]:
# Convert 'Date' column in df_historical to datetime.date for proper comparison
df_historical['Date'] = pd.to_datetime(df_historical['Date']).dt.date

# Filter the df_historical DataFrame to include only rows between '2020-01-01' and '2020-06-11'
df_historical_filtered = df_historical[
    (df_historical['Date'] >= pd.to_datetime('2020-01-01').date()) &
    (df_historical['Date'] <= pd.to_datetime('2020-06-11').date())
]
df_historical_filtered.head()

# Convert 'date' in df_news to datetime.date (remove time for alignment)
df_news['date'] = pd.to_datetime(df_news['date']).dt.date

# Display the min and max dates for both datasets to check alignment
print("df_news date range:", df_news['date'].min(), "to", df_news['date'].max())
print("df_historical_filtered date range:", df_historical_filtered['Date'].min(), "to", df_historical_filtered['Date'].max())

df_news date range: 2020-01-01 to 2020-06-11
df_historical_filtered date range: 2020-01-02 to 2020-06-11


In [5]:
# Filter df_news for specific stock tickers
selected_stocks = ['AAPL', 'AMZN', 'GOOG', 'FB', 'MSF', 'NVDA', 'TSLA']
df_news_selected = df_news[df_news['stock'].isin(selected_stocks)]

# reset the index of the filtered DataFrame
df_news_selected = df_news_selected.reset_index(drop=True)

df_news_selected.head()

Unnamed: 0,headline,url,publisher,date,stock
0,Tech Stocks And FAANGS Strong Again To Start D...,https://www.benzinga.com/government/20/06/1622...,JJ Kinahan,2020-06-10,AAPL
1,10 Biggest Price Target Changes For Wednesday,https://www.benzinga.com/analyst-ratings/price...,Lisa Levin,2020-06-10,AAPL
2,"Benzinga Pro's Top 5 Stocks To Watch For Wed.,...",https://www.benzinga.com/short-sellers/20/06/1...,Benzinga Newsdesk,2020-06-10,AAPL
3,"Deutsche Bank Maintains Buy on Apple, Raises P...",https://www.benzinga.com/news/20/06/16219873/d...,Benzinga Newsdesk,2020-06-10,AAPL
4,Apple To Let Users Trade In Their Mac Computer...,https://www.benzinga.com/news/20/06/16218697/a...,Neer Varshney,2020-06-10,AAPL


In [7]:
from textblob import TextBlob

# Perform sentiment analysis on the 'headline' column
df_news_selected['sentiment_polarity'] = df_news_selected['headline'].apply(lambda x: TextBlob(x).sentiment.polarity)
df_news_selected['sentiment_subjectivity'] = df_news_selected['headline'].apply(lambda x: TextBlob(x).sentiment.subjectivity)

# Display the dataframe with sentiment scores
df_news_selected[['headline', 'stock', 'sentiment_polarity', 'sentiment_subjectivity']].head()

Unnamed: 0,headline,stock,sentiment_polarity,sentiment_subjectivity
0,Tech Stocks And FAANGS Strong Again To Start D...,AAPL,0.433333,0.733333
1,10 Biggest Price Target Changes For Wednesday,AAPL,0.0,0.0
2,"Benzinga Pro's Top 5 Stocks To Watch For Wed.,...",AAPL,0.5,0.5
3,"Deutsche Bank Maintains Buy on Apple, Raises P...",AAPL,0.0,0.0
4,Apple To Let Users Trade In Their Mac Computer...,AAPL,0.0,0.0


In [8]:
# Aggregate sentiment scores by date and stock, calculating the average sentiment per day per stock
df_daily_sentiment = df_news_selected.groupby(['date', 'stock'], as_index=False).agg({
    'sentiment_polarity': 'mean',
    'sentiment_subjectivity': 'mean'
})

df_daily_sentiment.head()

Unnamed: 0,date,stock,sentiment_polarity,sentiment_subjectivity
0,2020-05-31,NVDA,0.0,0.0
1,2020-06-02,NVDA,0.25,0.25
2,2020-06-04,GOOG,0.0,0.125
3,2020-06-05,GOOG,-0.051852,0.096296
4,2020-06-08,FB,0.0,0.0
