### Importing Necessary Packages

In [2]:
import pandas as pd
import numpy as np
from textblob import TextBlob
import matplotlib.pyplot as plt
%matplotlib inline

### Loading the data

In [3]:
NEWS_FILE = '../data/raw_analyst_ratings.csv'
BASE_PATH = '../data/yfinance_data/'
STOCK_FILES = {'AAPL': 'AAPL_historical_data.csv',
    'AMZN': 'AMZN_historical_data.csv',
    'GOOG': 'GOOG_historical_data.csv',
    'META': 'META_historical_data.csv',
    'MSFT': 'MSFT_historical_data.csv',
    'NVDA': 'NVDA_historical_data.csv',
    'TSLA': 'TSLA_historical_data.csv'
     }

data = pd.read_csv(NEWS_FILE)
print("News Data Sample:\n", data.head())

News Data Sample:
    Unnamed: 0                                           headline  \
0           0            Stocks That Hit 52-Week Highs On Friday   
1           1         Stocks That Hit 52-Week Highs On Wednesday   
2           2                      71 Biggest Movers From Friday   
3           3       46 Stocks Moving In Friday's Mid-Day Session   
4           4  B of A Securities Maintains Neutral on Agilent...   

                                                 url          publisher  \
0  https://www.benzinga.com/news/20/06/16190091/s...  Benzinga Insights   
1  https://www.benzinga.com/news/20/06/16170189/s...  Benzinga Insights   
2  https://www.benzinga.com/news/20/05/16103463/7...         Lisa Levin   
3  https://www.benzinga.com/news/20/05/16095921/4...         Lisa Levin   
4  https://www.benzinga.com/news/20/05/16095304/b...         Vick Meyer   

                        date stock  
0  2020-06-05 10:30:54-04:00     A  
1  2020-06-03 10:45:20-04:00     A  
2  2020-05

### Load and combine stock data

In [4]:
stock_dfs = []
for symbol, file in STOCK_FILES.items():
    stock_data = pd.read_csv(BASE_PATH + file)
    stock_data['Stock'] = symbol
    stock_dfs.append(stock_data)
stock_df = pd.concat(stock_dfs, ignore_index=True)
print("\nStock Data Sample:\n", stock_df.head())


Stock Data Sample:
          Date      Open      High       Low     Close  Adj Close     Volume  \
0  1980-12-12  0.128348  0.128906  0.128348  0.128348   0.098943  469033600   
1  1980-12-15  0.122210  0.122210  0.121652  0.121652   0.093781  175884800   
2  1980-12-16  0.113281  0.113281  0.112723  0.112723   0.086898  105728000   
3  1980-12-17  0.115513  0.116071  0.115513  0.115513   0.089049   86441600   
4  1980-12-18  0.118862  0.119420  0.118862  0.118862   0.091630   73449600   

   Dividends  Stock Splits Stock  
0        0.0           0.0  AAPL  
1        0.0           0.0  AAPL  
2        0.0           0.0  AAPL  
3        0.0           0.0  AAPL  
4        0.0           0.0  AAPL  


### Normalize dates

In [8]:
data['date'] = pd.to_datetime(data['date'], format='mixed', errors='coerce')
data = data.dropna(subset=['date'])
data['date'] = data['date'].dt.tz_localize(None).dt.tz_localize('UTC')
data['date_only'] = data['date'].dt.date
stock_df['Date'] = pd.to_datetime(stock_df['Date'], format='mixed', errors='coerce')
stock_df = stock_df.dropna(subset=['Date'])
stock_df['Date'] = stock_df['Date'].dt.tz_localize(None)
stock_df['Date'] = stock_df['Date'].dt.date

### Merge news and stock data

In [10]:
merged_df = pd.merge(data, stock_df, left_on=['stock', 'date_only'], right_on=['Stock', 'Date'], how='inner')
print("Merged Data Sample:\n", merged_df[['stock', 'date_only', 'headline', 'Close']].head())

Merged Data Sample:
 Empty DataFrame
Columns: [stock, date_only, headline, Close]
Index: []
