In [9]:
#import usual packages for data manipulation
import pandas as pd
import numpy as np

#import usual packages for data visualization
import matplotlib.pyplot as plt
import seaborn as sns

#import yfinance to get stock data
import yfinance as yf

#import quantstats to get some statistics on the stock data
import quantstats as qs

#import statistical packages
import statsmodels.api as sm
import scipy.stats as scs

#import datetime to get the current date
import datetime as dt

#import class AlphaIntelligence from the module alphaintelligence
from alpha_vantage.alphaintelligence import AlphaIntelligence

import json

#set the style of the plots
plt.style.use('seaborn-v0_8-whitegrid')

In [3]:
#set the API key
my_key = 'DJ3QKKTFZ5J298QY'

#initialize the class AlphaIntelligence
ai = AlphaIntelligence(my_key,output_format='pandas')

In [4]:
#choose the ticker
ticker = 'AMZN'

In [5]:
#get news data for the ticker
news_data, news_meta_data = ai.get_news_sentiment(tickers=ticker,
                                                  time_from='20220810T0130',
                                                  limit=1000)

In [21]:
news_data.to_csv(f'../data/{ticker}_news_data_raw.csv')

In [22]:
df = news_data.copy()
df.reset_index(inplace=True, drop=True)
df = pd.DataFrame({'date': pd.to_datetime(df['time_published']).dt.date,
                   'ticker_sentiment': df['ticker_sentiment']})
df = df.groupby('date').sum()
scores = [] #to store daily sentiment scores for aapl
relevances = [] #to store daily relevance scores for aapl
for i in range(len(df)):
    temp = pd.DataFrame(df['ticker_sentiment'].iloc[i])
    temp = temp[temp['ticker'] == ticker]
    wts = temp['relevance_score'].astype(float)
    raw_scores = temp['ticker_sentiment_score'].astype(float)
    #append the mean relevance score for the day
    relevances.append(wts.mean())
    #append the weighted average sentiment score for the day
    scores.append(np.dot(wts,raw_scores) / wts.sum())
df[f'{ticker}_sentiment_score'] = np.array(scores)
df[f'{ticker}_relevance_score'] = np.array(relevances)
df.drop('ticker_sentiment', axis=1, inplace=True)
df.to_csv(f'../data/{ticker}_news_data.csv')