In [1]:
import json
import os

import pandas as pd
import requests

In [2]:
# Vantage news API Key
with open('./env', mode='r', encoding='utf-8') as f:
    api_key = f.readline().split('=')[-1]

# 30 days of financial news (FREE tier only allows for 25 requests/day, hence, only 25 days of data is collected).
for i in range(30):
    time_from = f'202212{'0' + str(i + 1) if i + 1 < 10 else str(i + 1)}T0001'
    time_to = f'202212{'0' + str(i + 2) if i + 2 < 10 else str(i + 2)}T0000'

    print(f'{time_from} - {time_to}')
    # ticker = 'MSFT'
    url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&time_from={time_from}&time_to={time_to}&limit=1000&apikey={api_key}'
    r = requests.get(url, headers={
        'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36",
        'Accept-Language': 'en-US,en;h=0.9'})
    data = r.json()

    with open(f'./data/data_{time_from}_{time_to}.json', 'w') as fp:
        json.dump(data, fp)


In [5]:
def read_json_files(dir: str, fname_filter=None) -> list:
    json_fnames = []
    for (_, _, fnames) in os.walk(dir):
        json_fnames.extend(fnames)
        break  # ensure only top level json files are read.

    filter_predicate = lambda name: name.startswith(fname_filter) and name.endswith('.json') if fname_filter else lambda \
            name: name.startswith(filter)
    json_fnames = list(filter(filter_predicate, json_fnames))

    json_data = []
    for fname in json_fnames:
        with open(f'{dir}/{fname}', 'r') as json_file:
            json_data.append(json.load(json_file))
    return json_data


json_data = read_json_files('./data', 'data')

In [6]:
keys = json_data[0].keys()
for k in list(keys):
    value = json_data[0][k]
    if type(value) == dict:
        print(f'{k}:')
        for attr in value.keys():
            print(f'\t{attr}')
    elif type(value) == list:
        print(f'{k}[0]:')
        print(f'\t{list(value[0].keys())}')
    else:
        print(f'{k}')


items
sentiment_score_definition
relevance_score_definition
feed[0]:
	['title', 'url', 'time_published', 'authors', 'summary', 'banner_image', 'source', 'category_within_source', 'source_domain', 'topics', 'overall_sentiment_score', 'overall_sentiment_label', 'ticker_sentiment']


In [7]:
def parse_vantage_api_data(json_data: list) -> pd.DataFrame:
    df_list = []
    for json_dict in json_data:
        for item in json_dict['feed']:
            item_dict = {'title': item['title'], 'time_published': item['time_published'], 'summary': item['summary'],
                         'overall_sentiment_label': item['overall_sentiment_label'],
                         'ticker_sentiment': item['ticker_sentiment']}
            df_list.append(item_dict)
    return pd.DataFrame(df_list)


data_df = parse_vantage_api_data(json_data)

In [8]:
data_df

Unnamed: 0,title,time_published,summary,overall_sentiment_label,ticker_sentiment
0,Used Auto Loan Payments Have Topped a Key Level,20221202T000000,Car buyers still face shortages and rising pri...,Neutral,[]
1,Exclusive: Republicans introduce legislation t...,20221201T235959,Republicans introduce legislation to thwart ES...,Neutral,[]
2,Bitcoin mining revenues fell 20% in November,20221201T235942,Bitcoin mining revenues fell 19.9% in November...,Neutral,"[{'ticker': 'CRYPTO:BTC', 'relevance_score': '..."
3,Mercantile Appoints New Members to Bank Board ...,20221201T235900,"GRAND RAPIDS, Mich., Dec. 1, 2022 /PRNewswire/...",Neutral,"[{'ticker': 'META', 'relevance_score': '0.0493..."
4,Nicholas Truglia served with 18-month prison s...,20221201T235817,"Nicholas Truglia, the 25-year-old hacker who l...",Neutral,[]
...,...,...,...,...,...
20473,Auto Expo to show EV might | The Financial Exp...,20221225T005000,Auto Expo to show EV might The Financial Expre...,Neutral,"[{'ticker': 'TTM', 'relevance_score': '0.06292..."
20474,"Chanda Kochhar, husband in CBI custody till Mo...",20221225T004000,"Chanda Kochhar, husband in CBI custody till Mo...",Neutral,"[{'ticker': 'IBN', 'relevance_score': '0.26610..."
20475,"Important January 17, 2023 Deadline Reminder: ...",20221225T003400,"RADNOR, Pa., Dec. 24, 2022 ( GLOBE NEWSWIRE ) ...",Neutral,"[{'ticker': 'OLPX', 'relevance_score': '0.5854..."
20476,Numerous Strategies Have Failed To Get Adverti...,20221225T001515,With Elon Musk agreeing to resign as CEO after...,Neutral,"[{'ticker': 'TSLA', 'relevance_score': '0.2653..."
