In [15]:
import requests
from pprint import pprint
import csv
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import json
import time
from tqdm import tqdm
import calendar
import os
import string
import random
import boto3


def get_stockprice(company_symbol: str = 'MSFT'):
    endpoint = "https://www.alphavantage.co/query"
    parameters = {
        "function": "TIME_SERIES_DAILY_ADJUSTED",
        "symbol": company_symbol,
        "outputsize": 'full'
    }
    for _ in range(100):
        parameters['apikey'] = ''.join(random.choices(string.ascii_uppercase + string.digits, k=15))
        # Send a GET request to the API endpoint
        response = requests.get(endpoint, params=parameters)
        # Check if the request was successful
        if response.status_code == 200 and 'Note' not in response.json():
            return response.json()
        # print(f'API key {parameters["apikey"]} has been used too many times. response note: {data["Note"]}')
        time.sleep(1)


# get stock price for all tickers
def get_stockprice_all(stocks_to_watch: list):
    os.makedirs('prices', exist_ok=True)
    # only get stock price for stocks that are not in the directory
    seen_stocks = [f.split('.')[0] for f in os.listdir('prices') if os.path.isfile(os.path.join('prices', f))]
    # make tqdm show the current ticker being processed
    progress = tqdm([t for t in stocks_to_watch if t not in seen_stocks], desc='Fetching stock prices')
    for ticker in progress:
        progress.set_postfix_str(ticker)
        data = get_stockprice(ticker)
        if data is None: 
            print(f'Unnable to fetch data for {ticker}')
            continue
        with open(f'prices/{ticker}.json', 'w') as outfile:
            json.dump(data, outfile, indent=4)

In [2]:
# define a boto resource in the ohio region
dynamodb = boto3.resource('dynamodb', region_name='us-east-2')
table = dynamodb.Table('StockSentiment')
# get a list of all items in the Stock column sorted by frequency
sentiment_ticker_list = pd.DataFrame(table.scan()['Items'])
# convert Date column to datetime
sentiment_ticker_list['Date'] = pd.to_datetime(sentiment_ticker_list['Date'])
# make the index the Date column
sentiment_ticker_list.set_index('Date', inplace=True)
sentiment_ticker_list.sort_index(inplace=True, ascending=False)
sentiment_ticker_list

Unnamed: 0_level_0,ticker_sentiment_score,ticker_sentiment_label,Stock,source,url,relevance_score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-03-08 08:00:00,0.177269,Somewhat-Bullish,GOOG,Benzinga,https://www.benzinga.com/pressreleases/23/03/g...,0.064104
2023-03-08 08:00:00,0.087061,Neutral,OSCUF,Benzinga,https://www.benzinga.com/pressreleases/23/03/g...,0.064104
2023-03-08 08:00:00,0.040016,Neutral,NBIX,Benzinga,https://www.benzinga.com/pressreleases/23/03/g...,0.064104
2023-03-08 08:00:00,0.040016,Neutral,PFE,Benzinga,https://www.benzinga.com/pressreleases/23/03/g...,0.064104
2023-03-08 08:00:00,0.040016,Neutral,AZN,Benzinga,https://www.benzinga.com/pressreleases/23/03/g...,0.064104
...,...,...,...,...,...,...
2023-02-21 07:46:01,0.013812,Neutral,CRYPTO:BTC,The Financial Express,https://www.financialexpress.com/blockchain/cr...,0.406745
2023-02-21 07:42:00,0.003875,Neutral,NWG,Reuters,https://www.reuters.com/markets/europe/hsbc-cu...,0.140076
2023-02-21 07:31:30,0.086742,Neutral,TSLA,Benzinga,https://www.benzinga.com/news/23/02/30980568/n...,0.472338
2023-02-21 07:31:30,0.327663,Somewhat-Bullish,NIO,Benzinga,https://www.benzinga.com/news/23/02/30980568/n...,0.730954


In [7]:
# get a list of tickers sorted by frequency
sorted_tickers = sentiment_ticker_list['Stock'].value_counts().index.tolist()
# remove any stocks that contain crypto or forex
sorted_tickers = [t for t in sorted_tickers if 'crypto' not in t.lower() and 'forex' not in t.lower()]
print(f'Number of tickers: {len(sorted_tickers)}, tickers: {sorted_tickers}')

Number of tickers: 1321, tickers: ['META', 'TSLA', 'GS', 'GOOG', 'JPM', 'MS', 'C', 'MSFT', 'BRK-A', 'NVDA', 'BCS', 'BAC', 'WFC', 'COIN', 'WMT', 'AAPL', 'BABA', 'AWON', 'CURN', 'SZIHF', 'BLK', 'AMZN', 'BYND', 'EVR', 'NFLX', 'HD', 'RCKY', 'KO', 'PFE', 'APG', 'WBD', 'TOL', 'MCO', 'COST', 'STT', 'SPXCF', 'RIVN', 'ETSY', 'LI', 'MRNA', 'FSLR', 'ZS', 'AXP', 'TCTZF', 'BIDU', 'GM', 'ROKU', 'M', 'PYPL', 'LDNXF', 'NIO', 'JNJ', 'XPEV', 'PANW', 'AVGO', 'AMD', 'CCORF', 'SQ', 'AMAT', 'SSNLF', 'OXY', 'RYDAF', 'TGT', 'BBY', 'SBUX', 'SHOP', 'ABBV', 'TTD', 'DKS', 'NTES', 'BKNG', 'O', 'QCOM', 'XOM', 'PARA', 'ON', 'AYX', 'SI-P-A', 'Z', 'MA', 'MORN', 'ATVI', 'WING', 'ENVX', 'SCHW', 'MELI', 'PDD', 'CSCO', 'MGM', 'INTC', 'UPST', 'GLP', 'NKE', 'NVAX', 'BR', 'MNST', 'GOLD', 'MCQEF', 'IVZ', 'CMSQF', 'ALB', 'CRWD', 'PSHG', 'ZM', 'RBCPF', 'GE', 'EBAY', 'BA', 'NKLA', 'SNEJF', 'EYE', 'DE', 'STM', 'BACHY', 'LAC', 'GIS', 'AZO', 'FATE', 'TTE', 'BBBY', 'SSTK', 'PM', 'BBWI', 'ARBK', 'MDT', 'SHC', 'HPQ', 'CVS', 'NWG', 'JW

In [16]:
get_stockprice_all(sorted_tickers)

Fetching stock prices:   6%|▌         | 12/196 [01:21<20:52,  6.81s/it, HRB]


KeyboardInterrupt: 