In [31]:
#//*************************************************************************
#//*** Downloads Stock Data and converts the returns to a dataframe,
#//*** which saves a compressed CSV file in the stocks folder
#//*************************************************************************
import json
import requests
import time
import pandas as pd
import platform
import csv

#//*********************************************************************************
#//*** Read the API keys from a JSON encoded file
#//*** Located in the ignore_folder sub directory
#//*** This Folder is added to the .gitignore file and does not show up on Github
#//*** This is Authentication Best Practices for Github
#//*********************************************************************************
f = open("./ignore_folder/alpha_vantage_api.json", "r")

#//*** Fugley Pythonic type conversion
#//*** Loads the file into Dictionary via JSON.loads
#//*** Gets the API key value using the 'api' key
#//*** prepends apikey= so the resulting value is URL ready :]
av_apikey = json.loads(f.read())['apikey']
f.close()

#//*** Load the Stock Tickers
f = open(".\\data\\stock_tickers.json", "r")
symbols = json.loads(f.read())['symbols']

#//*** Just get amc for testing
symbols = [ "gme" ]
f.close()


In [82]:
"""
#//*** Alpha Vantage API Docs:
#//*** https://www.alphavantage.co/documentation/

#//*** Intra day Query
#symbol = "amc"
#url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={symbol}&interval=5min&apikey={av_apikey}'

#//*** get prices throughout today
#action = "TIME_SERIES_INTRADAY"
#action = "TIME_SERIES_DAILY"
#//*** Intraday prices going back two yeares
#action = "TIME_SERIES_INTRADAY_EXTENDED"
#url = f'https://www.alphavantage.co/query?function={action}&symbol={symbol}&interval=60min&slice=year1month1&apikey={av_apikey}'
#url = f"https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol=IBM&interval=60min&slice=year1month3&adjusted=false&apikey={av_apikey}"
"""

#//******************************************************************************
#//*** Builds the URL request based on the symbol and type of data requested.
#//*** Initially, this does the daily numbers.
#//*** Can easily be scaled up to add many different URL request types
#//******************************************************************************
def build_url(input_action,input_symbol,m=1,y=1):
    #//*** Valid Actions:
    #//*******  Daily: Gets the historical daily closing price for up to 20 years
    
    if input_action == 'daily':
        action = "TIME_SERIES_DAILY"
        out = ""
        out += f'https://www.alphavantage.co/query?'
        out += f'function={action}'
        out += f'&symbol={symbol}'
        out += f'&outputsize=full'
        out += f'&apikey={av_apikey}'
        
        return out
    
    if input_action == '1min':
        action = "TIME_SERIES_INTRADAY_EXTENDED"
        out = ""
        out += f'https://www.alphavantage.co/query?'
        out += f'function={action}'
        out += f'&symbol={symbol}'
        out += f'&outputsize=full'
        out += f'&slice=year{y}month{m}'
        out += f'&interval=1min'
        #out += "datatype=json"
        #out += f'&adjusted=true',
        #out += "&slice=year1month1",
        out += f'&apikey={av_apikey}'
        
        return out
    
    
    print(f"Invalid Action: {input_action}")
    print(f"No URL Returned, PLease try again")
    return None
    


action = 'daily'
action = '1min'



for symbol in symbols:
    if action == '1min':
           
        #//*** initialize output dataframe
        out_df = pd.DataFrame()
            
        #//*** Loop the year
        for year in [1,2]:

            #//*** Loop each month
            for month in range(1,13):
                print(f"Length out_df: {len(out_df)}")
                print(f"Building URL: {symbol} - Month {month} Year {year}")
                url = build_url(action,symbol,month,year)

                print("Downloading")
                print(url)
                r = requests.get(url)
                print(r.text[:1000])
                f = open("t.csv", "w")

                f.write(r.text)
                f.close()

                out_df = pd.concat([out_df,pd.read_csv("t.csv")])

                print("Waiting 20 Seconds")
                time.sleep(20)

                    
        print("df Built")
        print(out_df.head(10))

        output_filename = f"./stocks/{symbol}_1min.csv.zip"

        #//*** Convert Path to Mac formatting if needed
        #if platform.system() == 'Darwin':
            #output_filename = output_filename.replace("\\","/")

        print(f"Writing dataframe to File: {output_filename}")
        out_df.to_csv(output_filename,compression="zip",index=False)

                        
                    #print(year,month)
            #print("Waiting 20 Seconds")
            #time.sleep(20)

            #//*** Build the Url Request for each symbol
    #//*** Verify we built a proper url
#    if url != None:
        
#        print("Downloading....")
        #//*** Download the data for each Symbol
#        r = requests.get(url)

"""
        #//*** Convert raw string to dictionary for processing 
        data = r.json()

        #//*** Output Dictionary
        out_dict = {}
        print("Processing....")
        #//*** Process Data into the out_dict
        for date in data[data_key]:
            #//*** Build out_dict (output_dictionary) keys 
            if len(out_dict.keys()) == 0:
                out_dict['date'] = []
                out_dict['symbol'] = []

                #//*** Get this dictionary for the first row. Use the key values, but strip the first 3 characters which are numeric
                for key in data[data_key][date].keys():
                    out_dict[key[3:]] = []

            #//*** Add Date to out_dict
            out_dict['date'].append(date)

            #//*** Add Symbol to out_dict
            out_dict['symbol'].append(symbol)

            #//*** Loop through the daily values and append to the out_dict
            for key,value in data[data_key][date].items():

                #//*** Trim first 3 characters off key and append to the appropriate dictionary list
                out_dict[key[3:]].append(value)

        print("Building Dataframe")
        out_df = pd.DataFrame()
        #//*** Convert the Dictionary to a Dataframe
        #//*** Each Key is a column, the data is the list
        for key,value in out_dict.items():
            out_df[key] = value

        #//*** Generic Filename - Placeholder
        output_filename = f".\\stocks\\{symbol}_need_a_better_name.csv.zip"
        
        
        #//*** Build filename based on action type
        if action == 'daily':
            output_filename = f".\\stocks\\{symbol}_daily.csv.zip"
        
        if action == '1min':
            output_filename = f".\\stocks\\{symbol}_1min.csv.zip"

        #//*** Convert Path to Mac formatting if needed
        if platform.system() == 'Darwin':
            output_filename = output_filename.replace("\\","/")
        
        print(f"Writing dataframe to File: {output_filename}")
        out_df.to_csv(output_filename,compression="zip",index=False)    

    #else:
    #    print("We've got an url problem Skipping")
    
    #//*** Wait 20 seconds so we don't hammer the API
    #//*** Max is 5 calls / minute & 500 /day
    
    #print("Waiting 20 Seconds")
    #time.sleep(20)
"""
print("done")




    

Length out_df: 0
Building URL: gme - Month 1 Year 1
Downloading
https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol=gme&outputsize=full&slice=year1month1&interval=1min&apikey=S4DI5C1JRKCLIM74
time,open,high,low,close,volume
2021-07-12 20:00:00,189.5,189.5,189.5,189.5,417
2021-07-12 19:53:00,189.44,189.44,189.44,189.44,118
2021-07-12 19:22:00,189.01,189.01,189.01,189.01,102
2021-07-12 18:52:00,189.0,189.0,189.0,189.0,250
2021-07-12 17:53:00,189.5,189.5,189.5,189.5,163
2021-07-12 17:10:00,189.26,189.26,189.26,189.26,445
2021-07-12 17:06:00,189.5,189.5,189.5,189.5,166
2021-07-12 16:57:00,189.2,189.2,189.2,189.2,755
2021-07-12 16:55:00,189.22,189.22,189.22,189.22,546
2021-07-12 16:54:00,189.3,189.3,189.3,189.3,768
2021-07-12 16:53:00,189.5,189.5,189.5,189.5,343
2021-07-12 16:27:00,189.5,189.5,189.5,189.5,505
2021-07-12 16:26:00,189.2,189.2,189.2,189.2,994
2021-07-12 16:23:00,189.22,189.22,189.22,189.22,229
2021-07-12 16:20:00,189.5,189.5,189.5,189.5,384
2021-07-

Length out_df: 92268
Building URL: gme - Month 8 Year 1
Downloading
https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol=gme&outputsize=full&slice=year1month8&interval=1min&apikey=S4DI5C1JRKCLIM74
time,open,high,low,close,volume
2020-12-14 20:00:00,12.73,12.73,12.73,12.73,114
2020-12-14 19:40:00,12.75,12.75,12.75,12.75,250
2020-12-14 19:39:00,12.72,12.72,12.72,12.72,214
2020-12-14 19:36:00,12.7199,12.7199,12.7199,12.7199,600
2020-12-14 19:27:00,12.72,12.72,12.72,12.72,150
2020-12-14 19:25:00,12.72,12.72,12.72,12.72,100
2020-12-14 19:10:00,12.72,12.72,12.72,12.72,100
2020-12-14 19:08:00,12.71,12.71,12.71,12.71,100
2020-12-14 18:38:00,12.73,12.73,12.73,12.73,1000
2020-12-14 18:27:00,12.74,12.74,12.74,12.74,250
2020-12-14 18:26:00,12.71,12.71,12.71,12.71,500
2020-12-14 18:23:00,12.72,12.72,12.72,12.72,404
2020-12-14 18:04:00,12.71,12.71,12.71,12.71,500
2020-12-14 18:00:00,12.72,12.72,12.72,12.72,416
2020-12-14 17:45:00,12.7199,12.72,12.7199,12.72,895
2020-12-14 

Length out_df: 161039
Building URL: gme - Month 3 Year 2
Downloading
https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol=gme&outputsize=full&slice=year2month3&interval=1min&apikey=S4DI5C1JRKCLIM74
time,open,high,low,close,volume
2020-05-18 19:47:00,4.6899,4.6899,4.6899,4.6899,788
2020-05-18 19:38:00,4.69,4.69,4.69,4.69,100
2020-05-18 19:34:00,4.69,4.69,4.69,4.69,1000
2020-05-18 19:27:00,4.6,4.6,4.6,4.6,132
2020-05-18 18:21:00,4.65,4.65,4.65,4.65,700
2020-05-18 18:20:00,4.6501,4.6501,4.6501,4.6501,1000
2020-05-18 18:09:00,4.65,4.65,4.65,4.65,710
2020-05-18 18:05:00,4.65,4.65,4.65,4.65,2838
2020-05-18 18:04:00,4.7,4.7,4.7,4.7,250
2020-05-18 17:47:00,4.65,4.65,4.65,4.65,3582
2020-05-18 17:46:00,4.65,4.65,4.65,4.65,708
2020-05-18 17:37:00,4.62,4.62,4.62,4.62,4709
2020-05-18 17:35:00,4.66,4.66,4.6,4.6,599
2020-05-18 17:34:00,4.66,4.7,4.66,4.66,2112
2020-05-18 17:33:00,4.81,4.81,4.75,4.75,1244
2020-05-18 17:32:00,4.88,4.9,4.85,4.85,768
2020-05-18 17:31:00,4.61,4.9

Length out_df: 219576
Building URL: gme - Month 10 Year 2
Downloading
https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol=gme&outputsize=full&slice=year2month10&interval=1min&apikey=S4DI5C1JRKCLIM74
time,open,high,low,close,volume
2019-10-21 16:21:00,5.93,5.93,5.93,5.93,300
2019-10-21 16:03:00,5.93,5.93,5.93,5.93,4331
2019-10-21 16:00:00,5.92,5.93,5.92,5.925,71335
2019-10-21 15:59:00,5.925,5.93,5.92,5.92,66878
2019-10-21 15:58:00,5.92,5.925,5.9117,5.925,35602
2019-10-21 15:57:00,5.92,5.925,5.9134,5.92,47520
2019-10-21 15:56:00,5.92,5.92,5.91,5.915,45092
2019-10-21 15:55:00,5.92,5.92,5.91,5.915,20712
2019-10-21 15:54:00,5.915,5.92,5.91,5.915,10694
2019-10-21 15:53:00,5.92,5.92,5.91,5.915,19202
2019-10-21 15:52:00,5.915,5.92,5.91,5.915,4085
2019-10-21 15:51:00,5.915,5.92,5.91,5.915,19325
2019-10-21 15:50:00,5.92,5.92,5.91,5.91,28721
2019-10-21 15:49:00,5.92,5.925,5.915,5.92,30960
2019-10-21 15:48:00,5.92,5.92,5.915,5.92,18892
2019-10-21 15:47:00,5.92,5.92,5.91

In [67]:
output_filename = f"./stocks/{symbol}_1min.csv.zip"

#//*** Convert Path to Mac formatting if needed
#if platform.system() == 'Darwin':
    #output_filename = output_filename.replace("\\","/")

print(f"Writing dataframe to File: {output_filename}")
out_df.to_csv(output_filename,compression="zip",index=False)

Writing dataframe to File: ./stocks/gme_1min.csv.zip


In [87]:
tdf = pd.read_csv(output_filename)
print(len(tdf['time'].unique()))
print(len(tdf))

245627
245627


In [49]:
f = open("t.csv", "w")
f.write(r.text)
f.close()

pd.read_csv("t.csv")


#decoded_content = r.decode('utf-8')
#cr = csv.reader(r.text.splitlines(), delimiter=',')
#cr

Unnamed: 0,time,open,high,low,close,volume
0,2021-07-12 20:00:00,189.50,189.50,189.50,189.50,417
1,2021-07-12 19:53:00,189.44,189.44,189.44,189.44,118
2,2021-07-12 19:22:00,189.01,189.01,189.01,189.01,102
3,2021-07-12 18:52:00,189.00,189.00,189.00,189.00,250
4,2021-07-12 17:53:00,189.50,189.50,189.50,189.50,163
...,...,...,...,...,...,...
10041,2021-06-14 04:38:00,236.39,236.40,236.39,236.40,975
10042,2021-06-14 04:36:00,236.03,236.03,236.03,236.03,358
10043,2021-06-14 04:21:00,237.50,237.50,237.50,237.50,489
10044,2021-06-14 04:15:00,235.95,235.95,235.95,235.95,418


In [71]:
"""
#//**** INTRA day EXTENDED provides stock data at intervals of 1min, 5min, 15min, 30min, 60min,
#//**** Each query provides one month at a time

import csv
CSV_URL = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol={symbol}&interval=15min&slice=year1month1&apikey={av_apikey}'

with requests.Session() as s:
    download = s.get(url)
    decoded_content = download.content.decode('utf-8')
    cr = csv.reader(decoded_content.splitlines(), delimiter=',')
    my_list = list(cr)
    for row in my_list:
        print(row)
"""

"\n#//**** INTRA day EXTENDED provides stock data at intervals of 1min, 5min, 15min, 30min, 60min,\n#//**** Each query provides one month at a time\n\nimport csv\nCSV_URL = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol={symbol}&interval=15min&slice=year1month1&apikey={av_apikey}'\n\nwith requests.Session() as s:\n    download = s.get(url)\n    decoded_content = download.content.decode('utf-8')\n    cr = csv.reader(decoded_content.splitlines(), delimiter=',')\n    my_list = list(cr)\n    for row in my_list:\n        print(row)\n"