In [1]:
# Initial Library imports
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
import numpy as np
import datetime as dt


In [2]:
# Load .env environment variables
load_dotenv()

True

In [3]:
# Set adjustable variables
no_tickers = 30
start_date = "2022-05-5" # format yyyy-mm-dd
end_date   = "2022-05-12" # format yyyy-mm-dd

#Set timeframe of Tickers 
timeframe = "4Hour"

In [4]:
# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

In [5]:
# Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")

In [6]:
# import csv list of all nasdaq tickers and clean them for use

#import csv file of all tickers to a dataframe
df_full_ticker_list = pd.read_csv ('Data/nasdaq_screener_assets.csv')
#drop unwanted columns
df_full_ticker_list = df_full_ticker_list.drop(['Last Sale', 'Net Change', '% Change', 'Market Cap',
       'Country', 'IPO Year', 'Volume', 'Sector', 'Industry'], axis = 'columns')

# Remove Tickers with '^', '/', & spaces as Alpaca does not accept them
df_full_ticker_list = df_full_ticker_list[~df_full_ticker_list.Symbol.str.contains('^', regex = False)
                                    & ~df_full_ticker_list.Symbol.str.contains('/', regex = False)
                                    & ~df_full_ticker_list.Symbol.str.contains(' ', regex = False)
                                   ]

# convert df to list for use with Alpaca API
alpaca_tickers = list(df_full_ticker_list['Symbol'])


In [7]:
#set up for Alpaca API call

# Format current date as ISO format
start_date = pd.Timestamp(start_date, tz="America/New_York").isoformat()
end_date = pd.Timestamp(end_date, tz="America/New_York").isoformat()


In [8]:
# make API call to Alpaca to receive a data frame of all stock data

df_stock_all = alpaca.get_bars(
    alpaca_tickers,
    timeframe,
    start = start_date,
    end = end_date
).df

#check output of df_stock_all
df_stock_all.head(10)

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,symbol
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-05-05 08:00:00+00:00,67.21,67.21,67.21,67.21,300,6,67.2034,AA
2022-05-05 12:00:00+00:00,67.49,68.17,62.1,62.35,2482875,29060,64.369244,AA
2022-05-05 16:00:00+00:00,62.29,64.2,61.94,63.46,2797675,33917,63.063929,AA
2022-05-05 20:00:00+00:00,63.46,63.52,63.3,63.36,232823,46,63.45979,AA
2022-05-06 08:00:00+00:00,62.82,62.82,62.82,62.82,121,5,62.814711,AA
2022-05-06 12:00:00+00:00,63.1,63.31,60.0,61.39,2751928,26827,61.568203,AA
2022-05-06 16:00:00+00:00,61.39,62.59,60.6,61.09,2360305,26037,61.313043,AA
2022-05-06 20:00:00+00:00,61.04,61.35,60.8,60.8,447653,67,61.037853,AA
2022-05-09 08:00:00+00:00,59.8,59.87,58.29,58.4,25099,177,58.689947,AA
2022-05-09 12:00:00+00:00,58.13,58.5731,53.88,54.91,5707182,53124,56.277603,AA


In [9]:
# calculate the change in price accross the selected date range to determine a suitable dataset for analysis

# Set up df_stock_price_change varialbe as data frame to capture all start and end values
df_stock_price_change = pd.DataFrame(columns = ['Ticker', 'Start Price', 'End Price','Price Change'])
# capture all Ticker symbols in df_stock_price_change form df_full_ticker_list
df_stock_price_change['Ticker'] = df_full_ticker_list['Symbol']

# loop through each ticker and capture the first and last pice for each
for index in df_stock_price_change.index:
    ticker = df_stock_price_change['Ticker'][index]
    ticker_data_temp = df_stock_all[df_stock_all.symbol == ticker]
    
    # ignore ticker if no symbol was found from Alpaca, only process if the size of ticker_data_temp is greater than 0
    if ticker_data_temp.size != 0:
    
        df_stock_price_change['Start Price'][index] = ticker_data_temp.at[ticker_data_temp.index.max(), 'close']
        df_stock_price_change['End Price'][index] = ticker_data_temp.at[ticker_data_temp.index.min(), 'close']

# calculate price change
df_stock_price_change['Price Change'] = df_stock_price_change['End Price'] - df_stock_price_change['Start Price']
df_stock_price_change['Price Change %'] = (df_stock_price_change['Price Change']/df_stock_price_change['Start Price'])*100

# check output of df_stock_price_change
df_stock_price_change.head(10)

Unnamed: 0,Ticker,Start Price,End Price,Price Change,Price Change %
0,A,113.99,121.57,7.58,6.649706
1,AA,56.66,67.21,10.55,18.619838
2,AAC,9.79,9.83,0.04,0.40858
3,AACG,1.0,1.17,0.17,17.0
4,AACI,9.86,9.86,0.0,0.0
5,AACIW,0.26,0.2709,0.0109,4.192308
6,AADI,11.57,15.74,4.17,36.041487
7,AAIC,3.02,3.2035,0.1835,6.076159
10,AAIN,23.78,24.42,0.64,2.691337
11,AAL,15.98,19.43,3.45,21.589487


In [10]:
# Clean df_stock_price_change, drop NaNs and sort in order of % change
df_stock_price_change.sort_values(by = 'Price Change %',ascending = False, inplace = True)
df_stock_price_change.dropna(axis = 'index', how = 'any' ,inplace = True)
df_stock_price_change.reset_index(drop = True, inplace = True)

# check output of df_stock_price_change
df_stock_price_change.head(10)

Unnamed: 0,Ticker,Start Price,End Price,Price Change,Price Change %
0,AKAN,1.21,8.85,7.64,631.404959
1,DAVEW,0.1622,0.5725,0.4103,252.959309
2,UPST,28.22,92.5,64.28,227.781715
3,VIEW,0.48,1.465,0.985,205.208333
4,AUST,1.9,5.68,3.78,198.947368
5,DM,1.39,4.11,2.72,195.683453
6,RDBXW,0.1749,0.49,0.3151,180.160091
7,VIEWW,0.062,0.173,0.111,179.032258
8,ITP,0.1834,0.4899,0.3065,167.121047
9,CELUW,0.62,1.65,1.03,166.129032


In [11]:
# Select Tickers for analysis distributed through Data Set equally
# this is to ensure that the tickers selected for analysis are a well represented set across a wide market range 

#set number of rows
rows_count = df_stock_price_change.shape[0]
# calculate step size for selection less one to avoid the ends
increment = round(rows_count/(no_tickers))-1
# set indexes of tickers to capture for analysis
list_select_tickers = range(round(no_tickers/2), rows_count, increment)

# capture the ticker symbols to use for anlysis
list_analysis_tickers = list(df_stock_price_change['Ticker'][list_select_tickers].values)

# check output of list_analysis_tickers
list_analysis_tickers

['PRTY',
 'AHT',
 'SKLZ',
 'EDBL',
 'ADSEW',
 'GLBS',
 'HCAT',
 'PETV',
 'VIAO',
 'AQUA',
 'VIRI',
 'STOK',
 'GHACW',
 'CIEN',
 'VRSK',
 'EMR',
 'GPRE',
 'ITRN',
 'IDE',
 'BCOW',
 'ECAT',
 'EGBN',
 'DSACU',
 'BFAC',
 'GNTY',
 'WAVS',
 'GENQU',
 'EJFA',
 'PNW',
 'VOC',
 'KYCHR']

In [18]:
# capture stock price data of the selected tickers and clean ready for analysis

# capture the data for only the chosen tickers from df_stock_all
df_stock_price_data = df_stock_all[df_stock_all['symbol'].isin(list_analysis_tickers)]
# reset index
df_stock_price_data.reset_index(inplace = True)

# Change 'timestamp' values to date only
#df_stock_price_data.loc[:,'timestamp'] = df_stock_price_data.loc[:,'timestamp'].dt.date

# drop un-needed columns
df_stock_price_data.drop(['open','high','low','volume','trade_count','vwap'], axis='columns',inplace = True)
# rename remaining columns to suitable names
df_stock_price_data.columns = ['Date','Close','Ticker']

# check output of df_stock_price_data
df_stock_price_data.head(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,Date,Close,Ticker
0,2022-05-05 16:00:00+00:00,0.76,ADSEW
1,2022-05-09 12:00:00+00:00,0.75,ADSEW
2,2022-05-09 16:00:00+00:00,0.7,ADSEW
3,2022-05-10 16:00:00+00:00,0.61,ADSEW
4,2022-05-11 12:00:00+00:00,0.61,ADSEW
5,2022-05-11 16:00:00+00:00,0.5863,ADSEW
6,2022-05-05 08:00:00+00:00,7.75,AHT
7,2022-05-05 12:00:00+00:00,7.1992,AHT
8,2022-05-05 16:00:00+00:00,7.22,AHT
9,2022-05-05 20:00:00+00:00,7.21,AHT


In [19]:
# save CSV of df_stock_price_data as 'Data/StockPriceData.csv'
df_stock_price_data.to_csv('Data/StockPriceData.csv')

In [16]:
#save ticker list in .csv for use in other program
#convert list to DF
df_alpaca_tickers = pd.DataFrame(alpaca_tickers)
# save to CSV
df_alpaca_tickers.to_csv('Data/TickerList.csv')

In [13]:
df_stock_price_data.head(10)

Unnamed: 0,Date,Close,Ticker
0,2022-05-05,0.76,ADSEW
1,2022-05-09,0.75,ADSEW
2,2022-05-09,0.7,ADSEW
3,2022-05-10,0.61,ADSEW
4,2022-05-11,0.61,ADSEW
5,2022-05-11,0.5863,ADSEW
6,2022-05-05,7.75,AHT
7,2022-05-05,7.1992,AHT
8,2022-05-05,7.22,AHT
9,2022-05-05,7.21,AHT


In [17]:
df_stock_all.to_csv('Data/AllStockData.csv')