In [1]:
# Initial Library imports
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
import numpy as np
import datetime as dt


In [2]:
# Load .env environment variables
load_dotenv()

True

In [3]:
# Set adjustable variables
no_tickers = 30
start_date = "2022-05-10" # format yyyy-mm-dd
end_date   = "2022-05-18" # format yyyy-mm-dd *** note with 4Hour timeframe, data will capture up to end of previous day from end_date

#Set timeframe of Tickers 
timeframe = "4Hour"

In [4]:
# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

In [5]:
# Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")

In [6]:
# import csv list of all nasdaq tickers and clean them for use

#import csv file of all tickers to a dataframe
df_full_ticker_list = pd.read_csv ('Data/nasdaq_screener_assets.csv')
#drop unwanted columns
df_full_ticker_list = df_full_ticker_list.drop(['Last Sale', 'Net Change', '% Change', 'Market Cap',
       'Country', 'IPO Year', 'Volume', 'Sector', 'Industry'], axis = 'columns')

# Remove Tickers with '^', '/', & spaces as Alpaca does not accept them
df_full_ticker_list = df_full_ticker_list[~df_full_ticker_list.Symbol.str.contains('^', regex = False)
                                    & ~df_full_ticker_list.Symbol.str.contains('/', regex = False)
                                    & ~df_full_ticker_list.Symbol.str.contains(' ', regex = False)
                                   ]

# convert df to list for use with Alpaca API
alpaca_tickers = list(df_full_ticker_list['Symbol'])


In [7]:
#set up for Alpaca API call

# Format current date as ISO format
start_date = pd.Timestamp(start_date, tz="America/New_York").isoformat()
end_date = pd.Timestamp(end_date, tz="America/New_York").isoformat()


In [8]:
# make API call to Alpaca to receive a data frame of all stock data

df_stock_all = alpaca.get_bars(
    alpaca_tickers,
    timeframe,
    start = start_date,
    end = end_date
).df

#check output of df_stock_all
df_stock_all.head(10)

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,symbol
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-05-10 08:00:00+00:00,55.6,55.6,54.6,55.03,34898,288,55.157951,AA
2022-05-10 12:00:00+00:00,54.9,56.88,54.07,54.52,4036281,38858,55.486529,AA
2022-05-10 16:00:00+00:00,54.58,56.91,53.61,55.835,4177742,50011,55.568406,AA
2022-05-10 20:00:00+00:00,55.84,56.1,55.61,55.7,203721,27,55.83954,AA
2022-05-11 08:00:00+00:00,56.89,56.93,56.5,56.93,955,17,56.761602,AA
2022-05-11 12:00:00+00:00,57.0,59.36,55.19,58.37,2748562,30562,58.218161,AA
2022-05-11 16:00:00+00:00,58.405,58.405,56.27,56.62,3124526,32199,57.113865,AA
2022-05-11 20:00:00+00:00,56.64,56.79,56.0,56.66,229478,105,56.648243,AA
2022-05-12 08:00:00+00:00,56.0,56.0,54.31,55.15,4834,50,54.991585,AA
2022-05-12 12:00:00+00:00,54.61,56.36,53.34,55.03,3098872,30907,54.867158,AA


In [9]:
# calculate the change in price accross the selected date range to determine a suitable dataset for analysis

# Set up df_stock_price_change varialbe as data frame to capture all start and end values
df_stock_price_change = pd.DataFrame(columns = ['Ticker', 'Start Price', 'End Price','Price Change'])
# capture all Ticker symbols in df_stock_price_change form df_full_ticker_list
df_stock_price_change['Ticker'] = df_full_ticker_list['Symbol']

# loop through each ticker and capture the first and last pice for each
for index in df_stock_price_change.index:
    ticker = df_stock_price_change['Ticker'][index]
    ticker_data_temp = df_stock_all[df_stock_all.symbol == ticker]
    
    # ignore ticker if no symbol was found from Alpaca, only process if the size of ticker_data_temp is greater than 0
    if ticker_data_temp.size != 0:
    
        df_stock_price_change['Start Price'][index] = ticker_data_temp.at[ticker_data_temp.index.min(), 'close']
        df_stock_price_change['End Price'][index] = ticker_data_temp.at[ticker_data_temp.index.max(), 'close']

# calculate price change
df_stock_price_change['Price Change'] = df_stock_price_change['End Price'] - df_stock_price_change['Start Price']
df_stock_price_change['Price Change %'] = (df_stock_price_change['Price Change']/df_stock_price_change['Start Price'])*100

# check output of df_stock_price_change
df_stock_price_change.head(10)

Unnamed: 0,Ticker,Start Price,End Price,Price Change,Price Change %
0,A,114.46,123.08,8.62,7.531015
1,AA,55.03,63.59,8.56,15.555152
2,AAC,9.815,9.82,0.005,0.050942
3,AACG,1.09,1.09,0.0,0.0
4,AACI,9.84,9.84,0.0,0.0
5,AACIW,0.2698,0.26,-0.0098,-3.63232
6,AADI,12.67,15.05,2.38,18.78453
7,AAIC,3.0482,3.58,0.5318,17.446362
10,AAIN,23.245,24.16,0.915,3.93633
11,AAL,16.53,17.85,1.32,7.985481


In [10]:
# Clean df_stock_price_change, drop NaNs and sort in order of % change
df_stock_price_change.sort_values(by = 'Price Change %',ascending = False, inplace = True)
df_stock_price_change.dropna(axis = 'index', how = 'any' ,inplace = True)
df_stock_price_change.reset_index(drop = True, inplace = True)

# check output of df_stock_price_change
df_stock_price_change.head(10)

Unnamed: 0,Ticker,Start Price,End Price,Price Change,Price Change %
0,SRGA,0.17645,5.25,5.07355,2875.347124
1,PTE,0.16,2.96,2.8,1750.0
2,TNXP,0.1362,2.3,2.1638,1588.693098
3,RMTI,0.2793,1.97,1.6907,605.334765
4,PXS,0.6261,2.76,2.1339,340.824149
5,EDTXW,0.0501,0.2145,0.1644,328.143713
6,PT,0.44,1.7,1.26,286.363636
7,NLSPW,0.109,0.37,0.261,239.449541
8,PIXY,0.198,0.428,0.23,116.161616
9,FFIEW,0.21,0.4427,0.2327,110.809524


In [11]:
# Select Tickers for analysis distributed through Data Set equally
# this is to ensure that the tickers selected for analysis are a well represented set across a wide market range 

#set number of rows
rows_count = df_stock_price_change.shape[0]
# calculate step size for selection less one to avoid the ends
increment = round(rows_count/(no_tickers))-1
# set indexes of tickers to capture for analysis
list_select_tickers = range(round(no_tickers/2), rows_count, increment)

# capture the ticker symbols to use for anlysis
list_analysis_tickers = list(df_stock_price_change['Ticker'][list_select_tickers].values)

# check output of list_analysis_tickers
list_analysis_tickers

['AGRI',
 'TKNO',
 'GNE',
 'ENERR',
 'KOP',
 'PVL',
 'GBX',
 'BRX',
 'IAS',
 'PRTC',
 'CFSB',
 'COWN',
 'SCD',
 'MGRC',
 'ENO',
 'HUSN',
 'MMX',
 'GRTS',
 'FRLA',
 'ARBG',
 'DECAU',
 'TETCU',
 'NVSA',
 'PYN',
 'XOMAO',
 'JACK',
 'NPCT',
 'IGACW',
 'DCRDW',
 'TBLA',
 'GGGVR']

In [12]:
# capture stock price data of the selected tickers and clean ready for analysis

# capture the data for only the chosen tickers from df_stock_all
df_stock_price_data = df_stock_all[df_stock_all['symbol'].isin(list_analysis_tickers)]
# reset index
df_stock_price_data.reset_index(inplace = True)

# Change 'timestamp' values to date only
##df_stock_data.loc[:,'timestamp'] = df_stock_data.loc[:,'timestamp'].dt.date
# df_stock_price_data.timestamp = pd.to_datetime(df_stock_price_data.timestamp)
# df_stock_price_data['timestamp'] = df_stock_price_data['timestamp'].to_pydatetime()

# drop un-needed columns
df_stock_price_data.drop(['open','high','low','volume','trade_count','vwap'], axis='columns',inplace = True)
# rename remaining columns to suitable names
df_stock_price_data.columns = ['Date','Close','Ticker']

# check output of df_stock_price_data
df_stock_price_data.head(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,Date,Close,Ticker
0,2022-05-10 08:00:00+00:00,1.3,AGRI
1,2022-05-10 12:00:00+00:00,1.445,AGRI
2,2022-05-10 16:00:00+00:00,1.52,AGRI
3,2022-05-10 20:00:00+00:00,1.57,AGRI
4,2022-05-11 12:00:00+00:00,1.515,AGRI
5,2022-05-11 16:00:00+00:00,1.45,AGRI
6,2022-05-11 20:00:00+00:00,1.42,AGRI
7,2022-05-12 12:00:00+00:00,1.4,AGRI
8,2022-05-12 16:00:00+00:00,1.5299,AGRI
9,2022-05-12 20:00:00+00:00,1.55,AGRI


In [13]:
# save CSV of df_stock_price_data as 'Data/StockPriceData.csv'
df_stock_price_data.to_csv('Data/StockPriceData.csv')

In [14]:
#save df_stock_price_change in .csv for use in other program
#convert list to DF
df_stock_price_change.to_csv('Data/StockPriceChange.csv')