In [1]:
# Initial imports
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
import numpy as np
import datetime


In [2]:
# Load .env environment variables
load_dotenv()

True

In [None]:
# Set adjustable variables
no_tickers = 30
start_date = "2022-05-5" # format yyyy-mm-dd
end_date   = "2022-05-12" # format yyyy-mm-dd

In [3]:
# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

In [4]:
# Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")

In [50]:
#import csv file of all tickers to a list

full_ticker_list = pd.read_csv ('Data/nasdaq_screener_assets.csv')
#drop unwanted columns
full_ticker_list = full_ticker_list.drop(['Last Sale', 'Net Change', '% Change', 'Market Cap',
       'Country', 'IPO Year', 'Volume', 'Sector', 'Industry'], axis = 'columns')

# Remove Tickers with '^', '/', & spaces as Alpaca does not accept them
full_ticker_list = full_ticker_list[~full_ticker_list.Symbol.str.contains('^', regex = False)
                                    & ~full_ticker_list.Symbol.str.contains('/', regex = False)
                                    & ~full_ticker_list.Symbol.str.contains(' ', regex = False)
                                   ]

In [7]:
# Format current date as ISO format
start_date = pd.Timestamp(start_date, tz="America/New_York").isoformat()
end_date = pd.Timestamp(end_date, tz="America/New_York").isoformat()

timeframe = "1Day"

In [8]:
alpaca_tickers = list(full_ticker_list['Symbol'])

In [9]:
df_stock_all = alpaca.get_bars(
    alpaca_tickers,
    timeframe,
    start = start_date,
    end = end_date
).df

In [10]:

# Set up stock_price_change varialbe as data frame to capture all start and end values
stock_price_change = pd.DataFrame(columns = ['Ticker', 'Start Price', 'End Price','Price Change'])
stock_price_change['Ticker'] = full_ticker_list['Symbol']

# loop through the 
for index in stock_price_change.index:
    ticker = stock_price_change['Ticker'][index]
    ticker_data_temp = df_stock_all[df_stock_all.symbol == ticker]
    
    # ignore ticker if no symbol was found from Alpaca
    if ticker_data_temp.size != 0:
    
        stock_price_change['Start Price'][index] = ticker_data_temp.at[ticker_data_temp.index.max(), 'close']
        stock_price_change['End Price'][index] = ticker_data_temp.at[ticker_data_temp.index.min(), 'close']

# calculate price change
stock_price_change['Price Change'] = stock_price_change['End Price'] - stock_price_change['Start Price']
stock_price_change['Price Change %'] = (stock_price_change['Price Change']/stock_price_change['Start Price'])*100


In [13]:
# Clean stock_price_change, drop NaNs and sort in order of % change
stock_price_change.sort_values(by = 'Price Change %',ascending = False, inplace = True)
stock_price_change.dropna(axis = 'index', how = 'any' ,inplace = True)
stock_price_change.reset_index(drop = True, inplace = True)


In [14]:
# Select Tickers for analysis distributed through Data Set
rows_count = stock_price_change.shape[0]
# calculate step size for selection less one to avoid the ends
increment = round(rows_count/(no_tickers))-1
list_select_tickers = range(round(no_tickers/2), rows_count, increment)


In [73]:
list_analysis_tickers = list(stock_price_change['Ticker'][list_select_tickers].values)

In [114]:
# capture stock price data of the selected tickers for analysis 
df_stock_price_data = df_stock_all[df_stock_all['symbol'].isin(list_analysis_tickers)]
df_stock_price_data.reset_index(inplace = True)

df_stock_price_data.loc[:,'timestamp'] = df_stock_price_data['timestamp'].dt.date

df_stock_price_data.drop(['open','high','low','volume','trade_count','vwap'], axis='columns',inplace = True)
df_stock_price_data.columns = ['Date','Close','Ticker']


In [122]:
# save CSV of df_stock_price_data as 'Data/StockPriceData.csv'
df_stock_price_data.to_csv('Data/StockPriceData.csv')