In [1]:
# Initial imports
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
import numpy as np
import datetime


In [2]:
# Load .env environment variables
load_dotenv()

True

In [3]:
# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

In [4]:
# Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")

In [50]:
#import csv file of all tickers to a list
#######full_ticker_list = pd.read_csv ('Data/nasdaq-listed-symbols.csv')

full_ticker_list = pd.read_csv ('Data/nasdaq_screener_assets.csv')
#drop unwanted columns
full_ticker_list = full_ticker_list.drop(['Last Sale', 'Net Change', '% Change', 'Market Cap',
       'Country', 'IPO Year', 'Volume', 'Sector', 'Industry'], axis = 'columns')

# Remove Tickers with '^', '/', & spaces as Alpaca does not accept them
full_ticker_list = full_ticker_list[~full_ticker_list.Symbol.str.contains('^', regex = False)
                                    & ~full_ticker_list.Symbol.str.contains('/', regex = False)
                                    & ~full_ticker_list.Symbol.str.contains(' ', regex = False)
                                   ]

In [51]:
#full_ticker_list.set_index('Symbol',  inplace = True)

full_ticker_list.head(10)

Unnamed: 0,Symbol,Name
0,A,Agilent Technologies Inc. Common Stock
1,AA,Alcoa Corporation Common Stock
2,AAC,Ares Acquisition Corporation Class A Ordinary ...
3,AACG,ATA Creativity Global American Depositary Shares
4,AACI,Armada Acquisition Corp. I Common Stock
5,AACIW,Armada Acquisition Corp. I Warrant
6,AADI,Aadi Bioscience Inc. Common Stock
7,AAIC,Arlington Asset Investment Corp Class A (new)
10,AAIN,Arlington Asset Investment Corp 6.000% Senior ...
11,AAL,American Airlines Group Inc. Common Stock


In [7]:
# Format current date as ISO format
start_date = pd.Timestamp("2022-04-9", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2022-05-8", tz="America/New_York").isoformat()

timeframe = "1Day"

In [8]:
alpaca_tickers = list(full_ticker_list['Symbol'])

In [9]:
df_stock_all = alpaca.get_bars(
    alpaca_tickers,
    timeframe,
    start = start_date,
    end = end_date
).df

In [10]:

# Set up stock_price_change varialbe as data frame to capture all start and end values
stock_price_change = pd.DataFrame(columns = ['Ticker', 'Start Price', 'End Price','Price Change'])
stock_price_change['Ticker'] = full_ticker_list['Symbol']

# loop through the 
for index in stock_price_change.index:
    ticker = stock_price_change['Ticker'][index]
    ticker_data_temp = df_stock_all[df_stock_all.symbol == ticker]
    
    # ignore ticker if no symbol was found from Alpaca
    if ticker_data_temp.size != 0:
    
        stock_price_change['Start Price'][index] = ticker_data_temp.at[ticker_data_temp.index.max(), 'close']
        stock_price_change['End Price'][index] = ticker_data_temp.at[ticker_data_temp.index.min(), 'close']

# calculate price change
stock_price_change['Price Change'] = stock_price_change['End Price'] - stock_price_change['Start Price']





In [11]:
stock_price_change['Price Change %'] = (stock_price_change['Price Change']/stock_price_change['Start Price'])*100

In [12]:
stock_price_change

Unnamed: 0,Ticker,Start Price,End Price,Price Change,Price Change %
0,A,121.08,131.26,10.18,8.407664
1,AA,61.04,82.53,21.49,35.206422
2,AAC,9.81,9.82,0.01,0.101937
3,AACG,1.14,1.225,0.085,7.45614
4,AACI,9.86,9.86,0.0,0.0
...,...,...,...,...,...
8523,ZWS,28.86,35.34,6.48,22.453222
8524,ZY,1.62,2.23,0.61,37.654321
8525,ZYME,5.98,6.08,0.1,1.672241
8526,ZYNE,1.25,1.8,0.55,44.0


In [13]:
# Clean stock_price_change, drop NaNs and sort in order of % change

stock_price_change.sort_values(by = 'Price Change %',ascending = False, inplace = True)
stock_price_change.dropna(axis = 'index', how = 'any' ,inplace = True)
stock_price_change.reset_index(drop = True, inplace = True)

In [14]:
# Select Tickers for analysis distributed through Data Set
no_tickers = 30

rows_count = stock_price_change.shape[0]
# calculate step size for selection less one to avoid the ends
increment = round(rows_count/(no_tickers))-1
list_select_tickers = range(round(no_tickers/2), rows_count, increment)

In [73]:
list_analysis_tickers = list(stock_price_change['Ticker'][list_select_tickers].values)

In [74]:
list_analysis_tickers

['MOLN',
 'TALKW',
 'ALGN',
 'ATHX',
 'SMSI',
 'URGN',
 'JNCE',
 'FREEW',
 'PZZA',
 'BROS',
 'GRVY',
 'BEP',
 'LMAT',
 'SECO',
 'CTBB',
 'FTF',
 'OGE',
 'SYNL',
 'SYRS',
 'SRL',
 'AGO',
 'EVOJU',
 'HSAQ',
 'MTAC',
 'GBRG',
 'TAP',
 'MEI',
 'ERF',
 'BLDR',
 'EPAM',
 'PPYAW']

In [105]:
# capture stock price data of the selected tickers for analysis 
df_stock_price_data = df_stock_all[df_stock_all['symbol'].isin(list_analysis_tickers)]
df_stock_price_data.reset_index(inplace = True)
df_stock_price_data['timestamp'] = df_stock_price_data['timestamp'].dt.date







A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [103]:
df_stock_price_data['timestamp'].dt.date

0      2022-04-11
1      2022-04-12
2      2022-04-13
3      2022-04-14
4      2022-04-18
          ...    
584    2022-05-02
585    2022-05-03
586    2022-05-04
587    2022-05-05
588    2022-05-06
Name: timestamp, Length: 589, dtype: object

In [108]:
df_stock_price_data

Unnamed: 0,timestamp,open,high,low,close,volume,trade_count,vwap,symbol
0,2022-04-11,59.06,60.335,58.6500,58.72,315665,5340,59.230175,AGO
1,2022-04-12,58.91,59.930,58.5400,58.87,289886,5709,59.070095,AGO
2,2022-04-13,58.50,60.480,58.5000,60.42,348366,6174,60.036452,AGO
3,2022-04-14,60.12,61.220,60.1200,60.40,434227,5802,60.534480,AGO
4,2022-04-18,60.12,60.970,59.8600,60.33,583988,6664,60.377456,AGO
...,...,...,...,...,...,...,...,...,...
584,2022-05-02,7.06,7.080,6.1600,6.66,242513,4136,6.601377,URGN
585,2022-05-03,6.63,6.840,6.3998,6.75,236584,4173,6.586335,URGN
586,2022-05-04,6.75,7.010,6.3400,6.91,119559,2500,6.702285,URGN
587,2022-05-05,6.84,6.900,6.2200,6.32,84668,1858,6.503618,URGN


In [109]:
df_stock_price_data

Unnamed: 0,timestamp,open,high,low,close,volume,trade_count,vwap,symbol
0,2022-04-11,59.06,60.335,58.6500,58.72,315665,5340,59.230175,AGO
1,2022-04-12,58.91,59.930,58.5400,58.87,289886,5709,59.070095,AGO
2,2022-04-13,58.50,60.480,58.5000,60.42,348366,6174,60.036452,AGO
3,2022-04-14,60.12,61.220,60.1200,60.40,434227,5802,60.534480,AGO
4,2022-04-18,60.12,60.970,59.8600,60.33,583988,6664,60.377456,AGO
...,...,...,...,...,...,...,...,...,...
584,2022-05-02,7.06,7.080,6.1600,6.66,242513,4136,6.601377,URGN
585,2022-05-03,6.63,6.840,6.3998,6.75,236584,4173,6.586335,URGN
586,2022-05-04,6.75,7.010,6.3400,6.91,119559,2500,6.702285,URGN
587,2022-05-05,6.84,6.900,6.2200,6.32,84668,1858,6.503618,URGN


In [24]:
stock_price_data.keys

15       MOLN
282     TALKW
549      ALGN
816      ATHX
1083     SMSI
1350     URGN
1617     JNCE
1884    FREEW
2151     PZZA
2418     BROS
2685     GRVY
2952      BEP
3219     LMAT
3486     SECO
3753     CTBB
4020      FTF
4287      OGE
4554     SYNL
4821     SYRS
5088      SRL
5355      AGO
5622    EVOJU
5889     HSAQ
6156     MTAC
6423     GBRG
6690      TAP
6957      MEI
7224      ERF
7491     BLDR
7758     EPAM
8025    PPYAW
Name: Ticker, dtype: object

In [None]:
stock_tweet_sentiment_df[“date”] = pd.to_datetime(stock_tweet_sentiment_df[“date”].dt.date)

In [None]:
df_stock_all

In [107]:
# save CSV of df_stock_price_data as 'Data/StockPriceData.csv'
df_stock_price_data.to_csv('Data/StockPriceData.csv')

In [110]:
set(df_stock_price_data['symbol'])

{'AGO',
 'ALGN',
 'ATHX',
 'BEP',
 'BLDR',
 'BROS',
 'CTBB',
 'EPAM',
 'ERF',
 'EVOJU',
 'FREEW',
 'FTF',
 'GBRG',
 'GRVY',
 'HSAQ',
 'JNCE',
 'LMAT',
 'MEI',
 'MOLN',
 'MTAC',
 'OGE',
 'PPYAW',
 'PZZA',
 'SECO',
 'SMSI',
 'SRL',
 'SYNL',
 'SYRS',
 'TALKW',
 'TAP',
 'URGN'}