In [81]:
from datetime import datetime, timedelta
import pandas_datareader.data as web
import yfinance as yf
import pandas as pd
import numpy as np
import time

pd.options.display.float_format = '{:,.2f}'.format

In [3]:
def get_tickers(file=r"Stock names.txt"):
    with open(file) as f:
        tickers = f.read().split("\n")
    return [ticker for ticker in tickers if "." not in ticker]

In [76]:
def get_all_tickers(etf=False):
    # Get a list of all available tickers from Yahoo Finance
    all_tickers = web.get_nasdaq_symbols()
    all_tickers = all_tickers[all_tickers["Test Issue"]]  # Filter out stocks that we can't fetch their data for some reason
    
    if not etf:
        all_tickers = all_tickers[~all_tickers["ETF"]]  # Filter out ETFs
    tickers = all_tickers.index.tolist()
    return [str(ticker) for ticker in tickers if str(ticker).isalpha()]


In [72]:
def get_info(df):
    high = df['High'].max()
    close = df['Close'].iloc[-1]
    pct_to_52 = ((high - close) / high) * 100
    volume = df["Volume"].iloc[-1]
    date_of_high = df['High'].idxmax()
    
    
    new_df = pd.DataFrame({'Close': close, '52wk High': high, "Percent from 52": pct_to_52, "Volume": volume, "doh": date_of_high, "Volume$": volume * close})

    return new_df

In [7]:
def get_data(tickers_list):
    return yf.download(tickers_list, period="1y")

In [8]:
s = time.time()
data = get_data(get_all_tickers())
data_cop = data.copy()
print(time.time() - s)

[*********************100%***********************]  7471 of 7471 completed

76 Failed downloads:
- ZXIET: No data found, symbol may be delisted
- BLEUR: Period '1y' is invalid, must be one of ['1d', '5d']
- HMACR: Period '1y' is invalid, must be one of ['1d', '5d']
- TMTCR: Period '1y' is invalid, must be one of ['1d', '5d']
- IGZ: Period '1y' is invalid, must be one of ['1d', '5d']
- ZCZZT: Period '1y' is invalid, must be one of ['1d', '5d']
- KYCHR: Period '1y' is invalid, must be one of ['1d', '5d']
- EMP: No data found for this date range, symbol may be delisted
- ATMCR: Period '1y' is invalid, must be one of ['1d', '5d']
- GLSTR: Period '1y' is invalid, must be one of ['1d', '5d']
- CSLMR: Period '1y' is invalid, must be one of ['1d', '5d']
- MCACR: Period '1y' is invalid, must be one of ['1d', '5d']
- IBO: Period '1y' is invalid, must be one of ['1d', '5d']
- TENKR: Period '1y' is invalid, must be one of ['1d', '5d']
- ZBZX: Period '1y' is invalid, must be one of ['1d', '5d']
- C

In [73]:
df = get_info(data)
cop = df.copy()
df

Unnamed: 0,Close,52wk High,Percent from 52,Volume,doh,Volume$
A,127.720001,160.259995,20.304502,2001200.0,2022-12-13,2.555933e+08
AA,33.750000,58.235001,42.045162,6035400.0,2022-08-26,2.036948e+08
AAC,10.560000,10.600000,0.377358,221500.0,2023-07-18,2.339040e+06
AACG,1.380000,2.640000,47.727275,11800.0,2022-08-18,1.628400e+04
AACI,10.560000,12.200000,13.442618,40300.0,2023-01-30,4.255680e+05
...,...,...,...,...,...,...
ZXIET,,,,,NaT,
ZXZZT,,,,,NaT,
ZYME,7.390000,10.800000,31.574077,605600.0,2023-01-19,4.475384e+06
ZYNE,0.360000,1.390000,74.100718,97700.0,2022-08-08,3.517200e+04


In [82]:
df = cop.copy()
df = df.dropna(subset=['Close'])
vol_thresh = 10000
df = df[df['Volume'] >= vol_thresh]
time_thresh1 = datetime.now() - timedelta(days=90)  # Hit the high between 120 and 200 days ago (3 - 7 months)
time_thresh2 = datetime.now() - timedelta(days=200)  
df = df[(df["doh"] < time_thresh1) & (df["doh"] > time_thresh2)]
df.sort_values(by='Percent from 52', ascending=True).head(20)

Unnamed: 0,Close,52wk High,Percent from 52,Volume,doh,Volume$
TRTL,10.39,10.4,0.1,34700.0,2023-01-12,360533.01
ORI,26.64,26.72,0.3,1612400.0,2023-02-13,42954335.02
AGNCN,25.51,25.69,0.7,24000.0,2023-03-06,612240.01
MDU,22.15,22.33,0.79,1418800.0,2023-02-28,31426419.46
BG,106.08,106.99,0.85,699800.0,2023-03-14,74234785.28
SEIC,64.08,64.69,0.94,437900.0,2023-02-03,28060632.8
AJRD,56.22,56.8,1.02,747600.0,2023-04-20,42030072.91
PHYT,10.73,10.85,1.11,30300.0,2023-04-17,325118.99
JBHT,198.34,200.64,1.15,883200.0,2023-02-02,175173884.77
VVX,50.4,51.0,1.18,63300.0,2023-03-03,3190320.1


In [78]:
def get_info_test(df):        
    days_back = 12
    
    _highs = df['High'][:-days_back].max()
    _closes = df['Close'].iloc[-days_back]
    _pct_to_52 = ((_highs - _closes) / _highs) * 100
    # _volume = np.nanmean(df["Volume"].iloc[-days_back])
    _date_of_high = df['High'][:-days_back].idxmax()
    
    highs = df['High'].max()
    date_of_high = df['High'].idxmax()
    closes = df['Close'].iloc[-1]
    volume = df["Volume"].iloc[-3:].mean()  # Calculates the average volume in the last 3 days
    pct_to_52 = ((highs - closes) / highs) * 100
    
    new_df = pd.DataFrame({'Old Close': _closes, 'Old 52wk High': _highs, "Old Percent from 52": _pct_to_52, "Volume": volume, "doh": _date_of_high, "Close": closes, "Prct252": pct_to_52, "high52": highs, "date of break": date_of_high})

    return new_df


In [None]:
s = time.time()
data = get_data(get_all_tickers())
data_cop = data.copy()
print(time.time() - s)

In [79]:
df = get_info_test(data)
cop = df.copy()
cop

Unnamed: 0,Old Close,Old 52wk High,Old Percent from 52,Volume,doh,Close,Prct252,high52,date of break
A,117.00,160.26,26.99,3098500.00,2022-12-13,,,160.26,2022-12-13
AA,34.09,58.24,41.46,8763950.00,2022-08-26,,,58.24,2022-08-26
AAA,24.58,24.73,0.58,3550.00,2023-02-22,,,24.82,2023-07-10
AAAU,19.08,20.43,6.61,893750.00,2023-05-04,,,20.43,2023-05-04
AAC,10.57,10.58,0.09,309650.00,2023-06-30,,,10.60,2023-07-18
...,...,...,...,...,...,...,...,...,...
ZXIET,,,,,NaT,,,,NaT
ZXZZT,,,,,NaT,,,,NaT
ZYME,8.26,10.80,23.52,484550.00,2023-01-19,,,10.80,2023-01-19
ZYNE,0.35,1.39,74.82,225900.00,2022-08-08,,,1.39,2022-08-08


In [81]:
# Filtering and sorting

df = cop.copy()
time_thresh = datetime.now() - timedelta(days=120)
break_time_thresh = datetime.now() - timedelta(days=9)  # How many days ago it broke
vol_thresh = 10000

df = df[df["Old 52wk High"] != df["high52"]]
# df = df[df['Old Volume'] >= vol_thresh]
df = df[df["doh"] < time_thresh]
df = df[df["date of break"] < break_time_thresh]
df["diff"] = datetime.now() - df["date of break"]
pd.options.display.float_format = '{:.2f}'.format
df.sort_values(by='Volume', ascending=False).head(20)

Unnamed: 0,Old Close,Old 52wk High,Old Percent from 52,Volume,doh,Close,Prct252,high52,date of break,diff
MARA,15.67,18.88,17.0,29030000.0,2022-08-11,,,19.88,2023-07-14,10 days 12:38:52.331717
SNAP,11.68,12.94,9.74,20912000.0,2022-09-12,,,13.89,2023-07-13,11 days 12:38:52.331717
AUR,2.8,3.27,14.37,14424950.0,2022-08-11,,,3.54,2023-07-12,12 days 12:38:52.331717
BWA,44.95,45.02,0.15,1709850.0,2023-03-03,,,47.05,2023-07-13,11 days 12:38:52.331717
ROVR,4.84,5.26,7.98,1527550.0,2022-11-08,,,5.29,2023-07-12,12 days 12:38:52.331717
DFAE,23.42,24.36,3.86,1458350.0,2023-01-26,,,24.41,2023-07-13,11 days 12:38:52.331717
HLT,148.32,152.89,2.99,1438900.0,2023-02-15,,,155.02,2023-07-11,13 days 12:38:52.331717
INMD,38.07,41.84,9.01,1278750.0,2023-02-16,,,46.46,2023-07-13,11 days 12:38:52.331717
VBLT,0.26,0.31,15.87,1103500.0,2022-07-25,,,0.32,2023-07-13,11 days 12:38:52.331717
MGNI,13.3,14.21,6.4,971050.0,2023-02-22,,,15.73,2023-07-13,11 days 12:38:52.331717


In [21]:
data_cop["Close"]["ZVZZT"]

Date
2022-07-25   NaN
2022-07-26   NaN
2022-07-27   NaN
2022-07-28   NaN
2022-07-29   NaN
              ..
2023-07-18   NaN
2023-07-19   NaN
2023-07-20   NaN
2023-07-21   NaN
2023-07-24   NaN
Name: ZVZZT, Length: 251, dtype: float64

In [50]:
all_tickers = web.get_nasdaq_symbols()
all_tickers = all_tickers[~all_tickers["ETF"] & ~all_tickers["Test Issue"]]
# all_tickers.loc["O"]
all_tickers

Unnamed: 0_level_0,Nasdaq Traded,Security Name,Listing Exchange,Market Category,ETF,Round Lot Size,Test Issue,Financial Status,CQS Symbol,NASDAQ Symbol,NextShares
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
A,True,"Agilent Technologies, Inc. Common Stock",N,,False,100.0,False,,A,A,False
AA,True,Alcoa Corporation Common Stock,N,,False,100.0,False,,AA,AA,False
AAC,True,Ares Acquisition Corporation Class A Ordinary ...,N,,False,100.0,False,,AAC,AAC,False
AAC.U,True,"Ares Acquisition Corporation Units, each consi...",N,,False,100.0,False,,AAC.U,AAC=,False
AAC.W,True,Ares Acquisition Corporation Redeemable Warran...,N,,False,100.0,False,,AAC.WS,AAC+,False
...,...,...,...,...,...,...,...,...,...,...,...
ZVSA,True,"ZyVersa Therapeutics, Inc. - Common Stock",Q,G,False,100.0,False,D,,ZVSA,False
ZWS,True,Zurn Elkay Water Solutions Corporation Common ...,N,,False,100.0,False,,ZWS,ZWS,False
ZYME,True,Zymeworks Inc. - Common Stock,Q,Q,False,100.0,False,N,,ZYME,False
ZYNE,True,"Zynerba Pharmaceuticals, Inc. - Common Stock",Q,S,False,100.0,False,D,,ZYNE,False
