In [1]:
from datetime import datetime, timedelta
from IPython.display import clear_output
import pandas_datareader.data as web
import yfinance as yf
import pandas as pd
import numpy as np
import pyautogui
import time

pd.options.display.float_format = '{:,.2f}'.format

# Downloads tickers and formatting data

### Fetching tickers from nasdaq and filtering out ETFs are stocks that we have a problem getting their prices

In [2]:
def get_all_tickers(etf=False):
    # Get a list of all available tickers from Yahoo Finance
    all_tickers = web.get_nasdaq_symbols()
    all_tickers = all_tickers[~all_tickers["Test Issue"]]  # Filter out stocks that we can't fetch their data for some reason
    
    if not etf:
        all_tickers = all_tickers[~all_tickers["ETF"]]  # Filter out ETFs
    tickers = all_tickers.index.tolist()
    return [str(ticker) for ticker in tickers if str(ticker).isalpha()]

### Formatting DataFrame

In [3]:
def get_info(df):
    high = df['High'].max()
    close = df['Close'].iloc[-1]
    pct_to_52 = ((high - close) / high) * 100
    volume = df["Volume"].iloc[-1]
    date_of_high = df['High'].idxmax()
    
    new_df = pd.DataFrame({'Close': close, '52wk High': high, "Percent from 52": pct_to_52, "Volume": volume, "doh": date_of_high, "Volume$": volume * close})

    return new_df

### Downloads data

In [4]:
def get_data(tickers_list):
    return yf.download(tickers_list, period="1y")

In [20]:
s = time.time()
data = get_data(get_all_tickers())
data_cop = data.copy()
print(time.time() - s)

[*********************100%***********************]  7400 of 7400 completed

54 Failed downloads:
- PLTNR: Period '1y' is invalid, must be one of ['1d', '5d']
- WINVR: Period '1y' is invalid, must be one of ['1d', '5d']
- TENKR: Period '1y' is invalid, must be one of ['1d', '5d']
- ASCBR: Period '1y' is invalid, must be one of ['1d', '5d']
- ESHAR: Period '1y' is invalid, must be one of ['1d', '5d']
- ENERR: Period '1y' is invalid, must be one of ['1d', '5d']
- DMAQR: Period '1y' is invalid, must be one of ['1d', '5d']
- EMP: No data found for this date range, symbol may be delisted
- MSSAR: Period '1y' is invalid, must be one of ['1d', '5d']
- ATMCR: Period '1y' is invalid, must be one of ['1d', '5d']
- RWODR: Period '1y' is invalid, must be one of ['1d', '5d']
- KYCHR: Period '1y' is invalid, must be one of ['1d', '5d']
- DISTR: Period '1y' is invalid, must be one of ['1d', '5d']
- NOVVR: Period '1y' is invalid, must be one of ['1d', '5d']
- FEXDR: Period '1y' is invalid, must be one 

In [24]:
df = get_info(data)
cop = df.copy()
df

Unnamed: 0,Close,52wk High,Percent from 52,Volume,doh,Volume$
A,119.75,160.26,25.28,296244.00,2022-12-13,35475219.00
AA,28.02,58.24,51.89,2084941.00,2022-08-26,58418170.77
AAC,10.68,10.69,0.09,1550.00,2023-08-24,16554.00
AACG,,2.64,,,2022-08-25,
AACI,,12.20,,,2023-01-30,
...,...,...,...,...,...,...
ZVSA,0.15,25.00,99.40,80598.00,2022-12-12,12170.30
ZWS,28.79,30.87,6.74,258308.00,2023-07-26,7436687.56
ZYME,7.52,10.80,30.37,67833.00,2023-01-19,510104.16
ZYNE,1.33,1.38,3.62,524834.00,2023-08-14,698029.24


### Filtering out stocks that don't match 

In [26]:
df = cop.copy()
df = df.dropna(subset=['Close'])
vol_thresh = 10000
vold_thresh = 500000
df = df[df['Volume'] >= vol_thresh]
df = df[df['Volume$'] >= vold_thresh]
time_thresh1 = datetime.now() - timedelta(days=75)  # Hit the high between 120 and 200 days ago (3 - 7 months)
time_thresh2 = datetime.now() - timedelta(days=200)  
df = df[(df["doh"] < time_thresh1) & (df["doh"] > time_thresh2)]
df.sort_values(by='Percent from 52', ascending=True).head(15)

Unnamed: 0,Close,52wk High,Percent from 52,Volume,doh,Volume$
ARGO,29.77,30.13,1.19,21177.0,2023-04-06,630439.3
NEWR,84.89,86.0,1.29,287800.0,2023-05-17,24431341.82
CMCA,10.7,10.85,1.38,50780.0,2023-02-17,543345.99
CHKP,133.84,135.93,1.54,102250.0,2023-04-12,13685139.63
ALCC,10.38,10.77,3.67,71535.0,2023-05-31,742175.62
SGEN,199.42,207.17,3.74,287606.0,2023-04-13,57354387.99
INVA,13.22,13.9,4.89,66739.0,2023-05-22,882289.6
VVR,3.93,4.14,4.95,228137.0,2023-03-08,897719.08
CAE,23.11,24.32,4.97,32371.0,2023-04-12,748126.18
ENSG,97.13,102.26,5.02,13659.0,2023-04-25,1326698.63


# Testing out (manually backtesting)

In [32]:
def get_info_test(df):        
    days_back = 12
    
    _highs = df['High'][:-days_back].max()
    _closes = df['Close'].iloc[-days_back]
    _pct_to_52 = ((_highs - _closes) / _highs) * 100
    # _volume = np.nanmean(df["Volume"].iloc[-days_back])
    _date_of_high = df['High'][:-days_back].idxmax()
    
    highs = df['High'].max()
    date_of_high = df['High'].idxmax()
    closes = df['Close'].iloc[-1]
    volume = df["Volume"].iloc[-3:].mean()  # Calculates the average volume in the last 3 days
    pct_to_52 = ((highs - closes) / highs) * 100
    
    new_df = pd.DataFrame({'Old Close': _closes, 'Old 52wk High': _highs, "Old Percent from 52": _pct_to_52, "Volume": volume, "doh": _date_of_high, "Close": closes, "Prct252": pct_to_52, "high52": highs, "date of break": date_of_high})

    return new_df

In [33]:
df_test = get_info_test(data)
cop_test = df_test.copy()
cop_test

Unnamed: 0,Old Close,Old 52wk High,Old Percent from 52,Volume,doh,Close,Prct252,high52,date of break
A,127.62,160.26,20.37,1333326.33,2022-12-13,126.87,20.83,160.26,2022-12-13
AA,33.38,58.24,42.68,3132267.33,2022-08-26,33.80,41.96,58.24,2022-08-26
AAC,10.56,10.60,0.38,21163.33,2023-07-18,10.65,0.05,10.65,2023-07-31
AACG,1.39,2.64,47.35,2835.00,2022-08-18,1.32,50.00,2.64,2022-08-18
AACI,10.51,12.20,13.85,8276.50,2023-01-30,,,12.20,2023-01-30
...,...,...,...,...,...,...,...,...,...
ZVSA,0.20,25.00,99.21,1000905.00,2022-12-12,0.17,99.33,25.00,2022-12-12
ZWS,27.33,31.77,13.98,590255.00,2022-08-18,30.21,4.91,31.77,2022-08-18
ZYME,7.87,10.80,27.13,923709.67,2023-01-19,7.16,33.66,10.80,2023-01-19
ZYNE,0.36,1.39,74.10,96968.00,2022-08-08,0.35,74.97,1.39,2022-08-08


### Filtering and sorting

In [7]:
df_test = cop_test.copy()
time_thresh = datetime.now() - timedelta(days=120)
break_time_thresh = datetime.now() - timedelta(days=2)  # How many days ago it broke
vol_thresh = 10000
# print(break_time_thresh)
df_test = df_test[df_test["Old 52wk High"] != df_test["high52"]]
df_test = df_test[df_test["doh"] < time_thresh]
df_test = df_test[df_test["date of break"] > break_time_thresh]
df_test["diff"] = datetime.now() - df_test["date of break"]
pd.options.display.float_format = '{:.2f}'.format
df_test.sort_values(by='Volume', ascending=False).head(20)

NameError: name 'cop_test' is not defined

# Random shit

In [9]:
all_tickers = web.get_nasdaq_symbols()
all_tickers = all_tickers[~all_tickers["ETF"] & ~all_tickers["Test Issue"]]
# all_tickers.loc["O"]
all_tickers

Unnamed: 0_level_0,Nasdaq Traded,Security Name,Listing Exchange,Market Category,ETF,Round Lot Size,Test Issue,Financial Status,CQS Symbol,NASDAQ Symbol,NextShares
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
A,True,"Agilent Technologies, Inc. Common Stock",N,,False,100.00,False,,A,A,False
AA,True,Alcoa Corporation Common Stock,N,,False,100.00,False,,AA,AA,False
AAC,True,Ares Acquisition Corporation Class A Ordinary ...,N,,False,100.00,False,,AAC,AAC,False
AAC.U,True,"Ares Acquisition Corporation Units, each consi...",N,,False,100.00,False,,AAC.U,AAC=,False
AAC.W,True,Ares Acquisition Corporation Redeemable Warran...,N,,False,100.00,False,,AAC.WS,AAC+,False
...,...,...,...,...,...,...,...,...,...,...,...
ZVSA,True,"ZyVersa Therapeutics, Inc. - Common Stock",Q,G,False,100.00,False,D,,ZVSA,False
ZWS,True,Zurn Elkay Water Solutions Corporation Common ...,N,,False,100.00,False,,ZWS,ZWS,False
ZYME,True,Zymeworks Inc. - Common Stock,Q,Q,False,100.00,False,N,,ZYME,False
ZYNE,True,"Zynerba Pharmaceuticals, Inc. - Common Stock",Q,S,False,100.00,False,D,,ZYNE,False


# Import to TradingView using pyautgui

In [6]:
def get_fit_tickers(df, num):
    return df.index[:num].tolist()


def add_to_tv(df):
    # Click plus (1773, 164)
    tickers = get_fit_tickers(df, 100)
    print(tickers)
    pyautogui.click(1773, 164)
    for ticker in tickers:
        pyautogui.write(ticker)
        pyautogui.press("enter")

# Finding stocks that broke 52wk high yesterday

### First DataFrame that contains necessary information

In [7]:
def parse_df(df, old_high=2, high_by="High"):
    # old_high- How many days ago it broke high (x or less)
    # high_by- by which OHLC to determine d
    high = df['High'].max()  # 52 wk high
    close = df['Close'].iloc[-1]  # Close of last candle
    pct_to_52 = ((high - close) / high) * 100  # Percent change to 52wk high
    volume = df["Volume"].iloc[-1]  # Volume of last candle
    old_doh = df['High'][:-old_high].idxmax()  # 52wk high before 2 days
    doh = df['High'].idxmax()  # 52wk high today
    new_df = pd.DataFrame({'Close': close, '52wk High': high, "% to 52wk": pct_to_52, "Volume": volume, "Date of high": doh, "Old date of high": old_doh, "Volume$": volume * close})

    return new_df

### Filtering and sorting DataFrame

In [22]:
def filter_sort_df(df):
    df = df.dropna(subset=['Close'])

    vol_thresh = 10000  # Volume threshold
    vold_thresh = 500000  # Volume in dollar threshold, helps filtering out stocks that don't really have a high volume but due to low price they have a high volume
    time_thresh1 = datetime.now() - timedelta(days=75)  # Hit the high between 120 and 200 days ago (3 - 7 months)
    time_thresh2 = datetime.now() - timedelta(days=200)  

    df = df[df['Volume'] >= vol_thresh]
    df = df[df['Volume$'] >= vold_thresh]
    df = df[(df["Old date of high"] < time_thresh1) & (df["Old date of high"] > time_thresh2)]  # Old doh (last support) is between 90 to 200 days'
    df = df[df["Old date of high"] != df["Date of high"]]
    
    return df.sort_values(by='% to 52wk', ascending=True)

In [23]:
full_df = parse_df(data, 3)
cop = full_df.copy()

df = filter_sort_df(cop)
df

Unnamed: 0,Close,52wk High,% to 52wk,Volume,Date of high,Old date of high,Volume$
OCS,14.28,14.35,0.49,39062.0,2023-08-25,2023-03-01,557805.35
MREO,1.49,1.54,3.32,409813.0,2023-08-24,2023-06-15,610129.61
SPLK,112.06,116.18,3.55,1593521.0,2023-08-25,2023-06-15,178565983.83
OSIS,134.57,139.9,3.81,38586.0,2023-08-24,2023-06-12,5192518.3


In [None]:
# add_to_tv(df)

In [14]:
cop.loc["URI"]

Close                            449.41
52wk High                        492.22
% to 52wk                          8.70
Volume                        24,350.00
Date of high        2023-08-07 00:00:00
Old date of high    2023-08-07 00:00:00
Volume$                   10,943,133.59
Name: URI, dtype: object

# Finding low 52

In [25]:
def parse_df_low(df, old_low=2, low_by="Low"):
    # old_low- How many days ago it broke low (x or less)
    # low_by- by which OHLC to determine d
    low = df['Low'].min()  # 52 wk low
    close = df['Close'].iloc[-1]  # Close of last candle
    pct_to_52 = ((low - close) / low) * 100  # Percent change to 52wk low
    volume = df["Volume"].iloc[-1]  # Volume of last candle
    old_doh = df['Low'][:-old_low].idxmin()  # 52wk low before 2 days
    doh = df['Low'].idxmin()  # 52wk low today
    new_df = pd.DataFrame({'Close': close, '52wk Low': low, "% to 52wk": pct_to_52, "Volume": volume, "Date of low": doh, "Old date of low": old_doh, "Volume$": volume * close})

    return new_df

def filter_sort_df_low(df):
    df = df.dropna(subset=['Close'])

    vol_thresh = 10000  # Volume threshold
    vold_thresh = 500000  # Volume in dollar threshold, helps filtering out stocks that don't really have a low volume but due to low price they have a low volume
    time_thresh1 = datetime.now() - timedelta(days=75)  # Hit the low between 120 and 200 days ago (3 - 7 months)
    time_thresh2 = datetime.now() - timedelta(days=200)  

    df = df[df['Volume'] >= vol_thresh]
    df = df[df['Volume$'] >= vold_thresh]
    df = df[(df["Old date of low"] < time_thresh1) & (df["Old date of low"] > time_thresh2)]  # Old doh (last support) is between 90 to 200 days'
    df = df[df["Old date of low"] != df["Date of low"]]
    
    # return df.sort_values(by='% to 52wk', ascending=True)
    return df.sort_values(by='Date of low', ascending=True)

In [26]:
full_df = parse_df_low(data, 12)
cop = full_df.copy()

df = filter_sort_df_low(cop)
df

Unnamed: 0,Close,52wk Low,% to 52wk,Volume,Date of low,Old date of low,Volume$
ADTN,8.17,7.00,-16.68,309340.00,2023-08-07,2023-05-09,2526534.31
BNTX,123.35,95.50,-29.16,1124301.00,2023-08-07,2023-05-11,138682526.63
CRON,1.78,1.64,-8.54,957938.00,2023-08-08,2023-06-06,1705129.61
AGEN,1.47,1.18,-24.58,1743553.00,2023-08-08,2023-04-12,2563022.96
G,36.17,35.31,-2.44,662809.00,2023-08-08,2023-05-11,23973800.32
...,...,...,...,...,...,...,...
OCGN,0.40,0.40,-1.44,7020635.00,2023-08-22,2023-05-26,2827209.76
CUTR,11.06,10.54,-4.93,258079.00,2023-08-22,2023-05-12,2854353.85
ONTF,6.80,6.60,-3.11,240340.00,2023-08-22,2023-05-22,1635513.66
CTVA,49.28,49.24,-0.10,1799940.00,2023-08-22,2023-06-01,88710042.63
