In [47]:
from datetime import datetime, timedelta
import pandas_datareader.data as web
import yfinance as yf
import pandas as pd
import numpy as np
import time

In [4]:
def get_tickers(file=r"Stock names.txt"):
    with open(file) as f:
        tickers = f.read().split("\n")
    return [ticker for ticker in tickers if "." not in ticker]

In [5]:
def get_all_tickers():
    # Get a list of all available tickers from Yahoo Finance
    all_tickers = web.get_nasdaq_symbols()
    tickers = all_tickers.index.tolist()
    return [str(ticker) for ticker in tickers if str(ticker).isalpha()]
    
    # with open('tickers.txt', 'w') as file:
    #     for ticker in tickers:
    #         file.write(ticker + '\n')
# get_all_tickers()[:10]

In [70]:
def get_info(tickers_list):
    df = yf.download(tickers_list, period="1y")

    highs = df['High'].max()
    closes = df['Close'].iloc[-1]
    pct_to_52 = ((highs - closes) / highs) * 100
    volume = df["Volume"].iloc[-1]
    date_of_high = df['High'].idxmax()
    
    new_df = pd.DataFrame({'Close': closes, '52wk High': highs, "Percent from 52": pct_to_52, "Volume": volume, "doh": date_of_high})

    return new_df

In [68]:
s = time.time()
df = get_info(get_all_tickers()[:400])
print(time.time() - s)
cop = df.copy()

[*********************100%***********************]  400 of 400 completed
62.71847081184387


In [69]:
df = df.dropna(subset=['Close'])
vol_thresh = 10000
df = df[df['Volume'] >= vol_thresh]
time_thresh = datetime.now() - timedelta(days=60)
df = df[df["doh"] < time_thresh]
df.sort_values(by='Percent from 52', ascending=True)

Unnamed: 0,Close,52wk High,Percent from 52,Volume,doh
AGNCN,25.549999,25.690001,0.544964,15800.0,2023-03-06
AIO,18.900000,19.110001,1.098906,110500.0,2022-08-15
ALIT,10.050000,10.185000,1.325481,3521800.0,2023-02-22
ACWV,98.709999,100.410004,1.693063,559300.0,2022-08-16
ADME,37.000999,37.700001,1.854115,101400.0,2022-08-16
...,...,...,...,...,...
AKAN,0.690000,15.500000,95.548387,98600.0,2022-08-09
AKLI,1.160000,37.581001,96.913334,41000.0,2022-08-22
ADTX,0.429000,28.490000,98.494209,71700.0,2022-09-14
AGFY,2.740000,416.000000,99.341346,53500.0,2022-10-17


In [93]:
def get_info_test(tickers_list):
    df = yf.download(tickers_list, period="1y")

    days_back = 12
    
    _highs = df['High'][:-days_back].max()
    _closes = df['Close'].iloc[-days_back]
    _pct_to_52 = ((_highs - _closes) / _highs) * 100
    _volume = df["Volume"].iloc[-days_back]
    _date_of_high = df['High'][:-days_back].idxmax()
    
    highs = df['High'].max()
    date_of_high = df['High'].idxmax()
    closes = df['Close'].iloc[-1]
    volume = df["Volume"].iloc[-1]
    pct_to_52 = ((highs - closes) / highs) * 100
    
    new_df = pd.DataFrame({'Old Close': _closes, 'Old 52wk High': _highs, "Old Percent from 52": _pct_to_52, "Old Volume": volume, "doh": _date_of_high, "Close": closes, "Prct252": pct_to_52, "high52": highs, "date of break": date_of_high})

    return new_df


In [116]:
s = time.time()
df = get_info_test(get_all_tickers()[:3000])
print(time.time() - s)
cop = df.copy()

[*********************100%***********************]  3000 of 3000 completed

24 Failed downloads:
- CSLMR: Period '1y' is invalid, must be one of ['1d', '5d']
- BLACR: Period '1y' is invalid, must be one of ['1d', '5d']
- BREZR: Period '1y' is invalid, must be one of ['1d', '5d']
- CBX: Period '1y' is invalid, must be one of ['1d', '5d']
- ASCBR: Period '1y' is invalid, must be one of ['1d', '5d']
- BRLIR: Period '1y' is invalid, must be one of ['1d', '5d']
- ADOCR: Period '1y' is invalid, must be one of ['1d', '5d']
- CLRCR: Period '1y' is invalid, must be one of ['1d', '5d']
- DMAQR: Period '1y' is invalid, must be one of ['1d', '5d']
- ARIZR: Period '1y' is invalid, must be one of ['1d', '5d']
- ATEST: Period '1y' is invalid, must be one of ['1d', '5d']
- ASCAR: Period '1y' is invalid, must be one of ['1d', '5d']
- BLEUR: Period '1y' is invalid, must be one of ['1d', '5d']
- AQUNR: Period '1y' is invalid, must be one of ['1d', '5d']
- AIBBR: Period '1y' is invalid, must be one of ['1

In [119]:
df = cop.copy()
time_thresh = datetime.now() - timedelta(days=120)
break_time_thresh = datetime.now() - timedelta(days=7)  # How many days ago it broke
vol_thresh = 10000

df = df[df["Old 52wk High"] != df["high52"]]
df = df[df['Old Volume'] >= vol_thresh]
df = df[df["doh"] < time_thresh]
df = df[df["date of break"] < break_time_thresh]
df["diff"] = datetime.now() - df["date of break"]
df

Unnamed: 0,Old Close,Old 52wk High,Old Percent from 52,Old Volume,doh,Close,Prct252,high52,date of break,diff
ABG,236.630005,253.669998,6.717386,286100.0,2023-02-02,232.369995,9.368547,256.390015,2023-07-11,13 days 00:12:13.435096
ABOS,5.05,10.97,53.965359,779500.0,2022-09-30,7.4,34.571178,11.31,2023-07-17,7 days 00:12:13.435096
AGNCM,22.450001,22.84,1.707528,20700.0,2023-02-08,22.4,1.969369,22.85,2023-07-17,7 days 00:12:13.435096
AIT,142.350006,149.419998,4.731624,129300.0,2023-02-02,143.339996,4.484581,150.070007,2023-07-12,12 days 00:12:13.435096
ALGN,330.920013,368.869995,10.288173,1568900.0,2023-02-02,337.369995,9.306701,371.98999,2023-07-17,7 days 00:12:13.435096
ALKT,15.74,17.709999,11.123656,215800.0,2023-02-02,16.33,7.844241,17.719999,2023-07-13,11 days 00:12:13.435096
AMAO,10.3,10.74,4.09683,301900.0,2023-03-22,10.925,2.715934,11.23,2023-07-14,10 days 00:12:13.435096
ARKF,20.59,21.73,5.246201,412100.0,2022-08-11,23.07,4.708796,24.209999,2023-07-14,10 days 00:12:13.435096
AUR,2.765,3.27,15.443421,12907200.0,2022-08-11,2.72,23.16384,3.54,2023-07-12,12 days 00:12:13.435096
BETZ,16.999001,17.73,4.12295,19900.0,2022-08-16,18.535,1.983078,18.91,2023-07-14,10 days 00:12:13.435096
