In [1]:
import pandas as pd
from zipline.pipeline import Pipeline
from zipline.pipeline.data import USEquityPricing
from sharadar.pipeline.factors import Exchange, Sector, IsDomesticCommonStock, MarketCap, Fundamentals, EV
from sharadar.pipeline.engine import symbol, symbols, make_pipeline_engine
from zipline.pipeline.filters import StaticAssets
import time
import datetime
from zipline.pipeline.factors import AverageDollarVolume
import os
from sharadar.util.output_dir import get_cache_dir as cache_dir
from sharadar.util.output_dir import get_data_dir as output_dir
from sharadar.pipeline.universes import UniverseReader
from sharadar.pipeline.engine import symbol, symbols, load_sharadar_bundle
import numpy as np
from scipy import stats
from zipline.pipeline.hooks.progress import ProgressHooks

In [2]:
spe = make_pipeline_engine()
pipe_date = pd.to_datetime('2024-01-05', utc=False)
pipe_end = pd.to_datetime('2024-07-08', utc=False)

In [3]:
class NullProgressPublisher(object):

    def publish(self, model):
        pass
    
no_hooks = [ProgressHooks.with_static_publisher(NullProgressPublisher())]

In [4]:
pipe_columns = {
    'Close': USEquityPricing.close.latest,
    'sector': Sector(),
    'adv': AverageDollarVolume(window_length=200),
    'mkt_cap': MarketCap()
}

In [5]:
def StocksUS():
    return (
        (USEquityPricing.close.latest > 3) &
        Exchange().element_of(['NYSE', 'NASDAQ', 'NYSEMKT']) &
        (Sector().notnull()) &
        (~Sector().element_of(['Financial Services', 'Real Estate'])) &
        (IsDomesticCommonStock().eq(1)) &
        (Fundamentals(field='revenue_arq') > 0) &
        (Fundamentals(field='assets_arq') > 0) &
        (Fundamentals(field='equity_arq') > 0) &
        (EV() > 0)
    )

In [6]:
pipe = Pipeline(columns=pipe_columns, screen = StocksUS())

stocks_us = spe.run_pipeline(pipe, pipe_date, pipe_end, hooks=no_hooks)
stocks_us.shape

[2024-07-15 11:24:13] INFO: Save root mask file: root-2024-01-05_2024-06-27_XNYS_US_199.pkl
[2024-07-15 11:25:22] INFO: save term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_Close.npy to cache
[2024-07-15 11:25:22] INFO: save term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_sector.npy to cache
[2024-07-15 11:25:22] INFO: save term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_adv.npy to cache
[2024-07-15 11:25:22] INFO: save term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_mkt_cap.npy to cache
[2024-07-15 11:25:22] INFO: save term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_screen_03cf8d8510cb40908d8f92e9883518af.npy to cache
[2024-07-15 11:25:22] INFO: Save root mask file: root-2024-06-28_2024-07-08_XNYS_US_199.pkl
[2024-07-15 11:25:28] INFO: save term-2023-09-13_2024-07-08_screen_03cf8d8510cb40908d8f92e9883518af_Close.npy to cache
[2024-07-15 11:25:28] INFO: save term-2023-09-13_2024-07-08_screen_

(273197, 4)

In [7]:
min_percentile_mc = stats.percentileofscore(stocks_us['mkt_cap'], 350e6)
min_percentile_adv = stats.percentileofscore(stocks_us['adv'], 2.5e6)

min_percentile_mc, min_percentile_adv

(18.51429554497304, 20.45483661972862)

In [8]:
def TradableStocksUS_Var():
    return (
        (StocksUS()) &
        (AverageDollarVolume(window_length=200).percentile_between(min_percentile_adv, 100.0, mask=StocksUS())) &
        (MarketCap().percentile_between(min_percentile_mc, 100.0, mask=StocksUS()))
    )

pipe = Pipeline(columns=pipe_columns, screen = TradableStocksUS_Var())

stocks_tradable_var = spe.run_pipeline(pipe, pipe_date, pipe_end, hooks=no_hooks)
stocks_tradable_var.shape

[2024-07-15 11:25:28] INFO: Load root mask file: root-2024-01-05_2024-06-27_XNYS_US_199.pkl
[2024-07-15 11:25:28] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_Close.npy from cache
[2024-07-15 11:25:28] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_sector.npy from cache
[2024-07-15 11:25:28] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_adv.npy from cache
[2024-07-15 11:25:28] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_mkt_cap.npy from cache
[2024-07-15 11:25:28] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_screen_03cf8d8510cb40908d8f92e9883518af.npy from cache
[2024-07-15 11:25:28] INFO: Load root mask file: root-2024-06-28_2024-07-08_XNYS_US_199.pkl
[2024-07-15 11:25:28] INFO: load term-2023-09-13_2024-07-08_screen_03cf8d8510cb40908d8f92e9883518af_Close.npy from cache
[2024-07-15 11:25:28] INFO: load term-2023-09-13_2024-0

(273197, 4)

In [9]:
def TradableStocksUS_Fix():
    return (
        (StocksUS()) &
        (AverageDollarVolume(window_length=200) > 2.5e6) &
        (MarketCap() > 350e6)
    )

pipe = Pipeline(columns=pipe_columns, screen = TradableStocksUS_Fix())

stocks_tradable_fix = spe.run_pipeline(pipe, pipe_date, pipe_end, hooks=no_hooks)
stocks_tradable_fix.shape

[2024-07-15 11:25:28] INFO: Load root mask file: root-2024-01-05_2024-06-27_XNYS_US_199.pkl
[2024-07-15 11:25:28] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_Close.npy from cache
[2024-07-15 11:25:28] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_sector.npy from cache
[2024-07-15 11:25:28] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_adv.npy from cache
[2024-07-15 11:25:28] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_mkt_cap.npy from cache
[2024-07-15 11:25:28] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_screen_03cf8d8510cb40908d8f92e9883518af.npy from cache
[2024-07-15 11:25:28] INFO: Load root mask file: root-2024-06-28_2024-07-08_XNYS_US_199.pkl
[2024-07-15 11:25:29] INFO: load term-2023-09-13_2024-07-08_screen_03cf8d8510cb40908d8f92e9883518af_Close.npy from cache
[2024-07-15 11:25:29] INFO: load term-2023-09-13_2024-0

(273197, 4)

In [10]:
min_percentile = 20
def TradableStocksUS_Var_20():
    return (
        (StocksUS()) &
        (AverageDollarVolume(window_length=200).percentile_between(min_percentile, 100, mask=StocksUS())) &
        (MarketCap().percentile_between(min_percentile, 100, mask=StocksUS()))
    )

pipe = Pipeline(columns=pipe_columns, screen = TradableStocksUS_Var_20())

stocks_tradable_var20 = spe.run_pipeline(pipe, pipe_date, pipe_end, hooks=no_hooks)
stocks_tradable_var20.shape

[2024-07-15 11:25:29] INFO: Load root mask file: root-2024-01-05_2024-06-27_XNYS_US_199.pkl
[2024-07-15 11:25:29] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_Close.npy from cache
[2024-07-15 11:25:29] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_sector.npy from cache
[2024-07-15 11:25:29] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_adv.npy from cache
[2024-07-15 11:25:29] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_mkt_cap.npy from cache
[2024-07-15 11:25:29] INFO: load term-2023-03-22_2024-06-27_screen_03cf8d8510cb40908d8f92e9883518af_screen_03cf8d8510cb40908d8f92e9883518af.npy from cache
[2024-07-15 11:25:29] INFO: Load root mask file: root-2024-06-28_2024-07-08_XNYS_US_199.pkl
[2024-07-15 11:25:29] INFO: load term-2023-09-13_2024-07-08_screen_03cf8d8510cb40908d8f92e9883518af_Close.npy from cache
[2024-07-15 11:25:29] INFO: load term-2023-09-13_2024-0

(273197, 4)

In [11]:
stocks_tradable_var20

Unnamed: 0,Unnamed: 1,Close,sector,adv,mkt_cap
2024-01-05,Equity(105149 [FNGR]),3.66,Communication Services,3.302633e+06,1.955000e+08
2024-01-05,Equity(108216 [AUID]),10.16,Technology,1.059212e+05,7.640000e+07
2024-01-05,Equity(108271 [MAMA]),4.70,Consumer Defensive,1.060988e+06,1.770000e+08
2024-01-05,Equity(108504 [NE]),47.39,Energy,5.865292e+07,6.738200e+09
2024-01-05,Equity(108805 [OPXS]),6.52,Industrials,7.532210e+04,4.290000e+07
...,...,...,...,...,...
2024-07-08,Equity(641912 [WAY]),21.52,Healthcare,4.491500e+06,3.536500e+09
2024-07-08,Equity(641966 [GRAL]),15.20,Healthcare,2.664991e+06,4.580000e+08
2024-07-08,Equity(641986 [WBTN]),19.65,Technology,1.460816e+06,2.639200e+09
2024-07-08,Equity(641989 [LSH]),3.90,Industrials,2.541876e+04,2.680000e+07


In [12]:
stocks_tradable_var20.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 273197 entries, (Timestamp('2024-01-05 00:00:00'), Equity(105149 [FNGR])) to (Timestamp('2024-07-08 00:00:00'), Equity(641994 [LB]))
Data columns (total 4 columns):
 #   Column   Non-Null Count   Dtype   
---  ------   --------------   -----   
 0   Close    273197 non-null  float64 
 1   sector   273197 non-null  category
 2   adv      273197 non-null  float64 
 3   mkt_cap  273197 non-null  float64 
dtypes: category(1), float64(3)
memory usage: 7.9+ MB
