# Universe for Sharadar Bundle

how to create a universe with 


In [1]:
import pandas as pd
from zipline.pipeline import Pipeline
from zipline.pipeline.data import USEquityPricing
from sharadar.pipeline.factors import Exchange, Sector, IsDomesticCommonStock, MarketCap, Fundamentals, EV
from sharadar.pipeline.engine import symbol, symbols, make_pipeline_engine
from zipline.pipeline.filters import StaticAssets
import time
import datetime
from zipline.pipeline.factors import AverageDollarVolume
import os
from sharadar.util.output_dir import get_cache_dir as cache_dir
from sharadar.util.output_dir import get_data_dir as output_dir
from sharadar.pipeline.universes import UniverseWriter, UniverseReader, NamedUniverse, TRADABLE_STOCKS_US
from sharadar.pipeline.engine import symbol, symbols, load_sharadar_bundle
import numpy as np
from scipy import stats
from zipline.pipeline.hooks.progress import ProgressHooks

# Universe for Sharadar Bundle

In [2]:
class NullProgressPublisher(object):
    def publish(self, model):
        pass
no_hooks = [ProgressHooks.with_static_publisher(NullProgressPublisher())]

In [8]:
spe = make_pipeline_engine()
pipe_date = pd.to_datetime('2024-01-05', utc=False)
pipe_end = pd.to_datetime('2024-07-10', utc=False)

In [9]:
def StocksUS():
    return (
        (USEquityPricing.close.latest > 3) &
        Exchange().element_of(['NYSE', 'NASDAQ', 'NYSEMKT']) &
        (Sector().notnull()) &
        (~Sector().element_of(['Financial Services', 'Real Estate'])) &
        (IsDomesticCommonStock().eq(1)) &
        (Fundamentals(field='revenue_arq') > 0) &
        (Fundamentals(field='assets_arq') > 0) &
        (Fundamentals(field='equity_arq') > 0) &
        (EV() > 0)
    )

In [10]:
TRADABLE_STOCKS_US

'tradable_stocks_us'

In [11]:
pipe_columns = {
    'Close': USEquityPricing.close.latest,
    'sector': Sector(),
    'adv': AverageDollarVolume(window_length=200),
    'mkt_cap': MarketCap()
}

In [12]:
pipe = Pipeline(columns=pipe_columns, screen = StocksUS())

stocks_us = spe.run_pipeline(pipe, pipe_date, pipe_end, hooks=no_hooks)
stocks_us.shape

[2024-07-28 20:58:33] INFO: Save root mask file: root-2024-01-05_2024-06-27_XNYS_US_199.pkl
[2024-07-28 21:00:07] INFO: save term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_Close.npy to cache
[2024-07-28 21:00:07] INFO: save term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_sector.npy to cache
[2024-07-28 21:00:07] INFO: save term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_adv.npy to cache
[2024-07-28 21:00:07] INFO: save term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_mkt_cap.npy to cache
[2024-07-28 21:00:07] INFO: save term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_screen_90550a3cdade4aae99d6d6f4aab4ed77.npy to cache
[2024-07-28 21:00:08] INFO: Save root mask file: root-2024-06-28_2024-07-10_XNYS_US_199.pkl
[2024-07-28 21:00:16] INFO: save term-2023-09-13_2024-07-10_screen_90550a3cdade4aae99d6d6f4aab4ed77_Close.npy to cache
[2024-07-28 21:00:16] INFO: save term-2023-09-13_2024-07-10_screen_

(277475, 4)

In [13]:
min_percentile_mc = stats.percentileofscore(stocks_us['mkt_cap'], 350e6)
min_percentile_adv = stats.percentileofscore(stocks_us['adv'], 2.5e6)

min_percentile_mc, min_percentile_adv


(18.513920172988556, 20.430309036850165)

In [14]:
def TradableStocksUS_Var():
    return (
        (StocksUS()) &
        (AverageDollarVolume(window_length=200).percentile_between(min_percentile_adv, 100.0, mask=StocksUS())) &
        (MarketCap().percentile_between(min_percentile_mc, 100.0, mask=StocksUS()))
    )

pipe = Pipeline(columns=pipe_columns, screen = TradableStocksUS_Var())

stocks_tradable_var = spe.run_pipeline(pipe, pipe_date, pipe_end, hooks=no_hooks)
stocks_tradable_var.shape

[2024-07-28 21:00:16] INFO: Load root mask file: root-2024-01-05_2024-06-27_XNYS_US_199.pkl
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_Close.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_sector.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_adv.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_mkt_cap.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_screen_90550a3cdade4aae99d6d6f4aab4ed77.npy from cache
[2024-07-28 21:00:16] INFO: Load root mask file: root-2024-06-28_2024-07-10_XNYS_US_199.pkl
[2024-07-28 21:00:16] INFO: load term-2023-09-13_2024-07-10_screen_90550a3cdade4aae99d6d6f4aab4ed77_Close.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-09-13_2024-0

(277475, 4)

In [15]:
def TradableStocksUS_Fix():
    return (
        (StocksUS()) &
        (AverageDollarVolume(window_length=200) > 2.5e6) &
        (MarketCap() > 350e6)
    )

pipe = Pipeline(columns=pipe_columns, screen = TradableStocksUS_Fix())

stocks_tradable_fix = spe.run_pipeline(pipe, pipe_date, pipe_end, hooks=no_hooks)
stocks_tradable_fix.shape

[2024-07-28 21:00:16] INFO: Load root mask file: root-2024-01-05_2024-06-27_XNYS_US_199.pkl
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_Close.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_sector.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_adv.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_mkt_cap.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_screen_90550a3cdade4aae99d6d6f4aab4ed77.npy from cache
[2024-07-28 21:00:16] INFO: Load root mask file: root-2024-06-28_2024-07-10_XNYS_US_199.pkl
[2024-07-28 21:00:16] INFO: load term-2023-09-13_2024-07-10_screen_90550a3cdade4aae99d6d6f4aab4ed77_Close.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-09-13_2024-0

(277475, 4)

In [16]:
len(stocks_tradable_fix.index.get_level_values(1).unique())

2455

In [17]:
min_percentile = 20
def TradableStocksUS_Var_20():
    return (
        (StocksUS()) &
        (AverageDollarVolume(window_length=200).percentile_between(min_percentile, 100, mask=StocksUS())) &
        (MarketCap().percentile_between(min_percentile, 100, mask=StocksUS()))
    )

pipe = Pipeline(columns=pipe_columns, screen = TradableStocksUS_Var_20())

stocks_tradable_var20 = spe.run_pipeline(pipe, pipe_date, pipe_end, hooks=no_hooks)
stocks_tradable_var20.shape

[2024-07-28 21:00:16] INFO: Load root mask file: root-2024-01-05_2024-06-27_XNYS_US_199.pkl
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_Close.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_sector.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_adv.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_mkt_cap.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-03-22_2024-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_screen_90550a3cdade4aae99d6d6f4aab4ed77.npy from cache
[2024-07-28 21:00:16] INFO: Load root mask file: root-2024-06-28_2024-07-10_XNYS_US_199.pkl
[2024-07-28 21:00:16] INFO: load term-2023-09-13_2024-07-10_screen_90550a3cdade4aae99d6d6f4aab4ed77_Close.npy from cache
[2024-07-28 21:00:16] INFO: load term-2023-09-13_2024-0

(277475, 4)

In [18]:
stocks_tradable_var20

Unnamed: 0,Unnamed: 1,Close,sector,adv,mkt_cap
2024-01-05,Equity(105149 [FNGR]),3.660,Communication Services,3.302633e+06,1.955000e+08
2024-01-05,Equity(108216 [AUID]),10.160,Technology,1.059212e+05,7.640000e+07
2024-01-05,Equity(108271 [MAMA]),4.700,Consumer Defensive,1.060988e+06,1.770000e+08
2024-01-05,Equity(108504 [NE]),47.390,Energy,5.865292e+07,6.738200e+09
2024-01-05,Equity(108805 [OPXS]),6.520,Industrials,7.532210e+04,4.290000e+07
...,...,...,...,...,...
2024-07-10,Equity(641966 [GRAL]),14.015,Healthcare,2.952446e+06,4.316000e+08
2024-07-10,Equity(641986 [WBTN]),20.340,Technology,1.573965e+06,2.542700e+09
2024-07-10,Equity(641989 [LSH]),3.760,Industrials,2.989175e+04,2.720000e+07
2024-07-10,Equity(641994 [LB]),27.320,Energy,8.223840e+05,4.053000e+08


In [19]:
stocks_tradable_var20.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 277475 entries, (Timestamp('2024-01-05 00:00:00'), Equity(105149 [FNGR])) to (Timestamp('2024-07-10 00:00:00'), Equity(642026 [SW]))
Data columns (total 4 columns):
 #   Column   Non-Null Count   Dtype   
---  ------   --------------   -----   
 0   Close    277475 non-null  float64 
 1   sector   277475 non-null  category
 2   adv      277475 non-null  float64 
 3   mkt_cap  277475 non-null  float64 
dtypes: category(1), float64(3)
memory usage: 8.3+ MB


### Write Universe


In [20]:
pipe_date = pd.to_datetime('2019-01-07', utc=False)
pipe_end = pd.to_datetime('2024-07-18', utc=False)

In [21]:
# write universe TRADABLE_STOCKS_US

screen = StocksUS()
universes_db_path = os.path.join(output_dir(), "universes.sqlite")
universe_name = TRADABLE_STOCKS_US
UniverseWriter(universes_db_path).write(universe_name, screen, pipe_date, pipe_end)
sids = UniverseReader(universes_db_path).get_sid(universe_name, '2024-02-07')
print(sids)
sids = UniverseReader(universes_db_path).get_sid(universe_name, '2002-02-07')
print(sids)
spe = make_pipeline_engine()
pipe = Pipeline(columns={
    'Close': USEquityPricing.close.latest,
},
    screen=NamedUniverse(TRADABLE_STOCKS_US)
)
stocks = spe.run_pipeline(pipe, pipe_date, pipe_end)
print(stocks)

[2024-07-28 21:00:16] INFO: Computing pipeline from 2019-01-07 00:00:00 to 2024-07-18 00:00:00...
[2024-07-28 21:00:16] INFO: Save root mask file: root-2019-01-07_2019-06-27_XNYS_US_0.pkl
Pipeline from 2019-01-07 to 2019-06-27
[2024-07-28 21:00:18] INFO: save term-2019-01-07_2019-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_screen_90550a3cdade4aae99d6d6f4aab4ed77.npy to cache
[2024-07-28 21:00:18] INFO: Save root mask file: root-2019-06-28_2019-12-17_XNYS_US_0.pkl
Pipeline from 2019-06-28 to 2019-12-17
[2024-07-28 21:00:20] INFO: save term-2019-06-28_2019-12-17_screen_90550a3cdade4aae99d6d6f4aab4ed77_screen_90550a3cdade4aae99d6d6f4aab4ed77.npy to cache
[2024-07-28 21:00:20] INFO: Save root mask file: root-2019-12-18_2020-06-10_XNYS_US_0.pkl
Pipeline from 2019-12-18 to 2020-06-10
[2024-07-28 21:01:00] INFO: save term-2019-12-18_2020-06-10_screen_90550a3cdade4aae99d6d6f4aab4ed77_screen_90550a3cdade4aae99d6d6f4aab4ed77.npy to cache
[2024-07-28 21:01:00] INFO: Save root mask file: root-20

In [22]:
TRADABLE_STOCKS_US + '_fix'

'tradable_stocks_us_fix'

In [23]:
# write universe with fix filter for TRADABLE_STOCKS_US

screen = TradableStocksUS_Fix()
universes_db_path = os.path.join(output_dir(), "universes.sqlite")
universe_name = TRADABLE_STOCKS_US + '_fix'
UniverseWriter(universes_db_path).write(universe_name, screen, pipe_date, pipe_end)
sids = UniverseReader(universes_db_path).get_sid(universe_name, '2024-02-07')
print(sids)
sids = UniverseReader(universes_db_path).get_sid(universe_name, '2002-02-07')
print(sids)
spe = make_pipeline_engine()
pipe = Pipeline(columns={
    'Close': USEquityPricing.close.latest,
},
    screen=NamedUniverse(universe_name)
)
stocks = spe.run_pipeline(pipe, pipe_date, pipe_end)
print(stocks)

[2024-07-28 21:09:07] INFO: Computing pipeline from 2019-01-07 00:00:00 to 2024-07-18 00:00:00...
[2024-07-28 21:09:07] INFO: Save root mask file: root-2019-01-07_2019-06-27_XNYS_US_199.pkl
Pipeline from 2019-01-07 to 2019-06-27
[2024-07-28 21:09:13] INFO: save term-2018-03-22_2019-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_screen_90550a3cdade4aae99d6d6f4aab4ed77.npy to cache
[2024-07-28 21:09:13] INFO: Save root mask file: root-2019-06-28_2019-12-17_XNYS_US_199.pkl
Pipeline from 2019-06-28 to 2019-12-17
[2024-07-28 21:09:18] INFO: save term-2018-09-12_2019-12-17_screen_90550a3cdade4aae99d6d6f4aab4ed77_screen_90550a3cdade4aae99d6d6f4aab4ed77.npy to cache
[2024-07-28 21:09:18] INFO: Save root mask file: root-2019-12-18_2020-06-10_XNYS_US_199.pkl
Pipeline from 2019-12-18 to 2020-06-10
[2024-07-28 21:09:23] INFO: save term-2019-03-07_2020-06-10_screen_90550a3cdade4aae99d6d6f4aab4ed77_screen_90550a3cdade4aae99d6d6f4aab4ed77.npy to cache
[2024-07-28 21:09:23] INFO: Save root mask file: r

In [24]:
TRADABLE_STOCKS_US + '_var20'

'tradable_stocks_us_var20'

In [25]:
screen = TradableStocksUS_Var_20()
universes_db_path = os.path.join(output_dir(), "universes.sqlite")
universe_name = TRADABLE_STOCKS_US + '_var20'
UniverseWriter(universes_db_path).write(universe_name, screen, pipe_date, pipe_end)
sids = UniverseReader(universes_db_path).get_sid(universe_name, '2024-02-07')
print(sids)
sids = UniverseReader(universes_db_path).get_sid(universe_name, '2002-02-07')
print(sids)
spe = make_pipeline_engine()
pipe = Pipeline(columns={
    'Close': USEquityPricing.close.latest,
},
    screen=NamedUniverse(TRADABLE_STOCKS_US + '_var20')
)
stocks = spe.run_pipeline(pipe, pipe_date, pipe_end)
print(stocks)

[2024-07-28 21:11:34] INFO: Computing pipeline from 2019-01-07 00:00:00 to 2024-07-18 00:00:00...
[2024-07-28 21:11:34] INFO: Save root mask file: root-2019-01-07_2019-06-27_XNYS_US_199.pkl
Pipeline from 2019-01-07 to 2019-06-27
[2024-07-28 21:11:39] INFO: save term-2018-03-22_2019-06-27_screen_90550a3cdade4aae99d6d6f4aab4ed77_screen_90550a3cdade4aae99d6d6f4aab4ed77.npy to cache
[2024-07-28 21:11:39] INFO: Save root mask file: root-2019-06-28_2019-12-17_XNYS_US_199.pkl
Pipeline from 2019-06-28 to 2019-12-17
[2024-07-28 21:11:45] INFO: save term-2018-09-12_2019-12-17_screen_90550a3cdade4aae99d6d6f4aab4ed77_screen_90550a3cdade4aae99d6d6f4aab4ed77.npy to cache
[2024-07-28 21:11:45] INFO: Save root mask file: root-2019-12-18_2020-06-10_XNYS_US_199.pkl
Pipeline from 2019-12-18 to 2020-06-10
[2024-07-28 21:11:50] INFO: save term-2019-03-07_2020-06-10_screen_90550a3cdade4aae99d6d6f4aab4ed77_screen_90550a3cdade4aae99d6d6f4aab4ed77.npy to cache
[2024-07-28 21:11:50] INFO: Save root mask file: r

In [26]:
len(stocks.index.get_level_values(1).unique())

2870