In [1]:
import yfinance as yf
import pandas as pd
import numpy as np

from sklearn.feature_selection import mutual_info_regression
from scipy.stats import spearmanr
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
nasdaq_100_tickers = ["AAPL", "MSFT", "AMZN", "GOOGL", "GOOG", "TSLA", "META", "NVDA", "PYPL", "NFLX",
    "ASML", "ADBE", "INTC", "CMCSA", "CSCO", "PEP", "AVGO", "TMUS", "COST",
    "TXN", "QCOM", "AMAT", "MU", "AMGN", "INTU", "ISRG", "CSX", "VRTX",
    "JD", "GILD", "BIDU", "MRVL", "REGN", "MDLZ", "ADSK", "ATVI", "BIIB", "ILMN",
    "LRCX", "ADP", "BKNG", "MELI", "KLAC", "NXPI", "MNST", "WDAY", "ROST",
    "KDP", "EA", "ALGN", "ADI", "IDXX", "DXCM", "XEL", "CTAS", "EXC", "MAR",
    "SNPS", "CDNS", "CPRT", "SGEN", "SPLK", "ORLY", "DLTR", "MTCH",
    "MCHP", "INCY", "PCAR", "CTSH", "FAST", "VRSK", "CHKP", "ANSS",
    "SWKS", "CDW", "TEAM", "WBA", "LULU", "PAYX",
    "VRSN", "AEP", "ZBRA", "TCOM", "NTES", "BMRN", "ULTA", "EXPE",
    "CSGP", "SIRI", "EBAY", "WDC"
    ]

end_date = '2022-12-31'
start_date = '2021-1-1'

nasdaq_20_tickers=nasdaq_100_tickers[:20]

## cross section

In [3]:
def rank(df):
    """Return the cross-sectional percentile rank

     Args:
         :param df: tickers in columns, sorted dates in rows.

     Returns:
         pd.DataFrame: the ranked values
     """
    return df.rank(axis=1, pct=True)

In [4]:
def scale(df):
    """
    Scaling time serie.
    :param df: a pandas DataFrame.
    :param k: scaling factor.
    :return: a pandas DataFrame rescaled df such that sum(abs(df)) = k
    """
    return df.div(df.abs().sum(axis=1), axis=0)

## operators

In [5]:
def log(df):
    return np.log1p(df)

In [6]:
def sign(df):
    return np.sign(df)

In [7]:
def power(df, exp):
    return df.pow(exp)

In [8]:
def WMA(x, timeperiod=7):
    weights = np.arange(1, timeperiod + 1)
    return np.dot(x, weights) / weights.sum()
    
    

## pandas implementation

In [9]:
def ts_lag(df: pd.DataFrame, t: int = 1) -> pd.DataFrame:
    """Return the lagged values t periods ago.

    Args:
        :param df: tickers in columns, sorted dates in rows.
        :param t: lag

    Returns:
        pd.DataFrame: the lagged values
    """
    return df.shift(t)

In [10]:
def ts_delta(df, period=1):
    """
    Wrapper function to estimate difference.
    :param df: a pandas DataFrame.
    :param period: the difference grade.
    :return: a pandas DataFrame with today’s value minus the value 'period' days ago.
    """
    return df.diff(period)

In [11]:
def ts_corr(x, y, window=10):
    """
    Wrapper function to estimate rolling correlations.
    :param x, y: pandas DataFrames.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return x.rolling(window).corr(y)

In [12]:
def ts_sum(df: pd.DataFrame, window: int = 10) -> pd.DataFrame:
    """Computes the rolling ts_sum for the given window size.

    Args:
        df (pd.DataFrame): tickers in columns, dates in rows.
        window      (int): size of rolling window.

    Returns:
        pd.DataFrame: the ts_sum over the last 'window' days.
    """
    return df.rolling(window).sum()

In [13]:
def ts_mean(df, window=10):
    """Computes the rolling mean for the given window size.

    Args:
        df (pd.DataFrame): tickers in columns, dates in rows.
        window      (int): size of rolling window.

    Returns:
        pd.DataFrame: the mean over the last 'window' days.
    """
    return df.rolling(window).mean()

In [14]:
def ts_weighted_mean(df, period=10):
    """
    Linear weighted moving average implementation.
    :param df: a pandas DataFrame.
    :param period: the LWMA period
    :return: a pandas DataFrame with the LWMA.
    """
    return (df.apply(lambda x: WMA(x, timeperiod=period)))

In [15]:
def ts_std(df, window=10):
    """
    Wrapper function to estimate rolling standard deviation.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return (df
            .rolling(window)
            .std())

In [16]:
def ts_rank(df, window=10):
    """
    Wrapper function to estimate rolling rank.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series rank over the past window days.
    """
    return (df
            .rolling(window)
            .apply(lambda x: x.rank().iloc[-1]))

In [17]:
def ts_product(df, window=10):
    """
    Wrapper function to estimate rolling ts_product.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series ts_product over the past 'window' days.
    """
    return (df
            .rolling(window)
            .apply(np.prod))

In [18]:
def ts_min(df, window=10):
    """
    Wrapper function to estimate rolling min.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return df.rolling(window).min()

In [19]:
def ts_max(df, window=10):
    """
    Wrapper function to estimate rolling min.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series max over the past 'window' days.
    """
    return df.rolling(window).max()

In [20]:
def ts_argmax(df, window=10):
    """
    Wrapper function to estimate which day ts_max(df, window) occurred on
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: well.. that :)
    """
    return df.rolling(window).apply(np.argmax).add(1)

In [21]:
def ts_argmin(df, window=10):
    """
    Wrapper function to estimate which day ts_min(df, window) occurred on
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: well.. that :)
    """
    return (df.rolling(window)
            .apply(np.argmin)
            .add(1))

In [22]:
def ts_cov(x, y, window=10):
    """
    Wrapper function to estimate rolling covariance.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return x.rolling(window).cov(y)

# evaluation

In [23]:
mi={}
ic={}

In [24]:
def get_mutual_info_score(returns, alpha, n=1000):
    df = pd.DataFrame({'y': returns, 'alpha': alpha}).dropna().sample(n=n)
    return mutual_info_regression(y=df.y, X=df[['alpha']])[0]

# data loading

In [25]:
## "AAPL"
ticker = "AAPL"
AAPL_data = yf.download(ticker, start=start_date, end=end_date, progress=False)
AAPL_data.index = AAPL_data.index.strftime('%Y/%m/%d')
AAPL_data['returns']=AAPL_data['Close']-AAPL_data['Close'].shift(1)
AAPL_data['ret_fwd']=AAPL_data['Close'].pct_change().dropna()
#AAPL_data['returns'] = (AAPL_data['Close']-AAPL_data['Close'].shift(1)) / AAPL_data['Close'].shift(1)
AAPL_data=AAPL_data.dropna()
AAPL_data=pd.concat({'AAPL': AAPL_data}, names=['ticker'])

In [26]:
AAPL_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Adj Close,Volume,returns,ret_fwd
ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,2021/01/05,128.889999,131.740005,128.429993,131.009995,128.736374,97664900,1.599991,0.012364
AAPL,2021/01/06,127.720001,131.050003,126.379997,126.599998,124.402931,155088000,-4.409996,-0.033662
AAPL,2021/01/07,128.360001,131.630005,127.860001,130.919998,128.647949,109578200,4.320000,0.034123
AAPL,2021/01/08,132.429993,132.630005,130.229996,132.050003,129.758362,105158200,1.130005,0.008631
AAPL,2021/01/11,129.190002,130.169998,128.500000,128.979996,126.741631,100384500,-3.070007,-0.023249
AAPL,...,...,...,...,...,...,...,...,...
AAPL,2022/12/23,130.919998,132.419998,129.639999,131.860001,131.127060,63814900,-0.369995,-0.002798
AAPL,2022/12/27,131.380005,131.410004,128.720001,130.029999,129.307236,69007800,-1.830002,-0.013878
AAPL,2022/12/28,129.669998,131.029999,125.870003,126.040001,125.339409,85438400,-3.989998,-0.030685
AAPL,2022/12/29,127.989998,130.479996,127.730003,129.610001,128.889572,75703700,3.570000,0.028324


In [27]:
#data=pd.DataFrame()
data=AAPL_data

In [28]:
for ticker in nasdaq_100_tickers[1:]:
    df = yf.download(ticker, start=start_date, end=end_date, progress=False)
    df.index = df.index.strftime('%Y/%m/%d')
    df['ret_fwd'] = df['Close'].pct_change().dropna()
    df['returns'] = df['Close']-df['Close'].shift(1)
    df=df.dropna()
    df=pd.concat({ticker: df}, names=['ticker'])
    data=pd.concat([data, df], axis=0)

In [29]:
data.returns

ticker  Date      
AAPL    2021/01/05    1.599991
        2021/01/06   -4.409996
        2021/01/07    4.320000
        2021/01/08    1.130005
        2021/01/11   -3.070007
                        ...   
WDC     2022/12/23   -0.049999
        2022/12/27    0.580000
        2022/12/28   -0.960001
        2022/12/29    1.360001
        2022/12/30   -0.020000
Name: returns, Length: 45682, dtype: float64

In [30]:
o = data.Open.unstack('ticker')
h = data.High.unstack('ticker')
l = data.Low.unstack('ticker')
c = data.Close.unstack('ticker')
v = data.Volume.unstack('ticker')
vwap = o.add(h).add(l).add(c).div(4)
adv20 = v.rolling(20).mean()
r = data.returns.unstack('ticker')

In [31]:
ret_fwd=data.ret_fwd
ret_fwd

ticker  Date      
AAPL    2021/01/05    0.012364
        2021/01/06   -0.033662
        2021/01/07    0.034123
        2021/01/08    0.008631
        2021/01/11   -0.023249
                        ...   
WDC     2022/12/23   -0.001632
        2022/12/27    0.018960
        2022/12/28   -0.030799
        2022/12/29    0.045018
        2022/12/30   -0.000634
Name: ret_fwd, Length: 45682, dtype: float64

In [32]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Adj Close,Volume,returns,ret_fwd
ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,2021/01/05,128.889999,131.740005,128.429993,131.009995,128.736374,97664900,1.599991,0.012364
AAPL,2021/01/06,127.720001,131.050003,126.379997,126.599998,124.402931,155088000,-4.409996,-0.033662
AAPL,2021/01/07,128.360001,131.630005,127.860001,130.919998,128.647949,109578200,4.320000,0.034123
AAPL,2021/01/08,132.429993,132.630005,130.229996,132.050003,129.758362,105158200,1.130005,0.008631
AAPL,2021/01/11,129.190002,130.169998,128.500000,128.979996,126.741631,100384500,-3.070007,-0.023249
...,...,...,...,...,...,...,...,...,...
WDC,2022/12/23,30.629999,30.790001,30.230000,30.590000,30.590000,2558200,-0.049999,-0.001632
WDC,2022/12/27,30.410000,31.280001,29.790001,31.170000,31.170000,3411400,0.580000,0.018960
WDC,2022/12/28,31.120001,31.350000,30.170000,30.209999,30.209999,3000300,-0.960001,-0.030799
WDC,2022/12/29,30.559999,31.629999,30.430000,31.570000,31.570000,4329300,1.360001,0.045018


# alpha calculation

alpha 5

In [33]:
def alpha005(o, vwap, c):
    """(rank((open - ts_mean(vwap, 10))) * (-1 * abs(rank((close - vwap)))))"""
    return (rank(o.sub(ts_mean(vwap, 10)))
            .mul(rank(c.sub(vwap)).mul(-1).abs())
            .stack('ticker')
            .swaplevel())

In [34]:
a5=alpha005(o, vwap, c)
a5

ticker  Date      
AAPL    2021/01/19    0.103611
ADBE    2021/01/19    0.003260
ADI     2021/01/19    0.510083
ADP     2021/01/19    0.048303
ADSK    2021/01/19    0.066659
                        ...   
WBA     2022/12/30    0.086946
WDAY    2022/12/30    0.126796
WDC     2022/12/30    0.178964
XEL     2022/12/30    0.025722
ZBRA    2022/12/30    0.787948
Length: 44863, dtype: float64

In [35]:
mi[5] = get_mutual_info_score(ret_fwd, a5)
mi[5]

0.03470599599417845

In [36]:
def alpha007(c, v, adv20):
    """(adv20 < volume) 
        ? ((-ts_rank(abs(ts_delta(close, 7)), 60)) * sign(ts_delta(close, 7))) 
        : -1
    """
    
    delta7 = ts_delta(c, 7)
    return (-ts_rank(abs(delta7), 60)
            .mul(sign(delta7))
            .where(adv20<v, -1)
            .stack('ticker')
            .swaplevel())

In [37]:
a7=alpha007(c, v, adv20)
a7

ticker  Date      
AAPL    2021/01/05     1.0
ADBE    2021/01/05     1.0
ADI     2021/01/05     1.0
ADP     2021/01/05     1.0
ADSK    2021/01/05     1.0
                      ... 
WBA     2022/12/30    44.0
WDAY    2022/12/30     1.0
WDC     2022/12/30     1.0
XEL     2022/12/30     1.0
ZBRA    2022/12/30     1.0
Length: 44099, dtype: float64

In [38]:
mi[7] = get_mutual_info_score(ret_fwd, a7)
mi[7]

0.0639019429983474

alpha 8

In [39]:
def alpha008(o, r):
    """-rank(((ts_sum(open, 5) * ts_sum(returns, 5)) - 
        ts_lag((ts_sum(open, 5) * ts_sum(returns, 5)),10)))
    """
    return (-(rank(((ts_sum(o, 5) * ts_sum(r, 5)) -
                       ts_lag((ts_sum(o, 5) * ts_sum(r, 5)), 10))))
           .stack('ticker')
            .swaplevel())

In [40]:
a8=alpha008(o, r)
a8

ticker  Date      
AAPL    2021/01/26   -0.901099
ADBE    2021/01/26   -0.989011
ADI     2021/01/26   -0.241758
ADP     2021/01/26   -0.879121
ADSK    2021/01/26   -0.076923
                        ...   
WBA     2022/12/30   -0.175824
WDAY    2022/12/30   -0.032967
WDC     2022/12/30   -0.351648
XEL     2022/12/30   -0.098901
ZBRA    2022/12/30   -0.824176
Length: 44408, dtype: float64

In [41]:
mi[8] = get_mutual_info_score(ret_fwd, a8)
mi[8]

0.009167486035333017

alpha 10

In [42]:
def alpha010(c):
    """rank(((0 < ts_min(ts_delta(close, 1), 4)) 
        ? ts_delta(close, 1) 
        : ((ts_max(ts_delta(close, 1), 4) < 0)
            ? ts_delta(close, 1) 
            : (-1 * ts_delta(close, 1)))))
    """
    close_diff = ts_delta(c, 1)
    alpha = close_diff.where(ts_min(close_diff, 4) > 0,
                             close_diff.where(ts_min(close_diff, 4) > 0,
                                              -close_diff))

    return (rank(alpha)
            .stack('ticker')
            .swaplevel())

In [43]:
a10=alpha010(c)
mi[10] = get_mutual_info_score(ret_fwd, a10)
mi[10]

0.2584408043659434

alpha 11

In [44]:
def alpha011(c, vwap, v):
    """(rank(ts_max((vwap - close), 3)) + 
        rank(ts_min(vwap - close), 3)) * 
        rank(ts_delta(volume, 3))
        """
    return (rank(ts_max(vwap.sub(c), 3))
            .add(rank(ts_min(vwap.sub(c), 3)))
            .mul(rank(ts_delta(v, 3)))
            .stack('ticker')
            .swaplevel())

In [45]:
a11=alpha011(c, vwap, v)
mi[11] = get_mutual_info_score(ret_fwd, a11)
mi[11]

0.0031749410487362795

alpha 12

In [46]:
def alpha012(v, c):
    """(sign(ts_delta(volume, 1)) * 
            (-1 * ts_delta(close, 1)))
        """
    return (sign(ts_delta(v, 1)).mul(-ts_delta(c, 1))
            .stack('ticker')
            .swaplevel())

In [47]:
a12=alpha012(v, c)
mi[12] = get_mutual_info_score(ret_fwd, a12)
mi[12]

0.5240765798984515

alpha 13

In [48]:
def alpha013(c, v):
    """-rank(ts_cov(rank(close), rank(volume), 5))"""
    return (-rank(ts_cov(rank(c), rank(v), 5))
            .stack('ticker')
            .swaplevel())

In [49]:
a13=alpha013(c, v)
mi[13] = get_mutual_info_score(ret_fwd, a13)
mi[13]

0.012460904294289588

alpha 14

In [50]:
def alpha014(o, v, r):
    """
    (-rank(ts_delta(returns, 3))) * ts_corr(open, volume, 10))
    """

    alpha = -rank(ts_delta(r, 3)).mul(ts_corr(o, v, 10)
                                      .replace([-np.inf,
                                                np.inf],
                                               np.nan))
    return (alpha
            .stack('ticker')
            .swaplevel())

In [51]:
a14=alpha014(o, v, r)
mi[14] = get_mutual_info_score(ret_fwd, a14)
mi[14]

0.06410431588960464

alpha 16

In [52]:
def alpha016(h, v):
    """(-1 * rank(ts_cov(rank(high), rank(volume), 5)))"""
    return (-rank(ts_cov(rank(h), rank(v), 5))
            .stack('ticker')
            .swaplevel())

In [53]:
a16=alpha016(h,v)
mi[16] = get_mutual_info_score(ret_fwd, a16)
mi[16]

0

alpha 17

In [54]:
def alpha017(c, v):
    """(((-1 * rank(ts_rank(close, 10))) * rank(ts_delta(ts_delta(close, 1), 1))) *rank(ts_rank((volume / adv20), 5)))
        """
    adv20 = ts_mean(v, 20)
    return (-rank(ts_rank(c, 10))
            .mul(rank(ts_delta(ts_delta(c, 1), 1)))
            .mul(rank(ts_rank(v.div(adv20), 5)))
            .stack('ticker')
            .swaplevel())

In [55]:
a17=alpha017(c,v)
mi[17] = get_mutual_info_score(ret_fwd, a17)
mi[17]

0.1770630244602196

alpha 18

In [56]:
def alpha018(o, c):
    """-rank((ts_std(abs((close - open)), 5) + (close - open)) +
            ts_corr(close, open,10))
    """
    return (-rank(ts_std(c.sub(o).abs(), 5)
                  .add(c.sub(o))
                  .add(ts_corr(c, o, 10)
                       .replace([-np.inf,
                                 np.inf],
                                np.nan)))
            .stack('ticker')
            .swaplevel())

In [57]:
a18=alpha018(o, c)
mi[18] = get_mutual_info_score(ret_fwd, a18)
mi[18]

0.11760722508500843

alpha 20

In [58]:
def alpha020(o, h, l, c):
    """-rank(open - ts_lag(high, 1)) * 
        rank(open - ts_lag(close, 1)) * 
        rank(open -ts_lag(low, 1))"""
    return (rank(o - ts_lag(h, 1))
            .mul(rank(o - ts_lag(c, 1)))
            .mul(rank(o - ts_lag(l, 1)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [59]:
a20=alpha020(o, h, l, c)
mi[20] = get_mutual_info_score(ret_fwd, a20)
mi[20]

0.06035516875436597

alpha 28

In [60]:
def alpha028(h, l, c, v, adv20):
    """scale(((ts_corr(adv20, low, 5) + (high + low) / 2) - close))"""
    return (scale(ts_corr(adv20, l, 5)
                  .replace([-np.inf, np.inf], 0)
                  .add(h.add(l).div(2).sub(c)))
            .stack('ticker')
            .swaplevel())

In [61]:
a28=alpha028(h, l, c, v, adv20)
mi[28] = get_mutual_info_score(ret_fwd, a28)
mi[28]

0.20467719527084238

alpha 30

In [109]:
def alpha030(c, v):
    """(((1.0 - rank(((sign((close - ts_lag(close, 1))) +
            sign((ts_lag(close, 1) - ts_lag(close, 2)))) +
            sign((ts_lag(close, 2) - ts_lag(close, 3)))))) *
            ts_sum(volume, 5)) / ts_sum(volume, 20))"""
    close_diff = ts_delta(c, 1)
    return (rank(sign(close_diff)
                 .add(sign(ts_lag(close_diff, 1)))
                 .add(sign(ts_lag(close_diff, 2))))
            .mul(-1).add(1)
            .mul(ts_sum(v, 5))
            .div(ts_sum(v, 20))
            .stack('ticker')
            .swaplevel())

In [111]:
a30=alpha030(c, r)
a30.to_frame()


In [117]:
a30.to_frame().replace([np.inf, -np.inf], np.nan, inplace=True)

In [121]:
mi[30] = get_mutual_info_score(ret_fwd, a30)
mi[30]

0.02378694836705053

alpha 34

In [64]:
def alpha034(c, r):
    """rank(((1 - rank((ts_std(returns, 2) / ts_std(returns, 5)))) + (1 - rank(ts_delta(close, 1)))))"""

    return (rank(rank(ts_std(r, 2).div(ts_std(r, 5))
                      .replace([-np.inf, np.inf],
                               np.nan))
                 .mul(-1)
                 .sub(rank(ts_delta(c, 1)))
                 .add(2))
            .stack('ticker')
            .swaplevel())

In [65]:
a34=alpha034(c, r)
mi[34] = get_mutual_info_score(ret_fwd, a34)
mi[34]

0.11601847807418686

In [120]:
a34

ticker  Date      
AAPL    2021/01/11    0.846154
ADBE    2021/01/11    0.879121
ADI     2021/01/11    0.560440
ADP     2021/01/11    0.280220
ADSK    2021/01/11    0.659341
                        ...   
WBA     2022/12/30    0.967033
WDAY    2022/12/30    0.285714
WDC     2022/12/30    0.362637
XEL     2022/12/30    0.868132
ZBRA    2022/12/30    0.483516
Length: 45318, dtype: float64

alpha 38

In [66]:
def alpha038(o, c):
    """"-1 * rank(ts_rank(close, 10)) * rank(close / open)"""
    return (rank(ts_rank(o, 10))
            .mul(rank(c.div(o).replace([-np.inf, np.inf], np.nan)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [67]:
a38=alpha038(o, c)
mi[38] = get_mutual_info_score(ret_fwd, a38)
mi[38]

0.1650316619184431

alpha 42

In [68]:
def alpha042(c, vwap):
    """rank(vwap - close) / rank(vwap + close)"""
    return (rank(vwap.sub(c))
            .div(rank(vwap.add(c)))
            .stack('ticker')
            .swaplevel())

In [69]:
a42=alpha042(c, vwap)
mi[42] = get_mutual_info_score(ret_fwd, a42)
mi[42]

0.166279541534192

alpha 43

In [70]:
def alpha043(c, adv20):
    """(ts_rank((volume / adv20), 20) * ts_rank((-1 * ts_delta(close, 7)), 8))"""

    return (ts_rank(v.div(adv20), 20)
            .mul(ts_rank(ts_delta(c, 7).mul(-1), 8))
            .stack('ticker')
            .swaplevel())

In [71]:
a43=alpha043(c, adv20)
mi[43] = get_mutual_info_score(ret_fwd, a43)
mi[43]

0.09188695863941065

alpha 45

In [72]:
def alpha045(c, v):
    """-(rank((ts_mean(ts_lag(close, 5), 20)) * 
        ts_corr(close, volume, 2)) *
        rank(ts_corr(ts_sum(close, 5), ts_sum(close, 20), 2)))"""

    return (rank(ts_mean(ts_lag(c, 5), 20))
            .mul(ts_corr(c, v, 2)
                 .replace([-np.inf, np.inf], np.nan))
            .mul(rank(ts_corr(ts_sum(c, 5),
                              ts_sum(c, 20), 2)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [73]:
a45=alpha045(c, v)
mi[45] = get_mutual_info_score(ret_fwd, a45)
mi[45]

0.0325486111603972

alpha 46

In [74]:
def alpha046(c):
    """0.25 < ts_lag(ts_delta(close, 10), 10) / 10 - ts_delta(close, 10) / 10
            ? -1
            : ((ts_lag(ts_delta(close, 10), 10) / 10 - ts_delta(close, 10) / 10 < 0) 
                ? 1 
                : -ts_delta(close, 1))
    """

    cond = ts_lag(ts_delta(c, 10), 10).div(10).sub(ts_delta(c, 10).div(10))
    alpha = pd.DataFrame(-np.ones_like(cond),
                         index=c.index,
                         columns=c.columns)
    alpha[cond.isnull()] = np.nan
    return (cond.where(cond > 0.25,
                       -alpha.where(cond < 0,
                       -ts_delta(c, 1)))
            .stack('ticker')
            .swaplevel())

In [75]:
a46=alpha046(c)
mi[46] = get_mutual_info_score(ret_fwd, a46)
mi[46]

0.10024619866920492

alpha 47

In [76]:
def alpha047(h, c, v, vwap, adv20):
    """((((rank((1 / close)) * volume) / adv20) * ((high * rank((high - close))) / 
        (ts_sum(high, 5) /5))) - rank((vwap - ts_lag(vwap, 5))))"""

    return (rank(c.pow(-1)).mul(v).div(adv20)
            .mul(h.mul(rank(h.sub(c))
                       .div(ts_mean(h, 5)))
                 .sub(rank(ts_delta(vwap, 5))))
            .stack('ticker')
            .swaplevel())

In [77]:
a47=alpha047(h, c, v, vwap, adv20)
mi[47] = get_mutual_info_score(ret_fwd, a47)
mi[47]

0.05670923524581051

alpha 49

In [78]:
def alpha049(c):
    """ts_delta(ts_lag(close, 10), 10).div(10).sub(ts_delta(close, 10).div(10)) < -0.1 * c
        ? 1 
        : -ts_delta(close, 1)"""
    cond = (ts_delta(ts_lag(c, 10), 10).div(10)
            .sub(ts_delta(c, 10).div(10)) >= -0.1 * c)
    return (-ts_delta(c, 1)
            .where(cond, 1)
            .stack('ticker')
            .swaplevel())

In [79]:
a49=alpha049(c)
mi[49] = get_mutual_info_score(ret_fwd, a49)
mi[49]

1.1517052396845058

alpha 51

In [80]:
def alpha051(c):
    """ts_delta(ts_lag(close, 10), 10).div(10).sub(ts_delta(close, 10).div(10)) < -0.05 * c
        ? 1 
        : -ts_delta(close, 1)"""
    cond = (ts_delta(ts_lag(c, 10), 10).div(10)
            .sub(ts_delta(c, 10).div(10)) >= -0.05 * c)
    return (-ts_delta(c, 1)
            .where(cond, 1)
            .stack('ticker')
            .swaplevel())

In [81]:
a51=alpha051(c)
mi[51] = get_mutual_info_score(ret_fwd, a51)
mi[51]

1.1582372731217716

alpha 55

In [82]:
def alpha055(h, l, c):
    """(-1 * ts_corr(rank(((close - ts_min(low, 12)) / 
                            (ts_max(high, 12) - ts_min(low,12)))), 
                    rank(volume), 6))"""

    return (ts_corr(rank(c.sub(ts_min(l, 12))
                         .div(ts_max(h, 12).sub(ts_min(l, 12))
                              .replace(0, 1e-6))),
                    rank(v), 6)
            .replace([-np.inf, np.inf], np.nan)
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [83]:
a55=alpha055(h, l, c)
mi[55] = get_mutual_info_score(ret_fwd, a55)
mi[55]

0

alpha 60

In [84]:
def alpha060(l, h, c, v):
    """-((2 * scale(rank(((((close - low) - (high - close)) / (high - low)) * volume)))) -scale(rank(ts_argmax(close, 10))))"""
    return (scale(rank(c.mul(2).sub(l).sub(h)
                       .div(h.sub(l).replace(0, 1e-5))
                       .mul(v))).mul(2)
            .sub(scale(rank(ts_argmax(c, 10)))).mul(-1)
            .stack('ticker')
            .swaplevel())

In [85]:
a60=alpha060(l, h, c, v)
mi[60] = get_mutual_info_score(ret_fwd, a60)
mi[60]

0.04841824447185328

alpha 83

In [86]:
def alpha083(h, l, c):
    """(rank(ts_lag((high - low) / ts_mean(close, 5), 2)) * rank(rank(volume)) / 
            (((high - low) / ts_mean(close, 5) / (vwap - close)))
    """
    s = h.sub(l).div(ts_mean(c, 5))

    return (rank(rank(ts_lag(s, 2))
                 .mul(rank(rank(v)))
                 .div(s).div(vwap.sub(c).add(1e-3)))
            .stack('ticker')
            .swaplevel()
            .replace((np.inf, -np.inf), np.nan))

In [87]:
a83=alpha083(h, l, c)
mi[83] = get_mutual_info_score(ret_fwd, a83)
mi[83]

0.12365479484626452

alpha 101

In [88]:
def alpha101(o, h, l, c):
    """((close - open) / ((high - low) + .001))"""
    return (c.sub(o).div(h.sub(l).add(1e-3))
            .stack('ticker')
            .swaplevel())

In [89]:
a101=alpha101(o, h, l, c)
mi[101] = get_mutual_info_score(ret_fwd, a101)
mi[101]

0.5282340491870343

# alpha selection

In [122]:
mi

{5: 0.03470599599417845,
 7: 0.0639019429983474,
 8: 0.009167486035333017,
 10: 0.2584408043659434,
 11: 0.0031749410487362795,
 12: 0.5240765798984515,
 13: 0.012460904294289588,
 14: 0.06410431588960464,
 16: 0,
 17: 0.1770630244602196,
 18: 0.11760722508500843,
 20: 0.06035516875436597,
 28: 0.20467719527084238,
 34: 0.11601847807418686,
 38: 0.1650316619184431,
 42: 0.166279541534192,
 43: 0.09188695863941065,
 45: 0.0325486111603972,
 46: 0.10024619866920492,
 47: 0.05670923524581051,
 49: 1.1517052396845058,
 51: 1.1582372731217716,
 55: 0,
 60: 0.04841824447185328,
 83: 0.12365479484626452,
 101: 0.5282340491870343,
 30: 0.02378694836705053}

In [123]:
sorted_mi = dict(sorted(mi.items(), reverse=True, key=lambda item: item[1]))
sorted_mi

{51: 1.1582372731217716,
 49: 1.1517052396845058,
 101: 0.5282340491870343,
 12: 0.5240765798984515,
 10: 0.2584408043659434,
 28: 0.20467719527084238,
 17: 0.1770630244602196,
 42: 0.166279541534192,
 38: 0.1650316619184431,
 83: 0.12365479484626452,
 18: 0.11760722508500843,
 34: 0.11601847807418686,
 46: 0.10024619866920492,
 43: 0.09188695863941065,
 14: 0.06410431588960464,
 7: 0.0639019429983474,
 20: 0.06035516875436597,
 47: 0.05670923524581051,
 60: 0.04841824447185328,
 5: 0.03470599599417845,
 45: 0.0325486111603972,
 30: 0.02378694836705053,
 13: 0.012460904294289588,
 8: 0.009167486035333017,
 11: 0.0031749410487362795,
 16: 0,
 55: 0}

In [124]:
lst=[]
df_lst=[]
for i in sorted_mi.keys():
    lst.append('a'+str(i))
    df_lst.append(locals()['a'+str(i)])

In [125]:
df_lst

[ticker  Date      
 AAPL    2021/01/05   -1.000000
 ADBE    2021/01/05   -1.000000
 ADI     2021/01/05   -1.000000
 ADP     2021/01/05   -1.000000
 ADSK    2021/01/05   -1.000000
                         ...   
 WBA     2022/12/30    0.110001
 WDAY    2022/12/30    0.459991
 WDC     2022/12/30    0.020000
 XEL     2022/12/30    0.959999
 ZBRA    2022/12/30    1.119995
 Length: 45682, dtype: float64,
 ticker  Date      
 AAPL    2021/01/05   -1.000000
 ADBE    2021/01/05   -1.000000
 ADI     2021/01/05   -1.000000
 ADP     2021/01/05   -1.000000
 ADSK    2021/01/05   -1.000000
                         ...   
 WBA     2022/12/30    0.110001
 WDAY    2022/12/30    0.459991
 WDC     2022/12/30    0.020000
 XEL     2022/12/30    0.959999
 ZBRA    2022/12/30    1.119995
 Length: 45682, dtype: float64,
 ticker  Date      
 AAPL    2021/01/05    0.640286
 ADBE    2021/01/05   -0.027613
 ADI     2021/01/05    0.809969
 ADP     2021/01/05   -0.067587
 ADSK    2021/01/05    0.646460
            

In [126]:
result = pd.concat(df_lst, axis=1, keys = lst)

In [127]:
result

Unnamed: 0_level_0,Unnamed: 1_level_0,a51,a49,a101,a12,a10,a28,a17,a42,a38,a83,...,a47,a60,a5,a45,a30,a13,a8,a11,a16,a55
ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAPL,2021/01/05,-1.000000,-1.000000,0.640286,,,,,0.875000,,,...,,,,,,,,,,
AAPL,2021/01/06,-1.000000,-1.000000,-0.239778,4.409996,0.758242,,,2.000000,,,...,,,,,,,,,,
AAPL,2021/01/07,-1.000000,-1.000000,0.678864,4.320000,0.373626,,,0.948718,,,...,,,,,,,,,,
AAPL,2021/01/08,-1.000000,-1.000000,-0.158262,1.130005,0.500000,,,1.358974,,,...,,,,,,,,1.188262,,
AAPL,2021/01/11,-1.000000,-1.000000,-0.125677,-3.070007,0.758242,,,1.421053,,,...,,,,,,-0.631868,,0.009419,-0.604396,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZBRA,2022/12/23,-0.710007,-0.710007,0.310431,0.710007,0.263736,-0.006871,-0.196186,0.414286,-0.289639,0.769231,...,0.014688,0.001672,0.220626,0.693153,0.009258,-0.824176,-0.912088,0.262046,-0.406593,-0.402229
ZBRA,2022/12/27,-2.779999,-2.779999,0.245864,-2.779999,0.065934,-0.007423,-0.359094,0.142857,-0.701727,0.362637,...,-0.019076,-0.016603,0.386185,-0.109890,-0.046731,-0.494505,-0.945055,0.113996,-0.412088,-0.396350
ZBRA,2022/12/28,4.160004,4.160004,-0.471441,-4.160004,0.857143,0.017860,-0.022559,1.157143,-0.218814,0.142857,...,-0.004370,-0.029742,0.099626,-0.752325,0.001790,-0.428571,-0.274725,0.912933,-0.351648,0.324595
ZBRA,2022/12/29,-10.690002,-10.690002,0.881671,-10.690002,0.109890,-0.034240,-0.776495,0.100000,-0.851950,0.835165,...,-0.085570,0.006211,0.831421,-0.059172,-0.098022,-0.340659,-0.406593,0.375317,-0.208791,-0.274630


In [128]:
result = result[result.index.get_level_values('Date') >='2021/02/01']

In [129]:
result

Unnamed: 0_level_0,Unnamed: 1_level_0,a51,a49,a101,a12,a10,a28,a17,a42,a38,a83,...,a47,a60,a5,a45,a30,a13,a8,a11,a16,a55
ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAPL,2021/02/01,-1.000000,-1.000000,0.087620,2.179993,0.593407,,,1.279070,-0.114479,0.153846,...,,-0.026039,0.192127,,,-0.296703,-0.208791,0.061345,-0.252747,0.466738
AAPL,2021/02/02,-1.000000,-1.000000,-0.435033,0.850006,0.670330,,,1.928571,-0.048243,0.967033,...,0.164961,0.012303,0.033209,,-0.635541,-0.626374,-0.186813,0.015699,-0.620879,-0.096671
AAPL,2021/02/03,1.050003,1.050003,-0.842198,1.050003,0.494505,,,1.225000,-0.084169,0.901099,...,0.136848,0.011108,0.051926,,-1.263845,-0.813187,-0.175824,0.013404,-0.840659,-0.271075
AAPL,2021/02/04,-3.449997,-3.449997,0.387761,3.449997,0.285714,,,0.809524,-0.197802,0.582418,...,-0.041331,-0.034042,0.147084,,0.012221,-0.813187,-0.208791,0.023910,-0.758242,-0.487398
AAPL,2021/02/05,0.630005,0.630005,-0.377971,-0.630005,0.681319,,,1.071429,-0.101497,0.934066,...,-0.041274,-0.036550,0.085135,,0.596115,-0.780220,-0.142857,0.070161,-0.681319,-0.318052
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZBRA,2022/12/23,-0.710007,-0.710007,0.310431,0.710007,0.263736,-0.006871,-0.196186,0.414286,-0.289639,0.769231,...,0.014688,0.001672,0.220626,0.693153,0.009258,-0.824176,-0.912088,0.262046,-0.406593,-0.402229
ZBRA,2022/12/27,-2.779999,-2.779999,0.245864,-2.779999,0.065934,-0.007423,-0.359094,0.142857,-0.701727,0.362637,...,-0.019076,-0.016603,0.386185,-0.109890,-0.046731,-0.494505,-0.945055,0.113996,-0.412088,-0.396350
ZBRA,2022/12/28,4.160004,4.160004,-0.471441,-4.160004,0.857143,0.017860,-0.022559,1.157143,-0.218814,0.142857,...,-0.004370,-0.029742,0.099626,-0.752325,0.001790,-0.428571,-0.274725,0.912933,-0.351648,0.324595
ZBRA,2022/12/29,-10.690002,-10.690002,0.881671,-10.690002,0.109890,-0.034240,-0.776495,0.100000,-0.851950,0.835165,...,-0.085570,0.006211,0.831421,-0.059172,-0.098022,-0.340659,-0.406593,0.375317,-0.208791,-0.274630


In [130]:
result['a28'].isna().sum()

455

In [131]:
alpha=result.groupby('ticker').transform(lambda x: x.fillna(x.mean()))

In [132]:
from sklearn.preprocessing import scale
alpha_nor=alpha.groupby('ticker').transform(lambda x: scale(x))
alpha_nor

Unnamed: 0_level_0,Unnamed: 1_level_0,a51,a49,a101,a12,a10,a28,a17,a42,a38,a83,...,a47,a60,a5,a45,a30,a13,a8,a11,a16,a55
ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAPL,2021/02/01,-0.350905,-0.350905,0.094158,0.652992,0.391199,-2.951353e-17,-5.856407e-16,0.689784,0.742978,-1.150895,...,2.520799e-16,-0.681790,-0.418277,-7.683592e-17,-2.200849e-17,1.247913,1.367771,-0.822079,1.223905,1.062330
AAPL,2021/02/02,-0.350905,-0.350905,-0.848727,0.186552,0.758392,-2.951353e-17,-5.856407e-16,2.295837,1.076694,1.554392,...,1.200280e+00,1.165857,-1.446931,-7.683592e-17,-1.824217e-01,-0.548785,1.470114,-0.914457,-0.744590,-0.196074
AAPL,2021/02/03,0.363932,0.363932,-1.583264,0.256693,-0.080906,-2.951353e-17,-5.856407e-16,0.556083,0.895690,1.335044,...,1.030061e+00,1.108298,-1.325775,-7.683592e-17,-3.484907e-01,-1.566913,1.521285,-0.919100,-1.919810,-0.585615
AAPL,2021/02/04,-1.205221,-1.205221,0.635622,1.098395,-1.077572,-2.951353e-17,-5.856407e-16,-0.471284,0.323171,0.274864,...,-4.876862e-02,-1.067436,-0.709833,-7.683592e-17,-1.120961e-02,-1.566913,1.367771,-0.897839,-1.479102,-1.068783
AAPL,2021/02/05,0.217479,0.217479,-0.745784,-0.332502,0.810848,-2.951353e-17,-5.856407e-16,0.176340,0.808383,1.444718,...,-4.842722e-02,-1.188310,-1.110820,-7.683592e-17,1.431212e-01,-1.387244,1.674798,-0.804239,-1.067775,-0.690540
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZBRA,2022/12/23,-0.105258,-0.105258,0.574980,0.038514,-0.779260,-2.445667e-01,-4.517829e-01,-0.433624,-0.304386,1.013050,...,-4.449050e-01,1.004733,-0.091802,1.237499e+00,-1.159585e-02,-1.594089,-1.045035,-0.843067,0.334676,-1.094533
ZBRA,2022/12/27,-0.320287,-0.320287,0.458986,-0.323464,-1.321158,-2.682931e-01,-1.469818e+00,-1.064582,-2.419566,-0.559839,...,-8.429269e-01,-0.477891,0.450819,-2.267877e-01,-3.139533e-02,0.158878,-1.130043,-1.318860,0.306303,-1.081007
ZBRA,2022/12/28,0.400635,0.400635,-0.829647,-0.466596,0.846437,8.191404e-01,6.332360e-01,1.293208,0.059146,-1.410049,...,-6.695655e-01,-1.543829,-0.488378,-1.398217e+00,-1.423671e-02,0.509471,0.598451,1.248713,0.618409,0.577730
ZBRA,2022/12/29,-1.141971,-1.141971,1.601207,-1.143878,-1.200736,-1.421697e+00,-4.078221e+00,-1.164207,-3.190638,1.268113,...,-1.626787e+00,1.372967,1.910078,-1.343065e-01,-4.953364e-02,0.976929,0.258419,-0.479043,1.356115,-0.800956


In [133]:
alpha_nor.to_csv('backtesting alpha.csv')