In [1]:
import yfinance as yf
import pandas as pd
import numpy as np

from sklearn.feature_selection import mutual_info_regression
from scipy.stats import spearmanr
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
nasdaq_100_tickers = ["AAPL", "MSFT", "AMZN", "GOOGL", "GOOG", "TSLA", "META", "NVDA", "PYPL", "NFLX",
    "ASML", "ADBE", "INTC", "CMCSA", "CSCO", "PEP", "AVGO", "TMUS", "COST", "PDD",
    "TXN", "QCOM", "AMAT", "MU", "AMGN", "INTU", "ISRG", "ZM", "CSX", "VRTX",
    "JD", "GILD", "BIDU", "MRVL", "REGN", "MDLZ", "ADSK", "ATVI", "BIIB", "ILMN",
    "LRCX", "ADP", "BKNG", "MELI", "KLAC", "DOCU", "NXPI", "MNST", "WDAY", "ROST",
    "KDP", "EA", "ALGN", "ADI", "IDXX", "DXCM", "XEL", "CTAS", "EXC", "MAR",
    "SNPS", "ASAN", "CDNS", "CPRT", "SGEN", "SPLK", "ORLY", "DLTR", "MTCH",
    "MCHP", "INCY", "PCAR", "CTSH", "FAST", "VRSK", "CHKP", "FOXA", "FOX", "ANSS",
    "SWKS", "OKTA", "TTD", "CDW", "TEAM", "WBA", "LULU", "PAYX",
    "VRSN", "AEP", "ZBRA", "PTON", "TCOM", "NTES", "BMRN", "ULTA", "EXPE",
    "CSGP", "SIRI", "EBAY", "WDC"
    ]

end_date = '2020-12-31'
start_date = '2016-1-1'

nasdaq_20_tickers=nasdaq_100_tickers[:20]

## cross section

In [3]:
def rank(df):
    """Return the cross-sectional percentile rank

     Args:
         :param df: tickers in columns, sorted dates in rows.

     Returns:
         pd.DataFrame: the ranked values
     """
    return df.rank(axis=1, pct=True)

In [4]:
def scale(df):
    """
    Scaling time serie.
    :param df: a pandas DataFrame.
    :param k: scaling factor.
    :return: a pandas DataFrame rescaled df such that sum(abs(df)) = k
    """
    return df.div(df.abs().sum(axis=1), axis=0)

## operators

In [5]:
def log(df):
    return np.log1p(df)

In [6]:
def sign(df):
    return np.sign(df)

In [7]:
def power(df, exp):
    return df.pow(exp)

In [8]:
def WMA(x, timeperiod=7):
    weights = np.arange(1, timeperiod + 1)
    return np.dot(x, weights) / weights.sum()
    
    

## pandas implementation

In [9]:
def ts_lag(df: pd.DataFrame, t: int = 1) -> pd.DataFrame:
    """Return the lagged values t periods ago.

    Args:
        :param df: tickers in columns, sorted dates in rows.
        :param t: lag

    Returns:
        pd.DataFrame: the lagged values
    """
    return df.shift(t)

In [10]:
def ts_delta(df, period=1):
    """
    Wrapper function to estimate difference.
    :param df: a pandas DataFrame.
    :param period: the difference grade.
    :return: a pandas DataFrame with today’s value minus the value 'period' days ago.
    """
    return df.diff(period)

In [11]:
def ts_corr(x, y, window=10):
    """
    Wrapper function to estimate rolling correlations.
    :param x, y: pandas DataFrames.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return x.rolling(window).corr(y)

In [12]:
def ts_sum(df: pd.DataFrame, window: int = 10) -> pd.DataFrame:
    """Computes the rolling ts_sum for the given window size.

    Args:
        df (pd.DataFrame): tickers in columns, dates in rows.
        window      (int): size of rolling window.

    Returns:
        pd.DataFrame: the ts_sum over the last 'window' days.
    """
    return df.rolling(window).sum()

In [13]:
def ts_mean(df, window=10):
    """Computes the rolling mean for the given window size.

    Args:
        df (pd.DataFrame): tickers in columns, dates in rows.
        window      (int): size of rolling window.

    Returns:
        pd.DataFrame: the mean over the last 'window' days.
    """
    return df.rolling(window).mean()

In [14]:
def ts_weighted_mean(df, period=10):
    """
    Linear weighted moving average implementation.
    :param df: a pandas DataFrame.
    :param period: the LWMA period
    :return: a pandas DataFrame with the LWMA.
    """
    return (df.apply(lambda x: WMA(x, timeperiod=period)))

In [15]:
def ts_std(df, window=10):
    """
    Wrapper function to estimate rolling standard deviation.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return (df
            .rolling(window)
            .std())

In [16]:
def ts_rank(df, window=10):
    """
    Wrapper function to estimate rolling rank.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series rank over the past window days.
    """
    return (df
            .rolling(window)
            .apply(lambda x: x.rank().iloc[-1]))

In [17]:
def ts_product(df, window=10):
    """
    Wrapper function to estimate rolling ts_product.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series ts_product over the past 'window' days.
    """
    return (df
            .rolling(window)
            .apply(np.prod))

In [18]:
def ts_min(df, window=10):
    """
    Wrapper function to estimate rolling min.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return df.rolling(window).min()

In [19]:
def ts_max(df, window=10):
    """
    Wrapper function to estimate rolling min.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series max over the past 'window' days.
    """
    return df.rolling(window).max()

In [20]:
def ts_argmax(df, window=10):
    """
    Wrapper function to estimate which day ts_max(df, window) occurred on
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: well.. that :)
    """
    return df.rolling(window).apply(np.argmax).add(1)

In [21]:
def ts_argmin(df, window=10):
    """
    Wrapper function to estimate which day ts_min(df, window) occurred on
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: well.. that :)
    """
    return (df.rolling(window)
            .apply(np.argmin)
            .add(1))

In [22]:
def ts_cov(x, y, window=10):
    """
    Wrapper function to estimate rolling covariance.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return x.rolling(window).cov(y)

# evaluation

In [23]:
mi={}
ic={}

In [24]:
def get_mutual_info_score(returns, alpha, n=1000):
    df = pd.DataFrame({'y': returns, 'alpha': alpha}).dropna().sample(n=n)
    return mutual_info_regression(y=df.y, X=df[['alpha']])[0]

# data loading

In [25]:
## "AAPL"
ticker = "AAPL"
AAPL_data = yf.download(ticker, start=start_date, end=end_date, progress=False)
AAPL_data.index = AAPL_data.index.strftime('%Y/%m/%d')
AAPL_data['returns']=AAPL_data['Close']-AAPL_data['Close'].shift(1)
AAPL_data['ret_fwd']=AAPL_data['Close'].pct_change().dropna()
#AAPL_data['returns'] = (AAPL_data['Close']-AAPL_data['Close'].shift(1)) / AAPL_data['Close'].shift(1)
AAPL_data=AAPL_data.dropna()
AAPL_data=pd.concat({'AAPL': AAPL_data}, names=['ticker'])

In [26]:
AAPL_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Adj Close,Volume,returns,ret_fwd
ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,2016/01/05,26.437500,26.462500,25.602501,25.677500,23.407412,223164000,-0.660000,-0.025059
AAPL,2016/01/06,25.139999,25.592501,24.967501,25.174999,22.949343,273829600,-0.502501,-0.019570
AAPL,2016/01/07,24.670000,25.032499,24.107500,24.112499,21.980768,324377600,-1.062500,-0.042205
AAPL,2016/01/08,24.637501,24.777500,24.190001,24.240000,22.096994,283192000,0.127501,0.005288
AAPL,2016/01/11,24.742500,24.764999,24.334999,24.632500,22.454800,198957600,0.392500,0.016192
AAPL,...,...,...,...,...,...,...,...,...
AAPL,2020/12/23,132.160004,132.429993,130.779999,130.960007,128.856766,88223700,-0.919998,-0.006976
AAPL,2020/12/24,131.320007,133.460007,131.100006,131.970001,129.850571,54930100,1.009995,0.007712
AAPL,2020/12/28,133.990005,137.339996,133.509995,136.690002,134.494797,124486200,4.720001,0.035766
AAPL,2020/12/29,138.050003,138.789993,134.339996,134.869995,132.704010,121047300,-1.820007,-0.013315


In [27]:
#data=pd.DataFrame()
data=AAPL_data

In [28]:
for ticker in nasdaq_100_tickers[1:]:
    df = yf.download(ticker, start=start_date, end=end_date, progress=False)
    df.index = df.index.strftime('%Y/%m/%d')
    df['ret_fwd'] = df['Close'].pct_change().dropna()
    df['returns'] = df['Close']-df['Close'].shift(1)
    df=df.dropna()
    df=pd.concat({ticker: df}, names=['ticker'])
    data=pd.concat([data, df], axis=0)

In [32]:
data.returns

ticker  Date      
AAPL    2016/01/05   -0.660000
        2016/01/06   -0.502501
        2016/01/07   -1.062500
        2016/01/08    0.127501
        2016/01/11    0.392500
                        ...   
WDC     2020/12/23    0.209999
        2020/12/24   -0.010002
        2020/12/28    0.140003
        2020/12/29   -1.770000
        2020/12/30    1.239998
Name: returns, Length: 119409, dtype: float64

In [33]:
o = data.Open.unstack('ticker')
h = data.High.unstack('ticker')
l = data.Low.unstack('ticker')
c = data.Close.unstack('ticker')
v = data.Volume.unstack('ticker')
vwap = o.add(h).add(l).add(c).div(4)
adv20 = v.rolling(20).mean()
r = data.returns.unstack('ticker')

In [34]:
ret_fwd=data.ret_fwd
ret_fwd

ticker  Date      
AAPL    2016/01/05   -0.025059
        2016/01/06   -0.019570
        2016/01/07   -0.042205
        2016/01/08    0.005288
        2016/01/11    0.016192
                        ...   
WDC     2020/12/23    0.004224
        2020/12/24   -0.000200
        2020/12/28    0.002805
        2020/12/29   -0.035358
        2020/12/30    0.025678
Name: ret_fwd, Length: 119409, dtype: float64

In [284]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Adj Close,Volume,returns,ret_fwd
ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,2016/01/05,26.437500,26.462500,25.602501,25.677500,23.407412,223164000,-0.660000,-0.025059
AAPL,2016/01/06,25.139999,25.592501,24.967501,25.174999,22.949343,273829600,-0.502501,-0.019570
AAPL,2016/01/07,24.670000,25.032499,24.107500,24.112499,21.980768,324377600,-1.062500,-0.042205
AAPL,2016/01/08,24.637501,24.777500,24.190001,24.240000,22.096994,283192000,0.127501,0.005288
AAPL,2016/01/11,24.742500,24.764999,24.334999,24.632500,22.454800,198957600,0.392500,0.016192
...,...,...,...,...,...,...,...,...,...
WDC,2020/12/23,49.950001,50.599998,49.410000,49.930000,49.930000,2685500,0.209999,0.004224
WDC,2020/12/24,50.310001,50.380001,49.349998,49.919998,49.919998,1224500,-0.010002,-0.000200
WDC,2020/12/28,50.590000,50.970001,49.630001,50.060001,50.060001,2763400,0.140003,0.002805
WDC,2020/12/29,50.639999,50.639999,48.230000,48.290001,48.290001,4622400,-1.770000,-0.035358


# alpha calculation

alpha 2

In [35]:
def alpha002(o, c, v):
    """(-1 * ts_corr(rank(ts_delta(log(volume), 2)), rank(((close - open) / open)), 6))"""
    s1 = rank(ts_delta(log(v), 2))
    s2 = rank((c / o) - 1)
    alpha = -ts_corr(s1, s2, 6)
    return alpha.stack('ticker').swaplevel().replace([-np.inf, np.inf], np.nan)

In [36]:
a2=alpha002(o, c, v)
a2

ticker  Date      
AAPL    2016/01/14   -0.414025
ADBE    2016/01/14    0.493090
ADI     2016/01/14    0.150472
ADP     2016/01/14    0.421144
ADSK    2016/01/14    0.515867
                        ...   
WDAY    2020/12/30   -0.206533
WDC     2020/12/30   -0.131785
XEL     2020/12/30   -0.965703
ZBRA    2020/12/30   -0.633176
ZM      2020/12/30    0.361023
Length: 118709, dtype: float64

In [37]:
## evaluation

In [38]:
mi[2] = get_mutual_info_score(ret_fwd, a2)
mi[2]

0.01335516484827437

alpha 3

In [39]:
def alpha003(o, v):
    """(-1 * ts_corr(rank(open), rank(volume), 10))"""

    return (-ts_corr(rank(o), rank(v), 10)
            .stack('ticker')
            .swaplevel()
            .replace([-np.inf, np.inf], np.nan))

In [40]:
a3=alpha003(o, v)
a3

ticker  Date      
ADBE    2016/01/19   -8.629107e-01
ADI     2016/01/19   -8.895679e-03
ADP     2016/01/19    4.577782e-01
ADSK    2016/01/19    2.653267e-01
AEP     2016/01/19    5.865556e-01
                          ...     
WBA     2020/12/30   -1.337252e-08
WDAY    2020/12/30    1.563609e-01
WDC     2020/12/30    4.935153e-01
ZBRA    2020/12/30    2.863463e-01
ZM      2020/12/30    2.984373e-01
Length: 111210, dtype: float64

In [41]:
mi[3] = get_mutual_info_score(ret_fwd, a3)
mi[3]

0.026809590583944942

alpha 4

In [42]:
def alpha004(l):
    """(-1 * Ts_Rank(rank(low), 9))"""
    return (-ts_rank(rank(l), 9)
            .stack('ticker')
            .swaplevel())

In [43]:
a4=alpha004(l)
a4

ticker  Date      
AAPL    2016/01/15   -9.0
ADBE    2016/01/15   -8.0
ADI     2016/01/15   -7.0
ADP     2016/01/15   -8.0
ADSK    2016/01/15   -2.5
                     ... 
WDAY    2020/12/30   -6.5
WDC     2020/12/30   -1.0
XEL     2020/12/30   -5.0
ZBRA    2020/12/30   -9.0
ZM      2020/12/30   -1.0
Length: 118609, dtype: float64

In [44]:
mi[4] = get_mutual_info_score(ret_fwd, a4)
mi[4]

0.014025875163913515

alpha 5

In [45]:
def alpha005(o, vwap, c):
    """(rank((open - ts_mean(vwap, 10))) * (-1 * abs(rank((close - vwap)))))"""
    return (rank(o.sub(ts_mean(vwap, 10)))
            .mul(rank(c.sub(vwap)).mul(-1).abs())
            .stack('ticker')
            .swaplevel())

In [46]:
a5=alpha005(o, vwap, c)
a5

ticker  Date      
AAPL    2016/01/19    0.446806
ADBE    2016/01/19    0.218090
ADI     2016/01/19    0.208670
ADP     2016/01/19    0.621664
ADSK    2016/01/19    0.021737
                        ...   
WDAY    2020/12/30    0.013600
WDC     2020/12/30    0.097500
XEL     2020/12/30    0.231000
ZBRA    2020/12/30    0.682500
ZM      2020/12/30    0.002500
Length: 118509, dtype: float64

In [47]:
mi[5] = get_mutual_info_score(ret_fwd, a5)
mi[5]

0.1038152595983246

In [48]:
def alpha006(o, v):
    """(-ts_corr(open, volume, 10))"""
    return (-ts_corr(o, v, 10)
            .stack('ticker')
            .swaplevel())

In [49]:
a6=alpha006(o, v)
a6

ticker  Date      
AAPL    2016/01/19    0.431357
ADBE    2016/01/19    0.537988
ADI     2016/01/19    0.714824
ADP     2016/01/19    0.250451
ADSK    2016/01/19    0.327689
                        ...   
WDAY    2020/12/30    0.226448
WDC     2020/12/30   -0.329796
XEL     2020/12/30   -0.847294
ZBRA    2020/12/30    0.457955
ZM      2020/12/30    0.038767
Length: 118509, dtype: float64

In [50]:
mi[6] = get_mutual_info_score(ret_fwd, a6)
mi[6]

0

alpha 7

In [51]:
def alpha007(c, v, adv20):
    """(adv20 < volume) 
        ? ((-ts_rank(abs(ts_delta(close, 7)), 60)) * sign(ts_delta(close, 7))) 
        : -1
    """
    
    delta7 = ts_delta(c, 7)
    return (-ts_rank(abs(delta7), 60)
            .mul(sign(delta7))
            .where(adv20<v, -1)
            .stack('ticker')
            .swaplevel())

In [52]:
a7=alpha007(c, v, adv20)
a7

ticker  Date      
AAPL    2016/01/05    1.0
ADBE    2016/01/05    1.0
ADI     2016/01/05    1.0
ADP     2016/01/05    1.0
ADSK    2016/01/05    1.0
                     ... 
WDAY    2020/12/30    1.0
WDC     2020/12/30    1.0
XEL     2020/12/30    1.0
ZBRA    2020/12/30    1.0
ZM      2020/12/30    1.0
Length: 124248, dtype: float64

In [53]:
mi[7] = get_mutual_info_score(ret_fwd, a7)
mi[7]

0.043322385340433645

alpha 8

In [54]:
def alpha008(o, r):
    """-rank(((ts_sum(open, 5) * ts_sum(returns, 5)) - 
        ts_lag((ts_sum(open, 5) * ts_sum(returns, 5)),10)))
    """
    return (-(rank(((ts_sum(o, 5) * ts_sum(r, 5)) -
                       ts_lag((ts_sum(o, 5) * ts_sum(r, 5)), 10))))
           .stack('ticker')
            .swaplevel())

In [55]:
a8=alpha008(o, r)
a8

ticker  Date      
AAPL    2016/01/26   -0.340659
ADBE    2016/01/26   -0.241758
ADI     2016/01/26   -0.615385
ADP     2016/01/26   -0.736264
ADSK    2016/01/26   -0.659341
                        ...   
WDAY    2020/12/30   -0.120000
WDC     2020/12/30   -0.410000
XEL     2020/12/30   -0.560000
ZBRA    2020/12/30   -0.200000
ZM      2020/12/30   -0.020000
Length: 118009, dtype: float64

In [56]:
mi[8] = get_mutual_info_score(ret_fwd, a8)
mi[8]

0.06151734709144652

alpha 9

In [57]:
def alpha009(c):
    """(0 < ts_min(ts_delta(close, 1), 5)) ? ts_delta(close, 1) 
    : ((ts_max(ts_delta(close, 1), 5) < 0) 
    ? ts_delta(close, 1) : (-1 * ts_delta(close, 1)))
    """
    close_diff = ts_delta(c, 1)
    alpha = close_diff.where(ts_min(close_diff, 5) > 0,
                             close_diff.where(ts_max(close_diff, 5) < 0,
                                              -close_diff))
    return (alpha
            .stack('ticker')
            .swaplevel())

In [58]:
a9=alpha009(c)
a9

ticker  Date      
AAPL    2016/01/06    0.502501
ADBE    2016/01/06    1.320000
ADI     2016/01/06    2.299999
ADP     2016/01/06    1.009995
ADSK    2016/01/06    0.849998
                        ...   
WDAY    2020/12/30    0.790009
WDC     2020/12/30   -1.239998
XEL     2020/12/30   -0.230003
ZBRA    2020/12/30   -5.709991
ZM      2020/12/30    0.350006
Length: 119309, dtype: float64

In [59]:
mi[9] = get_mutual_info_score(ret_fwd, a9)
mi[9]

0.8289222360887112

alpha 10

In [60]:
def alpha010(c):
    """rank(((0 < ts_min(ts_delta(close, 1), 4)) 
        ? ts_delta(close, 1) 
        : ((ts_max(ts_delta(close, 1), 4) < 0)
            ? ts_delta(close, 1) 
            : (-1 * ts_delta(close, 1)))))
    """
    close_diff = ts_delta(c, 1)
    alpha = close_diff.where(ts_min(close_diff, 4) > 0,
                             close_diff.where(ts_min(close_diff, 4) > 0,
                                              -close_diff))

    return (rank(alpha)
            .stack('ticker')
            .swaplevel())

In [61]:
a10=alpha010(c)
mi[10] = get_mutual_info_score(ret_fwd, a10)
mi[10]

0.2536335703473922

alpha 11

In [62]:
def alpha011(c, vwap, v):
    """(rank(ts_max((vwap - close), 3)) + 
        rank(ts_min(vwap - close), 3)) * 
        rank(ts_delta(volume, 3))
        """
    return (rank(ts_max(vwap.sub(c), 3))
            .add(rank(ts_min(vwap.sub(c), 3)))
            .mul(rank(ts_delta(v, 3)))
            .stack('ticker')
            .swaplevel())

In [63]:
a11=alpha011(c, vwap, v)
mi[11] = get_mutual_info_score(ret_fwd, a11)
mi[11]

0.08660409405334013

alpha 12

In [64]:
def alpha012(v, c):
    """(sign(ts_delta(volume, 1)) * 
            (-1 * ts_delta(close, 1)))
        """
    return (sign(ts_delta(v, 1)).mul(-ts_delta(c, 1))
            .stack('ticker')
            .swaplevel())

In [65]:
a12=alpha012(v, c)
mi[12] = get_mutual_info_score(ret_fwd, a12)
mi[12]

0.6136162474996048

alpha 13

In [66]:
def alpha013(c, v):
    """-rank(ts_cov(rank(close), rank(volume), 5))"""
    return (-rank(ts_cov(rank(c), rank(v), 5))
            .stack('ticker')
            .swaplevel())

In [67]:
a13=alpha013(c, v)
mi[13] = get_mutual_info_score(ret_fwd, a13)
mi[13]

0.07995684992759156

alpha 14

In [68]:
def alpha014(o, v, r):
    """
    (-rank(ts_delta(returns, 3))) * ts_corr(open, volume, 10))
    """

    alpha = -rank(ts_delta(r, 3)).mul(ts_corr(o, v, 10)
                                      .replace([-np.inf,
                                                np.inf],
                                               np.nan))
    return (alpha
            .stack('ticker')
            .swaplevel())

In [69]:
a14=alpha014(o, v, r)
mi[14] = get_mutual_info_score(ret_fwd, a14)
mi[14]

0.03861108062034946

alpha 15

In [70]:
def alpha015(h, v):
    """(-1 * ts_sum(rank(ts_corr(rank(high), rank(volume), 3)), 3))"""
    alpha = (-ts_sum(rank(ts_corr(rank(h), rank(v), 3)
                          .replace([-np.inf, np.inf], np.nan)), 3))
    return (alpha
            .stack('ticker')
            .swaplevel())

In [71]:
a15=alpha015(h,v)
mi[15] = get_mutual_info_score(ret_fwd, a15)
mi[15]

0

In [72]:
a15

ticker  Date      
ADP     2016/01/11   -0.563675
ADSK    2016/01/11   -2.313269
AEP     2016/01/11   -0.464739
ATVI    2016/01/11   -0.900735
BIDU    2016/01/11   -1.364391
                        ...   
WBA     2020/12/30   -1.223368
WDAY    2020/12/30   -1.016074
WDC     2020/12/30   -0.189152
XEL     2020/12/30   -1.322648
ZM      2020/12/30   -1.881045
Length: 78239, dtype: float64

alpha 16

In [73]:
def alpha016(h, v):
    """(-1 * rank(ts_cov(rank(high), rank(volume), 5)))"""
    return (-rank(ts_cov(rank(h), rank(v), 5))
            .stack('ticker')
            .swaplevel())

In [74]:
a16=alpha016(h,v)
mi[16] = get_mutual_info_score(ret_fwd, a16)
mi[16]

0.05446966833778788

alpha 17

In [75]:
def alpha017(c, v):
    """(((-1 * rank(ts_rank(close, 10))) * rank(ts_delta(ts_delta(close, 1), 1))) *rank(ts_rank((volume / adv20), 5)))
        """
    adv20 = ts_mean(v, 20)
    return (-rank(ts_rank(c, 10))
            .mul(rank(ts_delta(ts_delta(c, 1), 1)))
            .mul(rank(ts_rank(v.div(adv20), 5)))
            .stack('ticker')
            .swaplevel())

In [76]:
a17=alpha017(c,v)
mi[17] = get_mutual_info_score(ret_fwd, a17)
mi[17]

0.1818362661529478

alpha 18

In [77]:
def alpha018(o, c):
    """-rank((ts_std(abs((close - open)), 5) + (close - open)) +
            ts_corr(close, open,10))
    """
    return (-rank(ts_std(c.sub(o).abs(), 5)
                  .add(c.sub(o))
                  .add(ts_corr(c, o, 10)
                       .replace([-np.inf,
                                 np.inf],
                                np.nan)))
            .stack('ticker')
            .swaplevel())

In [78]:
a18=alpha018(o, c)
mi[18] = get_mutual_info_score(ret_fwd, a18)
mi[18]

0.18411535211620356

alpha 19

In [79]:
def alpha019(c, r):
    """((-1 * sign(((close - ts_lag(close, 7)) + ts_delta(close, 7)))) * 
    (1 + rank((1 + ts_sum(returns,250)))))
    """
    return (-sign(ts_delta(c, 7) + ts_delta(c, 7))
            .mul(1 + rank(1 + ts_sum(r, 250)))
            .stack('ticker')
            .swaplevel())

In [80]:
a19=alpha016(c,r)
mi[19] = get_mutual_info_score(ret_fwd, a19)
mi[19]

0.008594933029275165

alpha 20

In [81]:
def alpha020(o, h, l, c):
    """-rank(open - ts_lag(high, 1)) * 
        rank(open - ts_lag(close, 1)) * 
        rank(open -ts_lag(low, 1))"""
    return (rank(o - ts_lag(h, 1))
            .mul(rank(o - ts_lag(c, 1)))
            .mul(rank(o - ts_lag(l, 1)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [82]:
a20=alpha020(o, h, l, c)
mi[20] = get_mutual_info_score(ret_fwd, a20)
mi[20]

0.07956835070628987

alpha 21

In [83]:
def alpha021(c, v):
    """ts_mean(close, 8) + ts_std(close, 8) < ts_mean(close, 2)
        ? -1
        : (ts_mean(close,2) < ts_mean(close, 8) - ts_std(close, 8)
            ? 1
            : (volume / adv20 < 1
                ? -1
                : 1))
    """
    sma2 = ts_mean(c, 2)
    sma8 = ts_mean(c, 8)
    std8 = ts_std(c, 8)

    cond_1 = sma8.add(std8) < sma2
    cond_2 = sma8.add(std8) > sma2
    cond_3 = v.div(ts_mean(v, 20)) < 1

    val = np.ones_like(c)
    alpha = pd.DataFrame(np.select(condlist=[cond_1, cond_2, cond_3],
                                   choicelist=[-1, 1, -1], default=1),
                         index=c.index,
                         columns=c.columns)

    return (alpha
            .stack('ticker')
            .swaplevel())

In [84]:
a21=alpha021(c, v)
mi[21] = get_mutual_info_score(ret_fwd, a21)
mi[21]

0.0168799973671252

alpha 22

In [85]:
def alpha022(h, c, v):
    """-(ts_delta(ts_corr(high, volume, 5), 5) * 
        rank(ts_std(close, 20)))
    """

    return (ts_delta(ts_corr(h, v, 5)
                     .replace([-np.inf,
                               np.inf],
                              np.nan), 5)
            .mul(rank(ts_std(c, 20)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [86]:
a22=alpha022(h, c, v)
mi[22] = get_mutual_info_score(ret_fwd, a22)
mi[22]

0.03033911164311931

alpha 23

In [87]:
def alpha023(h, c):
    """((ts_mean(high, 20) < high)
            ? (-1 * ts_delta(high, 2))
            : 0
        """

    return (ts_delta(h, 2)
            .mul(-1)
            .where(ts_mean(h, 20) < h, 0)
            .stack('ticker')
            .swaplevel())

In [88]:
a23=alpha023(h, c)
mi[23] = get_mutual_info_score(ret_fwd, a23)
mi[23]

0.07963522798882972

In [89]:
def alpha024(c):
    """((((ts_delta((ts_mean(close, 100)), 100) / ts_lag(close, 100)) <= 0.05)  
        ? (-1 * (close - ts_min(close, 100))) 
        : (-1 * ts_delta(close, 3)))
    """
    cond = ts_delta(ts_mean(c, 100), 100) / ts_lag(c, 100) <= 0.05

    return (c.sub(ts_min(c, 100)).mul(-1).where(cond, -ts_delta(c, 3))
            .stack('ticker')
            .swaplevel())

In [90]:
a24=alpha024(c)
mi[24] = get_mutual_info_score(ret_fwd, a24)
mi[24]

0.08831602508137326

alpha 25

In [91]:
def alpha025(h, c, r, vwap, adv20):
    """rank((-1 * returns) * adv20 * vwap * (high - close))"""
    return (rank(-r.mul(adv20)
                 .mul(vwap)
                 .mul(h.sub(c)))
            .stack('ticker')
            .swaplevel())

In [92]:
a25=alpha025(h, c, r, vwap, adv20)
mi[25] = get_mutual_info_score(ret_fwd, a25)
mi[25]

0.3170214861461784

alpha 26

In [93]:
def alpha026(h, v):
    """(-1 * ts_max(ts_corr(ts_rank(volume, 5), ts_rank(high, 5), 5), 3))"""
    return (ts_max(ts_corr(ts_rank(v, 5), 
                           ts_rank(h, 5), 5)
                   .replace([-np.inf, np.inf], np.nan), 3)
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [94]:
a26=alpha026(h,v)
mi[26] = get_mutual_info_score(ret_fwd, a26)
mi[26]

0.0015861492835367308

alpha 27

In [95]:
def alpha027(v, vwap):
    """((0.5 < rank(ts_mean(ts_corr(rank(volume), rank(vwap), 6), 2))) 
            ? -1
            : 1)"""
    cond = rank(ts_mean(ts_corr(rank(v),
                                rank(vwap), 6), 2))
    alpha = cond.notnull().astype(float)
    return (alpha.where(cond <= 0.5, -alpha)
            .stack('ticker')
            .swaplevel())

In [96]:
a27=alpha027(h,v)
mi[27] = get_mutual_info_score(ret_fwd, a27)
mi[27]

0

alpha 28

In [97]:
def alpha028(h, l, c, v, adv20):
    """scale(((ts_corr(adv20, low, 5) + (high + low) / 2) - close))"""
    return (scale(ts_corr(adv20, l, 5)
                  .replace([-np.inf, np.inf], 0)
                  .add(h.add(l).div(2).sub(c)))
            .stack('ticker')
            .swaplevel())

In [98]:
a28=alpha028(h, l, c, v, adv20)
mi[28] = get_mutual_info_score(ret_fwd, a28)
mi[28]

0.10244980280178595

alpha 29

In [99]:
def alpha029(c, r):
    """(ts_min(ts_product(rank(rank(scale(log(ts_sum(ts_min(rank(rank((-1 * 
            rank(ts_delta((close - 1),5))))), 2), 1))))), 1), 5)
        + ts_rank(ts_lag((-1 * returns), 6), 5))
    """
    return (ts_min(rank(rank(scale(log(ts_sum(rank(rank(-rank(ts_delta((c - 1), 5)))), 2))))), 5)
            .add(ts_rank(ts_lag((-1 * r), 6), 5))
            .stack('ticker')
            .swaplevel())

In [100]:
a29=alpha029(c, r)
mi[29] = get_mutual_info_score(ret_fwd, a29)
mi[29]

0.0004984801595782073

alpha 30

In [101]:
def alpha030(c, v):
    """(((1.0 - rank(((sign((close - ts_lag(close, 1))) +
            sign((ts_lag(close, 1) - ts_lag(close, 2)))) +
            sign((ts_lag(close, 2) - ts_lag(close, 3)))))) *
            ts_sum(volume, 5)) / ts_sum(volume, 20))"""
    close_diff = ts_delta(c, 1)
    return (rank(sign(close_diff)
                 .add(sign(ts_lag(close_diff, 1)))
                 .add(sign(ts_lag(close_diff, 2))))
            .mul(-1).add(1)
            .mul(ts_sum(v, 5))
            .div(ts_sum(v, 20))
            .stack('ticker')
            .swaplevel())

In [102]:
a30=alpha030(c, r)
mi[30] = get_mutual_info_score(ret_fwd, a30)
mi[30]

0.04939338017397432

alpha 31

In [103]:
def alpha031(l, c, adv20):
    """((rank(rank(rank(ts_weighted_mean((-1 * rank(rank(ts_delta(close, 10)))), 10)))) +
        rank((-1 * ts_delta(close, 3)))) + sign(scale(ts_corr(adv20, low, 12))))
    """
    return (rank(rank(rank(ts_weighted_mean(rank(rank(ts_delta(c, 10))).mul(-1), 10))))
            .add(rank(ts_delta(c, 3).mul(-1)))
            .add(sign(scale(ts_corr(adv20, l, 12)
                            .replace([-np.inf, np.inf],
                                     np.nan))))
            .stack('ticker')
            .swaplevel())

a31=alpha031(l, c, adv20)
mi[31] = get_mutual_info_score(ret_fwd, a31)
mi[31]

alpha 32

In [104]:
def alpha032(c, vwap):
    """scale(ts_mean(close, 7) - close) + 
        (20 * scale(ts_corr(vwap, ts_lag(close, 5),230)))"""
    return (scale(ts_mean(c, 7).sub(c))
            .add(20 * scale(ts_corr(vwap,
                                    ts_lag(c, 5), 230)))
            .stack('ticker')
            .swaplevel())

In [105]:
a32=alpha032(c, vwap)
mi[32] = get_mutual_info_score(ret_fwd, a32)
mi[32]

0.044041437224579205

alpha 33

In [106]:
def alpha033(o, c):
    """rank(-(1 - (open / close)))"""
    return (rank(o.div(c).mul(-1).add(1).mul(-1))
            .stack('ticker')
            .swaplevel())

In [107]:
a33=alpha032(o, c)
mi[33] = get_mutual_info_score(ret_fwd, a33)
mi[33]

0.03642180000709683

alpha 34

In [108]:
def alpha034(c, r):
    """rank(((1 - rank((ts_std(returns, 2) / ts_std(returns, 5)))) + (1 - rank(ts_delta(close, 1)))))"""

    return (rank(rank(ts_std(r, 2).div(ts_std(r, 5))
                      .replace([-np.inf, np.inf],
                               np.nan))
                 .mul(-1)
                 .sub(rank(ts_delta(c, 1)))
                 .add(2))
            .stack('ticker')
            .swaplevel())

In [109]:
a34=alpha034(c, r)
mi[34] = get_mutual_info_score(ret_fwd, a34)
mi[34]

0.11932989463638943

alpha 35

In [110]:
def alpha035(h, l, c, v, r):
    """((ts_Rank(volume, 32) *
        (1 - ts_Rank(((close + high) - low), 16))) *
        (1 -ts_Rank(returns, 32)))
    """
    return (ts_rank(v, 32)
            .mul(1 - ts_rank(c.add(h).sub(l), 16))
            .mul(1 - ts_rank(r, 32))
            .stack('ticker')
            .swaplevel())

In [111]:
a35=alpha035(h, l, c, v, r)
mi[35] = get_mutual_info_score(ret_fwd, a35)
mi[35]

0.2992729865668804

alpha 36

In [114]:
def alpha036(o, c, v, r, adv20):
    """2.21 * rank(ts_corr((close - open), ts_lag(volume, 1), 15)) +
        0.7 * rank((open- close)) +
        0.73 * rank(ts_Rank(ts_lag(-1 * returns, 6), 5)) +
        rank(abs(ts_corr(vwap,adv20, 6))) +
        0.6 * rank(((ts_mean(close, 200) - open) * (close - open)))
    """

    return (rank(ts_corr(c.sub(o), ts_lag(v, 1), 15)).mul(2.21)
            .add(rank(o.sub(c)).mul(.7))
            .add(rank(ts_rank(ts_lag(-r, 6), 5)).mul(0.73))
            .add(rank(abs(ts_corr(vwap, adv20, 6))))
            .add(rank(ts_mean(c, 200).sub(o).mul(c.sub(o))).mul(0.6))
            .stack('ticker')
            .swaplevel())

In [115]:
a36=alpha036(o, c, v, r, adv20)
mi[36] = get_mutual_info_score(ret_fwd, a36)
mi[36]

0.023011087056008073

alpha 37

In [116]:
def alpha037(o, c):
    """(rank(ts_corr(ts_lag((open - close), 1), close, 200)) + rank((open - close)))"""
    return (rank(ts_corr(ts_lag(o.sub(c), 1), c, 200))
            .add(rank(o.sub(c)))
            .stack('ticker')
            .swaplevel())

In [117]:
a37=alpha037(o, c)
mi[37] = get_mutual_info_score(ret_fwd, a37)
mi[37]

0.09316995473424416

alpha 38

In [118]:
def alpha038(o, c):
    """"-1 * rank(ts_rank(close, 10)) * rank(close / open)"""
    return (rank(ts_rank(o, 10))
            .mul(rank(c.div(o).replace([-np.inf, np.inf], np.nan)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [119]:
a38=alpha038(o, c)
mi[38] = get_mutual_info_score(ret_fwd, a38)
mi[38]

0.15426331980773567

alpha 39

In [120]:
def alpha039(c, v, r, adv20):
    """-rank(ts_delta(close, 7) * (1 - rank(ts_weighted_mean(volume / adv20, 9)))) * 
            (1 + rank(ts_sum(returns, 250)))"""
    return (rank(ts_delta(c, 7).mul(rank(ts_weighted_mean(v.div(adv20), 9)).mul(-1).add(1))).mul(-1)
            .mul(rank(ts_mean(r, 250).add(1)))
            .stack('ticker')
            .swaplevel())

a39=alpha039(c, v, r, adv20)
mi[39] = get_mutual_info_score(ret_fwd, a39)
mi[39]

alpha 40

In [121]:
def alpha040(h, v):
    """((-1 * rank(ts_std(high, 10))) * ts_corr(high, volume, 10))
    """
    return (rank(ts_std(h, 10))
            .mul(ts_corr(h, v, 10))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [122]:
a40=alpha040(h, v)
mi[40] = get_mutual_info_score(ret_fwd, a40)
mi[40]

0

alpha 41

In [123]:
def alpha041(h, l, vwap):
    """power(high * low, 0.5 - vwap"""
    return (power(h.mul(l), 0.5)
            .sub(vwap)
            .stack('ticker')
            .swaplevel())

In [124]:
a41=alpha041(h, l, vwap)
mi[41] = get_mutual_info_score(ret_fwd, a41)
mi[41]

0.014310421439685861

alpha 42

In [125]:
def alpha042(c, vwap):
    """rank(vwap - close) / rank(vwap + close)"""
    return (rank(vwap.sub(c))
            .div(rank(vwap.add(c)))
            .stack('ticker')
            .swaplevel())

In [126]:
a42=alpha042(c, vwap)
mi[42] = get_mutual_info_score(ret_fwd, a42)
mi[42]

0.08717629454619802

alpha 43

In [127]:
def alpha043(c, adv20):
    """(ts_rank((volume / adv20), 20) * ts_rank((-1 * ts_delta(close, 7)), 8))"""

    return (ts_rank(v.div(adv20), 20)
            .mul(ts_rank(ts_delta(c, 7).mul(-1), 8))
            .stack('ticker')
            .swaplevel())

In [128]:
a43=alpha043(c, adv20)
mi[43] = get_mutual_info_score(ret_fwd, a43)
mi[43]

0.09617986838086656

alpha 44

In [129]:
def alpha044(h, v):
    """-ts_corr(high, rank(volume), 5)"""

    return (ts_corr(h, rank(v), 5)
            .replace([-np.inf, np.inf], np.nan)
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [130]:
a44=alpha044(h, v)
mi[44] = get_mutual_info_score(ret_fwd, a44)
mi[44]

0

alpha 45

In [131]:
def alpha045(c, v):
    """-(rank((ts_mean(ts_lag(close, 5), 20)) * 
        ts_corr(close, volume, 2)) *
        rank(ts_corr(ts_sum(close, 5), ts_sum(close, 20), 2)))"""

    return (rank(ts_mean(ts_lag(c, 5), 20))
            .mul(ts_corr(c, v, 2)
                 .replace([-np.inf, np.inf], np.nan))
            .mul(rank(ts_corr(ts_sum(c, 5),
                              ts_sum(c, 20), 2)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [132]:
a45=alpha045(c, v)
mi[45] = get_mutual_info_score(ret_fwd, a45)
mi[45]

0.04415608862691922

alpha 46

In [133]:
def alpha046(c):
    """0.25 < ts_lag(ts_delta(close, 10), 10) / 10 - ts_delta(close, 10) / 10
            ? -1
            : ((ts_lag(ts_delta(close, 10), 10) / 10 - ts_delta(close, 10) / 10 < 0) 
                ? 1 
                : -ts_delta(close, 1))
    """

    cond = ts_lag(ts_delta(c, 10), 10).div(10).sub(ts_delta(c, 10).div(10))
    alpha = pd.DataFrame(-np.ones_like(cond),
                         index=c.index,
                         columns=c.columns)
    alpha[cond.isnull()] = np.nan
    return (cond.where(cond > 0.25,
                       -alpha.where(cond < 0,
                       -ts_delta(c, 1)))
            .stack('ticker')
            .swaplevel())

In [134]:
a46=alpha046(c)
mi[46] = get_mutual_info_score(ret_fwd, a46)
mi[46]

0.15757703714642535

alpha 47

In [135]:
def alpha047(h, c, v, vwap, adv20):
    """((((rank((1 / close)) * volume) / adv20) * ((high * rank((high - close))) / 
        (ts_sum(high, 5) /5))) - rank((vwap - ts_lag(vwap, 5))))"""

    return (rank(c.pow(-1)).mul(v).div(adv20)
            .mul(h.mul(rank(h.sub(c))
                       .div(ts_mean(h, 5)))
                 .sub(rank(ts_delta(vwap, 5))))
            .stack('ticker')
            .swaplevel())

In [136]:
a47=alpha047(h, c, v, vwap, adv20)
mi[47] = get_mutual_info_score(ret_fwd, a47)
mi[47]

0.06775902141782586

alpha 49

In [137]:
def alpha049(c):
    """ts_delta(ts_lag(close, 10), 10).div(10).sub(ts_delta(close, 10).div(10)) < -0.1 * c
        ? 1 
        : -ts_delta(close, 1)"""
    cond = (ts_delta(ts_lag(c, 10), 10).div(10)
            .sub(ts_delta(c, 10).div(10)) >= -0.1 * c)
    return (-ts_delta(c, 1)
            .where(cond, 1)
            .stack('ticker')
            .swaplevel())

In [138]:
a49=alpha049(c)
mi[49] = get_mutual_info_score(ret_fwd, a49)
mi[49]

1.1829999722567193

alpha 50

In [139]:
def alpha050(v, vwap):
    """-ts_max(rank(ts_corr(rank(volume), rank(vwap), 5)), 5)"""
    return (ts_max(rank(ts_corr(rank(v),
                                rank(vwap), 5)), 5)
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [140]:
a50=alpha050(v, vwap)
mi[50] = get_mutual_info_score(ret_fwd, a50)
mi[50]

0

alpha 51

In [141]:
def alpha051(c):
    """ts_delta(ts_lag(close, 10), 10).div(10).sub(ts_delta(close, 10).div(10)) < -0.05 * c
        ? 1 
        : -ts_delta(close, 1)"""
    cond = (ts_delta(ts_lag(c, 10), 10).div(10)
            .sub(ts_delta(c, 10).div(10)) >= -0.05 * c)
    return (-ts_delta(c, 1)
            .where(cond, 1)
            .stack('ticker')
            .swaplevel())

In [142]:
a51=alpha051(c)
mi[51] = get_mutual_info_score(ret_fwd, a51)
mi[51]

1.1505085594795275

alpha 52

In [143]:
def alpha052(l, v, r):
    """(ts_lag(ts_min(low, 5), 5) - ts_min(low, 5)) * 
        rank((ts_sum(returns, 240) - ts_sum(returns, 20)) / 220) * 
        ts_rank(volume, 5)
    """
    return (ts_delta(ts_min(l, 5), 5)
            .mul(rank(ts_sum(r, 240)
                      .sub(ts_sum(r, 20))
                      .div(220)))
            .mul(ts_rank(v, 5))
            .stack('ticker')
            .swaplevel())

In [144]:
a52=alpha052(l, v, r)
mi[52] = get_mutual_info_score(ret_fwd, a52)
mi[52]

0.017014302015681437

alpha 53

In [145]:
def alpha053(h, l, c):
    """-1 * ts_delta(1 - (high - close) / (close - low), 9)"""
    inner = (c.sub(l)).add(1e-6)
    return (ts_delta(h.sub(c)
                     .mul(-1).add(1)
                     .div(c.sub(l)
                          .add(1e-6)), 9)
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [146]:
a53=alpha053(h, l, c)
mi[53] = get_mutual_info_score(ret_fwd, a53)
mi[53]

0

alpha 54

In [147]:
def alpha054(o, h, l, c):
    """-(low - close) * power(open, 5) / ((low - high) * power(close, 5))"""
    return (l.sub(c).mul(o.pow(5)).mul(-1)
            .div(l.sub(h).replace(0, -0.0001).mul(c ** 5))
            .stack('ticker')
            .swaplevel())

In [148]:
a54=alpha054(o, h, l, c)
mi[54] = get_mutual_info_score(ret_fwd, a54)
mi[54]

0.2453871581356677

alpha 55

In [149]:
def alpha055(h, l, c):
    """(-1 * ts_corr(rank(((close - ts_min(low, 12)) / 
                            (ts_max(high, 12) - ts_min(low,12)))), 
                    rank(volume), 6))"""

    return (ts_corr(rank(c.sub(ts_min(l, 12))
                         .div(ts_max(h, 12).sub(ts_min(l, 12))
                              .replace(0, 1e-6))),
                    rank(v), 6)
            .replace([-np.inf, np.inf], np.nan)
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [150]:
a55=alpha055(h, l, c)
mi[55] = get_mutual_info_score(ret_fwd, a55)
mi[55]

0.03809947897622479

alpha 57

In [151]:
def alpha057(c, vwap):
    """-(close - vwap) / ts_weighted_mean(rank(ts_argmax(close, 30)), 2)"""
    return (c.sub(vwap.add(1e-5))
            .div(ts_weighted_mean(rank(ts_argmax(c, 30)))).mul(-1)
            .stack('ticker')
            .swaplevel())

a57=alpha057(c, vwap)
mi[57] = get_mutual_info_score(ret_fwd, a57)
mi[57]

alpha 60

In [152]:
def alpha060(l, h, c, v):
    """-((2 * scale(rank(((((close - low) - (high - close)) / (high - low)) * volume)))) -scale(rank(ts_argmax(close, 10))))"""
    return (scale(rank(c.mul(2).sub(l).sub(h)
                       .div(h.sub(l).replace(0, 1e-5))
                       .mul(v))).mul(2)
            .sub(scale(rank(ts_argmax(c, 10)))).mul(-1)
            .stack('ticker')
            .swaplevel())

In [153]:
a60=alpha060(l, h, c, v)
mi[60] = get_mutual_info_score(ret_fwd, a60)
mi[60]

0.037003056918062605

alpha 61

In [154]:
def alpha061(v, vwap):
    """rank((vwap - ts_min(vwap, 16))) < rank(ts_corr(vwap, adv180, 17))"""

    return (rank(vwap.sub(ts_min(vwap, 16)))
            .lt(rank(ts_corr(vwap, ts_mean(v, 180), 18)))
            .astype(int)
            .stack('ticker')
            .swaplevel())

In [155]:
a61=alpha061(v, vwap)
mi[61] = get_mutual_info_score(ret_fwd, a61)
mi[61]

0

alpha 62

In [156]:
def alpha062(o, h, l, vwap, adv20):
    """((rank(ts_corr(vwap, ts_sum(adv20, 22.4101), 9.91009)) < 
    rank(((rank(open) + rank(open)) < (rank(((high + low) / 2)) + rank(high))))) * -1)"""
    return (rank(ts_corr(vwap, ts_sum(adv20, 22), 9))
            .lt(rank(
                rank(o).mul(2))
                .lt(rank(h.add(l).div(2))
                    .add(rank(h))))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [157]:
a62=alpha062(o, h, l, vwap, adv20)
mi[62] = get_mutual_info_score(ret_fwd, a62)
mi[62]

0

alpha 64

In [158]:
def alpha064(o, h, l, v, vwap):
    """((rank(ts_corr(ts_sum(((open * 0.178404) + (low * (1 - 0.178404))), 12.7054),ts_sum(adv120, 12.7054), 16.6208)) <
        rank(ts_delta(((((high + low) / 2) * 0.178404) + (vwap * (1 -0.178404))), 3.69741))) * -1)"""
    w = 0.178404
    return (rank(ts_corr(ts_sum(o.mul(w).add(l.mul(1 - w)), 12),
                         ts_sum(ts_mean(v, 120), 12), 16))
            .lt(rank(ts_delta(h.add(l).div(2).mul(w)
                               .add(vwap.mul(1 - w)), 3)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [159]:
a64=alpha062(o, h, l, v, vwap)
mi[64] = get_mutual_info_score(ret_fwd, a64)
mi[64]

0.009081108297391172

alpha 65

In [160]:
def alpha065(o, v, vwap):
    """((rank(ts_corr(((open * 0.00817205) + (vwap * (1 - 0.00817205))), 
                        ts_sum(adv60,8.6911), 6.40374)) < 
        rank((open - ts_min(open, 13.635)))) * -1)
    """
    w = 0.00817205
    return (rank(ts_corr(o.mul(w).add(vwap.mul(1 - w)),
                         ts_mean(ts_mean(v, 60), 9), 6))
            .lt(rank(o.sub(ts_min(o, 13))))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [161]:
a65=alpha065(o, v, vwap)
mi[65] = get_mutual_info_score(ret_fwd, a65)
mi[65]

0.0036273620339530854

alpha 66

def alpha066(l, h, vwap):
    """((rank(ts_weighted_mean(ts_delta(vwap, 3.51013), 7.23052)) +
        ts_rank(ts_weighted_mean(((((low* 0.96633) + (low *
                                    (1 - 0.96633))) - vwap) /
                                    (open - ((high + low) / 2))), 11.4157), 6.72611)) * -1)
    """
    w = 0.96633
    return (rank(ts_weighted_mean(ts_delta(vwap, 4), 7))
            .add(ts_rank(ts_weighted_mean(l.mul(w).add(l.mul(1 - w))
                                           .sub(vwap)
                                           .div(o.sub(h.add(l).div(2)).add(1e-3)), 11), 7))
            .mul(-1)
            .stack('ticker')
            .swaplevel())



a66=alpha066(l, h, vwap)
mi[66] = get_mutual_info_score(ret_fwd, a66)
mi[66]


alpha 68

In [162]:
def alpha068(h, c, v):
    """((ts_rank(ts_corr(rank(high), rank(adv15), 8.91644), 13.9333) <
        rank(ts_delta(((close * 0.518371) + (low * (1 - 0.518371))), 1.06157))) * -1)
    """
    w = 0.518371
    return (ts_rank(ts_corr(rank(h), rank(ts_mean(v, 15)), 9), 14)
            .lt(rank(ts_delta(c.mul(w).add(l.mul(1 - w)), 1)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [163]:
a68=alpha068(h, c, v)
mi[68] = get_mutual_info_score(ret_fwd, a68)
mi[68]

0.0001909834256347276

alpha 71

def alpha071(o, c, v, vwap):
    """max(ts_rank(ts_weighted_mean(ts_corr(ts_rank(close, 3.43976), ts_rank(adv180,12.0647), 18.0175), 4.20501), 15.6948), 
            ts_rank(ts_weighted_mean((rank(((low + open) - (vwap +vwap)))^2), 16.4662), 4.4388))"""

    s1 = (ts_rank(ts_weighted_mean(ts_corr(ts_rank(c, 3),
                                           ts_rank(ts_mean(v, 180), 12), 18), 4), 16))
    s2 = (ts_rank(ts_weighted_mean(rank(l.add(o).
                                        sub(vwap.mul(2)))
                                   .pow(2), 16), 4))
    return (s1.where(s1 > s2, s2)
            .stack('ticker')
            .swaplevel())

a71=alpha071(o, c, v, vwap)
mi[71] = get_mutual_info_score(ret_fwd, a71)
mi[71]

alpha 72

def alpha072(h, l, v, vwap):
    """(rank(ts_weighted_mean(ts_corr(((high + low) / 2), adv40, 8.93345), 10.1519)) /
        rank(ts_weighted_mean(ts_corr(ts_rank(vwap, 3.72469), ts_rank(volume, 18.5188), 6.86671), 2.95011)))
    """
    return (rank(ts_weighted_mean(ts_corr(h.add(l).div(2), ts_mean(v, 40), 9), 10))
            .div(rank(ts_weighted_mean(ts_corr(ts_rank(vwap, 3), ts_rank(v, 18), 6), 2)))
            .stack('ticker')
            .swaplevel())

a72=alpha072(h, l, v, vwap)
mi[72] = get_mutual_info_score(ret_fwd, a72)
mi[72]

alpha 73

def alpha073(l, vwap):
    """(max(rank(ts_weighted_mean(ts_delta(vwap, 4.72775), 2.91864)),
        ts_rank(ts_weighted_mean(((ts_delta(((open * 0.147155) + 
            (low * (1 - 0.147155))), 2.03608) / 
            ((open *0.147155) + (low * (1 - 0.147155)))) * -1), 3.33829), 16.7411)) * -1)
        """
    w = 0.147155
    s1 = rank(ts_weighted_mean(ts_delta(vwap, 5), 3))
    s2 = (ts_rank(ts_weighted_mean(ts_delta(o.mul(w).add(l.mul(1 - w)), 2)
                                   .div(o.mul(w).add(l.mul(1 - w)).mul(-1)), 3), 16))

    print(s2)
    return (s1.where(s1 > s2, s2)
            .mul(-1)
            .stack('ticker')
            .swaplevel())

a73=alpha073(l,vwap)
mi[73] = get_mutual_info_score(ret_fwd, a73)
mi[73]

alpha 74

In [164]:
def alpha074(v, vwap):
    """((rank(ts_corr(close, ts_sum(adv30, 37.4843), 15.1365)) <
        rank(ts_corr(rank(((high * 0.0261661) + (vwap * (1 - 0.0261661)))), rank(volume), 11.4791)))* -1)"""

    w = 0.0261661
    return (rank(ts_corr(c, ts_mean(ts_mean(v, 30), 37), 15))
            .lt(rank(ts_corr(rank(h.mul(w).add(vwap.mul(1 - w))), rank(v), 11)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [165]:
a74=alpha074(v,vwap)
mi[74] = get_mutual_info_score(ret_fwd, a74)
mi[74]

0.0027278740897338505

alpha 75

In [166]:
def alpha075(l, v, vwap):
    """(rank(ts_corr(vwap, volume, 4.24304)) < 
        rank(ts_corr(rank(low), rank(adv50),12.4413)))
    """

    return (rank(ts_corr(vwap, v, 4))
            .lt(rank(ts_corr(rank(l), rank(ts_mean(v, 50)), 12)))
            .astype(int)
            .stack('ticker')
            .swaplevel())

In [167]:
a75=alpha075(l, v,vwap)
mi[75] = get_mutual_info_score(ret_fwd, a75)
mi[75]

0.005654881043354321

alpha 77

def alpha077(l, h, vwap):
    """min(rank(ts_weighted_mean(((((high + low) / 2) + high) - (vwap + high)), 20.0451)),
            rank(ts_weighted_mean(ts_corr(((high + low) / 2), adv40, 3.1614), 5.64125)))
    """

    s1 = rank(ts_weighted_mean(h.add(l).div(2).sub(vwap), 20))
    s2 = rank(ts_weighted_mean(ts_corr(h.add(l).div(2), ts_mean(v, 40), 3), 5))
    return (s1.where(s1 < s2, s2)
            .stack('ticker')
            .swaplevel())

a77=alpha077(l, h, vwap)
mi[77] = get_mutual_info_score(ret_fwd, a77)
mi[77]

alpha 78

In [168]:
def alpha078(l, v, vwap):
    """(rank(ts_corr(ts_sum(((low * 0.352233) + (vwap * (1 - 0.352233))), 19.7428),
        ts_sum(adv40, 19.7428), 6.83313))^rank(ts_corr(rank(vwap), rank(volume), 5.77492)))"""

    w = 0.352233
    return (rank(ts_corr(ts_sum((l.mul(w).add(vwap.mul(1 - w))), 19),
                         ts_sum(ts_mean(v, 40), 19), 6))
            .pow(rank(ts_corr(rank(vwap), rank(v), 5)))
            .stack('ticker')
            .swaplevel())

In [169]:
a78=alpha078(l, v, vwap)
mi[78] = get_mutual_info_score(ret_fwd, a78)
mi[78]

0.0249474961052365

alpha 81

In [170]:
def alpha081(v, vwap):
    """-(rank(log(ts_product(rank((rank(ts_corr(vwap, ts_sum(adv10, 49.6054),8.47743))^4)), 14.9655))) <
        rank(ts_corr(rank(vwap), rank(volume), 5.07914)))"""

    return (rank(log(ts_product(rank(rank(ts_corr(vwap,
                                                  ts_sum(ts_mean(v, 10), 50), 8))
                                     .pow(4)), 15)))
            .lt(rank(ts_corr(rank(vwap), rank(v), 5)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [171]:
a81=alpha081(v, vwap)
mi[81] = get_mutual_info_score(ret_fwd, a81)
mi[81]

0

alpha 83

In [172]:
def alpha083(h, l, c):
    """(rank(ts_lag((high - low) / ts_mean(close, 5), 2)) * rank(rank(volume)) / 
            (((high - low) / ts_mean(close, 5) / (vwap - close)))
    """
    s = h.sub(l).div(ts_mean(c, 5))

    return (rank(rank(ts_lag(s, 2))
                 .mul(rank(rank(v)))
                 .div(s).div(vwap.sub(c).add(1e-3)))
            .stack('ticker')
            .swaplevel()
            .replace((np.inf, -np.inf), np.nan))

In [173]:
a83=alpha083(h, l, c)
mi[83] = get_mutual_info_score(ret_fwd, a83)
mi[83]

0.06874178504593953

alpha 84

In [174]:
def alpha084(c, vwap):
    """power(ts_rank((vwap - ts_max(vwap, 15.3217)), 20.7127), 
        ts_delta(close,4.96796))"""
    return (rank(power(ts_rank(vwap.sub(ts_max(vwap, 15)), 20),
                       ts_delta(c, 6)))
            .stack('ticker')
            .swaplevel())

In [175]:
a84=alpha084(c, vwap)
mi[84] = get_mutual_info_score(ret_fwd, a84)
mi[84]

0.006621244551926253

alpha 85

In [176]:
def alpha085(l, v):
    """power(rank(ts_corr(((high * 0.876703) + (close * (1 - 0.876703))), adv30,9.61331)),
        rank(ts_corr(ts_rank(((high + low) / 2), 3.70596), 
                     ts_rank(volume, 10.1595),7.11408)))
                     """
    w = 0.876703
    return (rank(ts_corr(h.mul(w).add(c.mul(1 - w)), ts_mean(v, 30), 10))
            .pow(rank(ts_corr(ts_rank(h.add(l).div(2), 4),
                              ts_rank(v, 10), 7)))
            .stack('ticker')
            .swaplevel())

In [177]:
a85=alpha085(l, v)
mi[85] = get_mutual_info_score(ret_fwd, a85)
mi[85]

0.018306044249821074

alpha 86

In [178]:
def alpha086(c, v, vwap):
    """((ts_rank(ts_corr(close, ts_sum(adv20, 14.7444), 6.00049), 20.4195) < 
        rank(((open + close) - (vwap + open)))) * -1)
    """
    return (ts_rank(ts_corr(c, ts_mean(ts_mean(v, 20), 15), 6), 20)
            .lt(rank(c.sub(vwap)))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [179]:
a86=alpha086(c, v, vwap)
mi[86] = get_mutual_info_score(ret_fwd, a86)
mi[86]

0.0009775110370372486

alpha 88

def alpha088(o, h, l, c, v):
    """min(rank(ts_weighted_mean(((rank(open) + rank(low)) - (rank(high) + rank(close))),8.06882)), 
        ts_rank(ts_weighted_mean(ts_corr(ts_rank(close, 8.44728), 
                ts_rank(adv60,20.6966), 8.01266), 6.65053), 2.61957))"""

    s1 = (rank(ts_weighted_mean(rank(o)
                                .add(rank(l))
                                .sub(rank(h))
                                .add(rank(c)), 8)))
    s2 = ts_rank(ts_weighted_mean(ts_corr(ts_rank(c, 8),
                                          ts_rank(ts_mean(v, 60), 20), 8), 6), 2)

    return (s1.where(s1 < s2, s2)
            .stack('ticker')
            .swaplevel())

a88=alpha088(o, h, l, c, v)
mi[88] = get_mutual_info_score(ret_fwd, a88)
mi[88]

alpha  92

In [180]:
def alpha092(o, l, c, v):
    """min(ts_rank(ts_weighted_mean(((((high + low) / 2) + close) < (low + open)), 14.7221),18.8683), 
            ts_rank(ts_weighted_mean(ts_corr(rank(low), rank(adv30), 7.58555), 6.94024),6.80584))
    """
    p1 = ts_rank(ts_weighted_mean(h.add(l).div(2).add(c).lt(l.add(o)), 15), 18)
    p2 = ts_rank(ts_weighted_mean(ts_corr(rank(l), rank(ts_mean(v, 30)), 7), 6), 6)

    return (p1.where(p1<p2, p2)
            .stack('ticker')
            .swaplevel())

a92=alpha092(o, l, c, v)
mi[92] = get_mutual_info_score(ret_fwd, a92)
mi[92]

alpha 94

In [181]:
def alpha094(v, vwap):
    """((rank((vwap - ts_min(vwap, 11.5783)))^ts_rank(ts_corr(ts_rank(vwap,19.6462), 
        ts_rank(adv60, 4.02992), 18.0926), 2.70756)) * -1)
    """

    return (rank(vwap.sub(ts_min(vwap, 11)))
            .pow(ts_rank(ts_corr(ts_rank(vwap, 20),
                                 ts_rank(ts_mean(v, 60), 4), 18), 2))
            .mul(-1)
            .stack('ticker')
            .swaplevel())

In [182]:
a94=alpha094(v, vwap)
mi[94] = get_mutual_info_score(ret_fwd, a94)
mi[94]

0

alpha 95

In [183]:
def alpha095(o, l, v):
    """(rank((open - ts_min(open, 12.4105))) < 
        ts_rank((rank(ts_corr(ts_sum(((high + low)/ 2), 19.1351), ts_sum(adv40, 19.1351), 12.8742))^5), 11.7584))
    """
    
    return (rank(o.sub(ts_min(o, 12)))
            .lt(ts_rank(rank(ts_corr(ts_mean(h.add(l).div(2), 19),
                                     ts_sum(ts_mean(v, 40), 19), 13).pow(5)), 12))
            .astype(int)
            .stack('ticker')
            .swaplevel())

In [184]:
a95=alpha095(o, l, v)
mi[95] = get_mutual_info_score(ret_fwd, a95)
mi[95]

0.0008732835999019084

alpha 96

In [185]:
def alpha096(c, v, vwap):
    """(max(ts_rank(ts_weighted_mean(ts_corr(rank(vwap), rank(volume), 5.83878),4.16783), 8.38151), 
        ts_rank(ts_weighted_mean(ts_argmax(ts_corr(ts_rank(close, 7.45404), ts_rank(adv60, 4.13242), 3.65459), 12.6556), 14.0365), 13.4143)) * -1)"""
    
    s1 = ts_rank(ts_weighted_mean(ts_corr(rank(vwap), rank(v), 10), 4), 8)
    s2 = ts_rank(ts_weighted_mean(ts_argmax(ts_corr(ts_rank(c, 7),
                                                    ts_rank(ts_mean(v, 60), 10), 10), 12), 14), 13)
    return (s1.where(s1 > s2, s2)
            .mul(-1)
            .stack('ticker')
            .swaplevel())

a96=alpha096(c, v, vwap)
mi[96] = get_mutual_info_score(ret_fwd, a96)
mi[96]

alpha 98

In [186]:
def alpha098(o, v, vwap):
    """(rank(ts_weighted_mean(ts_corr(vwap, ts_sum(adv5, 26.4719), 4.58418), 7.18088)) -
        rank(ts_weighted_mean(ts_tank(ts_argmin(ts_corr(rank(open), 
        rank(adv15), 20.8187), 8.62571),6.95668), 8.07206)))
    """
    adv5 = ts_mean(v, 5)
    adv15 = ts_mean(v, 15)
    return (rank(ts_weighted_mean(ts_corr(vwap, ts_mean(adv5, 26), 4), 7))
            .sub(rank(ts_weighted_mean(ts_rank(ts_argmin(ts_corr(rank(o),
                                                                 rank(adv15), 20), 8), 6))))
            .stack('ticker')
            .swaplevel())

a98=alpha098(o, v, vwap)
mi[98] = get_mutual_info_score(ret_fwd, a98)
mi[98]

alpha 99

In [187]:
def alpha099(l, v):
    """((rank(ts_corr(ts_sum(((high + low) / 2), 19.8975), 
                    ts_sum(adv60, 19.8975), 8.8136)) <
                    rank(ts_corr(low, volume, 6.28259))) * -1)"""

    return ((rank(ts_corr(ts_sum((h.add(l).div(2)), 19),
                          ts_sum(ts_mean(v, 60), 19), 8))
             .lt(rank(ts_corr(l, v, 6)))
             .mul(-1))
            .stack('ticker')
            .swaplevel())

In [188]:
a99=alpha099(l, v)
mi[99] = get_mutual_info_score(ret_fwd, a99)
mi[99]

0

alpha 101

In [189]:
def alpha101(o, h, l, c):
    """((close - open) / ((high - low) + .001))"""
    return (c.sub(o).div(h.sub(l).add(1e-3))
            .stack('ticker')
            .swaplevel())

In [190]:
a101=alpha101(o, h, l, c)
mi[101] = get_mutual_info_score(ret_fwd, a101)
mi[101]

0.5071795102045429

# alpha selection

In [191]:
mi

{2: 0.01335516484827437,
 3: 0.026809590583944942,
 4: 0.014025875163913515,
 5: 0.1038152595983246,
 6: 0,
 7: 0.043322385340433645,
 8: 0.06151734709144652,
 9: 0.8289222360887112,
 10: 0.2536335703473922,
 11: 0.08660409405334013,
 12: 0.6136162474996048,
 13: 0.07995684992759156,
 14: 0.03861108062034946,
 15: 0,
 16: 0.05446966833778788,
 17: 0.1818362661529478,
 18: 0.18411535211620356,
 19: 0.008594933029275165,
 20: 0.07956835070628987,
 21: 0.0168799973671252,
 22: 0.03033911164311931,
 23: 0.07963522798882972,
 24: 0.08831602508137326,
 25: 0.3170214861461784,
 26: 0.0015861492835367308,
 27: 0,
 28: 0.10244980280178595,
 29: 0.0004984801595782073,
 30: 0.04939338017397432,
 32: 0.044041437224579205,
 33: 0.03642180000709683,
 34: 0.11932989463638943,
 35: 0.2992729865668804,
 36: 0.023011087056008073,
 37: 0.09316995473424416,
 38: 0.15426331980773567,
 40: 0,
 41: 0.014310421439685861,
 42: 0.08717629454619802,
 43: 0.09617986838086656,
 44: 0,
 45: 0.04415608862691922,
 46

In [221]:
sorted_mi = dict(sorted(mi.items(), reverse=True, key=lambda item: item[1]))
sorted_mi

{49: 1.1829999722567193,
 51: 1.1505085594795275,
 9: 0.8289222360887112,
 12: 0.6136162474996048,
 101: 0.5071795102045429,
 25: 0.3170214861461784,
 35: 0.2992729865668804,
 10: 0.2536335703473922,
 54: 0.2453871581356677,
 18: 0.18411535211620356,
 17: 0.1818362661529478,
 46: 0.15757703714642535,
 38: 0.15426331980773567,
 34: 0.11932989463638943,
 5: 0.1038152595983246,
 28: 0.10244980280178595,
 43: 0.09617986838086656,
 37: 0.09316995473424416,
 24: 0.08831602508137326,
 42: 0.08717629454619802,
 11: 0.08660409405334013,
 13: 0.07995684992759156,
 23: 0.07963522798882972,
 20: 0.07956835070628987,
 83: 0.06874178504593953,
 47: 0.06775902141782586,
 8: 0.06151734709144652,
 16: 0.05446966833778788,
 30: 0.04939338017397432,
 45: 0.04415608862691922,
 32: 0.044041437224579205,
 7: 0.043322385340433645,
 14: 0.03861108062034946,
 55: 0.03809947897622479,
 60: 0.037003056918062605,
 33: 0.03642180000709683,
 22: 0.03033911164311931,
 3: 0.026809590583944942,
 78: 0.0249474961052365

In [193]:
a2

ticker  Date      
AAPL    2016/01/14   -0.414025
ADBE    2016/01/14    0.493090
ADI     2016/01/14    0.150472
ADP     2016/01/14    0.421144
ADSK    2016/01/14    0.515867
                        ...   
WDAY    2020/12/30   -0.206533
WDC     2020/12/30   -0.131785
XEL     2020/12/30   -0.965703
ZBRA    2020/12/30   -0.633176
ZM      2020/12/30    0.361023
Length: 118709, dtype: float64

In [237]:
lst=[]
df_lst=[]
for i in sorted_mi.keys():
    lst.append('a'+str(i))
    df_lst.append(locals()['a'+str(i)])

In [242]:
result = pd.concat(df_lst, axis=1, keys = lst)

In [301]:
result = result[result.index.get_level_values('Date') >='2016/02/01']

In [302]:
result

Unnamed: 0_level_0,Unnamed: 1_level_0,a49,a51,a9,a12,a101,a25,a35,a10,a54,a18,...,a27,a40,a44,a50,a53,a61,a62,a81,a94,a99
ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAPL,2016/03/29,-0.622499,-0.622499,-0.622499,-0.622499,0.957446,0.274725,3915.0,0.549451,-0.843840,-0.208791,...,1.0,0.010058,-0.000000,-0.543210,3.268207,0,-1,0,,0
AAPL,2016/03/30,-0.469999,-0.469999,-0.469999,-0.469999,0.498901,0.054945,11745.0,0.241758,-0.505927,-0.648352,...,1.0,-0.106466,-0.000000,-0.532051,-0.618663,0,-1,0,,0
AAPL,2016/03/31,0.142500,0.142500,0.142500,-0.142500,-0.712891,0.868132,260.0,0.593407,-0.111504,-0.318681,...,-1.0,-0.091357,0.503659,-0.532051,-24.125448,0,-1,0,,0
AAPL,2016/04/01,-0.250000,-0.250000,-0.250000,0.250000,0.670730,0.593407,1320.0,0.472527,-0.940934,-0.340659,...,-1.0,-0.066172,0.550072,-0.518987,2.455888,0,-1,0,,0
AAPL,2016/04/04,-0.282501,-0.282501,-0.282501,-0.282501,0.363827,0.065934,9000.0,0.142857,-0.428940,-0.802198,...,-1.0,-0.195386,0.555592,-0.518987,-0.511982,0,-1,0,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZM,2020/12/23,25.119995,25.119995,25.119995,25.119995,-0.754339,0.990000,-0.0,0.990000,-0.277880,-0.020000,...,-0.0,-0.174357,-0.288964,-0.516129,3.341123,1,-1,0,-0.1444,0
ZM,2020/12/24,8.709991,8.709991,8.709991,-8.709991,-0.881488,0.990000,-0.0,0.990000,-0.019300,-0.040000,...,-0.0,-0.294808,0.133738,-0.543478,51.285028,1,-1,0,-0.0036,0
ZM,2020/12/28,23.779999,23.779999,23.779999,23.779999,-0.783561,1.000000,-0.0,0.990000,-0.127136,-0.020000,...,-0.0,0.156586,0.407325,-0.543478,8.826669,1,-1,0,-0.0350,0
ZM,2020/12/29,-2.359985,-2.359985,-2.359985,2.359985,0.316015,0.060000,-0.0,0.100000,-0.354365,-0.960000,...,1.0,0.105578,0.442800,-0.543478,-0.848972,1,-1,0,-0.0650,0


In [303]:
#result.to_csv('initial alpha result.csv')

In [304]:
selected_alpha = lst[:40]

In [305]:
selected_result = result[selected_alpha]

In [306]:
selected_result

Unnamed: 0_level_0,Unnamed: 1_level_0,a49,a51,a9,a12,a101,a25,a35,a10,a54,a18,...,a32,a7,a14,a55,a60,a33,a22,a3,a78,a36
ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAPL,2016/03/29,-0.622499,-0.622499,-0.622499,-0.622499,0.957446,0.274725,3915.0,0.549451,-0.843840,-0.208791,...,,1.0,0.131729,-0.000000,-0.026278,,-0.251499,,,
AAPL,2016/03/30,-0.469999,-0.469999,-0.469999,-0.469999,0.498901,0.054945,11745.0,0.241758,-0.505927,-0.648352,...,,,-0.270974,-0.000000,-0.025084,,-0.373234,,,
AAPL,2016/03/31,0.142500,0.142500,0.142500,-0.142500,-0.712891,0.868132,260.0,0.593407,-0.111504,-0.318681,...,,1.0,-0.059218,-0.627054,0.011467,,-0.280233,0.408248,,
AAPL,2016/04/01,-0.250000,-0.250000,-0.250000,0.250000,0.670730,0.593407,1320.0,0.472527,-0.940934,-0.340659,...,,1.0,-0.025932,-0.604207,-0.027473,,-0.149490,0.500000,,
AAPL,2016/04/04,-0.282501,-0.282501,-0.282501,-0.282501,0.363827,0.065934,9000.0,0.142857,-0.428940,-0.802198,...,,,-0.177212,-0.557475,0.015886,,0.145123,0.534522,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZM,2020/12/23,25.119995,25.119995,25.119995,25.119995,-0.754339,0.990000,-0.0,0.990000,-0.277880,-0.020000,...,0.272353,16.0,-0.004774,0.280261,0.011485,0.225419,-0.461377,,0.772236,3.513479
ZM,2020/12/24,8.709991,8.709991,8.709991,-8.709991,-0.881488,0.990000,-0.0,0.990000,-0.019300,-0.040000,...,0.311284,1.0,-0.002961,0.483085,0.012970,0.278640,-1.033254,,0.761732,3.253739
ZM,2020/12/28,23.779999,23.779999,23.779999,23.779999,-0.783561,1.000000,-0.0,0.990000,-0.127136,-0.020000,...,0.321974,46.0,0.005598,0.604366,0.009604,0.302727,-0.607281,0.396203,0.897553,3.896547
ZM,2020/12/29,-2.359985,-2.359985,-2.359985,2.359985,0.316015,0.060000,-0.0,0.100000,-0.354365,-0.960000,...,0.296275,1.0,0.107946,0.549857,0.003861,0.316374,0.987516,0.335449,0.928339,2.441653


In [307]:
alpha_corr = selected_result.corr()

In [308]:
for i in range(len(alpha_corr.values)):
    for j in range(1, i):
        if alpha_corr.values[i][j]>0.5:
            print(f"{lst[i+1]} and {lst[j+1]} have correlation {alpha_corr.values[i][j]}")

a12 and a9 have correlation 0.8327990614587417
a10 and a25 have correlation 0.5090230865520972
a54 and a35 have correlation 0.7657444285694435
a17 and a35 have correlation 0.6383829674900698
a17 and a54 have correlation 0.5800946641259345
a46 and a35 have correlation 0.5007784584389477
a34 and a46 have correlation 0.5262278775233394
a5 and a35 have correlation 0.5979655314918241
a5 and a54 have correlation 0.5760092016837156
a43 and a9 have correlation 0.5571699029250493
a24 and a35 have correlation 0.5093446081417549
a24 and a17 have correlation 0.5793938180100685
a30 and a23 have correlation 0.6287479830871998
a33 and a18 have correlation 0.5462450623539378
a33 and a47 have correlation 0.5828929404088304
a22 and a7 have correlation 0.8985846771670122


In [311]:
# drop columns that are highly correlated
# 9, 22, 23, 24, 25, 35, 54
selected_result = selected_result.drop(columns=['a9', 'a22', 'a23', 'a24', 'a24', 'a25', 'a35', 'a54'])

In [312]:
selected_result.columns

Index(['a49', 'a51', 'a12', 'a101', 'a10', 'a18', 'a17', 'a46', 'a38', 'a34',
       'a5', 'a28', 'a43', 'a37', 'a42', 'a11', 'a13', 'a20', 'a83', 'a47',
       'a8', 'a16', 'a30', 'a45', 'a32', 'a7', 'a14', 'a55', 'a60', 'a33',
       'a3', 'a78', 'a36'],
      dtype='object')

In [330]:
selected_result.isna().sum()/len(selected_result)

a49     0.000000
a51     0.000000
a12     0.048225
a101    0.048150
a10     0.048225
a18     0.048825
a17     0.049875
a46     0.048225
a38     0.048825
a34     0.048450
a5      0.048825
a28     0.049875
a43     0.051000
a37     0.170450
a42     0.048150
a11     0.048375
a13     0.048450
a20     0.048225
a83     0.048600
a47     0.049575
a8      0.049200
a16     0.048450
a30     0.049575
a45     0.056342
a32     0.198500
a7      0.003633
a14     0.048825
a55     0.051892
a60     0.048825
a33     0.198500
a3      0.110533
a78     0.209733
a36     0.169625
dtype: float64

In [331]:
# drop a37, a32, a33, a3, a78, a36
selected_result = selected_result.drop(columns=['a37', 'a32', 'a33', 'a3', 'a78', 'a36'])

In [None]:
selected_result.to_csv('first round alpha.csv')