In [1]:
# importing the libraries for my functions  
import numpy as np
import pandas as pd
from numpy import abs
from numpy import log
from numpy import sign
from scipy.stats import rankdata
import yfinance as yf
import matplotlib.pyplot as plt

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# making some functions before the alpha to use for calculation purpose
def ts_sum(df, window=10):
    """
    Wrapper function to estimate rolling sum.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    
    return df.rolling(window).sum()

In [4]:
def sma(df, window=10):
    """
    Wrapper function to estimate Simple Moving Average.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return df.rolling(window).mean()

In [5]:
def stddev(df, window=10):
    """
    Wrapper function to estimate rolling standard deviation.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return df.rolling(window).std()

In [6]:
def correlation(x, y, window=10):
    """
    Wrapper function to estimate rolling correlations.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return x.rolling(window).corr(y)

In [7]:
def covariance(x, y, window=10):
    """
    Wrapper function to estimate rolling covariance.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return x.rolling(window).cov(y)

In [8]:
def rolling_rank(na):
    """
    Auxiliary function to be used in pd.rolling_apply
    :param na: numpy array.
    :return: The rank of the last value in the array.
    """
    return rankdata(na)[-1]

In [9]:
def ts_rank(df, window=10):
    """
    Wrapper function to estimate rolling rank.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series rank over the past window days.
    """
    return df.rolling(window).apply(rolling_rank)

In [10]:
def rolling_prod(na):
    """
    Auxiliary function to be used in pd.rolling_apply
    :param na: numpy array.
    :return: The product of the values in the array.
    """
    return np.prod(na)

In [11]:
def product(df, window=10):
    """
    Wrapper function to estimate rolling product.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series product over the past 'window' days.
    """
    return df.rolling(window).apply(rolling_prod)

In [12]:
def ts_min(df, window=10):
    """
    Wrapper function to estimate rolling min.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return df.rolling(window).min()

In [13]:
def ts_max(df, window=10):
    """
    Wrapper function to estimate rolling max.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series max over the past 'window' days.
    """
    return df.rolling(window).max()

In [14]:
def delta(df, period=1):
    """
    Wrapper function to estimate difference.
    :param df: a pandas DataFrame.
    :param period: the difference grade.
    :return: a pandas DataFrame with today’s value minus the value 'period' days ago.
    """
    return df.diff(period)

In [15]:
def delay(df, period=1):
    """
    Wrapper function to estimate lag.
    :param df: a pandas DataFrame.
    :param period: the lag grade.
    :return: a pandas DataFrame with lagged time series
    """
    return df.shift(period)

In [16]:
#def rank(df):
   # """
    #Cross sectional rank
    #:param df: a pandas DataFrame.
    #:return: a pandas DataFrame with rank along columns.
    #"""
    #return df.rank(axis=1, pct=True)
    #return df.rank(axis=1, pct=True)

In [17]:
def scale(df, k=1):
    """
    Scaling time serie.
    :param df: a pandas DataFrame.
    :param k: scaling factor.
    :return: a pandas DataFrame rescaled df such that sum(abs(df)) = k
    """
    return df.mul(k).div(np.abs(df).sum())


In [18]:
def ts_argmax(df, window=10):
    """
    Wrapper function to estimate which day ts_max(df, window) occurred on
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: well.. that 
    """
    return df.rolling(window).apply(np.argmax) + 1

In [19]:
def ts_argmin(df, window=10):
    """
    Wrapper function to estimate which day ts_min(df, window) occurred on
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: well.. that 
    """
    return df.rolling(window).apply(np.argmin) + 1

In [20]:
def decay_linear(df, period=10):
    """
    Linear weighted moving average implementation.
    :param df: a pandas DataFrame.
    :param period: the LWMA period
    :return: a pandas DataFrame with the LWMA.
    """
    # Clean data
    if df.isnull().values.any():
        df.fillna(method='ffill', inplace=True)
        df.fillna(method='bfill', inplace=True)
        df.fillna(value=0, inplace=True)
    na_lwma = np.zeros_like(df)
    na_lwma[:period, :] = df.iloc[:period, :] 
    na_series = df.to_numpy()

    divisor = period * (period + 1) / 2
    y = (np.arange(period) + 1) * 1.0 / divisor
    # Estimate the actual lwma with the actual close.
    # The backtest engine should assure to be bias free.
    for row in range(period - 1, df.shape[0]):
        x = na_series[row - period + 1: row + 1, :]
        na_lwma[row, :] = (np.dot(x.T, y))
    return pd.DataFrame(na_lwma, index=df.index, columns=['CLOSE'])  
# endregion

In [21]:
class Alphas(object):
    def __init__(self, df_data):
    
        self.open = df_data['Open'] 
        self.high = df_data['High'] 
        self.low = df_data['Low']   
        self.close = df_data['Close'] 
        self.volume = df_data['Volume']*100 
        self.returns = np.log(df_data['Open']/df_data['Open'].shift(1))
        self.vwap = (df_data['Volume']*((df_data['High']+df_data['Low']+df_data['Close']) / 3)).cumsum() / (df_data['Volume'].cumsum())
        
    # Alpha#1 (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) -0.5)
    #def alpha001(self):
        #inner = self.close
        #inner[self.returns < 0] = stddev(self.returns, 20)
        #return rank(ts_argmax(inner ** 2, 5))
    
    def arushi01(self):
        inner = self.close
        inner[self.returns < 0] = stddev(self.returns, 20)
        return  ts_rank(ts_argmax(inner ** 2, 5))
    
     # Alpha#2 (-1 * correlation(rank(delta(log(volume), 2)), rank(((close - open) / open)), 6))
    #def alpha002(self):
        #df = -1 * correlation(rank(delta(log(self.volume), 2)), rank((self.close - self.open) / self.open), 6)
        #return df.replace([-np.inf, np.inf], 0).fillna(value=0)
    
    def arushi02(self):
        df = -1 * correlation( ts_rank(delta(log(self.volume), 2)),  ts_rank((self.close - self.open) / self.open), 6)
        return df.replace([-np.inf, np.inf], 0).fillna(value=0)
        
    # Alpha#3 (-1 * correlation(rank(open), rank(volume), 10))
    #def alpha003(self):
        #df = -1 * correlation(rank(self.open), rank(self.volume), 10)
        #return df.replace([-np.inf, np.inf], 0).fillna(value=0)
    
    def arushi03(self):
        df = -1 * correlation(ts_rank(self.open), ts_rank(self.volume), 10)
        return df.replace([-np.inf, np.inf], 0).fillna(value=0)
        
    # Alpha#4 (-1 * Ts_Rank(rank(low), 9))
    #def alpha004(self):
        #return -1 * ts_rank(rank(self.low), 9)
    
    def arushi04(self):
        return -1 * ts_rank(ts_rank(self.low), 9)
    
    
    # Alpha#5 (rank((open - (sum(vwap, 10) / 10))) * (-1 * abs(rank((close - vwap)))))
    #def alpha005(self):
        #return  (rank((self.open - (sum(self.vwap, 10) / 10))) * (-1 * abs(rank((self.close - self.vwap)))))
    
    def arushi05(self):
        return  (ts_rank((self.open - (sum(self.vwap, 10) / 10))) * (-1 * abs(ts_rank((self.close - self.vwap)))))
        
    # Alpha#6 (-1 * correlation(open, volume, 10))
    def alpha006(self):
        df = -1 * correlation(self.open, self.volume, 10)
        return df.replace([-np.inf, np.inf], 0).fillna(value=0)
    
    # Alpha#7 ((adv20 < volume) ? ((-1 * ts_rank(abs(delta(close, 7)), 60)) * sign(delta(close, 7))) : (-1* 1))
    def alpha007(self):
        adv20 = sma(self.volume, 20)
        alpha = -1 * ts_rank(abs(delta(self.close, 7)), 60) * sign(delta(self.close, 7))
        alpha[adv20 >= self.volume] = -1
        return alpha
    
    # Alpha#8 (-1 * rank(((sum(open, 5) * sum(returns, 5)) - delay((sum(open, 5) * sum(returns, 5)),10))))
    #def alpha008(self):
        #return -1 * (rank(((ts_sum(self.open, 5) * ts_sum(self.returns, 5)) -
                           #delay((ts_sum(self.open, 5) * ts_sum(self.returns, 5)), 10))))
    def arushi08(self):
        return -1 * (ts_rank(((ts_sum(self.open, 5) * ts_sum(self.returns, 5)) -
                           delay((ts_sum(self.open, 5) * ts_sum(self.returns, 5)), 10))))
        
    
    # Alpha#9 ((0 < ts_min(delta(close, 1), 5)) ? delta(close, 1) : ((ts_max(delta(close, 1), 5) < 0) ?delta(close, 1) : (-1 * delta(close, 1))))
    def alpha009(self):
        delta_close = delta(self.close, 1)
        cond_1 = ts_min(delta_close, 5) > 0
        cond_2 = ts_max(delta_close, 5) < 0
        alpha = -1 * delta_close
        alpha[cond_1 | cond_2] = delta_close
        return alpha
    
    # Alpha#10 rank(((0 < ts_min(delta(close, 1), 4)) ? delta(close, 1) : ((ts_max(delta(close, 1), 4) < 0)? delta(close, 1) : (-1 * delta(close, 1)))))
    #def alpha010(self):
        #delta_close = delta(self.close, 1)
        #cond_1 = ts_min(delta_close, 4) > 0
        #cond_2 = ts_max(delta_close, 4) < 0
        #alpha = rank(-1 * delta_close)
        #alpha[cond_1 | cond_2] = delta_close
        #return alpha
    
    def arushi10(self):
        delta_close = delta(self.close, 1)
        cond_1 = ts_min(delta_close, 4) > 0
        cond_2 = ts_max(delta_close, 4) < 0
        alpha = ts_rank(-1 * delta_close)
        alpha[cond_1 | cond_2] = delta_close
        return alpha
        
    
    
    # Alpha#11 ((rank(ts_max((vwap - close), 3)) + rank(ts_min((vwap - close), 3))) *rank(delta(volume, 3)))
    #def alpha011(self):
        #return ((rank(ts_max((self.vwap - self.close), 3)) + rank(ts_min((self.vwap - self.close), 3))) *rank(delta(self.volume, 3)))
    
    def arushi11(self):
        return ((ts_rank(ts_max((self.vwap - self.close), 3)) + ts_rank(ts_min((self.vwap - self.close), 3))) *ts_rank(delta(self.volume, 3)))
        
    
    # Alpha#12 (sign(delta(volume, 1)) * (-1 * delta(close, 1)))
    def alpha012(self):
        return sign(delta(self.volume, 1)) * (-1 * delta(self.close, 1))

    # Alpha#13 (-1 * rank(covariance(rank(close), rank(volume), 5)))
    #def alpha013(self):
        #return -1 * rank(covariance(rank(self.close), rank(self.volume), 5))
    
    def arushi13(self):
        return -1 * ts_rank(covariance(ts_rank(self.close), ts_rank(self.volume), 5))
        
    
    # Alpha#14 ((-1 * rank(delta(returns, 3))) * correlation(open, volume, 10))
    #def alpha014(self):
        #df = correlation(self.open, self.volume, 10)
        #df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        #return -1 * rank(delta(self.returns, 3)) * df
    
    def arushi14(self):
        df = correlation(self.open, self.volume, 10)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * ts_rank(delta(self.returns, 3)) * df
        
    
    # Alpha#15 (-1 * sum(rank(correlation(rank(high), rank(volume), 3)), 3))
    #def alpha015(self):
        #df = correlation(rank(self.high), rank(self.volume), 3)
        #df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        #return -1 * ts_sum(rank(df), 3)
    
    def arushi15(self):
        df = correlation(ts_rank(self.high), ts_rank(self.volume), 3)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * ts_sum(ts_rank(df), 3)
        
    
     # Alpha#16 (-1 * rank(covariance(rank(high), rank(volume), 5)))
    #def alpha016(self):
        #return -1 * rank(covariance(rank(self.high), rank(self.volume), 5))
    
    def arushi16(self):
        return -1 * ts_rank(covariance(ts_rank(self.high), ts_rank(self.volume), 5))
        
    
    # Alpha#17 (((-1 * rank(ts_rank(close, 10))) * rank(delta(delta(close, 1), 1))) *rank(ts_rank((volume / adv20), 5)))
    #def alpha017(self):
        #adv20 = sma(self.volume, 20)
        #return -1 * (rank(ts_rank(self.close, 10)) *
                     #rank(delta(delta(self.close, 1), 1)) *
                     #rank(ts_rank((self.volume / adv20), 5)))
    
    def arushi17(self):
        adv20 = sma(self.volume, 20)
        return -1 * (ts_rank(ts_rank(self.close, 10)) *
                     ts_rank(delta(delta(self.close, 1), 1)) *
                     ts_rank(ts_rank((self.volume / adv20), 5)))
    
        
         
        
    # Alpha#18 (-1 * rank(((stddev(abs((close - open)), 5) + (close - open)) + correlation(close, open,10))))
    #def alpha018(self):
        #df = correlation(self.close, self.open, 10)
        #df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        #return -1 * (rank((stddev(abs((self.close - self.open)), 5) + (self.close - self.open)) +
                          #df))
    
    def arushi18(self):
        df = correlation(self.close, self.open, 10)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * (ts_rank((stddev(abs((self.close - self.open)), 5) + (self.close - self.open)) +
                          df))
        
    
    # Alpha#19 ((-1 * sign(((close - delay(close, 7)) + delta(close, 7)))) * (1 + rank((1 + sum(returns,250)))))
    #def alpha019(self):
        #return ((-1 * sign((self.close - delay(self.close, 7)) + delta(self.close, 7))) *
                #(1 + rank(1 + ts_sum(self.returns, 250))))
    
    def arushi19(self):
        return ((-1 * sign((self.close - delay(self.close, 7)) + delta(self.close, 7))) *
                (1 + ts_rank(1 + ts_sum(self.returns, 250))))
        
    
    # Alpha#20 (((-1 * rank((open - delay(high, 1)))) * rank((open - delay(close, 1)))) * rank((open -delay(low, 1))))
    #def alpha020(self):
        #return -1 * (rank(self.open - delay(self.high, 1)) *
                     #rank(self.open - delay(self.close, 1)) *
                     #rank(self.open - delay(self.low, 1)))
    
    def arushi20(self):
        return -1 * (ts_rank(self.open - delay(self.high, 1)) *
                     ts_rank(self.open - delay(self.close, 1)) *
                     ts_rank(self.open - delay(self.low, 1)))
    
        

    # Alpha#21 ((((sum(close, 8) / 8) + stddev(close, 8)) < (sum(close, 2) / 2)) ? (-1 * 1) : (((sum(close,2) / 2) < ((sum(close, 8) / 8) - stddev(close, 8))) ? 1 : (((1 < (volume / adv20)) || ((volume /adv20) == 1)) ? 1 : (-1 * 1))))
    def alpha021(self):
        cond_1 = sma(self.close, 8) + stddev(self.close, 8) < sma(self.close, 2)
        cond_2 = sma(self.volume, 20) / self.volume < 1
        alpha = pd.DataFrame(np.ones_like(self.close), index=self.close.index
                             )
    # Remaining part of Alpha#21 = pd.DataFrame(np.ones_like(self.close), index=self.close.index, columns=self.close.columns)
        alpha[cond_1 | cond_2] = -1
        return alpha
    
    # Alpha#22 (-1 * (delta(correlation(high, volume, 5), 5) * rank(stddev(close, 20))))
    #def alpha022(self):
        #df = correlation(self.high, self.volume, 5)
        #df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        #return -1 * delta(df, 5) * rank(stddev(self.close, 20))
    
    def arushi22(self):
        df = correlation(self.high, self.volume, 5)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * delta(df, 5) * ts_rank(stddev(self.close, 20))
    
        

    # Alpha#23 (((sum(high, 20) / 20) < high) ? (-1 * delta(high, 2)) : 0)
    def alpha023(self):
        cond = sma(self.high, 20) < self.high
        alpha = pd.DataFrame(np.zeros_like(self.close),index=self.close.index,columns=['close'])
        alpha.at[cond,'close'] = -1 * delta(self.high, 2).fillna(value=0)
        return alpha
    
    # Alpha#24 ((((delta((sum(close, 100) / 100), 100) / delay(close, 100)) < 0.05) ||((delta((sum(close, 100) / 100), 100) / delay(close, 100)) == 0.05)) ? (-1 * (close - ts_min(close,100))) : (-1 * delta(close, 3)))
    def alpha024(self):
        cond = delta(sma(self.close, 100), 100) / delay(self.close, 100) <= 0.05
        alpha = -1 * delta(self.close, 3)
        alpha[cond] = -1 * (self.close - ts_min(self.close, 100))
        return alpha
    
    # Alpha#25 rank(((((-1 * returns) * adv20) * vwap) * (high - close)))
    #def alpha025(self):
        #adv20 = sma(self.volume, 20)
        #return rank(((((-1 * self.returns) * adv20) * self.vwap) * (self.high - self.close)))
    
    def arushi25(self):
        adv20 = sma(self.volume, 20)
        return ts_rank(((((-1 * self.returns) * adv20) * self.vwap) * (self.high - self.close)))
        
    
     # Alpha#26 (-1 * ts_max(correlation(ts_rank(volume, 5), ts_rank(high, 5), 5), 3))
    def alpha026(self):
        df = correlation(ts_rank(self.volume, 5), ts_rank(self.high, 5), 5)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * ts_max(df, 3)
    
    # Alpha#27 ((0.5 < rank((sum(correlation(rank(volume), rank(vwap), 6), 2) / 2.0))) ? (-1 * 1) : 1)
    #def alpha027(self):
        #alpha = rank((sma(correlation(rank(self.volume), rank(self.vwap), 6), 2) / 2.0))
        #alpha[alpha > 0.5] = -1
        #alpha[alpha <= 0.5]=1
        #return alpha
    
    def arushi27(self):
        alpha = ts_rank((sma(correlation(ts_rank(self.volume), ts_rank(self.vwap), 6), 2) / 2.0))
        alpha[alpha > 0.5] = -1
        alpha[alpha <= 0.5]=1
        return alpha
        
    
    
    # Alpha#28 scale(((correlation(adv20, low, 5) + ((high + low) / 2)) - close))
    def alpha028(self):
        adv20 = sma(self.volume, 20)
        df = correlation(adv20, self.low, 5)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return scale(((df + ((self.high + self.low) / 2)) - self.close))
    

    # Alpha#29 (min(product(rank(rank(scale(log(sum(ts_min(rank(rank((-1 * rank(delta((close - 1),5))))), 2), 1))))), 1), 5) + ts_rank(delay((-1 * returns), 6), 5))
    #def alpha029(self):
        #return (ts_min(rank(rank(scale(log(ts_sum(rank(rank(-1 * rank(delta((self.close - 1), 5)))), 2))))), 5) +
                #ts_rank(delay((-1 * self.returns), 6), 5))
    
    def arushi29(self):
        return (ts_min(ts_rank(ts_rank(scale(log(ts_sum(ts_rank(ts_rank(-1 * ts_rank(delta((self.close - 1), 5)))), 2))))), 5) +
                ts_rank(delay((-1 * self.returns), 6), 5))
        
    

    # Alpha#30 (((1.0 - rank(((sign((close - delay(close, 1))) + sign((delay(close, 1) - delay(close, 2)))) +sign((delay(close, 2) - delay(close, 3)))))) * sum(volume, 5)) / sum(volume, 20))
    #def alpha030(self):
        #delta_close = delta(self.close, 1)
        #inner = sign(delta_close) + sign(delay(delta_close, 1)) + sign(delay(delta_close, 2))
        #return ((1.0 - rank(inner)) * ts_sum(self.volume, 5)) / ts_sum(self.volume, 20)
    
    def arushi30(self):
        delta_close = delta(self.close, 1)
        inner = sign(delta_close) + sign(delay(delta_close, 1)) + sign(delay(delta_close, 2))
        return ((1.0 - ts_rank(inner)) * ts_sum(self.volume, 5)) / ts_sum(self.volume, 20)
    
    
     # Alpha#31 ((rank(rank(rank(decay_linear((-1 * rank(rank(delta(close, 10)))), 10)))) + rank((-1 *delta(close, 3)))) + sign(scale(correlation(adv20, low, 12))))
    #def alpha031(self):
        #adv20 = sma(self.volume, 20)
        #df = correlation(adv20, self.low, 12).replace([-np.inf, np.inf], 0).fillna(value=0)         
        #p1=rank(rank(rank(decay_linear((-1 * rank(rank(delta(self.close, 10)))).to_frame(), 10)))) 
        #p2=rank((-1 * delta(self.close, 3)))
        #p3=sign(scale(df))
        
        #return p1.CLOSE+p2+p3
    
    
    def arushi31(self):
        adv20 = sma(self.volume, 20)
        df = correlation(adv20, self.low, 12).replace([-np.inf, np.inf], 0).fillna(value=0)         
        p1=ts_rank(ts_rank(ts_rank(decay_linear((-1 * ts_rank(ts_rank(delta(self.close, 10)))).to_frame(), 10)))) 
        p2=ts_rank((-1 * delta(self.close, 3)))
        p3=sign(scale(df))
        
        return p1.CLOSE+p2+p3
    
        

    # Alpha#32  (scale(((sum(close, 7) / 7) - close)) + (20 * scale(correlation(vwap, delay(close, 5),230))))
    def alpha032(self):
        return scale(((sma(self.close, 7) / 7) - self.close)) + (20 * scale(correlation(self.vwap, delay(self.close, 5),230)))
    
    # Alpha#33  rank((-1 * ((1 - (open / close))^1)))
    #def alpha033(self):
        #return rank(-1 + (self.open / self.close))
    
    def arushi33(self):
        return ts_rank(-1 + (self.open / self.close))
        
    
    # Alpha#34  rank(((1 - rank((stddev(returns, 2) / stddev(returns, 5)))) + (1 - rank(delta(close, 1)))))
    #def alpha034(self):
        #inner = stddev(self.returns, 2) / stddev(self.returns, 5)
        #inner = inner.replace([-np.inf, np.inf], 1).fillna(value=1)
        #return rank(2 - rank(inner) - rank(delta(self.close, 1)))
    
    def arushi34(self):
        inner = stddev(self.returns, 2) / stddev(self.returns, 5)
        inner = inner.replace([-np.inf, np.inf], 1).fillna(value=1)
        return ts_rank(2 - ts_rank(inner) - ts_rank(delta(self.close, 1)))
        

    # Alpha#35  ((Ts_Rank(volume, 32) * (1 - Ts_Rank(((close + high) - low), 16))) * (1 -Ts_Rank(returns, 32)))
    def alpha035(self):
        return ((ts_rank(self.volume, 32) *
                 (1 - ts_rank(self.close + self.high - self.low, 16))) *
                (1 - ts_rank(self.returns, 32)))
            
    # Alpha#36  (((((2.21 * rank(correlation((close - open), delay(volume, 1), 15))) + (0.7 * rank((open- close)))) + (0.73 * rank(Ts_Rank(delay((-1 * returns), 6), 5)))) + rank(abs(correlation(vwap,adv20, 6)))) + (0.6 * rank((((sum(close, 200) / 200) - open) * (close - open)))))
    #def alpha036(self):
        #adv20 = sma(self.volume, 20)
        #return (((((2.21 * rank(correlation((self.close - self.open), delay(self.volume, 1), 15))) + (0.7 * rank((self.open- self.close)))) + (0.73 * rank(ts_rank(delay((-1 * self.returns), 6), 5)))) + rank(abs(correlation(self.vwap,adv20, 6)))) + (0.6 * rank((((sma(self.close, 200) / 200) - self.open) * (self.close - self.open)))))
    
    def arushi36(self):
        adv20 = sma(self.volume, 20)
        return (((((2.21 * ts_rank(correlation((self.close - self.open), delay(self.volume, 1), 15))) + (0.7 * ts_rank((self.open- self.close)))) + (0.73 * ts_rank(ts_rank(delay((-1 * self.returns), 6), 5)))) + ts_rank(abs(correlation(self.vwap,adv20, 6)))) + (0.6 * ts_rank((((sma(self.close, 200) / 200) - self.open) * (self.close - self.open)))))
    
        
    # Alpha#37  (rank(correlation(delay((open - close), 1), close, 200)) + rank((open - close)))
    #def alpha037(self):
        #return rank(correlation(delay(self.open - self.close, 1), self.close, 200)) + rank(self.open - self.close)
    
    def arushi37(self):
        return ts_rank(correlation(delay(self.open - self.close, 1), self.close, 200)) + ts_rank(self.open - self.close)
    
        
    # Alpha#38  ((-1 * rank(Ts_Rank(close, 10))) * rank((close / open)))
    #def alpha038(self):
        #inner = self.close / self.open
        #inner = inner.replace([-np.inf, np.inf], 1).fillna(value=1)
        #return -1 * rank(ts_rank(self.open, 10)) * rank(inner)
    
    def arushi38(self):
        inner = self.close / self.open
        inner = inner.replace([-np.inf, np.inf], 1).fillna(value=1)
        return -1 * ts_rank(ts_rank(self.open, 10)) * ts_rank(inner)
        
    
    # Alpha#39  ((-1 * rank((delta(close, 7) * (1 - rank(decay_linear((volume / adv20), 9)))))) * (1 +rank(sum(returns, 250))))
    #def alpha039(self):
        #adv20 = sma(self.volume, 20)
        #return ((-1 * rank(delta(self.close, 7) * (1 - rank(decay_linear((self.volume / adv20).to_frame(), 9).CLOSE)))) *
                #(1 + rank(sma(self.returns, 250))))
    
    def arushi39(self):
        adv20 = sma(self.volume, 20)
        return ((-1 * ts_rank(delta(self.close, 7) * (1 - ts_rank(decay_linear((self.volume / adv20).to_frame(), 9).CLOSE)))) *
                (1 + ts_rank(sma(self.returns, 250))))
    
        
    
    # Alpha#40  ((-1 * rank(stddev(high, 10))) * correlation(high, volume, 10))
    #def alpha040(self):
        #return -1 * rank(stddev(self.high, 10)) * correlation(self.high, self.volume, 10)
    
    
    def arushi40(self):
        return -1 * ts_rank(stddev(self.high, 10)) * correlation(self.high, self.volume, 10)
    
    
     # Alpha#41  (((high * low)^0.5) - vwap)
    def alpha041(self):
        return pow((self.high * self.low),0.5) - self.vwap
    
    # Alpha#42  (rank((vwap - close)) / rank((vwap + close)))
    #def alpha042(self):
        #return rank((self.vwap - self.close)) / rank((self.vwap + self.close))
    
    def arushi42(self):
        return ts_rank((self.vwap - self.close)) / ts_rank((self.vwap + self.close))
        
    
    # Alpha#43  (ts_rank((volume / adv20), 20) * ts_rank((-1 * delta(close, 7)), 8))
    def alpha043(self):
        adv20 = sma(self.volume, 20)
        return ts_rank(self.volume / adv20, 20) * ts_rank((-1 * delta(self.close, 7)), 8)

    # Alpha#44  (-1 * correlation(high, rank(volume), 5))
    #def alpha044(self):
        #df = correlation(self.high, rank(self.volume), 5)
        #df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        #return -1 * df
    
    def arushi44(self):
        df = correlation(self.high, ts_rank(self.volume), 5)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * df
        
    

    # Alpha#45  (-1 * ((rank((sum(delay(close, 5), 20) / 20)) * correlation(close, volume, 2)) *rank(correlation(sum(close, 5), sum(close, 20), 2))))
    #def alpha045(self):
        #df = correlation(self.close, self.volume, 2)
        #df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        #return -1 * (rank(sma(delay(self.close, 5), 20)) * df *
                     #rank(correlation(ts_sum(self.close, 5), ts_sum(self.close, 20), 2)))
    
    def arushi45(self):
        df = correlation(self.close, self.volume, 2)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * (ts_rank(sma(delay(self.close, 5), 20)) * df *
                     ts_rank(correlation(ts_sum(self.close, 5), ts_sum(self.close, 20), 2)))
        
    
    
    # Alpha#46  ((0.25 < (((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10))) ?(-1 * 1) : (((((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10)) < 0) ? 1 :((-1 * 1) * (close - delay(close, 1)))))
    def alpha046(self):
        inner = ((delay(self.close, 20) - delay(self.close, 10)) / 10) - ((delay(self.close, 10) - self.close) / 10)
        alpha = (-1 * delta(self.close))
        alpha[inner < 0] = 1
        alpha[inner > 0.25] = -1
        return alpha

    # Alpha#47  ((((rank((1 / close)) * volume) / adv20) * ((high * rank((high - close))) / (sum(high, 5) /5))) - rank((vwap - delay(vwap, 5))))
    #def alpha047(self):
        #adv20 = sma(self.volume, 20)
        #return ((((rank((1 / self.close)) * self.volume) / adv20) * ((self.high * rank((self.high - self.close))) / (sma(self.high, 5) /5))) - rank((self.vwap - delay(self.vwap, 5))))
    
    def arushi47(self):
        adv20 = sma(self.volume, 20)
        return ((((ts_rank((1 / self.close)) * self.volume) / adv20) * ((self.high * ts_rank((self.high - self.close))) / (sma(self.high, 5) /5))) - ts_rank((self.vwap - delay(self.vwap, 5))))
    
        
    # Alpha#48  (indneutralize(((correlation(delta(close, 1), delta(delay(close, 1), 1), 250) *delta(close, 1)) / close), IndClass.subindustry) / sum(((delta(close, 1) / delay(close, 1))^2), 250))
     
    
    # Alpha#49  (((((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10)) < (-1 *0.1)) ? 1 : ((-1 * 1) * (close - delay(close, 1))))
    def alpha049(self):
        inner = (((delay(self.close, 20) - delay(self.close, 10)) / 10) - ((delay(self.close, 10) - self.close) / 10))
        alpha = (-1 * delta(self.close))
        alpha[inner < -0.1] = 1
        return alpha
    
    # Alpha#50  (-1 * ts_max(rank(correlation(rank(volume), rank(vwap), 5)), 5))
    #def alpha050(self):
        #return (-1 * ts_max(rank(correlation(rank(self.volume), rank(self.vwap), 5)), 5))
    
    def arushi50(self):
        return (-1 * ts_max(ts_rank(correlation(ts_rank(self.volume), ts_rank(self.vwap), 5)), 5))
    
    def alpha_number(self,n):
        if n == 1:
            return self.arushi01()
        elif n == 2:
            return self.arushi02()
        elif n == 3:
            return self.arushi03()
        elif n == 4:
            return self.arushi04()
        elif n == 5:
            return self.arushi05()
        elif n == 6:
            return self.alpha006()
        elif n == 7:
            return self.alpha007()
        elif n == 8:
            return self.arushi08()
        elif n == 9:
            return self.alpha009()
        elif n == 10:
            return self.arushi10()
        elif n == 11:
            return self.arushi11()
        elif n == 12:
            return self.alpha012()
        elif n == 13:
            return self.arushi13()
        elif n == 14:
            return self.arushi14()
        elif n == 15:
            return self.arushi15()
        elif n == 16:
            return self.arushi16()
        elif n == 17:
            return self.arushi17()
        elif n == 18:
            return self.arushi18()
        elif n == 19:
            return self.arushi19()
        elif n == 20:
            return self.arushi20()
        elif n == 21:
            return self.alpha021()
        elif n == 22:
            return self.arushi22()
        elif n == 23:
            return self.alpha023()
        elif n == 24:
            return self.alpha024()
        elif n == 25:
            return self.arushi25()
        elif n == 26:
            return self.alpha026()
        elif n == 27:
            return self.arushi27()
        elif n == 28:
            return self.alpha028()
        elif n == 29:
            return self.arushi29()
        elif n == 30:
            return self.arushi30()
        elif n == 31:
            return self.arushi31()
        elif n == 32:
            return self.alpha032()
        elif n == 33:
            return self.arushi33()
        elif n == 34:
            return self.arushi34()
        elif n == 35:
            return self.alpha035()
        elif n == 36:
            return self.arushi36()
        elif n == 37:
            return self.arushi37()
        elif n == 38:
            return self.arushi38()
        elif n == 39:
            return self.arushi39()
        elif n == 40:
            return self.arushi40()
        elif n == 41:
            return self.alpha041()
        elif n == 42:
            return self.arushi42()
        elif n == 43:
            return self.alpha043()
        elif n == 44:
            return self.arushi44()
        elif n == 45:
            return self.arushi45()
        elif n == 46:
            return self.alpha046()
        elif n == 47:
            return self.arushi47()
        elif n == 49:
            return self.alpha049()
        elif n == 50:
            return self.arushi50()            
        
    
        

In [22]:
def get_alpha(df):
        stock=Alphas(df)
        df['logreturns']=stock.returns
        #df['Kalpha001']=stock.alpha001()
        df['Arushi01']=stock.arushi01()
        #df['Kalpha002']=stock.alpha002()
        df['Arushi02']=stock.arushi02()
        #df['Kalpha003']=stock.alpha003()
        df['Arushi03']=stock.arushi03()
        #df['Kalpha004']=stock.alpha004()
        df['Arushi04']=stock.arushi04()
        #df['Kalpha005']=stock.alpha005()
        df['Arushi05']=stock.arushi05()
        df['Kalpha006']=stock.alpha006()
        df['Kalpha007']=stock.alpha007()
        #df['Kalpha008']=stock.alpha008()
        df['Arushi08']=stock.arushi08()
        df['Kalpha009']=stock.alpha009()
        #df['Kalpha010']=stock.alpha010()
        df['Arushi10']=stock.arushi10()
        #df['Kalpha011']=stock.alpha011()
        df['Arushi11']=stock.arushi11()
        df['Kalpha012']=stock.alpha012()
        #df['Kalpha013']=stock.alpha013()
        df['Arushi13']=stock.arushi13()
        #df['Kalpha014']=stock.alpha014()
        df['Arushi14']=stock.arushi14()
        #df['Kalpha015']=stock.alpha015()
        df['Arushi15']=stock.arushi15()
        #df['Kalpha016']=stock.alpha016()
        df['Arushi16']=stock.arushi16()
        #df['Kalpha017']=stock.alpha017()
        df['Arushi17']=stock.arushi17()
        #df['Kalpha018']=stock.alpha018()
        df['Arushi18']=stock.arushi18()
        #df['Kalpha019']=stock.alpha019()
        df['Arushi19']=stock.arushi19()
        #df['Kalpha020']=stock.alpha020()
        df['Arushi20']=stock.arushi20()
        df['Kalpha021']=stock.alpha021()
        #df['Kalpha022']=stock.alpha022()
        df['Arushi22']=stock.arushi22()
        df['Kalpha023']=stock.alpha023()
        df['Kalpha024']=stock.alpha024()
        #df['Kalpha025']=stock.alpha025()
        df['Arushi25']=stock.arushi25()
        df['Kalpha026']=stock.alpha026()
        #df['Kalpha027']=stock.alpha027()
        df['Arushi27']=stock.arushi27()
        df['Kalpha028']=stock.alpha028()
        #df['Kalpha029']=stock.alpha029()
        df['Arushi29']=stock.arushi29()
        #df['Kalpha030']=stock.alpha030()
        df['Arushi30']=stock.arushi30()
        #df['Kalpha031']=stock.alpha031()
        df['Arushi31']=stock.arushi31()
        df['Kalpha032']=stock.alpha032()
        #df['Kalpha033']=stock.alpha033()
        df['Arushi33']=stock.arushi33()
        #df['Kalpha034']=stock.alpha034()
        df['Arushi34']=stock.arushi34()
        df['Kalpha035']=stock.alpha035()
        #df['Kalpha036']=stock.alpha036()
        df['Arushi36']=stock.arushi36()
        #df['Kalpha037']=stock.alpha037()
        df['Arushi37']=stock.arushi37()
        #df['Kalpha038']=stock.alpha038()
        df['Arushi38']=stock.arushi38()
        #df['Kalpha039']=stock.alpha039()
        df['Arushi39']=stock.arushi39()
        #df['Kalpha040']=stock.alpha040()
        df['Arushi40']=stock.arushi40()
        df['Kalpha041']=stock.alpha041()
        #df['Kalpha042']=stock.alpha042()
        df['Arushi42']=stock.arushi42()
        df['Kalpha043']=stock.alpha043()
        #df['Kalpha044']=stock.alpha044()
        df['Arushi44']=stock.arushi44()
        #df['Kalpha045']=stock.alpha045()
        df['Arushi45']=stock.arushi45()
        df['Kalpha046']=stock.alpha046()
        #df['Kalpha047']=stock.alpha047()
        df['Arushi47']=stock.arushi47()
        df['Kalpha049']=stock.alpha049()
        #df['Kalpha050']=stock.alpha050()
        df['Arushi50']=stock.arushi50()
        #to convert this into numpy array just uncomment the below code and comment the last code
        #return df.to_numpy()
        return df

In [23]:
#%run Utils_PseudoPrices.ipynb
import yfinance as yf
import investpy
#%run Utils_DailyData.ipynb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
import scipy.stats as st

In [24]:
#############################
#HELPER FUNCTIONS
############
#List Data retrieval of OHLCV data
#Inputs: Tickers, Years
#Outputs: List of Dataframes of Daily Stock Data
def get_daily_ohlcv2(tickers,years=4):
    outo=[]
    outh=[]
    outl=[]
    outc=[]
    outv=[]
    oute=[]
    outdf=[]
    months=str(years*12)
    for ticker in tickers:
        tick = yf.Ticker(ticker)
        #df=tick.history(start="2017-01-01",  end="2021-07-04")
        df=tick.history(period=months+"mo")
        df=df.reset_index()
        df.dropna(inplace=True)
        outdf.append(df)
    #return np.asarray(outo),np.asarray(outh),np.asarray(outl),np.asarray(outc),np.asarray(outv),np.asarray(oute)
    return outdf

################
#Dataframe retrieval of OHLCV data
#Inputs: Tickers, Years
#Outputs: Dataframe of Daily Stock Data (all tickers in one dataframe)
def get_daily_ohlcv3(tickers,years=4):
    tick = yf.Tickers(concat_tickers(tickers))
    months=str(years*12)
    #hist2=tick.history(start="2017-01-01",  end="2021-07-04")
    hist2=tick.history(period=months+"mo")
    #return np.transpose(np.asarray(hist2['Open'])),np.transpose(np.asarray(hist2['High'])),np.transpose(np.asarray(hist2['Low'])),np.transpose(np.asarray(hist2['Close'])),np.transpose(np.asarray(hist2['Volume'])),np.transpose(np.asarray(hist2['Stock Splits']))
    return hist2
def concat_tickers(tickers):
    out=""
    for ticker in tickers:
        out=out+" "+ticker
    return out

#Adds NS to tickers
def addNS(ticks):
    return [tick+'.NS' for tick in ticks]
s = np.random.normal(0, 1, 1000)
s2 = np.random.normal(0, 1, 1000)

#Shuffles a 1d array or list
def sh(a):
    a2=a.copy()
    np.random.shuffle(a2)
    return a2

In [25]:
####################
#UNIVERSE 4: SP 500 tickers
spytickers= ["SPY","A","AAL","AAP","AAPL","ABBV","ABC","ABMD","ABT","ACN","ADBE","ADI","ADM","ADP","ADS","ADSK","AEE","AEP","AES","AFL","AGN","AIG","AIV","AIZ","AJG","AKAM","ALB","ALGN","ALK","ALL","ALLE","ALXN","AMAT","AMCR","AMD","AME","AMG","AMGN","AMP","AMT","AMZN","ANET","ANSS","ANTM","AON","AOS","APA","APD","APH","APTV","ARE","ARNC","ATO","ATVI","AVB","AVGO","AVY","AWK","AXP","AZO","BA","BAC","BAX","BBY","BDX","BEN","BIIB","BK","BKNG","BKR","BLK","BLL","BMY","BR","BSX","BWA","BXP","C","CAG","CAH","CAT","CB","CBOE","CBRE","CCI","CCL","CDNS","CDW","CE","CERN","CF","CFG","CHD","CHRW","CHTR","CI","CINF","CL","CLX","CMA","CMCSA","CME","CMG","CMI","CMS","CNC","CNP","COF","COG","COO","COP","COST","COTY","CPB","CPRI","CPRT","CRM","CSCO","CSX","CTAS","CTL","CTSH","CTVA","CTXS","CVS","CVX","CXO","D","DAL","DD","DE","DFS","DG","DGX","DHI","DHR","DIS","DISCA","DISCK","DISH","DLR","DLTR","DOV","DOW","DRE","DRI","DTE","DUK","DVA","DVN","DXC","EA","EBAY","ECL","ED","EFX","EIX","EL","EMN","EMR","EOG","EQIX","EQR","ES","ESS","ETFC","ETN","ETR","EVRG","EW","EXC","EXPD","EXPE","EXR","F","FANG","FAST","FB","FBHS","FCX","FDX","FE","FFIV","FIS","FISV","FITB","FL","FLIR","FLS","FLT","FMC","FOX","FOXA","FRC","FRT","FTI","FTNT","FTV","GD","GE","GILD","GIS","GL","GLW","GM","GOOG","GOOGL","GPC","GPN","GPS","GRMN","GS","GWW","HAL","HAS","HBAN","HBI","HCA","PEAK","HD","HES","HFC","HIG","HII","HLT","HOG","HOLX","HON","HP","HPE","HPQ","HRB","HRL","HSIC","HST","HSY","HUM","IBM","ICE","IDXX","IEX","IFF","ILMN","INCY","INFO","INTC","INTU","IP","IPG","IPGP","IQV","IR","IRM","ISRG","IT","ITW","IVZ","JBHT","JCI","J","JEF","JKHY","JNJ","JNPR","JPM","JWN","K","KEY","KEYS","KHC","KIM","KLAC","KMB","KMI","KMX","KO","KR","KSS","KSU","L","LB","LDOS","LEG","LEN","LH","LHX","LIN","LKQ","LLY","LMT","LNC","LNT","LOW","LRCX","LUV","LVS","LW","LYB","M","MA","MAA","MAC","MAR","MAS","MCD","MCHP","MCK","MCO","MDLZ","MDT","MET","MGM","MHK","MKC","MKTX","MLM","MMC","MMM","MNST","MO","MOS","MPC","MRK","MRO","MS","MSCI","MSFT","MSI","MTB","MTD","MU","MXIM","MYL","NBL","NCLH","NDAQ","NEE","NEM","NFLX","NI","NKE","NKTR","NLOK","NLSN","NOC","NOV","NRG","NSC","NTAP","NTRS","NUE","NVDA","NVR","NWL","NWS","NWSA","O","OKE","OMC","ORCL","ORLY","OXY","PAYX","PBCT","PCAR","PEAK","PEG","PEP","PFE","PFG","PG","PGR","PH","PHM","PKG","PKI","PLD","PM","PNC","PNR","PNW","PPG","PPL","PRGO","PRU","PSA","PSX","PVH","PWR","PXD","PYPL","QCOM","QRVO","RCL","RE","REG","REGN","RF","RHI","RJF","RL","RMD","ROK","ROL","ROP","ROST","RSG","RTN","SBAC","SBUX","SCHW","SEE","SHW","SIVB","SJM","SLB","SLG","SNA","SNPS","SO","SPG","SPGI","SRE","STT","STX","STZ","SWK","SWKS","SYF","SYK","SYY","T","TAP","TDG","TEL","TFC","TFX","TGT","TIF","TJX","TMO","TMUS","TPR","TRIP","TROW","TRV","TSCO","TSN","TTWO","TWTR","TXN","TXT","UA","UAA","UAL","UDR","UHS","ULTA","UNH","UNM","UNP","UPS","URI","USB","UTX","V","VAR","VFC","VIAC","VLO","VMC","VNO","VRSK","VRSN","VRTX","VTR","VZ","WAB","WAT","WBA","WDC","WEC","WELL","WFC","WHR","WLTW","WM","WMB","WMT","WRK","WU","WY","WYNN","XEC","XEL","XLNX","XOM","XRAY","XRX","XYL","YUM","ZBH","ZION","ZTS"];

In [26]:
################################
#LOW TRADING COSTS UNIVERSES

In [27]:
##########################
#Universe 1: DISTANCE TO LTP
#Low Costs in terms of percentage slippage costs
#Costs are 0.04 percent per round trip trade
#variationMinusDistToLtpTickers = pd.read_csv(filepath_or_buffer="/home/ec2-user/Chris/Data/SortedVariationMinusDistToLtp_2021-02-05_freq1_qVar50_qDist50.csv", sep=',', delimiter=None, header='infer') 
#keys = variationMinusDistToLtpTickers['ticker'].values[0:50]
tickers_ltp= ['TSLA', 'ROKU', 'MRNA', 'PDD', 'NVDA', 'ETSY', 'FB', 'SQ', 'BA',
       'DIS', 'PLUG', 'IWM', 'XBI', 'MSFT', 'QQQ', 'AAPL', 'BABA', 'FSLY',
       'PYPL', 'SPY', 'TSM', 'CHWY', 'JPM', 'AMAT', 'XOP', 'SNAP', 'QCOM',
       'NET', 'IBB', 'MU', 'AXP', 'CVX', 'V', 'BRK.B', 'UBER', 'ABBV',
       'JD', 'TWTR', 'LYFT', 'KSS', 'CZR', 'MPC', 'XLK', 'SBUX', 'COF',
       'VLO', 'JNJ', 'MA', 'WDC', 'IBM']

In [28]:
##########################
#Universe 2: TICKS PER MINUTE
#Stocks ordered by number of trades per minute
#These are the most liquid; hence lower costs are expected as the ltp is supposed to reflect the traded price
#Costs can be taken to be 0.06 percent per round trip trade
#tickers_tpm(50) in Utils_Backtests
tickers_tpm=['AAPL', 'TSLA', 'SPY', 'QQQ', 'SQQQ', 'MSFT', 'AMZN', 'NVDA',
       'ORCL', 'FB', 'INTC', 'SNAP', 'XOM', 'UBER', 'NLOK', 'XLK', 'CHWY',
       'BA', 'PYPL', 'IWM', 'T', 'CVX', 'DOCU', 'WFC', 'GDX', 'GE', 'BAC',
       'KR', 'CSCO', 'MU', 'OXY', 'C', 'WMT', 'PFE', 'KO', 'SQ', 'CMCSA',
       'XLE', 'UAL', 'EWZ', 'FSLY', 'OSTK', 'BABA', 'JPM', 'XLI', 'NFLX',
       'CVS', 'V', 'DAL', 'MGM']

In [29]:
spy_data2 = get_daily_ohlcv2(tickers_ltp)

- BRK.B: No data found, symbol may be delisted


In [30]:
#spy_data2 = get_daily_ohlcv2(tickers_tpm)

In [31]:
#spy_data2 = get_daily_ohlcv2(spytickers[0:10])

In [32]:
spy_data2[9]

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2017-08-11,97.680348,98.895928,97.468103,98.394257,9410400,0.0,0
1,2017-08-14,99.175701,99.185343,97.689994,97.825058,9707300,0.0,0
2,2017-08-15,98.046940,98.500371,97.786462,97.931175,8868500,0.0,0
3,2017-08-16,98.249539,99.030987,98.133774,98.596848,6698800,0.0,0
4,2017-08-17,98.635444,98.973103,97.767173,97.805763,6612400,0.0,0
...,...,...,...,...,...,...,...,...
1002,2021-08-05,172.490005,177.119995,172.490005,176.710007,8864200,0.0,0
1003,2021-08-06,177.429993,178.809998,176.630005,177.130005,5504100,0.0,0
1004,2021-08-09,177.000000,177.309998,175.059998,176.720001,5339000,0.0,0
1005,2021-08-10,176.250000,177.399994,175.580002,177.070007,5180800,0.0,0


In [33]:
def alpha_name(m):
    if m == 1:
        return 'Arushi01'
    elif m == 2:
        return 'Arushi02'
    elif m == 3:
        return 'Arushi03'
    elif m == 4:
        return 'Arushi04'
    elif m == 5:
        return 'Arushi05'
    elif m == 6:
        return 'Kalpha006'
    elif m == 7:
        return 'Kalpha007'
    elif m == 8:
        return 'Arushi08'
    elif m == 9:
        return 'Kalpha009'
    elif m == 10:
        return 'Arushi10'
    elif m == 11:
        return 'Arushi11'
    elif m == 12:
        return 'Kalpha012'
    elif m == 13:
        return 'Arushi13'
    elif m == 14:
        return 'Arushi14'
    elif m == 15:
        return 'Arushi15'
    elif m == 16:
        return 'Arushi16'
    elif m == 17:
        return 'Arushi17'
    elif m == 18:
        return 'Arushi18'
    elif m == 19:
        return 'Arushi19'
    elif m == 20:
        return 'Arushi20'
    elif m == 21:
        return 'Kalpha021'
    elif m == 22:
        return 'Arushi22'
    elif m == 23:
        return 'Kalpha023'
    elif m == 24:
        return 'Kalpha024'
    elif m == 25:
        return 'Arushi25'
    elif m == 26:
        return 'Kalpha026'
    elif m == 27:
        return 'Arushi27'
    elif m == 28:
        return 'Kalpha028'
    elif m == 29:
        return 'Arushi29'
    elif m == 30:
        return 'Arushi30'
    elif m == 31:
        return 'Arushi31'
    elif m == 32:
        return 'Kalpha032'
    elif m == 33:
        return 'Arushi33'
    elif m == 34:
        return 'Arushi34'
    elif m == 35:
        return 'Kalpha035'
    elif m == 36:
        return 'Arushi36'
    elif m == 37:
        return 'Arushi37'
    elif m == 38:
        return 'Arushi38'
    elif m == 39:
        return 'Arushi39'
    elif m == 40:
        return 'Arushi40'
    elif m == 41:
        return 'Kalpha041'
    elif m == 42:
        return 'Arushi42'
    elif m == 43:
        return 'Kalpha043'
    elif m == 44:
        return 'Arushi44'
    elif m == 45:
        return 'Arushi45'
    elif m == 46:
        return 'Kalpha046'
    elif m == 47:
        return 'Arushi47'
    elif m == 49:
        return 'Kalpha049'
    elif m == 50:
        return 'Arushi50'                

In [34]:
def getarray_fromalpha(df,m, n):
    out=[]
    for i in range(len(df)):
        stock=Alphas(df[i])
        #print(df[alpha_name(m)])
        df[i][alpha_name(m)]=stock.alpha_number(n)
        out.append(np.asarray(list(df[i][alpha_name(m)])))
    return np.nan_to_num(np.asarray(out),0)

In [35]:
def getarray_fromreturns(df):
    out=[]
    for i in range(len(df)):
        stock=Alphas(df[i])
        df[i]['logreturns']=stock.returns
        out.append(np.asarray(list(df[i]['logreturns'])))
    return np.nan_to_num(np.asarray(out),0)

In [36]:
arushi=getarray_fromalpha(spy_data2,1, 1)

In [37]:
logrets=getarray_fromreturns(spy_data2)

In [38]:
np.shape(logrets)

(50,)

In [39]:
%store logrets

Stored 'logrets' (ndarray)


In [123]:
%store arushi

Stored 'arushi' (ndarray)


In [None]:
out

In [None]:
result.head(10)

In [None]:
result.tail(10)