In [1]:
import numpy as np
import pandas as pd
from numpy import abs
from numpy import log
from numpy import sign
from scipy.stats import rankdata
import yfinance as yf

In [3]:


# region Auxiliary functions
def ts_sum(df, window=10):
    """
    Wrapper function to estimate rolling sum.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """

    return df.rolling(window).sum()

def sma(df, window=10):
    """
    Wrapper function to estimate SMA.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return df.rolling(window).mean()

def stddev(df, window=10):
    """
    Wrapper function to estimate rolling standard deviation.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return df.rolling(window).std()

def correlation(x, y, window=10):
    """
    Wrapper function to estimate rolling corelations.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return x.rolling(window).corr(y)

def covariance(x, y, window=10):
    """
    Wrapper function to estimate rolling covariance.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return x.rolling(window).cov(y)

def rolling_rank(na):
    """
    Auxiliary function to be used in pd.rolling_apply
    :param na: numpy array.
    :return: The rank of the last value in the array.
    """
    return rankdata(na)[-1]

def ts_rank(df, window=10):
    """
    Wrapper function to estimate rolling rank.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series rank over the past window days.
    """
    return df.rolling(window).apply(rolling_rank)

def rolling_prod(na):
    """
    Auxiliary function to be used in pd.rolling_apply
    :param na: numpy array.
    :return: The product of the values in the array.
    """
    return np.prod(na)

def product(df, window=10):
    """
    Wrapper function to estimate rolling product.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series product over the past 'window' days.
    """
    return df.rolling(window).apply(rolling_prod)

def ts_min(df, window=10):
    """
    Wrapper function to estimate rolling min.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return df.rolling(window).min()

def ts_max(df, window=10):
    """
    Wrapper function to estimate rolling min.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series max over the past 'window' days.
    """
    return df.rolling(window).max()

def delta(df, period=1):
    """
    Wrapper function to estimate difference.
    :param df: a pandas DataFrame.
    :param period: the difference grade.
    :return: a pandas DataFrame with today’s value minus the value 'period' days ago.
    """
    return df.diff(period)

def delay(df, period=1):
    """
    Wrapper function to estimate lag.
    :param df: a pandas DataFrame.
    :param period: the lag grade.
    :return: a pandas DataFrame with lagged time series
    """
    return df.shift(period)

def rank(df):
    """
    Cross sectional rank
    :param df: a pandas DataFrame.
    :return: a pandas DataFrame with rank along columns.
    """
    #return df.rank(axis=1, pct=True)
    return df.rank(pct=True)

def scale(df, k=1):
    """
    Scaling time serie.
    :param df: a pandas DataFrame.
    :param k: scaling factor.
    :return: a pandas DataFrame rescaled df such that sum(abs(df)) = k
    """
    return df.mul(k).div(np.abs(df).sum())

def ts_argmax(df, window=10):
    """
    Wrapper function to estimate which day ts_max(df, window) occurred on
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: well.. that :)
    """
    return df.rolling(window).apply(np.argmax) + 1

def ts_argmin(df, window=10):
    """
    Wrapper function to estimate which day ts_min(df, window) occurred on
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: well.. that :)
    """
    return df.rolling(window).apply(np.argmin) + 1

def decay_linear(df, period=10):
    """
    Linear weighted moving average implementation.
    :param df: a pandas DataFrame.
    :param period: the LWMA period
    :return: a pandas DataFrame with the LWMA.
    """
    # Clean data
    if df.isnull().values.any():
        df.fillna(method='ffill', inplace=True)
        df.fillna(method='bfill', inplace=True)
        df.fillna(value=0, inplace=True)
    na_lwma = np.zeros_like(df)
    na_lwma[:period, :] = df.iloc[:period, :]
    na_series = df.values

    divisor = period * (period + 1) / 2
    y = (np.arange(period) + 1) * 1.0 / divisor
    # Estimate the actual lwma with the actual close.
    # The backtest engine should assure to be snooping bias free.
    for row in range(period - 1, df.shape[0]):
        x = na_series[row - period + 1: row + 1, :]
        na_lwma[row, :] = (np.dot(x.T, y))
    return pd.DataFrame(na_lwma, index=df.index, columns=['CLOSE'])
# endregion

def get_alpha(df):
        stock=Alphas(df)
        df['alpha001']=stock.alpha001()
        df['alpha002']=stock.alpha002()
        df['alpha003']=stock.alpha003()
        df['alpha004']=stock.alpha004()
        df['alpha005']=stock.alpha005()
        df['alpha006']=stock.alpha006()
        df['alpha007']=stock.alpha007()
        df['alpha008']=stock.alpha008()
        df['alpha009']=stock.alpha009()
        df['alpha010']=stock.alpha010()
        df['alpha011']=stock.alpha011()
        df['alpha012']=stock.alpha012()
        df['alpha013']=stock.alpha013()
        df['alpha014']=stock.alpha014()
        df['alpha015']=stock.alpha015()
        df['alpha016']=stock.alpha016()
        df['alpha017']=stock.alpha017()
        df['alpha018']=stock.alpha018()
        df['alpha019']=stock.alpha019()
        df['alpha020']=stock.alpha020()
        df['alpha021']=stock.alpha021()
        df['alpha022']=stock.alpha022()
        df['alpha023']=stock.alpha023()
        df['alpha024']=stock.alpha024()
        df['alpha025']=stock.alpha025()
        df['alpha026']=stock.alpha026()
        df['alpha027']=stock.alpha027()
        df['alpha028']=stock.alpha028()
        df['alpha029']=stock.alpha029()
        df['alpha030']=stock.alpha030()
        df['alpha031']=stock.alpha031()
        df['alpha032']=stock.alpha032()
        df['alpha033']=stock.alpha033()
        df['alpha034']=stock.alpha034()
        df['alpha035']=stock.alpha035()
        df['alpha036']=stock.alpha036()
        df['alpha037']=stock.alpha037()
        df['alpha038']=stock.alpha038()
        df['alpha039']=stock.alpha039()
        df['alpha040']=stock.alpha040()
        df['alpha041']=stock.alpha041()
        df['alpha042']=stock.alpha042()
        df['alpha043']=stock.alpha043()
        df['alpha044']=stock.alpha044()
        df['alpha045']=stock.alpha045()
        df['alpha046']=stock.alpha046()
        df['alpha047']=stock.alpha047()
        df['alpha049']=stock.alpha049()
        df['alpha050']=stock.alpha050()
        df['alpha051']=stock.alpha051()
        df['alpha052']=stock.alpha052()
        df['alpha053']=stock.alpha053()
        df['alpha054']=stock.alpha054()
        df['alpha055']=stock.alpha055()
        df['alpha057']=stock.alpha057()
        df['alpha060']=stock.alpha060()
        df['alpha061']=stock.alpha061()
        df['alpha062']=stock.alpha062()
        df['alpha064']=stock.alpha064()
        df['alpha065']=stock.alpha065()
        df['alpha066']=stock.alpha066()
        df['alpha068']=stock.alpha068()
        df['alpha071']=stock.alpha071()
        df['alpha072']=stock.alpha072()
        df['alpha073']=stock.alpha073()
        df['alpha074']=stock.alpha074()
        df['alpha075']=stock.alpha075()
        df['alpha077']=stock.alpha077()
        df['alpha078']=stock.alpha078()
        df['alpha081']=stock.alpha081()
        df['alpha083']=stock.alpha083()
        df['alpha084']=stock.alpha084()
        df['alpha085']=stock.alpha085()
        df['alpha086']=stock.alpha086()
        df['alpha088']=stock.alpha088()
        df['alpha092']=stock.alpha092()
        df['alpha094']=stock.alpha094()
        df['alpha095']=stock.alpha095()
        df['alpha096']=stock.alpha096()
        df['alpha098']=stock.alpha098()
        df['alpha099']=stock.alpha099()
        df['alpha101']=stock.alpha101()
        return df

class Alphas(object):
    def __init__(self, df_data):

        self.open = df_data['S_DQ_OPEN']
        self.high = df_data['S_DQ_HIGH']
        self.low = df_data['S_DQ_LOW']
        self.close = df_data['S_DQ_CLOSE']
        self.volume = df_data['S_DQ_VOLUME']*100
        self.returns = df_data['S_DQ_PCTCHANGE']
        self.vwap = (df_data['S_DQ_AMOUNT']*1000)/(df_data['S_DQ_VOLUME']*100+1)

    # Alpha#1	 (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) -0.5)
    def alpha001(self):
        inner = self.close
        inner[self.returns < 0] = stddev(self.returns, 20)
        return rank(ts_argmax(inner ** 2, 5))

    # Alpha#2	 (-1 * correlation(rank(delta(log(volume), 2)), rank(((close - open) / open)), 6))
    def alpha002(self):
        df = -1 * correlation(rank(delta(log(self.volume), 2)), rank((self.close - self.open) / self.open), 6)
        return df.replace([-np.inf, np.inf], 0).fillna(value=0)

    # Alpha#3	 (-1 * correlation(rank(open), rank(volume), 10))
    def alpha003(self):
        df = -1 * correlation(rank(self.open), rank(self.volume), 10)
        return df.replace([-np.inf, np.inf], 0).fillna(value=0)

    # Alpha#4	 (-1 * Ts_Rank(rank(low), 9))
    def alpha004(self):
        return -1 * ts_rank(rank(self.low), 9)

    # Alpha#5	 (rank((open - (sum(vwap, 10) / 10))) * (-1 * abs(rank((close - vwap)))))
    def alpha005(self):
        return  (rank((self.open - (sum(self.vwap, 10) / 10))) * (-1 * abs(rank((self.close - self.vwap)))))

    # Alpha#6	 (-1 * correlation(open, volume, 10))
    def alpha006(self):
        df = -1 * correlation(self.open, self.volume, 10)
        return df.replace([-np.inf, np.inf], 0).fillna(value=0)

    # Alpha#7	 ((adv20 < volume) ? ((-1 * ts_rank(abs(delta(close, 7)), 60)) * sign(delta(close, 7))) : (-1* 1))
    def alpha007(self):
        adv20 = sma(self.volume, 20)
        alpha = -1 * ts_rank(abs(delta(self.close, 7)), 60) * sign(delta(self.close, 7))
        alpha[adv20 >= self.volume] = -1
        return alpha

    # Alpha#8	 (-1 * rank(((sum(open, 5) * sum(returns, 5)) - delay((sum(open, 5) * sum(returns, 5)),10))))
    def alpha008(self):
        return -1 * (rank(((ts_sum(self.open, 5) * ts_sum(self.returns, 5)) -
                           delay((ts_sum(self.open, 5) * ts_sum(self.returns, 5)), 10))))

    # Alpha#9	 ((0 < ts_min(delta(close, 1), 5)) ? delta(close, 1) : ((ts_max(delta(close, 1), 5) < 0) ?delta(close, 1) : (-1 * delta(close, 1))))
    def alpha009(self):
        delta_close = delta(self.close, 1)
        cond_1 = ts_min(delta_close, 5) > 0
        cond_2 = ts_max(delta_close, 5) < 0
        alpha = -1 * delta_close
        alpha[cond_1 | cond_2] = delta_close
        return alpha

    # Alpha#10	 rank(((0 < ts_min(delta(close, 1), 4)) ? delta(close, 1) : ((ts_max(delta(close, 1), 4) < 0)? delta(close, 1) : (-1 * delta(close, 1)))))
    def alpha010(self):
        delta_close = delta(self.close, 1)
        cond_1 = ts_min(delta_close, 4) > 0
        cond_2 = ts_max(delta_close, 4) < 0
        alpha = -1 * delta_close
        alpha[cond_1 | cond_2] = delta_close
        return alpha

    # Alpha#11	 ((rank(ts_max((vwap - close), 3)) + rank(ts_min((vwap - close), 3))) *rank(delta(volume, 3)))
    def alpha011(self):
        return ((rank(ts_max((self.vwap - self.close), 3)) + rank(ts_min((self.vwap - self.close), 3))) *rank(delta(self.volume, 3)))

    # Alpha#12	 (sign(delta(volume, 1)) * (-1 * delta(close, 1)))
    def alpha012(self):
        return sign(delta(self.volume, 1)) * (-1 * delta(self.close, 1))

    # Alpha#13	 (-1 * rank(covariance(rank(close), rank(volume), 5)))
    def alpha013(self):
        return -1 * rank(covariance(rank(self.close), rank(self.volume), 5))

    # Alpha#14	 ((-1 * rank(delta(returns, 3))) * correlation(open, volume, 10))
    def alpha014(self):
        df = correlation(self.open, self.volume, 10)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * rank(delta(self.returns, 3)) * df

    # Alpha#15	 (-1 * sum(rank(correlation(rank(high), rank(volume), 3)), 3))
    def alpha015(self):
        df = correlation(rank(self.high), rank(self.volume), 3)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * ts_sum(rank(df), 3)

    # Alpha#16	 (-1 * rank(covariance(rank(high), rank(volume), 5)))
    def alpha016(self):
        return -1 * rank(covariance(rank(self.high), rank(self.volume), 5))

    # Alpha#17	 (((-1 * rank(ts_rank(close, 10))) * rank(delta(delta(close, 1), 1))) *rank(ts_rank((volume / adv20), 5)))
    def alpha017(self):
        adv20 = sma(self.volume, 20)
        return -1 * (rank(ts_rank(self.close, 10)) *
                     rank(delta(delta(self.close, 1), 1)) *
                     rank(ts_rank((self.volume / adv20), 5)))

    # Alpha#18	 (-1 * rank(((stddev(abs((close - open)), 5) + (close - open)) + correlation(close, open,10))))
    def alpha018(self):
        df = correlation(self.close, self.open, 10)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * (rank((stddev(abs((self.close - self.open)), 5) + (self.close - self.open)) +
                          df))

    # Alpha#19	 ((-1 * sign(((close - delay(close, 7)) + delta(close, 7)))) * (1 + rank((1 + sum(returns,250)))))
    def alpha019(self):
        return ((-1 * sign((self.close - delay(self.close, 7)) + delta(self.close, 7))) *
                (1 + rank(1 + ts_sum(self.returns, 250))))

    # Alpha#20	 (((-1 * rank((open - delay(high, 1)))) * rank((open - delay(close, 1)))) * rank((open -delay(low, 1))))
    def alpha020(self):
        return -1 * (rank(self.open - delay(self.high, 1)) *
                     rank(self.open - delay(self.close, 1)) *
                     rank(self.open - delay(self.low, 1)))

    # Alpha#21	 ((((sum(close, 8) / 8) + stddev(close, 8)) < (sum(close, 2) / 2)) ? (-1 * 1) : (((sum(close,2) / 2) < ((sum(close, 8) / 8) - stddev(close, 8))) ? 1 : (((1 < (volume / adv20)) || ((volume /adv20) == 1)) ? 1 : (-1 * 1))))
    def alpha021(self):
        cond_1 = sma(self.close, 8) + stddev(self.close, 8) < sma(self.close, 2)
        cond_2 = sma(self.volume, 20) / self.volume < 1
        alpha = pd.DataFrame(np.ones_like(self.close), index=self.close.index
                             )
#        alpha = pd.DataFrame(np.ones_like(self.close), index=self.close.index,
#                             columns=self.close.columns)
        alpha[cond_1 | cond_2] = -1
        return alpha

    # Alpha#22	 (-1 * (delta(correlation(high, volume, 5), 5) * rank(stddev(close, 20))))
    def alpha022(self):
        df = correlation(self.high, self.volume, 5)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * delta(df, 5) * rank(stddev(self.close, 20))

    # Alpha#23	 (((sum(high, 20) / 20) < high) ? (-1 * delta(high, 2)) : 0)
    def alpha023(self):
        cond = sma(self.high, 20) < self.high
        alpha = pd.DataFrame(np.zeros_like(self.close),index=self.close.index,columns=['close'])
        alpha.loc[cond,'close'] = -1 * delta(self.high, 2).fillna(value=0)
        return alpha

    # Alpha#24	 ((((delta((sum(close, 100) / 100), 100) / delay(close, 100)) < 0.05) ||((delta((sum(close, 100) / 100), 100) / delay(close, 100)) == 0.05)) ? (-1 * (close - ts_min(close,100))) : (-1 * delta(close, 3)))
    def alpha024(self):
        cond = delta(sma(self.close, 100), 100) / delay(self.close, 100) <= 0.05
        alpha = -1 * delta(self.close, 3)
        alpha[cond] = -1 * (self.close - ts_min(self.close, 100))
        return alpha

    # Alpha#25	 rank(((((-1 * returns) * adv20) * vwap) * (high - close)))
    def alpha025(self):
        adv20 = sma(self.volume, 20)
        return rank(((((-1 * self.returns) * adv20) * self.vwap) * (self.high - self.close)))

    # Alpha#26	 (-1 * ts_max(correlation(ts_rank(volume, 5), ts_rank(high, 5), 5), 3))
    def alpha026(self):
        df = correlation(ts_rank(self.volume, 5), ts_rank(self.high, 5), 5)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * ts_max(df, 3)

    # Alpha#27	 ((0.5 < rank((sum(correlation(rank(volume), rank(vwap), 6), 2) / 2.0))) ? (-1 * 1) : 1)
    ###
    ## Some Error, still fixing!!
    def alpha027(self):
        alpha = rank((sma(correlation(rank(self.volume), rank(self.vwap), 6), 2) / 2.0))
        alpha[alpha > 0.5] = -1
        alpha[alpha <= 0.5]=1
        return alpha

    # Alpha#28	 scale(((correlation(adv20, low, 5) + ((high + low) / 2)) - close))
    def alpha028(self):
        adv20 = sma(self.volume, 20)
        df = correlation(adv20, self.low, 5)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return scale(((df + ((self.high + self.low) / 2)) - self.close))

    # Alpha#29	 (min(product(rank(rank(scale(log(sum(ts_min(rank(rank((-1 * rank(delta((close - 1),5))))), 2), 1))))), 1), 5) + ts_rank(delay((-1 * returns), 6), 5))
    def alpha029(self):
        return (ts_min(rank(rank(scale(log(ts_sum(rank(rank(-1 * rank(delta((self.close - 1), 5)))), 2))))), 5) +
                ts_rank(delay((-1 * self.returns), 6), 5))

    # Alpha#30	 (((1.0 - rank(((sign((close - delay(close, 1))) + sign((delay(close, 1) - delay(close, 2)))) +sign((delay(close, 2) - delay(close, 3)))))) * sum(volume, 5)) / sum(volume, 20))
    def alpha030(self):
        delta_close = delta(self.close, 1)
        inner = sign(delta_close) + sign(delay(delta_close, 1)) + sign(delay(delta_close, 2))
        return ((1.0 - rank(inner)) * ts_sum(self.volume, 5)) / ts_sum(self.volume, 20)

    # Alpha#31	 ((rank(rank(rank(decay_linear((-1 * rank(rank(delta(close, 10)))), 10)))) + rank((-1 *delta(close, 3)))) + sign(scale(correlation(adv20, low, 12))))
    def alpha031(self):
        adv20 = sma(self.volume, 20)
        df = correlation(adv20, self.low, 12).replace([-np.inf, np.inf], 0).fillna(value=0)
        p1=rank(rank(rank(decay_linear((-1 * rank(rank(delta(self.close, 10)))).to_frame(), 10))))
        p2=rank((-1 * delta(self.close, 3)))
        p3=sign(scale(df))

        return p1.CLOSE+p2+p3

    # Alpha#32	 (scale(((sum(close, 7) / 7) - close)) + (20 * scale(correlation(vwap, delay(close, 5),230))))
    def alpha032(self):
        return scale(((sma(self.close, 7) / 7) - self.close)) + (20 * scale(correlation(self.vwap, delay(self.close, 5),230)))

    # Alpha#33	 rank((-1 * ((1 - (open / close))^1)))
    def alpha033(self):
        return rank(-1 + (self.open / self.close))

    # Alpha#34	 rank(((1 - rank((stddev(returns, 2) / stddev(returns, 5)))) + (1 - rank(delta(close, 1)))))
    def alpha034(self):
        inner = stddev(self.returns, 2) / stddev(self.returns, 5)
        inner = inner.replace([-np.inf, np.inf], 1).fillna(value=1)
        return rank(2 - rank(inner) - rank(delta(self.close, 1)))

    # Alpha#35	 ((Ts_Rank(volume, 32) * (1 - Ts_Rank(((close + high) - low), 16))) * (1 -Ts_Rank(returns, 32)))
    def alpha035(self):
        return ((ts_rank(self.volume, 32) *
                 (1 - ts_rank(self.close + self.high - self.low, 16))) *
                (1 - ts_rank(self.returns, 32)))

    # Alpha#36	 (((((2.21 * rank(correlation((close - open), delay(volume, 1), 15))) + (0.7 * rank((open- close)))) + (0.73 * rank(Ts_Rank(delay((-1 * returns), 6), 5)))) + rank(abs(correlation(vwap,adv20, 6)))) + (0.6 * rank((((sum(close, 200) / 200) - open) * (close - open)))))
    def alpha036(self):
        adv20 = sma(self.volume, 20)
        return (((((2.21 * rank(correlation((self.close - self.open), delay(self.volume, 1), 15))) + (0.7 * rank((self.open- self.close)))) + (0.73 * rank(ts_rank(delay((-1 * self.returns), 6), 5)))) + rank(abs(correlation(self.vwap,adv20, 6)))) + (0.6 * rank((((sma(self.close, 200) / 200) - self.open) * (self.close - self.open)))))

    # Alpha#37	 (rank(correlation(delay((open - close), 1), close, 200)) + rank((open - close)))
    def alpha037(self):
        return rank(correlation(delay(self.open - self.close, 1), self.close, 200)) + rank(self.open - self.close)

    # Alpha#38	 ((-1 * rank(Ts_Rank(close, 10))) * rank((close / open)))
    def alpha038(self):
        inner = self.close / self.open
        inner = inner.replace([-np.inf, np.inf], 1).fillna(value=1)
        return -1 * rank(ts_rank(self.open, 10)) * rank(inner)

    # Alpha#39	 ((-1 * rank((delta(close, 7) * (1 - rank(decay_linear((volume / adv20), 9)))))) * (1 +rank(sum(returns, 250))))
    def alpha039(self):
        adv20 = sma(self.volume, 20)
        return ((-1 * rank(delta(self.close, 7) * (1 - rank(decay_linear((self.volume / adv20).to_frame(), 9).CLOSE)))) *
                (1 + rank(sma(self.returns, 250))))

    # Alpha#40	 ((-1 * rank(stddev(high, 10))) * correlation(high, volume, 10))
    def alpha040(self):
        return -1 * rank(stddev(self.high, 10)) * correlation(self.high, self.volume, 10)

    # Alpha#41	 (((high * low)^0.5) - vwap)
    def alpha041(self):
        return pow((self.high * self.low),0.5) - self.vwap

    # Alpha#42	 (rank((vwap - close)) / rank((vwap + close)))
    def alpha042(self):
        return rank((self.vwap - self.close)) / rank((self.vwap + self.close))

    # Alpha#43	 (ts_rank((volume / adv20), 20) * ts_rank((-1 * delta(close, 7)), 8))
    def alpha043(self):
        adv20 = sma(self.volume, 20)
        return ts_rank(self.volume / adv20, 20) * ts_rank((-1 * delta(self.close, 7)), 8)

    # Alpha#44	 (-1 * correlation(high, rank(volume), 5))
    def alpha044(self):
        df = correlation(self.high, rank(self.volume), 5)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * df

    # Alpha#45	 (-1 * ((rank((sum(delay(close, 5), 20) / 20)) * correlation(close, volume, 2)) *rank(correlation(sum(close, 5), sum(close, 20), 2))))
    def alpha045(self):
        df = correlation(self.close, self.volume, 2)
        df = df.replace([-np.inf, np.inf], 0).fillna(value=0)
        return -1 * (rank(sma(delay(self.close, 5), 20)) * df *
                     rank(correlation(ts_sum(self.close, 5), ts_sum(self.close, 20), 2)))

    # Alpha#46	 ((0.25 < (((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10))) ?(-1 * 1) : (((((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10)) < 0) ? 1 :((-1 * 1) * (close - delay(close, 1)))))
    def alpha046(self):
        inner = ((delay(self.close, 20) - delay(self.close, 10)) / 10) - ((delay(self.close, 10) - self.close) / 10)
        alpha = (-1 * delta(self.close))
        alpha[inner < 0] = 1
        alpha[inner > 0.25] = -1
        return alpha

    # Alpha#47	 ((((rank((1 / close)) * volume) / adv20) * ((high * rank((high - close))) / (sum(high, 5) /5))) - rank((vwap - delay(vwap, 5))))
    def alpha047(self):
        adv20 = sma(self.volume, 20)
        return ((((rank((1 / self.close)) * self.volume) / adv20) * ((self.high * rank((self.high - self.close))) / (sma(self.high, 5) /5))) - rank((self.vwap - delay(self.vwap, 5))))

    # Alpha#48	 (indneutralize(((correlation(delta(close, 1), delta(delay(close, 1), 1), 250) *delta(close, 1)) / close), IndClass.subindustry) / sum(((delta(close, 1) / delay(close, 1))^2), 250))


    # Alpha#49	 (((((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10)) < (-1 *0.1)) ? 1 : ((-1 * 1) * (close - delay(close, 1))))
    def alpha049(self):
        inner = (((delay(self.close, 20) - delay(self.close, 10)) / 10) - ((delay(self.close, 10) - self.close) / 10))
        alpha = (-1 * delta(self.close))
        alpha[inner < -0.1] = 1
        return alpha

    # Alpha#50	 (-1 * ts_max(rank(correlation(rank(volume), rank(vwap), 5)), 5))
    def alpha050(self):
        return (-1 * ts_max(rank(correlation(rank(self.volume), rank(self.vwap), 5)), 5))

    # Alpha#51	 (((((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10)) < (-1 *0.05)) ? 1 : ((-1 * 1) * (close - delay(close, 1))))
    def alpha051(self):
        inner = (((delay(self.close, 20) - delay(self.close, 10)) / 10) - ((delay(self.close, 10) - self.close) / 10))
        alpha = (-1 * delta(self.close))
        alpha[inner < -0.05] = 1
        return alpha

    # Alpha#52	 ((((-1 * ts_min(low, 5)) + delay(ts_min(low, 5), 5)) * rank(((sum(returns, 240) -sum(returns, 20)) / 220))) * ts_rank(volume, 5))
    def alpha052(self):
        return (((-1 * delta(ts_min(self.low, 5), 5)) *
                 rank(((ts_sum(self.returns, 240) - ts_sum(self.returns, 20)) / 220))) * ts_rank(self.volume, 5))

    # Alpha#53	 (-1 * delta((((close - low) - (high - close)) / (close - low)), 9))
    def alpha053(self):
        inner = (self.close - self.low).replace(0, 0.0001)
        return -1 * delta((((self.close - self.low) - (self.high - self.close)) / inner), 9)

    # Alpha#54	 ((-1 * ((low - close) * (open^5))) / ((low - high) * (close^5)))
    def alpha054(self):
        inner = (self.low - self.high).replace(0, -0.0001)
        return -1 * (self.low - self.close) * (self.open ** 5) / (inner * (self.close ** 5))

    # Alpha#55	 (-1 * correlation(rank(((close - ts_min(low, 12)) / (ts_max(high, 12) - ts_min(low,12)))), rank(volume), 6))
    def alpha055(self):
        divisor = (ts_max(self.high, 12) - ts_min(self.low, 12)).replace(0, 0.0001)
        inner = (self.close - ts_min(self.low, 12)) / (divisor)
        df = correlation(rank(inner), rank(self.volume), 6)
        return -1 * df.replace([-np.inf, np.inf], 0).fillna(value=0)

    # Alpha#56	 (0 - (1 * (rank((sum(returns, 10) / sum(sum(returns, 2), 3))) * rank((returns * cap)))))
    # This Alpha uses the Cap, however I have not acquired the data yet
#    def alpha056(self):
#        return (0 - (1 * (rank((sma(self.returns, 10) / sma(sma(self.returns, 2), 3))) * rank((self.returns * self.cap)))))

    # Alpha#57	 (0 - (1 * ((close - vwap) / decay_linear(rank(ts_argmax(close, 30)), 2))))
    def alpha057(self):
        return (0 - (1 * ((self.close - self.vwap) / decay_linear(rank(ts_argmax(self.close, 30)).to_frame(), 2).CLOSE)))

    # Alpha#58	 (-1 * Ts_Rank(decay_linear(correlation(IndNeutralize(vwap, IndClass.sector), volume,3.92795), 7.89291), 5.50322))

    # Alpha#59	 (-1 * Ts_Rank(decay_linear(correlation(IndNeutralize(((vwap * 0.728317) + (vwap *(1 - 0.728317))), IndClass.industry), volume, 4.25197), 16.2289), 8.19648))


    # Alpha#60	 (0 - (1 * ((2 * scale(rank(((((close - low) - (high - close)) / (high - low)) * volume)))) -scale(rank(ts_argmax(close, 10))))))
    def alpha060(self):
        divisor = (self.high - self.low).replace(0, 0.0001)
        inner = ((self.close - self.low) - (self.high - self.close)) * self.volume / divisor
        return - ((2 * scale(rank(inner))) - scale(rank(ts_argmax(self.close, 10))))

	# Alpha#61	 (rank((vwap - ts_min(vwap, 16.1219))) < rank(correlation(vwap, adv180, 17.9282)))
    def alpha061(self):
        adv180 = sma(self.volume, 180)
        return (rank((self.vwap - ts_min(self.vwap, 16))) < rank(correlation(self.vwap, adv180, 18)))

	# Alpha#62	 ((rank(correlation(vwap, sum(adv20, 22.4101), 9.91009)) < rank(((rank(open) +rank(open)) < (rank(((high + low) / 2)) + rank(high))))) * -1)
    def alpha062(self):
        adv20 = sma(self.volume, 20)
        return ((rank(correlation(self.vwap, sma(adv20, 22), 10)) < rank(((rank(self.open) +rank(self.open)) < (rank(((self.high + self.low) / 2)) + rank(self.high))))) * -1)

    # Alpha#63	 ((rank(decay_linear(delta(IndNeutralize(close, IndClass.industry), 2.25164), 8.22237))- rank(decay_linear(correlation(((vwap * 0.318108) + (open * (1 - 0.318108))), sum(adv180,37.2467), 13.557), 12.2883))) * -1)


    # Alpha#64	 ((rank(correlation(sum(((open * 0.178404) + (low * (1 - 0.178404))), 12.7054),sum(adv120, 12.7054), 16.6208)) < rank(delta(((((high + low) / 2) * 0.178404) + (vwap * (1 -0.178404))), 3.69741))) * -1)
    def alpha064(self):
        adv120 = sma(self.volume, 120)
        return ((rank(correlation(sma(((self.open * 0.178404) + (self.low * (1 - 0.178404))), 13),sma(adv120, 13), 17)) < rank(delta(((((self.high + self.low) / 2) * 0.178404) + (self.vwap * (1 -0.178404))), 3.69741))) * -1)

    # Alpha#65	 ((rank(correlation(((open * 0.00817205) + (vwap * (1 - 0.00817205))), sum(adv60,8.6911), 6.40374)) < rank((open - ts_min(open, 13.635)))) * -1)
    def alpha065(self):
        adv60 = sma(self.volume, 60)
        return ((rank(correlation(((self.open * 0.00817205) + (self.vwap * (1 - 0.00817205))), sma(adv60,9), 6)) < rank((self.open - ts_min(self.open, 14)))) * -1)

    # Alpha#66	 ((rank(decay_linear(delta(vwap, 3.51013), 7.23052)) + Ts_Rank(decay_linear(((((low* 0.96633) + (low * (1 - 0.96633))) - vwap) / (open - ((high + low) / 2))), 11.4157), 6.72611)) * -1)
    def alpha066(self):
        return ((rank(decay_linear(delta(self.vwap, 4).to_frame(), 7).CLOSE) + ts_rank(decay_linear(((((self.low* 0.96633) + (self.low * (1 - 0.96633))) - self.vwap) / (self.open - ((self.high + self.low) / 2))).to_frame(), 11).CLOSE, 7)) * -1)

    # Alpha#67	 ((rank((high - ts_min(high, 2.14593)))^rank(correlation(IndNeutralize(vwap,IndClass.sector), IndNeutralize(adv20, IndClass.subindustry), 6.02936))) * -1)


    # Alpha#68	 ((Ts_Rank(correlation(rank(high), rank(adv15), 8.91644), 13.9333) <rank(delta(((close * 0.518371) + (low * (1 - 0.518371))), 1.06157))) * -1)
    def alpha068(self):
        adv15 = sma(self.volume, 15)
        return ((ts_rank(correlation(rank(self.high), rank(adv15), 9), 14) <rank(delta(((self.close * 0.518371) + (self.low * (1 - 0.518371))), 1.06157))) * -1)

    # Alpha#69	 ((rank(ts_max(delta(IndNeutralize(vwap, IndClass.industry), 2.72412),4.79344))^Ts_Rank(correlation(((close * 0.490655) + (vwap * (1 - 0.490655))), adv20, 4.92416),9.0615)) * -1)

    # Alpha#70	 ((rank(delta(vwap, 1.29456))^Ts_Rank(correlation(IndNeutralize(close,IndClass.industry), adv50, 17.8256), 17.9171)) * -1)


    # Alpha#71	 max(Ts_Rank(decay_linear(correlation(Ts_Rank(close, 3.43976), Ts_Rank(adv180,12.0647), 18.0175), 4.20501), 15.6948), Ts_Rank(decay_linear((rank(((low + open) - (vwap +vwap)))^2), 16.4662), 4.4388))
    def alpha071(self):
        adv180 = sma(self.volume, 180)
        p1=ts_rank(decay_linear(correlation(ts_rank(self.close, 3), ts_rank(adv180,12), 18).to_frame(), 4).CLOSE, 16)
        p2=ts_rank(decay_linear((rank(((self.low + self.open) - (self.vwap +self.vwap))).pow(2)).to_frame(), 16).CLOSE, 4)
        df=pd.DataFrame({'p1':p1,'p2':p2})
        df.loc[df['p1']>=df['p2'],'max']=df['p1']
        df.loc[df['p2']>=df['p1'],'max']=df['p2']
        return df['max']
        #return max(ts_rank(decay_linear(correlation(ts_rank(self.close, 3), ts_rank(adv180,12), 18).to_frame(), 4).CLOSE, 16), ts_rank(decay_linear((rank(((self.low + self.open) - (self.vwap +self.vwap))).pow(2)).to_frame(), 16).CLOSE, 4))

    # Alpha#72	 (rank(decay_linear(correlation(((high + low) / 2), adv40, 8.93345), 10.1519)) /rank(decay_linear(correlation(Ts_Rank(vwap, 3.72469), Ts_Rank(volume, 18.5188), 6.86671),2.95011)))
    def alpha072(self):
        adv40 = sma(self.volume, 40)
        return (rank(decay_linear(correlation(((self.high + self.low) / 2), adv40, 9).to_frame(), 10).CLOSE) /rank(decay_linear(correlation(ts_rank(self.vwap, 4), ts_rank(self.volume, 19), 7).to_frame(),3).CLOSE))

    # Alpha#73	 (max(rank(decay_linear(delta(vwap, 4.72775), 2.91864)),Ts_Rank(decay_linear(((delta(((open * 0.147155) + (low * (1 - 0.147155))), 2.03608) / ((open *0.147155) + (low * (1 - 0.147155)))) * -1), 3.33829), 16.7411)) * -1)
    def alpha073(self):
        p1=rank(decay_linear(delta(self.vwap, 5).to_frame(), 3).CLOSE)
        p2=ts_rank(decay_linear(((delta(((self.open * 0.147155) + (self.low * (1 - 0.147155))), 2) / ((self.open *0.147155) + (self.low * (1 - 0.147155)))) * -1).to_frame(), 3).CLOSE, 17)
        df=pd.DataFrame({'p1':p1,'p2':p2})
        df.loc[df['p1']>=df['p2'],'max']=df['p1']
        df.loc[df['p2']>=df['p1'],'max']=df['p2']
        return -1*df['max']
        #return (max(rank(decay_linear(delta(self.vwap, 5).to_frame(), 3).CLOSE),ts_rank(decay_linear(((delta(((self.open * 0.147155) + (self.low * (1 - 0.147155))), 2) / ((self.open *0.147155) + (self.low * (1 - 0.147155)))) * -1).to_frame(), 3).CLOSE, 17)) * -1)

    # Alpha#74	 ((rank(correlation(close, sum(adv30, 37.4843), 15.1365)) <rank(correlation(rank(((high * 0.0261661) + (vwap * (1 - 0.0261661)))), rank(volume), 11.4791)))* -1)
    def alpha074(self):
        adv30 = sma(self.volume, 30)
        return ((rank(correlation(self.close, sma(adv30, 37), 15)) <rank(correlation(rank(((self.high * 0.0261661) + (self.vwap * (1 - 0.0261661)))), rank(self.volume), 11)))* -1)

    # Alpha#75	 (rank(correlation(vwap, volume, 4.24304)) < rank(correlation(rank(low), rank(adv50),12.4413)))
    def alpha075(self):
        adv50 = sma(self.volume, 50)
        return (rank(correlation(self.vwap, self.volume, 4)) < rank(correlation(rank(self.low), rank(adv50),12)))

    # Alpha#76	 (max(rank(decay_linear(delta(vwap, 1.24383), 11.8259)),Ts_Rank(decay_linear(Ts_Rank(correlation(IndNeutralize(low, IndClass.sector), adv81,8.14941), 19.569), 17.1543), 19.383)) * -1)


    # Alpha#77	 min(rank(decay_linear(((((high + low) / 2) + high) - (vwap + high)), 20.0451)),rank(decay_linear(correlation(((high + low) / 2), adv40, 3.1614), 5.64125)))
    def alpha077(self):
        adv40 = sma(self.volume, 40)
        p1=rank(decay_linear(((((self.high + self.low) / 2) + self.high) - (self.vwap + self.high)).to_frame(), 20).CLOSE)
        p2=rank(decay_linear(correlation(((self.high + self.low) / 2), adv40, 3).to_frame(), 6).CLOSE)
        df=pd.DataFrame({'p1':p1,'p2':p2})
        df.loc[df['p1']>=df['p2'],'min']=df['p2']
        df.loc[df['p2']>=df['p1'],'min']=df['p1']
        return df['min']
        #return min(rank(decay_linear(((((self.high + self.low) / 2) + self.high) - (self.vwap + self.high)).to_frame(), 20).CLOSE),rank(decay_linear(correlation(((self.high + self.low) / 2), adv40, 3).to_frame(), 6).CLOSE))

    # Alpha#78	 (rank(correlation(sum(((low * 0.352233) + (vwap * (1 - 0.352233))), 19.7428),sum(adv40, 19.7428), 6.83313))^rank(correlation(rank(vwap), rank(volume), 5.77492)))
    def alpha078(self):
        adv40 = sma(self.volume, 40)
        return (rank(correlation(ts_sum(((self.low * 0.352233) + (self.vwap * (1 - 0.352233))), 20),ts_sum(adv40,20), 7)).pow(rank(correlation(rank(self.vwap), rank(self.volume), 6))))

    # Alpha#79	 (rank(delta(IndNeutralize(((close * 0.60733) + (open * (1 - 0.60733))),IndClass.sector), 1.23438)) < rank(correlation(Ts_Rank(vwap, 3.60973), Ts_Rank(adv150,9.18637), 14.6644)))

    # Alpha#80	 ((rank(Sign(delta(IndNeutralize(((open * 0.868128) + (high * (1 - 0.868128))),IndClass.industry), 4.04545)))^Ts_Rank(correlation(high, adv10, 5.11456), 5.53756)) * -1)


    # Alpha#81	 ((rank(Log(product(rank((rank(correlation(vwap, sum(adv10, 49.6054),8.47743))^4)), 14.9655))) < rank(correlation(rank(vwap), rank(volume), 5.07914))) * -1)
    def alpha081(self):
        adv10 = sma(self.volume, 10)
        return ((rank(log(product(rank((rank(correlation(self.vwap, ts_sum(adv10, 50),8)).pow(4))), 15))) < rank(correlation(rank(self.vwap), rank(self.volume), 5))) * -1)

    # Alpha#82	 (min(rank(decay_linear(delta(open, 1.46063), 14.8717)),Ts_Rank(decay_linear(correlation(IndNeutralize(volume, IndClass.sector), ((open * 0.634196) +(open * (1 - 0.634196))), 17.4842), 6.92131), 13.4283)) * -1)


    # Alpha#83	 ((rank(delay(((high - low) / (sum(close, 5) / 5)), 2)) * rank(rank(volume))) / (((high -low) / (sum(close, 5) / 5)) / (vwap - close)))
    def alpha083(self):
        return ((rank(delay(((self.high - self.low) / (ts_sum(self.close, 5) / 5)), 2)) * rank(rank(self.volume))) / (((self.high -self.low) / (ts_sum(self.close, 5) / 5)) / (self.vwap - self.close)))

    # Alpha#84	 SignedPower(Ts_Rank((vwap - ts_max(vwap, 15.3217)), 20.7127), delta(close,4.96796))
    def alpha084(self):
        return pow(ts_rank((self.vwap - ts_max(self.vwap, 15)), 21), delta(self.close,5))

    # Alpha#85	 (rank(correlation(((high * 0.876703) + (close * (1 - 0.876703))), adv30,9.61331))^rank(correlation(Ts_Rank(((high + low) / 2), 3.70596), Ts_Rank(volume, 10.1595),7.11408)))
    def alpha085(self):
        adv30 = sma(self.volume, 30)
        return (rank(correlation(((self.high * 0.876703) + (self.close * (1 - 0.876703))), adv30,10)).pow(rank(correlation(ts_rank(((self.high + self.low) / 2), 4), ts_rank(self.volume, 10),7))))

    # Alpha#86	 ((Ts_Rank(correlation(close, sum(adv20, 14.7444), 6.00049), 20.4195) < rank(((open+ close) - (vwap + open)))) * -1)

    def alpha086(self):
        adv20 = sma(self.volume, 20)
        return ((ts_rank(correlation(self.close, sma(adv20, 15), 6), 20) < rank(((self.open+ self.close) - (self.vwap +self.open)))) * -1)

    # Alpha#87	 (max(rank(decay_linear(delta(((close * 0.369701) + (vwap * (1 - 0.369701))),1.91233), 2.65461)), Ts_Rank(decay_linear(abs(correlation(IndNeutralize(adv81,IndClass.industry), close, 13.4132)), 4.89768), 14.4535)) * -1)


    # Alpha#88	 min(rank(decay_linear(((rank(open) + rank(low)) - (rank(high) + rank(close))),8.06882)), Ts_Rank(decay_linear(correlation(Ts_Rank(close, 8.44728), Ts_Rank(adv60,20.6966), 8.01266), 6.65053), 2.61957))
    def alpha088(self):
        adv60 = sma(self.volume, 60)
        p1=rank(decay_linear(((rank(self.open) + rank(self.low)) - (rank(self.high) + rank(self.close))).to_frame(),8).CLOSE)
        p2=ts_rank(decay_linear(correlation(ts_rank(self.close, 8), ts_rank(adv60,21), 8).to_frame(), 7).CLOSE, 3)
        df=pd.DataFrame({'p1':p1,'p2':p2})
        df.loc[df['p1']>=df['p2'],'min']=df['p2']
        df.loc[df['p2']>=df['p1'],'min']=df['p1']
        return df['min']
        #return min(rank(decay_linear(((rank(self.open) + rank(self.low)) - (rank(self.high) + rank(self.close))).to_frame(),8).CLOSE), ts_rank(decay_linear(correlation(ts_rank(self.close, 8), ts_rank(adv60,20.6966), 8).to_frame(), 7).CLOSE, 3))

    # Alpha#89	 (Ts_Rank(decay_linear(correlation(((low * 0.967285) + (low * (1 - 0.967285))), adv10,6.94279), 5.51607), 3.79744) - Ts_Rank(decay_linear(delta(IndNeutralize(vwap,IndClass.industry), 3.48158), 10.1466), 15.3012))

    # Alpha#90	 ((rank((close - ts_max(close, 4.66719)))^Ts_Rank(correlation(IndNeutralize(adv40,IndClass.subindustry), low, 5.38375), 3.21856)) * -1)

    # Alpha#91	 ((Ts_Rank(decay_linear(decay_linear(correlation(IndNeutralize(close,IndClass.industry), volume, 9.74928), 16.398), 3.83219), 4.8667) -rank(decay_linear(correlation(vwap, adv30, 4.01303), 2.6809))) * -1)


    # Alpha#92	 min(Ts_Rank(decay_linear(((((high + low) / 2) + close) < (low + open)), 14.7221),18.8683), Ts_Rank(decay_linear(correlation(rank(low), rank(adv30), 7.58555), 6.94024),6.80584))
    def alpha092(self):
        adv30 = sma(self.volume, 30)
        p1=ts_rank(decay_linear(((((self.high + self.low) / 2) + self.close) < (self.low + self.open)).to_frame(), 15).CLOSE,19)
        p2=ts_rank(decay_linear(correlation(rank(self.low), rank(adv30), 8).to_frame(), 7).CLOSE,7)
        df=pd.DataFrame({'p1':p1,'p2':p2})
        df.loc[df['p1']>=df['p2'],'min']=df['p2']
        df.loc[df['p2']>=df['p1'],'min']=df['p1']
        return df['min']
        #return  min(ts_rank(decay_linear(((((self.high + self.low) / 2) + self.close) < (self.low + self.open)).to_frame(), 15).CLOSE,19), ts_rank(decay_linear(correlation(rank(self.low), rank(adv30), 8).to_frame(), 7).CLOSE,7))

    # Alpha#93	 (Ts_Rank(decay_linear(correlation(IndNeutralize(vwap, IndClass.industry), adv81,17.4193), 19.848), 7.54455) / rank(decay_linear(delta(((close * 0.524434) + (vwap * (1 -0.524434))), 2.77377), 16.2664)))


    # Alpha#94	 ((rank((vwap - ts_min(vwap, 11.5783)))^Ts_Rank(correlation(Ts_Rank(vwap,19.6462), Ts_Rank(adv60, 4.02992), 18.0926), 2.70756)) * -1)
    def alpha094(self):
        adv60 = sma(self.volume, 60)
        return ((rank((self.vwap - ts_min(self.vwap, 12))).pow(ts_rank(correlation(ts_rank(self.vwap,20), ts_rank(adv60, 4), 18), 3)) * -1))

    # Alpha#95	 (rank((open - ts_min(open, 12.4105))) < Ts_Rank((rank(correlation(sum(((high + low)/ 2), 19.1351), sum(adv40, 19.1351), 12.8742))^5), 11.7584))
    def alpha095(self):
        adv40 = sma(self.volume, 40)
        return (rank((self.open - ts_min(self.open, 12))) < ts_rank((rank(correlation(sma(((self.high + self.low)/ 2), 19), sma(adv40, 19), 13)).pow(5)), 12))

    # Alpha#96	 (max(Ts_Rank(decay_linear(correlation(rank(vwap), rank(volume), 3.83878),4.16783), 8.38151), Ts_Rank(decay_linear(Ts_ArgMax(correlation(Ts_Rank(close, 7.45404),Ts_Rank(adv60, 4.13242), 3.65459), 12.6556), 14.0365), 13.4143)) * -1)
    def alpha096(self):
        adv60 = sma(self.volume, 60)
        p1=ts_rank(decay_linear(correlation(rank(self.vwap), rank(self.volume).to_frame(), 4),4).CLOSE, 8)
        p2=ts_rank(decay_linear(ts_argmax(correlation(ts_rank(self.close, 7),ts_rank(adv60, 4), 4), 13).to_frame(), 14).CLOSE, 13)
        df=pd.DataFrame({'p1':p1,'p2':p2})
        df.loc[df['p1']>=df['p2'],'max']=df['p1']
        df.loc[df['p2']>=df['p1'],'max']=df['p2']
        return -1*df['max']
        #return (max(ts_rank(decay_linear(correlation(rank(self.vwap), rank(self.volume).to_frame(), 4),4).CLOSE, 8), ts_rank(decay_linear(ts_argmax(correlation(ts_rank(self.close, 7),ts_rank(adv60, 4), 4), 13).to_frame(), 14).CLOSE, 13)) * -1)

    # Alpha#97	 ((rank(decay_linear(delta(IndNeutralize(((low * 0.721001) + (vwap * (1 - 0.721001))),IndClass.industry), 3.3705), 20.4523)) - Ts_Rank(decay_linear(Ts_Rank(correlation(Ts_Rank(low,7.87871), Ts_Rank(adv60, 17.255), 4.97547), 18.5925), 15.7152), 6.71659)) * -1)


    # Alpha#98	 (rank(decay_linear(correlation(vwap, sum(adv5, 26.4719), 4.58418), 7.18088)) -rank(decay_linear(Ts_Rank(Ts_ArgMin(correlation(rank(open), rank(adv15), 20.8187), 8.62571),6.95668), 8.07206)))
    def alpha098(self):
        adv5 = sma(self.volume, 5)
        adv15 = sma(self.volume, 15)
        return (rank(decay_linear(correlation(self.vwap, sma(adv5, 26), 5).to_frame(), 7).CLOSE) -rank(decay_linear(ts_rank(ts_argmin(correlation(rank(self.open), rank(adv15), 21), 9),7).to_frame(), 8).CLOSE))

    # Alpha#99	 ((rank(correlation(sum(((high + low) / 2), 19.8975), sum(adv60, 19.8975), 8.8136)) <rank(correlation(low, volume, 6.28259))) * -1)
    def alpha099(self):
        adv60 = sma(self.volume, 60)
        return ((rank(correlation(ts_sum(((self.high + self.low) / 2), 20), ts_sum(adv60, 20), 9)) <rank(correlation(self.low, self.volume, 6))) * -1)

    # Alpha#100	 (0 - (1 * (((1.5 * scale(indneutralize(indneutralize(rank(((((close - low) - (high -close)) / (high - low)) * volume)), IndClass.subindustry), IndClass.subindustry))) -scale(indneutralize((correlation(close, rank(adv20), 5) - rank(ts_argmin(close, 30))),IndClass.subindustry))) * (volume / adv20))))


    # Alpha#101	 ((close - open) / ((high - low) + .001))
    def alpha101(self):
        return (self.close - self.open) /((self.high - self.low) + 0.001)



## Apple

In [6]:
import yfinance as yf

start_date = "2015-05-01"
end_date = "2024-05-08"

stock_data = yf.Ticker('AAPL')
stock_data = stock_data.history(start=start_date, end=end_date)
stock_data.index = pd.to_datetime(stock_data.index)
stock_data['date'] = stock_data.index
stock_data.reset_index(drop=True, inplace=True)

stock_data.set_index('date',inplace=True)
stock_data.index = stock_data.index.date

stock_data.drop(['Dividends','Stock Splits'],axis=1, inplace=True)

stock_data

Unnamed: 0,Open,High,Low,Close,Volume
2015-05-01,28.124737,29.023571,27.946310,28.760387,234050400
2015-05-04,28.883061,29.121710,28.606496,28.704632,203953200
2015-05-05,28.581965,28.648877,28.053373,28.057835,197085600
2015-05-06,28.227333,28.269711,27.513622,27.881630,288564000
2015-05-07,27.944344,28.237741,27.776368,28.054089,175763600
...,...,...,...,...,...
2024-05-01,168.561979,171.673194,168.094799,168.283661,50383100
2024-05-02,171.474389,172.378930,169.864119,171.991272,94214900
2024-05-03,185.529503,185.877408,181.563465,182.279144,163224100
2024-05-06,181.255329,183.094214,179.336907,180.619171,78569700


In [7]:
df = stock_data.rename(columns={
    'Open': 'S_DQ_OPEN',
    'High': 'S_DQ_HIGH',
    'Low': 'S_DQ_LOW',
    'Close': 'S_DQ_CLOSE',
    'Volume': 'S_DQ_VOLUME'
})

df['S_DQ_PCTCHANGE'] = df['S_DQ_CLOSE'].pct_change() * 100
df['S_DQ_AMOUNT'] = (df['S_DQ_CLOSE'] * df['S_DQ_VOLUME'] * 100) / 1000

In [9]:
df_with_alphas = get_alpha(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inner[self.returns < 0] = stddev(self.returns, 20)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill',

In [10]:
df_with_alphas = df_with_alphas.fillna(0)
df_with_alphas = df_with_alphas.astype({col: int for col in df_with_alphas.columns if df_with_alphas[col].dtype == 'bool'})
df_with_alphas

Unnamed: 0,S_DQ_OPEN,S_DQ_HIGH,S_DQ_LOW,S_DQ_CLOSE,S_DQ_VOLUME,S_DQ_PCTCHANGE,S_DQ_AMOUNT,alpha001,alpha002,alpha003,...,alpha085,alpha086,alpha088,alpha092,alpha094,alpha095,alpha096,alpha098,alpha099,alpha101
2015-05-01,28.124737,29.023571,27.946310,28.760387,234050400,0.000000,6.731380e+08,0.000000,0.000000,0.000000,...,0.000000,0,0.000000,0.0,0.000000,0,0.0,-0.903084,0,0.589514
2015-05-04,28.883061,29.121710,28.606496,0.000000,203953200,-0.193863,5.854402e+08,0.000000,0.000000,0.000000,...,0.000000,0,0.000000,0.0,0.000000,0,0.0,-0.903084,0,0.000000
2015-05-05,28.581965,28.648877,28.053373,0.000000,197085600,-2.253285,5.529795e+08,0.000000,0.000000,0.000000,...,0.000000,0,0.020044,0.0,0.000000,0,0.0,-0.903084,0,0.000000
2015-05-06,28.227333,28.269711,27.513622,0.000000,288564000,-0.628005,8.045635e+08,0.000000,0.000000,0.000000,...,0.000000,0,0.020044,0.0,0.000000,0,0.0,-0.903084,0,0.000000
2015-05-07,27.944344,28.237741,27.776368,28.054089,175763600,0.618539,4.930888e+08,0.000000,0.000000,0.000000,...,0.000000,0,0.029956,0.0,0.000000,0,0.0,-0.903084,0,0.237352
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-01,168.561979,171.673194,168.094799,1.548857,50383100,-0.604719,8.478653e+08,0.355744,-0.210730,-0.574772,...,0.596294,0,0.904846,1.0,-0.710491,1,-8.0,-0.407048,-1,-46.659601
2024-05-02,171.474389,172.378930,169.864119,171.991272,94214900,2.203191,1.620414e+09,0.201024,-0.481146,-0.588975,...,0.865420,0,0.867841,1.0,-0.864099,1,-8.0,-0.251101,-1,0.205454
2024-05-03,185.529503,185.877408,181.563465,182.279144,163224100,5.981625,2.975235e+09,0.817453,-0.642454,-0.889424,...,0.942471,0,0.824229,2.0,-0.978902,1,-13.0,-0.007930,-1,-0.753280
2024-05-06,181.255329,183.094214,179.336907,2.089555,78569700,-0.910676,1.419119e+09,0.535619,-0.644042,-0.817802,...,0.926141,0,0.887665,5.0,-0.960686,1,-13.0,0.253744,-1,-47.671939


In [2]:
pip install ppscore

Collecting ppscore
  Downloading ppscore-1.3.0.tar.gz (17 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting pandas<2.0.0,>=1.0.0 (from ppscore)
  Downloading pandas-1.5.3-cp39-cp39-win_amd64.whl.metadata (12 kB)
Downloading pandas-1.5.3-cp39-cp39-win_amd64.whl (10.9 MB)
   ---------------------------------------- 0.0/10.9 MB ? eta -:--:--
    --------------------------------------- 0.2/10.9 MB 7.0 MB/s eta 0:00:02
   -- ------------------------------------- 0.7/10.9 MB 9.1 MB/s eta 0:00:02
   ---- ----------------------------------- 1.3/10.9 MB 11.7 MB/s eta 0:00:01
   ------ --------------------------------- 1.9/10.9 MB 11.0 MB/s eta 0:00:01
   --------- ------------------------------ 2.5/10.9 MB 10.5 MB/s eta 0:00:01
   ----------- ---------------------------- 3.1/10.9 MB 11.4 MB/s eta 0:00:01
   ------------ --------------------------- 3.5/10.9 MB 11.1 MB/s eta 0:00:01
   -------------- ------------------------- 4.0/

  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
eodhd 1.0.30 requires pandas>=2.1.0, but you have pandas 1.5.3 which is incompatible.
eodhd 1.0.30 requires rich>=13.5.2, but you have rich 12.4.4 which is incompatible.
ludwig 0.10.4 requires bitsandbytes<0.41.0, but you have bitsandbytes 0.41.3 which is incompatible.
ludwig 0.10.4 requires pyarrow<15.0.0, but you have pyarrow 19.0.0 which is incompatible.

[notice] A new release of pip is available: 24.1.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
import pandas as pd
import ppscore as pps

In [12]:
df_with_alphas.drop(['S_DQ_OPEN', 'S_DQ_HIGH', 'S_DQ_LOW' , 'S_DQ_VOLUME', 'S_DQ_PCTCHANGE', 'S_DQ_AMOUNT'], axis=1, inplace=True)

In [13]:
pps_scores = []

target_column = 'S_DQ_CLOSE'
for col in df_with_alphas.columns:
    if col != target_column:
        score = pps.score(df_with_alphas, x=col, y=target_column)
        pps_scores.append(score)

# 5. Create DataFrame of scores and sort
pps_df = pd.DataFrame(pps_scores)
pps_df = pps_df[pps_df['ppscore'] > 0].sort_values(by='ppscore', ascending=False)

# 6. Display top predictors of Close price
print(pps_df[['x', 'y', 'ppscore']])

           x           y   ppscore
17  alpha018  S_DQ_CLOSE  0.883559
4   alpha005  S_DQ_CLOSE  0.849897
41  alpha042  S_DQ_CLOSE  0.817031
23  alpha024  S_DQ_CLOSE  0.706264
8   alpha009  S_DQ_CLOSE  0.623553
9   alpha010  S_DQ_CLOSE  0.618580
24  alpha025  S_DQ_CLOSE  0.425434
27  alpha028  S_DQ_CLOSE  0.422461
54  alpha057  S_DQ_CLOSE  0.350063
46  alpha047  S_DQ_CLOSE  0.347839
70  alpha083  S_DQ_CLOSE  0.342350
47  alpha049  S_DQ_CLOSE  0.334769
49  alpha051  S_DQ_CLOSE  0.322045
32  alpha033  S_DQ_CLOSE  0.217023
52  alpha054  S_DQ_CLOSE  0.209428
81  alpha101  S_DQ_CLOSE  0.190911
11  alpha012  S_DQ_CLOSE  0.135906
38  alpha039  S_DQ_CLOSE  0.129279
0   alpha001  S_DQ_CLOSE  0.127226
34  alpha035  S_DQ_CLOSE  0.085496
40  alpha041  S_DQ_CLOSE  0.074260
51  alpha053  S_DQ_CLOSE  0.073464
36  alpha037  S_DQ_CLOSE  0.055572
30  alpha031  S_DQ_CLOSE  0.030196
6   alpha007  S_DQ_CLOSE  0.023290


  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)


## HSBC

In [6]:
pip install --upgrade yfinance

Collecting yfinance
  Downloading yfinance-0.2.63-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting curl_cffi>=0.7 (from yfinance)
  Downloading curl_cffi-0.11.3-cp39-abi3-win_amd64.whl.metadata (15 kB)
Collecting websockets>=13.0 (from yfinance)
  Downloading websockets-15.0.1-cp39-cp39-win_amd64.whl.metadata (7.0 kB)
Collecting cffi>=1.12.0 (from curl_cffi>=0.7->yfinance)
  Downloading cffi-1.17.1-cp39-cp39-win_amd64.whl.metadata (1.6 kB)
Collecting pycparser (from cffi>=1.12.0->curl_cffi>=0.7->yfinance)
  Downloading pycparser-2.22-py3-none-any.whl.metadata (943 bytes)
Downloading yfinance-0.2.63-py2.py3-none-any.whl (118 kB)
   ---------------------------------------- 0.0/118.4 kB ? eta -:--:--
   ---------------------------------------- 118.4/118.4 kB 3.5 MB/s eta 0:00:00
Downloading curl_cffi-0.11.3-cp39-abi3-win_amd64.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ------------ --------------------------- 0.4/1.4 MB 8.9 MB/s eta 0:00:01
   --

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
eodhd 1.0.30 requires pandas>=2.1.0, but you have pandas 1.5.3 which is incompatible.
eodhd 1.0.30 requires rich>=13.5.2, but you have rich 12.4.4 which is incompatible.
polygon-api-client 1.14.2 requires websockets<13.0,>=10.3, but you have websockets 15.0.1 which is incompatible.

[notice] A new release of pip is available: 24.1.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
start_date = "2015-05-01"
end_date = "2024-05-08"

stock_data = yf.Ticker('0005.HK')
stock_data = stock_data.history(start=start_date, end=end_date)
stock_data.index = pd.to_datetime(stock_data.index)
stock_data['date'] = stock_data.index
stock_data.reset_index(drop=True, inplace=True)
stock_data

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,date
0,44.409758,44.467885,43.799413,43.944733,30752542,0.0,0.0,2015-05-04 00:00:00+08:00
1,44.148177,44.293496,43.421577,43.508770,25859252,0.0,0.0,2015-05-05 00:00:00+08:00
2,43.101875,43.799409,43.043748,43.276257,33206893,0.0,0.0,2015-05-06 00:00:00+08:00
3,43.334391,43.915671,43.218133,43.450645,25310481,0.0,0.0,2015-05-07 00:00:00+08:00
4,43.799406,44.758518,43.625025,44.496944,37785690,0.0,0.0,2015-05-08 00:00:00+08:00
...,...,...,...,...,...,...,...,...
2217,61.122278,62.664355,60.935355,62.617622,39704597,0.0,0.0,2024-04-30 00:00:00+08:00
2218,63.552212,64.720451,63.505480,64.720451,43579704,0.0,0.0,2024-05-02 00:00:00+08:00
2219,64.860644,64.907369,64.533538,64.813911,19885139,0.0,0.0,2024-05-03 00:00:00+08:00
2220,64.813911,64.907369,64.299890,64.673721,22339805,0.0,0.0,2024-05-06 00:00:00+08:00


In [4]:
df = stock_data.rename(columns={
    'Open': 'S_DQ_OPEN',
    'High': 'S_DQ_HIGH',
    'Low': 'S_DQ_LOW',
    'Close': 'S_DQ_CLOSE',
    'Volume': 'S_DQ_VOLUME'
})

df['S_DQ_PCTCHANGE'] = df['S_DQ_CLOSE'].pct_change() * 100
df['S_DQ_AMOUNT'] = (df['S_DQ_CLOSE'] * df['S_DQ_VOLUME'] * 100) / 1000

df_with_alphas = get_alpha(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inner[self.returns < 0] = stddev(self.returns, 20)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [5]:
df_with_alphas.drop(['S_DQ_OPEN', 'S_DQ_HIGH', 'S_DQ_LOW' , 'S_DQ_VOLUME', 'S_DQ_PCTCHANGE', 'S_DQ_AMOUNT'], axis=1, inplace=True)

In [8]:
pps_scores = []

target_column = 'S_DQ_CLOSE'
for col in df_with_alphas.columns:
    if col != target_column:
        score = pps.score(df_with_alphas, x=col, y=target_column)
        pps_scores.append(score)

# 5. Create DataFrame of scores and sort
pps_df = pd.DataFrame(pps_scores)
pps_df = pps_df[pps_df['ppscore'] > 0].sort_values(by='ppscore', ascending=False)

# 6. Display top predictors of Close price
print(pps_df[['x', 'y', 'ppscore']])

           x           y   ppscore
20  alpha018  S_DQ_CLOSE  0.879730
44  alpha042  S_DQ_CLOSE  0.834099
57  alpha057  S_DQ_CLOSE  0.798987
30  alpha028  S_DQ_CLOSE  0.785552
35  alpha033  S_DQ_CLOSE  0.782940
84  alpha101  S_DQ_CLOSE  0.762325
27  alpha025  S_DQ_CLOSE  0.752678
11  alpha009  S_DQ_CLOSE  0.750484
55  alpha054  S_DQ_CLOSE  0.750285
12  alpha010  S_DQ_CLOSE  0.715634
7   alpha005  S_DQ_CLOSE  0.685217
26  alpha024  S_DQ_CLOSE  0.627706
49  alpha047  S_DQ_CLOSE  0.607318
54  alpha053  S_DQ_CLOSE  0.582158
37  alpha035  S_DQ_CLOSE  0.455198
50  alpha049  S_DQ_CLOSE  0.445078
52  alpha051  S_DQ_CLOSE  0.421212
58  alpha060  S_DQ_CLOSE  0.411862
41  alpha039  S_DQ_CLOSE  0.397756
40  alpha038  S_DQ_CLOSE  0.372280
33  alpha031  S_DQ_CLOSE  0.363128
3   alpha001  S_DQ_CLOSE  0.355669
21  alpha019  S_DQ_CLOSE  0.251563
14  alpha012  S_DQ_CLOSE  0.218868
45  alpha043  S_DQ_CLOSE  0.202285
19  alpha017  S_DQ_CLOSE  0.199156
9   alpha007  S_DQ_CLOSE  0.196519
39  alpha037  S_DQ_C

  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)


## Pepsi

In [12]:
start_date = "2015-05-01"
end_date = "2024-05-08"

stock_data = yf.Ticker('PEP')
stock_data = stock_data.history(start=start_date, end=end_date)
stock_data.index = pd.to_datetime(stock_data.index)
stock_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-05-01 00:00:00-04:00,70.214216,70.553629,69.919074,70.509354,4622300,0.0,0.0
2015-05-04 00:00:00-04:00,70.428192,71.018478,70.288003,70.797119,3729400,0.0,0.0
2015-05-05 00:00:00-04:00,70.848755,70.959435,70.273230,70.406044,3502800,0.0,0.0
2015-05-06 00:00:00-04:00,70.774992,70.811887,70.059273,70.450340,3376400,0.0,0.0
2015-05-07 00:00:00-04:00,70.206841,70.590522,70.059266,70.206841,3157400,0.0,0.0
...,...,...,...,...,...,...,...
2024-05-01 00:00:00-04:00,167.968606,168.705720,166.245455,167.671829,4340500,0.0,0.0
2024-05-02 00:00:00-04:00,168.083469,168.791879,167.097447,167.959015,3964800,0.0,0.0
2024-05-03 00:00:00-04:00,168.294072,168.954614,167.528226,168.629120,3491900,0.0,0.0
2024-05-06 00:00:00-04:00,169.213101,169.213101,167.652705,168.313248,3576200,0.0,0.0


In [13]:
df = stock_data.rename(columns={
    'Open': 'S_DQ_OPEN',
    'High': 'S_DQ_HIGH',
    'Low': 'S_DQ_LOW',
    'Close': 'S_DQ_CLOSE',
    'Volume': 'S_DQ_VOLUME'
})

df['S_DQ_PCTCHANGE'] = df['S_DQ_CLOSE'].pct_change() * 100
df['S_DQ_AMOUNT'] = (df['S_DQ_CLOSE'] * df['S_DQ_VOLUME'] * 100) / 1000

df_with_alphas = get_alpha(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inner[self.returns < 0] = stddev(self.returns, 20)


In [14]:
df_with_alphas = df_with_alphas.fillna(0)
df_with_alphas = df_with_alphas.astype({col: int for col in df_with_alphas.columns if df_with_alphas[col].dtype == 'bool'})
df_with_alphas

Unnamed: 0_level_0,S_DQ_OPEN,S_DQ_HIGH,S_DQ_LOW,S_DQ_CLOSE,S_DQ_VOLUME,Dividends,Stock Splits,S_DQ_PCTCHANGE,S_DQ_AMOUNT,alpha001,...,alpha085,alpha086,alpha088,alpha092,alpha094,alpha095,alpha096,alpha098,alpha099,alpha101
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-05-01 00:00:00-04:00,70.214216,70.553629,69.919074,70.509354,4622300,0.0,0.0,0.000000,3.259154e+07,0.000000,...,0.000000,0,0.000000,0.0,0.000000,0,0.0,0.257489,0,0.464377
2015-05-04 00:00:00-04:00,70.428192,71.018478,70.288003,70.797119,3729400,0.0,0.0,0.408124,2.640308e+07,0.000000,...,0.000000,0,0.000000,0.0,0.000000,0,0.0,0.257489,0,0.504361
2015-05-05 00:00:00-04:00,70.848755,70.959435,70.273230,0.000000,3502800,0.0,0.0,-0.552388,2.466183e+07,0.000000,...,0.000000,0,0.001542,0.0,0.000000,0,0.0,0.257489,0,0.000000
2015-05-06 00:00:00-04:00,70.774992,70.811887,70.059273,70.450340,3376400,0.0,0.0,0.062915,2.378685e+07,0.000000,...,0.000000,0,0.010352,0.0,0.000000,0,0.0,0.257489,0,-0.430793
2015-05-07 00:00:00-04:00,70.206841,70.590522,70.059266,0.000000,3157400,0.0,0.0,-0.345633,2.216711e+07,0.000000,...,0.000000,0,0.010352,0.0,0.000000,0,0.0,0.257489,0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-01 00:00:00-04:00,167.968606,168.705720,166.245455,1.286672,4340500,0.0,0.0,-0.432041,7.277796e+07,0.373776,...,0.740546,0,0.936123,2.0,-0.944223,1,-13.0,-0.174229,0,-67.722079
2024-05-02 00:00:00-04:00,168.083469,168.791879,167.097447,167.959015,3964800,0.0,0.0,0.171278,6.659239e+07,0.214604,...,0.566593,0,0.891630,1.0,-0.884462,1,-13.0,-0.127093,-1,-0.073406
2024-05-03 00:00:00-04:00,168.294072,168.954614,167.528226,168.629120,3491900,0.0,0.0,0.398969,5.888360e+07,0.824354,...,0.382752,0,0.831718,1.0,-0.834440,1,-13.0,0.002863,0,0.234728
2024-05-06 00:00:00-04:00,169.213101,169.213101,167.652705,1.284801,3576200,0.0,0.0,-0.187318,6.019218e+07,0.552538,...,0.556008,0,0.888987,3.0,-0.664164,1,-13.0,0.074890,0,-107.550099


In [15]:
df_with_alphas.drop(['S_DQ_OPEN', 'S_DQ_HIGH', 'S_DQ_LOW' , 'S_DQ_VOLUME', 'S_DQ_PCTCHANGE', 'S_DQ_AMOUNT'], axis=1, inplace=True)

In [16]:
pps_scores = []

target_column = 'S_DQ_CLOSE'
for col in df_with_alphas.columns:
    if col != target_column:
        score = pps.score(df_with_alphas, x=col, y=target_column)
        pps_scores.append(score)

# 5. Create DataFrame of scores and sort
pps_df = pd.DataFrame(pps_scores)
pps_df = pps_df[pps_df['ppscore'] > 0].sort_values(by='ppscore', ascending=False)

# 6. Display top predictors of Close price
print(pps_df[['x', 'y', 'ppscore']])

           x           y   ppscore
19  alpha018  S_DQ_CLOSE  0.894605
43  alpha042  S_DQ_CLOSE  0.791687
25  alpha024  S_DQ_CLOSE  0.742105
72  alpha083  S_DQ_CLOSE  0.729540
10  alpha009  S_DQ_CLOSE  0.718258
56  alpha057  S_DQ_CLOSE  0.717813
29  alpha028  S_DQ_CLOSE  0.716486
11  alpha010  S_DQ_CLOSE  0.704653
26  alpha025  S_DQ_CLOSE  0.684170
6   alpha005  S_DQ_CLOSE  0.662450
48  alpha047  S_DQ_CLOSE  0.643366
34  alpha033  S_DQ_CLOSE  0.641831
54  alpha054  S_DQ_CLOSE  0.640274
83  alpha101  S_DQ_CLOSE  0.638834
53  alpha053  S_DQ_CLOSE  0.462497
36  alpha035  S_DQ_CLOSE  0.382561
49  alpha049  S_DQ_CLOSE  0.376089
51  alpha051  S_DQ_CLOSE  0.366604
2   alpha001  S_DQ_CLOSE  0.366073
40  alpha039  S_DQ_CLOSE  0.282657
32  alpha031  S_DQ_CLOSE  0.259357
57  alpha060  S_DQ_CLOSE  0.250246
8   alpha007  S_DQ_CLOSE  0.204055
38  alpha037  S_DQ_CLOSE  0.198238
44  alpha043  S_DQ_CLOSE  0.170066
39  alpha038  S_DQ_CLOSE  0.156122
35  alpha034  S_DQ_CLOSE  0.131584
42  alpha041  S_DQ_C

  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)


## Tencent

In [17]:
start_date = "2015-05-01"
end_date = "2024-05-08"

stock_data = yf.Ticker('0700.HK')
stock_data = stock_data.history(start=start_date, end=end_date)
stock_data.index = pd.to_datetime(stock_data.index)
stock_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-05-04 00:00:00+08:00,140.035559,141.440309,138.367425,138.718613,24186374,0.0,0.0
2015-05-05 00:00:00+08:00,139.157612,141.176934,136.699313,138.367447,21200652,0.0,0.0
2015-05-06 00:00:00+08:00,136.084753,139.772209,136.084753,136.962708,21188428,0.0,0.0
2015-05-07 00:00:00+08:00,136.084735,136.874900,133.275233,134.328796,21528958,0.0,0.0
2015-05-08 00:00:00+08:00,136.962696,138.718634,134.943374,135.206772,19643557,0.0,0.0
...,...,...,...,...,...,...,...
2024-04-30 00:00:00+08:00,343.606468,344.785877,338.102468,341.247620,15957023,0.0,0.0
2024-05-02 00:00:00+08:00,343.016761,355.007616,339.478473,354.221313,22479610,0.0,0.0
2024-05-03 00:00:00+08:00,364.639596,366.212172,354.417899,358.152740,23592345,0.0,0.0
2024-05-06 00:00:00+08:00,358.349332,366.605318,356.776757,363.853333,26208326,0.0,0.0


In [18]:
df = stock_data.rename(columns={
    'Open': 'S_DQ_OPEN',
    'High': 'S_DQ_HIGH',
    'Low': 'S_DQ_LOW',
    'Close': 'S_DQ_CLOSE',
    'Volume': 'S_DQ_VOLUME'
})

df['S_DQ_PCTCHANGE'] = df['S_DQ_CLOSE'].pct_change() * 100
df['S_DQ_AMOUNT'] = (df['S_DQ_CLOSE'] * df['S_DQ_VOLUME'] * 100) / 1000

df_with_alphas = get_alpha(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inner[self.returns < 0] = stddev(self.returns, 20)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [19]:
df_with_alphas = df_with_alphas.fillna(0)
df_with_alphas = df_with_alphas.astype({col: int for col in df_with_alphas.columns if df_with_alphas[col].dtype == 'bool'})
df_with_alphas

Unnamed: 0_level_0,S_DQ_OPEN,S_DQ_HIGH,S_DQ_LOW,S_DQ_CLOSE,S_DQ_VOLUME,Dividends,Stock Splits,S_DQ_PCTCHANGE,S_DQ_AMOUNT,alpha001,...,alpha085,alpha086,alpha088,alpha092,alpha094,alpha095,alpha096,alpha098,alpha099,alpha101
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-05-04 00:00:00+08:00,140.035559,141.440309,138.367425,138.718613,24186374,0.0,0.0,0.000000,3.355100e+08,0.000000,...,0.000000,0,0.000000,0.0,0.000000,0,0.0,0.616562,0,-0.428431
2015-05-05 00:00:00+08:00,139.157612,141.176934,136.699313,0.000000,21200652,0.0,0.0,-0.253150,2.933480e+08,0.000000,...,0.000000,0,0.000000,0.0,0.000000,0,0.0,0.616562,0,0.000000
2015-05-06 00:00:00+08:00,136.084753,139.772209,136.084753,0.000000,21188428,0.0,0.0,-1.015224,2.902024e+08,0.000000,...,0.000000,0,0.013276,0.0,0.000000,0,0.0,0.616562,0,0.000000
2015-05-07 00:00:00+08:00,136.084735,136.874900,133.275233,0.000000,21528958,0.0,0.0,-1.923086,2.891959e+08,0.000000,...,0.000000,0,0.013276,0.0,0.000000,0,0.0,0.616562,0,0.000000
2015-05-08 00:00:00+08:00,136.962696,138.718634,134.943374,135.206772,19643557,0.0,0.0,0.653602,2.655942e+08,0.000000,...,0.000000,0,0.008551,0.0,0.000000,0,0.0,0.616562,0,-0.464990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-30 00:00:00+08:00,343.606468,344.785877,338.102468,2.038762,15957023,0.0,0.0,-0.115079,5.445296e+08,0.401364,...,0.931011,0,0.675068,7.0,-0.919041,1,-7.0,0.513501,0,-51.099164
2024-05-02 00:00:00+08:00,343.016761,355.007616,339.478473,354.221313,22479610,0.0,0.0,3.801842,7.962757e+08,0.839318,...,0.891864,0,0.582808,6.0,-0.853157,1,-3.0,0.629838,0,0.721471
2024-05-03 00:00:00+08:00,364.639596,366.212172,354.417899,358.152740,23592345,0.0,0.0,1.109879,8.449663e+08,0.839318,...,0.906449,0,0.552205,1.0,-0.866654,1,-5.0,0.716697,0,-0.549954
2024-05-06 00:00:00+08:00,358.349332,366.605318,356.776757,363.853333,26208326,0.0,0.0,1.591665,9.535987e+08,0.839318,...,0.690258,0,0.500900,1.0,-0.912453,1,-7.0,0.779253,0,0.559944


In [20]:
df_with_alphas.drop(['S_DQ_OPEN', 'S_DQ_HIGH', 'S_DQ_LOW' , 'S_DQ_VOLUME', 'S_DQ_PCTCHANGE', 'S_DQ_AMOUNT'], axis=1, inplace=True)

In [21]:
pps_scores = []

target_column = 'S_DQ_CLOSE'
for col in df_with_alphas.columns:
    if col != target_column:
        score = pps.score(df_with_alphas, x=col, y=target_column)
        pps_scores.append(score)

# 5. Create DataFrame of scores and sort
pps_df = pd.DataFrame(pps_scores)
pps_df = pps_df[pps_df['ppscore'] > 0].sort_values(by='ppscore', ascending=False)

# 6. Display top predictors of Close price
print(pps_df[['x', 'y', 'ppscore']])

           x           y   ppscore
19  alpha018  S_DQ_CLOSE  0.880789
25  alpha024  S_DQ_CLOSE  0.832249
10  alpha009  S_DQ_CLOSE  0.820213
11  alpha010  S_DQ_CLOSE  0.792650
6   alpha005  S_DQ_CLOSE  0.710565
29  alpha028  S_DQ_CLOSE  0.703939
56  alpha057  S_DQ_CLOSE  0.699137
43  alpha042  S_DQ_CLOSE  0.687499
26  alpha025  S_DQ_CLOSE  0.668966
83  alpha101  S_DQ_CLOSE  0.609516
34  alpha033  S_DQ_CLOSE  0.595041
48  alpha047  S_DQ_CLOSE  0.579743
54  alpha054  S_DQ_CLOSE  0.552394
49  alpha049  S_DQ_CLOSE  0.441228
51  alpha051  S_DQ_CLOSE  0.433947
53  alpha053  S_DQ_CLOSE  0.419604
36  alpha035  S_DQ_CLOSE  0.349995
2   alpha001  S_DQ_CLOSE  0.348694
57  alpha060  S_DQ_CLOSE  0.284642
40  alpha039  S_DQ_CLOSE  0.265320
13  alpha012  S_DQ_CLOSE  0.263279
32  alpha031  S_DQ_CLOSE  0.230514
8   alpha007  S_DQ_CLOSE  0.191248
39  alpha038  S_DQ_CLOSE  0.176900
38  alpha037  S_DQ_CLOSE  0.166173
44  alpha043  S_DQ_CLOSE  0.154634
20  alpha019  S_DQ_CLOSE  0.150661
35  alpha034  S_DQ_C

  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)


## Toyota

In [22]:
start_date = "2015-05-01"
end_date = "2024-05-08"

stock_data = yf.Ticker('7203.T')
stock_data = stock_data.history(start=start_date, end=end_date)
stock_data.index = pd.to_datetime(stock_data.index)

stock_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-05-01 00:00:00+09:00,1225.572542,1228.083374,1212.870814,1228.083374,37228500,0.0,0.0
2015-05-07 00:00:00+09:00,1218.630655,1220.846079,1211.689067,1213.166016,40812500,0.0,0.0
2015-05-08 00:00:00+09:00,1212.871021,1232.957500,1209.621714,1222.766602,44073500,0.0,0.0
2015-05-11 00:00:00+09:00,1240.637487,1240.637487,1213.313923,1216.267822,55217000,0.0,0.0
2015-05-12 00:00:00+09:00,1216.267795,1223.504883,1209.916931,1223.504883,35281500,0.0,0.0
...,...,...,...,...,...,...,...
2024-04-26 00:00:00+09:00,3389.625362,3435.117193,3356.716378,3397.368652,30255300,0.0,0.0
2024-04-30 00:00:00+09:00,3462.218883,3523.197297,3441.892744,3521.261475,35459100,0.0,0.0
2024-05-01 00:00:00+09:00,3484.480756,3541.587524,3461.250884,3489.320312,27076000,0.0,0.0
2024-05-02 00:00:00+09:00,3466.090332,3505.774695,3444.796284,3466.090332,20164900,0.0,0.0


In [23]:
df = stock_data.rename(columns={
    'Open': 'S_DQ_OPEN',
    'High': 'S_DQ_HIGH',
    'Low': 'S_DQ_LOW',
    'Close': 'S_DQ_CLOSE',
    'Volume': 'S_DQ_VOLUME'
})

df['S_DQ_PCTCHANGE'] = df['S_DQ_CLOSE'].pct_change() * 100
df['S_DQ_AMOUNT'] = (df['S_DQ_CLOSE'] * df['S_DQ_VOLUME'] * 100) / 1000

df_with_alphas = get_alpha(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inner[self.returns < 0] = stddev(self.returns, 20)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [24]:
df_with_alphas = df_with_alphas.fillna(0)
df_with_alphas = df_with_alphas.astype({col: int for col in df_with_alphas.columns if df_with_alphas[col].dtype == 'bool'})
df_with_alphas

Unnamed: 0_level_0,S_DQ_OPEN,S_DQ_HIGH,S_DQ_LOW,S_DQ_CLOSE,S_DQ_VOLUME,Dividends,Stock Splits,S_DQ_PCTCHANGE,S_DQ_AMOUNT,alpha001,...,alpha085,alpha086,alpha088,alpha092,alpha094,alpha095,alpha096,alpha098,alpha099,alpha101
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-05-01 00:00:00+09:00,1225.572542,1228.083374,1212.870814,1228.083374,37228500,0.0,0.0,0.000000,4.571970e+09,0.000000,...,0.000000,0,0.000000,0.0,0.000000,0,0.0,0.690144,0,0.165039
2015-05-07 00:00:00+09:00,1218.630655,1220.846079,1211.689067,0.000000,40812500,0.0,0.0,-1.214686,4.951234e+09,0.000000,...,0.000000,0,0.000000,0.0,0.000000,0,0.0,0.690144,0,0.000000
2015-05-08 00:00:00+09:00,1212.871021,1232.957500,1209.621714,1222.766602,44073500,0.0,0.0,0.791366,5.389160e+09,0.000000,...,0.000000,0,0.156391,0.0,0.000000,0,0.0,0.690144,0,0.424034
2015-05-11 00:00:00+09:00,1240.637487,1240.637487,1213.313923,0.000000,55217000,0.0,0.0,-0.531482,6.715866e+09,0.000000,...,0.000000,0,0.156391,0.0,0.000000,0,0.0,0.690144,0,0.000000
2015-05-12 00:00:00+09:00,1216.267795,1223.504883,1209.916931,1223.504883,35281500,0.0,0.0,0.595022,4.316709e+09,0.000000,...,0.000000,0,0.187219,0.0,0.000000,0,0.0,0.690144,0,0.532571
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-26 00:00:00+09:00,3389.625362,3435.117193,3356.716378,3397.368652,30255300,0.0,0.0,0.371745,1.027884e+10,0.389950,...,0.797553,0,0.947345,5.0,-0.016248,1,-8.0,0.571782,0,0.098764
2024-04-30 00:00:00+09:00,3462.218883,3523.197297,3441.892744,3521.261475,35459100,0.0,0.0,3.646729,1.248608e+10,0.833561,...,0.482696,0,0.869487,3.0,-0.559602,1,-8.0,0.309406,0,0.726182
2024-05-01 00:00:00+09:00,3484.480756,3541.587524,3461.250884,1.871617,27076000,0.0,0.0,-0.907094,9.447684e+09,0.570487,...,0.590276,0,0.923042,1.0,-0.593966,1,-8.0,0.085959,0,-43.349657
2024-05-02 00:00:00+09:00,3466.090332,3505.774695,3444.796284,1.875786,20164900,0.0,0.0,-0.665745,6.989336e+09,0.389950,...,0.696017,0,0.954095,1.0,-0.714156,1,-7.0,-0.243249,0,-56.809577


In [25]:
df_with_alphas.drop(['S_DQ_OPEN', 'S_DQ_HIGH', 'S_DQ_LOW' , 'S_DQ_VOLUME', 'S_DQ_PCTCHANGE', 'S_DQ_AMOUNT'], axis=1, inplace=True)

In [26]:
pps_scores = []

target_column = 'S_DQ_CLOSE'
for col in df_with_alphas.columns:
    if col != target_column:
        score = pps.score(df_with_alphas, x=col, y=target_column)
        pps_scores.append(score)

# 5. Create DataFrame of scores and sort
pps_df = pd.DataFrame(pps_scores)
pps_df = pps_df[pps_df['ppscore'] > 0].sort_values(by='ppscore', ascending=False)

# 6. Display top predictors of Close price
print(pps_df[['x', 'y', 'ppscore']])

           x           y   ppscore
19  alpha018  S_DQ_CLOSE  0.888498
25  alpha024  S_DQ_CLOSE  0.843457
10  alpha009  S_DQ_CLOSE  0.818809
11  alpha010  S_DQ_CLOSE  0.806941
43  alpha042  S_DQ_CLOSE  0.739475
29  alpha028  S_DQ_CLOSE  0.725320
56  alpha057  S_DQ_CLOSE  0.682904
6   alpha005  S_DQ_CLOSE  0.667808
26  alpha025  S_DQ_CLOSE  0.652988
48  alpha047  S_DQ_CLOSE  0.622382
34  alpha033  S_DQ_CLOSE  0.611803
83  alpha101  S_DQ_CLOSE  0.582433
54  alpha054  S_DQ_CLOSE  0.573761
49  alpha049  S_DQ_CLOSE  0.444716
51  alpha051  S_DQ_CLOSE  0.436642
53  alpha053  S_DQ_CLOSE  0.392292
36  alpha035  S_DQ_CLOSE  0.342159
2   alpha001  S_DQ_CLOSE  0.314568
40  alpha039  S_DQ_CLOSE  0.314552
57  alpha060  S_DQ_CLOSE  0.274800
32  alpha031  S_DQ_CLOSE  0.255594
8   alpha007  S_DQ_CLOSE  0.206701
38  alpha037  S_DQ_CLOSE  0.202551
39  alpha038  S_DQ_CLOSE  0.195395
20  alpha019  S_DQ_CLOSE  0.179556
13  alpha012  S_DQ_CLOSE  0.169521
44  alpha043  S_DQ_CLOSE  0.168061
47  alpha046  S_DQ_C

  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)
  array = numpy.asarray(array, order=order, dtype=dtype)


In [35]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import math
from torch.nn.utils.parametrizations import weight_norm

In [47]:
X = ['Close', 'alpha018','alpha024','alpha009','alpha010','alpha042']

In [44]:
df_with_alphas.rename(columns={'S_DQ_CLOSE':'Close'}, inplace=True)

In [48]:
stock_data_train = df_with_alphas[X].iloc[:round(len(df_with_alphas)*0.7)]
stock_data_test = df_with_alphas[X][round(len(df_with_alphas)*0.7):]

In [None]:
window = 5
predict_forward = 1

# =========================
# Feature Scaling (TRAIN)
# =========================
scaler_x = MinMaxScaler(feature_range=(0, 1))
train_data = scaler_x.fit_transform(stock_data_train)

scaler_y = MinMaxScaler(feature_range=(0, 1))
train_target = scaler_y.fit_transform(
    stock_data_train['Close'].values.reshape(-1, 1)
)

# =========================
# Build Training Sequences
# =========================
X_train, y_train = [], []

for i in range(len(train_data) - window - predict_forward + 1):
    X_train.append(train_data[i:i + window])
    y_train.append(train_target[i + window:i + window + predict_forward])

X_train = np.array(X_train)
y_train = np.array(y_train).reshape(-1, predict_forward)

# =========================
# Feature Scaling (TEST)
# =========================
test_data = scaler_x.transform(stock_data_test)
test_target = scaler_y.transform(
    stock_data_test['Close'].values.reshape(-1, 1)
)

# =========================
# Build Testing Sequences
# =========================
X_test, y_test = [], []

for i in range(len(test_data) - window - predict_forward + 1):
    X_test.append(test_data[i:i + window])
    y_test.append(test_target[i + window:i + window + predict_forward])

X_test = np.array(X_test)
y_test = np.array(y_test).reshape(-1, predict_forward)

In [50]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1550, 5, 6), (1550, 1), (662, 5, 6), (662, 1))