In [1]:
def create_lagged_series(symbol, bars, start_date, end_date, lags=5, lags_hilo=5):
    """This creates a pandas DataFrame that stores the percentage returns of the 
    adjusted closing value of a stock obtained from Stooq, along with 
    a number of lagged returns from the prior trading days (lags defaults to 5 days).
    Trading volume, as well as the Direction from the previous day, are also included."""

    # Create the new lagged DataFrame
    tslag = pd.DataFrame(index=bars.index)
    tslag["Today"] = bars["Close"]
    tslag["Volume"] = bars["Volume"]
    tslag["HiLo"] = bars["HiLo"]
    
    # Create the shifted lag series of prior trading period close and hilo values
    for lag_close in lags:
        tslag["Lag%s" % str(lag_close)] = bars["Close"].shift(lag_close)
        
    for lag_hilo in lags_hilo:   
        tslag["HiLo Lag%s" % str(lag_hilo)] = bars["HiLo"].shift(lag_hilo)

    # Create the returns DataFrame
    tsret = pd.DataFrame(index=tslag.index)
    tsret["Volume"] = tslag["Volume"]
    tsret["Today"] = tslag["Today"].pct_change()*100.0

    for lag_close in lags:
        tsret["Lag%s" % str(lag_close)] = tslag["Lag%s" % str(lag_close)]
    
    for lag_hilo in lags_hilo:
        tsret["HiLo Lag%s" % str(lag_hilo)] = tslag["HiLo Lag%s" % str(lag_hilo)]
    
    # 2DO: check if this is still necessary
    # If any of the values of percentage returns equal zero, set them to
    # a small number (stops issues with QDA model in scikit-learn)
    for i,x in enumerate(tsret["Today"]):
        if (abs(x) < 0.0001):
            tsret["Today"][i] = 0.0001

    # Create the lagged percentage returns columns
    for lag3 in lags:
        tsret["Lag%s" % str(lag3)] = tslag["Lag%s" % str(lag3)].pct_change()*100.0
    
    # Create the lagged average volume columns
    tsret["avg_vol_lags"] = tsret["Volume"].rolling(int(lags[-1]), min_periods=1).mean()    
        
    # Create the "Direction" column (+1 or -1) indicating an up/down day
    tsret["Direction"] = np.sign(tsret["Today"])
    tsret = tsret[tsret.index > str(start_date)]
    return tsret