In [1]:
import os
import sys
import warnings
import numpy as np
import rrsBdtDevDependencies
import dataFunctions as dataFun
from datetime import datetime as dt
import matplotlib.pyplot as plt
import pandas as pd
import quandl
QAPIKEY = "YpAydSEsKoSAfuQ9UKhu"
quandl.ApiConfig.api_key = QAPIKEY
import yfinance as yf
import pickle
from sklearn.metrics import mean_squared_error
import logging
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Dense
from keras import optimizers
from keras.callbacks import CSVLogger


In [46]:
### CONFIGURE ###
barrels = 750000
costPerDay = 30000
daysToPredict = 1
trainDataDate = '2018-01-01'
testSplitDate = '2020-01-01'

params = {
    "batch_size": 20,  # 20<16<10, 25 was a bust
    "epochs": 300,
    "lr": 0.00010000,
    "time_steps": 10
}


In [3]:
print('Running...')

def show_more(df, lines):
    with pd.option_context("display.max_rows", lines):
        display(df)

Running...


In [4]:
def create_features(df, features, label=None, shift = 0, nonShiftFeatures = None):
    df = df.set_index('Date')
    #X = df[['OilProduction', 'NatGasPrices', 'BrentPrices', '20dSMA', 'Momentum_14', 'MACD_12_26', 'MACDdiff_12_26', 'ROC_14', 'RSI_14', 'bollAmplitude', 'distFromTopBoll', 'distFromLowBoll', '20d200dDist','dayofyear','dayofmonth','weekofyear']]

    # X = df[['OilProduction', '20dSMA', 'Momentum_14', 'MACD_12_26', 'MACDdiff_12_26', 'ROC_14', 'RSI_14', 'bollAmplitude', 'distFromTopBoll', 'distFromLowBoll', '20d200dDist','dayofyear','dayofmonth','weekofyear']]
    # if shift > 0:
    #     tiems = X[['dayofyear','dayofmonth','weekofyear']]
    #     #X = X[['OilProduction', 'NatGasPrices', 'BrentPrices', '20dSMA', 'Momentum_14', 'MACD_12_26', 'MACDdiff_12_26','ROC_14', 'RSI_14', 'bollAmplitude', 'distFromTopBoll', 'distFromLowBoll', '20d200dDist']].shift(shift)
    #     X = X[['OilProduction', '20dSMA', 'Momentum_14', 'MACD_12_26', 'MACDdiff_12_26','ROC_14', 'RSI_14', 'bollAmplitude', 'distFromTopBoll', 'distFromLowBoll', '20d200dDist']].shift(shift)
    #     X = X.merge(tiems, how='inner', left_index=True, right_index=True)

    X = df[features]
    if shift > 0:
        tiems = X[nonShiftFeatures]
        newFeatures = features
        for f in nonShiftFeatures:
            newFeatures.remove(f)
        X = X[newFeatures].shift(shift)
        X = X.merge(tiems, how='inner', left_index=True, right_index=True)

    if label:
        y = df[label]
        return X, y
    return X

In [5]:
"""
Getting WTI price data 
"""

wtiData         = quandl.get("FRED/DCOILWTICO")
wtiData.reset_index(level=0, inplace=True)
wtiData         = wtiData.rename(columns={"Value": "Prices"})
yfStartDate     = wtiData['Date'].iloc[-1].strftime('%Y-%m-%d')
stocks          = "CL=F"
period          = "1d"
Stocks, yfInfo  = dataFun.yFinData(yfStartDate)
wtiData         = wtiData.append(Stocks, ignore_index =True)
wtiData         = wtiData.sort_values(by = ["Date"])

# Getting Oil production data and combining dataframes
oilDF   = dataFun.oilProduction()
df      = dataFun.combineFrames(wtiData,oilDF)
df      = df[np.isfinite(df['Prices'])]
df      = df.reset_index().drop(["index"], axis = 1)

# Getting natural gas data and combining frames
natGasData          = quandl.get("EIA/NG_RNGWHHD_D")
natGasData.reset_index(level=0, inplace=True)
natGasData          = natGasData.rename(columns={"Value": "NatGasPrices"})
yfStartDate         = natGasData['Date'].iloc[-1].strftime('%Y-%m-%d')
stocks              = "NG=F"
period              = "1d"
NGStocks, yfInfo    = dataFun.yFinData(yfStartDate,stock=stocks,name ="NatGasPrices")
natGasData          = natGasData.append(NGStocks, ignore_index =True)
natGasData          = natGasData.sort_values(by = ["Date"])
newdf               = pd.merge(df, natGasData, on=['Date'], how ="left")

"""
Getting Brent oil data and combining dataframes
"""

brentData = quandl.get("FRED/DCOILBRENTEU")
brentData.reset_index(level=0, inplace=True)
name = "BrentPrices"
brentData = brentData.rename(columns={"Value": name})
yfStartDate = brentData['Date'].iloc[-1].strftime('%Y-%m-%d')
stocks = "BZ=F"
period = "1d"
BStocks, yfInfo = dataFun.yFinData(yfStartDate,stock=stocks,name = name)
brentData = brentData.append(BStocks, ignore_index =True)
brentData = brentData.sort_values(by = ["Date"])
df = pd.merge(newdf, brentData, on=['Date'], how ="left")

df["BrentPrices"] = df["BrentPrices"].interpolate(method='nearest')
df["NatGasPrices"] = df["NatGasPrices"].interpolate(method='nearest')

# Calculating the technical indicators for price data
df = df.reset_index().drop(["index"], axis = 1)
df["20dSMA"] = dataFun.SMA(20, df["Prices"])
df["10dSMA"] = dataFun.SMA(10, df["Prices"])
df["5dSMA"] = dataFun.SMA(5, df["Prices"])
df["50dSMA"] = dataFun.SMA(50, df["Prices"])
df["200dSMA"] = dataFun.SMA(200, df["Prices"])


df["boll_lo"] = dataFun.bollinger(df['Prices'])[0]
df["boll_hi"] = dataFun.bollinger(df['Prices'])[1]

df = dataFun.momentum(df, 14)
df = dataFun.macd(df, 12, 26)
df = dataFun.rate_of_change(df, 14)
df = dataFun.relative_strength_index(df)

df["boll_hi"] = pd.to_numeric(df["boll_hi"])
df["boll_lo"] = pd.to_numeric(df["boll_lo"])
df["20dSMA"] = pd.to_numeric(df["20dSMA"])
df["10dSMA"] = pd.to_numeric(df["10dSMA"])
df["5dSMA"] = pd.to_numeric(df["5dSMA"])
df["50dSMA"] = pd.to_numeric(df["50dSMA"])
df["200dSMA"] = pd.to_numeric(df["200dSMA"])

df["bollAmplitude"] = df["boll_hi"] - df["boll_lo"]
df["distFromTopBoll"] = df["boll_hi"] - df["Prices"]
df["distFromLowBoll"] = df["boll_lo"] - df["Prices"]
df["20d200dDist"] = np.abs(df["20dSMA"] - df["200dSMA"])



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [6]:
"""
Processing the resultant data frame
"""

df = df[df["Date"] > trainDataDate]
df = df[np.isfinite(df['200dSMA'])]
df = df.rename(columns={"Production of Crude Oil": "OilProduction"})
df = df.drop_duplicates("Date",keep="first")
df = df.reset_index().drop(["index"], axis = 1)

In [7]:
"""
Creating time series features from datetime index
"""

df['dayofweek'] = df['Date'].dt.dayofweek
df['quarter'] = df['Date'].dt.quarter
df['month'] = df['Date'].dt.month
df['year'] = df['Date'].dt.year
df['dayofyear'] = df['Date'].dt.dayofyear
df['dayofmonth'] = df['Date'].dt.day
df['weekofyear'] = df['Date'].dt.weekofyear

In [48]:
df_train = df[df["Date"] <= testSplitDate].copy()
df_test = df[df["Date"] > testSplitDate].copy()

# df_train, df_test = train_test_split(df, train_size=0.9, test_size=0.1, shuffle=False)

features = ["Prices"]
# training_set = df.set_index('Date')
# training_set = training_set[features]

#['OilProduction', '20dSMA', 'Momentum_14', 'MACD_12_26', 'MACDdiff_12_26', 'ROC_14', 'RSI_14', 'bollAmplitude', 'distFromTopBoll', 'distFromLowBoll', '20d200dDist','dayofyear','dayofmonth','weekofyear']
nonShiftFeatures = ['dayofyear','dayofmonth','weekofyear']

# X_train, y_train = create_features(df_train,features,label='Prices', shift =1)
# X_test, y_test = create_features(df_test,label='Prices', shift =1)
# X_train = X_train.iloc[1:]
# X_test = X_test.iloc[1:]
# y_train = y_train.iloc[1:]
# y_test = y_test.iloc[1:]

In [49]:
df_train = df_train.set_index('Date')
df_test = df_test.set_index('Date')
x = df_train.loc[:,features].values


In [55]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
x_train = sc.fit_transform(x)

x_test = sc.transform(df_test.loc[:,features])
df_test

Unnamed: 0_level_0,Prices,OilProduction,NatGasPrices,BrentPrices,20dSMA,10dSMA,5dSMA,50dSMA,200dSMA,boll_lo,...,distFromTopBoll,distFromLowBoll,20d200dDist,dayofweek,quarter,month,year,dayofyear,dayofmonth,weekofyear
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,61.17,12900.0,2.05,67.05,60.26,61.179,61.49,57.9114,57.7858,57.978542,...,1.371458,-3.191458,2.4742,3,1,1,2020,2,2,1
2020-01-03,63.0,12900.0,2.06,69.08,60.487,61.386,61.746,58.1058,57.80535,58.060783,...,-0.086783,-4.939217,2.68165,4,1,1,2020,3,3,1
2020-01-06,63.27,12900.0,2.1,70.25,60.7295,61.583,62.048,58.287,57.8261,58.205614,...,-0.016614,-5.064386,2.9034,0,1,1,2020,6,6,2
2020-01-07,62.7,12900.0,2.17,68.74,60.9045,61.81,62.256,58.423,57.839,58.342079,...,0.766921,-4.357921,3.0655,1,1,1,2020,7,7,2
2020-01-08,59.65,12900.0,2.09,67.31,60.9375,61.724,61.958,58.4938,57.83735,58.463416,...,3.761584,-1.186584,3.10015,2,1,1,2020,8,8,2
2020-01-09,59.56,12900.0,2.09,66.58,60.9545,61.563,61.636,58.5546,57.8408,58.525851,...,3.823149,-1.034149,3.1137,3,1,1,2020,9,9,2
2020-01-10,59.02,13000.0,2.05,66.77,60.9685,61.293,60.84,58.623,57.84235,58.590909,...,4.326091,-0.429091,3.12615,4,1,1,2020,10,10,2
2020-01-13,58.17,13000.0,2.03,64.14,60.918,60.934,59.82,58.6796,57.83385,58.345542,...,5.320458,0.175542,3.08415,0,1,1,2020,13,13,3
2020-01-14,58.34,13000.0,2.15,64.45,60.8295,60.602,58.948,58.7494,57.8286,58.028376,...,5.290624,-0.311624,3.0009,1,1,1,2020,14,14,3
2020-01-15,57.86,13000.0,2.01,63.29,60.712,60.274,58.59,58.8262,57.82145,57.619465,...,5.944535,-0.240535,2.89055,2,1,1,2020,15,15,3


In [62]:
from tqdm import tqdm_notebook

def build_timeseries(mat, y_col_index):
    # y_col_index is the index of column that would act as output column
    # total number of time-series samples would be len(mat) - TIME_STEPS
    dim_0 = mat.shape[0] - TIME_STEPS
    dim_1 = mat.shape[1]
    x = np.zeros((dim_0, TIME_STEPS, dim_1))
    y = np.zeros((dim_0,))
    
    for i in tqdm_notebook(range(dim_0)):
        x[i] = mat[i:TIME_STEPS+i]
        y[i] = mat[TIME_STEPS+i, y_col_index]
    print("length of time-series i/o",x.shape,y.shape)
    return x, y


def trim_dataset(mat, batch_size):
    """
    trims dataset to a size that's divisible by BATCH_SIZE
    """
    no_of_rows_drop = mat.shape[0]%batch_size
    if(no_of_rows_drop > 0):
        return mat[:-no_of_rows_drop]
    else:
        return mat

In [56]:
BATCH_SIZE = 5
TIME_STEPS = 10
x_t, y_t = build_timeseries(x_train, 0)
x_t = trim_dataset(x_t, BATCH_SIZE)
y_t = trim_dataset(y_t, BATCH_SIZE)
x_temp, y_temp = build_timeseries(x_test, 0)
x_val, x_test_t = np.split(trim_dataset(x_temp, BATCH_SIZE),2)
y_val, y_test_t = np.split(trim_dataset(y_temp, BATCH_SIZE),2)

print("Test size", x_test_t.shape, y_test_t.shape, x_val.shape, y_val.shape)


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  # This is added back by InteractiveShellApp.init_path()


HBox(children=(FloatProgress(value=0.0, max=489.0), HTML(value='')))


length of time-series i/o (489, 10, 1) (489,)


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


length of time-series i/o (14, 10, 1) (14,)


In [61]:
# lr = 0.1
# lstm_model = Sequential()
# lstm_model.add(LSTM(100, batch_input_shape=(BATCH_SIZE, TIME_STEPS, x_t.shape[2]), dropout=0.0, recurrent_dropout=0.0, stateful=True,     kernel_initializer='random_uniform'))
# lstm_model.add(Dropout(0.5))
# lstm_model.add(Dense(20,activation='relu'))
# lstm_model.add(Dense(1,activation='sigmoid'))
# optimizer = optimizers.RMSprop(lr=lr)
# lstm_model.compile(loss='mean_squared_error', optimizer=optimizer)

def create_model():
    lstm_model = Sequential()
    # (batch_size, timesteps, data_dim)
    lstm_model.add(LSTM(100, batch_input_shape=(BATCH_SIZE, TIME_STEPS, x_t.shape[2]),
                        dropout=0.0, recurrent_dropout=0.0, stateful=True, return_sequences=True,
                        kernel_initializer='random_uniform'))
    lstm_model.add(Dropout(0.4))
    lstm_model.add(LSTM(60, dropout=0.0))
    lstm_model.add(Dropout(0.4))
    lstm_model.add(Dense(20,activation='relu'))
    lstm_model.add(Dense(1,activation='sigmoid'))
    optimizer = optimizers.RMSprop(lr=params["lr"])
    # optimizer = optimizers.SGD(lr=0.000001, decay=1e-6, momentum=0.9, nesterov=True)
    lstm_model.compile(loss='mean_squared_error', optimizer=optimizer)
    return lstm_model

# model = Sequential()
# model.add(LSTM(units=50,return_sequences=True,input_shape=(X_train.shape[1], 1)))
# model.add(Dropout(0.2))
# model.add(LSTM(units=50,return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(units=50,return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(units=50))
# model.add(Dropout(0.2))
# model.add(Dense(units=1))
# model.compile(optimizer='adam',loss='mean_squared_error')
# model.fit(X_train,y_train,epochs=100,batch_size=32)

In [None]:
model = None
try:
    model = pickle.load(open("lstm_model", 'rb'))
    print("Loaded saved model...")
except FileNotFoundError:
    print("Model not found")


In [None]:
is_update_model = True
if model is None or is_update_model:
    from keras import backend as K
    print("Building model...")
    print("checking if GPU available", K.tensorflow_backend._get_available_gpus())
    model = create_model()
    
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,
                       patience=40, min_delta=0.0001)
    
    mcp = ModelCheckpoint(os.path.join(OUTPUT_PATH,
                          "best_model.h5"), monitor='val_loss', verbose=1,
                          save_best_only=True, save_weights_only=False, mode='min', period=1)

    # Not used here. But leaving it here as a reminder for future
    r_lr_plat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=30, 
                                  verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
    
    csv_logger = CSVLogger(os.path.join(OUTPUT_PATH, 'training_log_' + time.ctime().replace(" ","_") + '.log'), append=True)
    
    history = model.fit(x_t, y_t, epochs=params["epochs"], verbose=2, batch_size=BATCH_SIZE,
                        shuffle=False, validation_data=(trim_dataset(x_val, BATCH_SIZE),
                        trim_dataset(y_val, BATCH_SIZE)), callbacks=[es, mcp, csv_logger])
    
    print("saving model...")
    modDate = str(fin_df_train["Date"].iloc[-1].strftime('%Y-%m-%d'))
    fileName = "LSTM_Model_"+modDate+".sav"
    pickle.dump(model, open(fileName, "wb"))

In [66]:
# OUTPUT_PATH = "/Users/qw19176/Documents/Courses/Team-Cpp/"
# csv_logger = CSVLogger(os.path.join(OUTPUT_PATH, 'LSTMRegressor' + '.log'), append=True)
# epochs = 100
# history = lstm_model.fit(x_t, y_t, epochs=epochs, verbose=2, batch_size=BATCH_SIZE,
#                     shuffle=False, validation_data=(trim_dataset(x_val, BATCH_SIZE),
#                     trim_dataset(y_val, BATCH_SIZE)), callbacks=[csv_logger])

Instructions for updating:
Use tf.cast instead.
Train on 485 samples, validate on 5 samples
Epoch 1/100
 - 1s - loss: 0.2875 - val_loss: 0.3662
Epoch 2/100
 - 0s - loss: 0.2892 - val_loss: 0.3662
Epoch 3/100
 - 0s - loss: 0.2892 - val_loss: 0.3662
Epoch 4/100
 - 1s - loss: 0.2892 - val_loss: 0.3662
Epoch 5/100
 - 0s - loss: 0.2892 - val_loss: 0.3662
Epoch 6/100
 - 0s - loss: 0.2892 - val_loss: 0.3662
Epoch 7/100
 - 0s - loss: 0.2892 - val_loss: 0.3662
Epoch 8/100
 - 0s - loss: 0.2892 - val_loss: 0.3662
Epoch 9/100
 - 0s - loss: 0.2892 - val_loss: 0.3662
Epoch 10/100
 - 0s - loss: 0.2892 - val_loss: 0.3662
Epoch 11/100
 - 0s - loss: 0.2892 - val_loss: 0.3662
Epoch 12/100
 - 1s - loss: 0.2892 - val_loss: 0.3662
Epoch 13/100
 - 0s - loss: 0.2892 - val_loss: 0.3662
Epoch 14/100
 - 1s - loss: 0.2892 - val_loss: 0.3662
Epoch 15/100
 - 0s - loss: 0.2892 - val_loss: 0.3662
Epoch 16/100
 - 0s - loss: 0.2892 - val_loss: 0.3662
Epoch 17/100
 - 0s - loss: 0.2892 - val_loss: 0.3662
Epoch 18/100
 - 

In [None]:
search_params = {
    "batch_size": [20, 30, 40],
    "time_steps": [30, 60, 90], 
    "lr": [0.01, 0.001, 0.0001],
    "epochs": [30, 50, 70]
}

def eval_model():
    """
    implement your logic to build a model, train it and then calculate validation loss.
    Save this validation loss using CSVLogger of Keras or in a text file. Later you can
    query to get the best combination.
    """
    pass

def get_all_combinations(params):
    all_names = params.keys()
    combinations = it.product(*(params[name] for name in all_names))
    return list(combinations)

def run_search(mat, params):
    param_combs = get_all_combinations(params) # list of tuples
    logging.info("Total combinations to try = {}".format(len(param_combs)))
    for i, combination in enumerate(param_combs):
        logging.info("Trying combo no. {} {}".format(i, combination))
        eval_model(mat, combination, i)

run_search(x_input, search_params)

In [None]:
"""
TALOS OPTIMISATION
"""

def data(search_params):
    """
    The function that prepares the data for LSTM training specific to this problem as per values in search_params.
    """
    global mat

    BATCH_SIZE = search_params["batch_size"]
    TIME_STEPS = search_params["time_steps"]
    x_train, x_test = train_test_split(mat, train_size=0.8, test_size=0.2, shuffle=False)

    # scale the train and test dataset
    min_max_scaler = MinMaxScaler()
    x_train = min_max_scaler.fit_transform(x_train)
    x_test = min_max_scaler.transform(x_test)

    x_train_ts, y_train_ts = build_timeseries(x_train, 3, TIME_STEPS)
    x_test_ts, y_test_ts = build_timeseries(x_test, 3, TIME_STEPS)
    x_train_ts = trim_dataset(x_train_ts, BATCH_SIZE)
    y_train_ts = trim_dataset(y_train_ts, BATCH_SIZE)
    x_test_ts = trim_dataset(x_test_ts, BATCH_SIZE)
    y_test_ts = trim_dataset(y_test_ts, BATCH_SIZE)
    print("Test size(trimmed) {}, {}".format(x_test_ts.shape, y_test_ts.shape))
    return x_train_ts, y_train_ts, x_test_ts, y_test_ts
  
  def create_model_talos(x_train_ts, y_train_ts, x_test_ts, y_test_ts, params):
    """
    function that builds model, trains, evaluates on validation data and returns Keras history object and model for
    talos scanning. Here I am creating data inside function because data preparation varies as per the selected value of 
    batch_size and time_steps during searching. So we ignore data that's received here as argument from scan method of Talos.
    """
    x_train_ts, y_train_ts, x_test_ts, y_test_ts = data(params)
    BATCH_SIZE = params["batch_size"]
    TIME_STEPS = params["time_steps"]
    lstm_model = Sequential()
    # (batch_size, timesteps, data_dim)
    lstm_model.add(LSTM(params["lstm1_nodes"], batch_input_shape=(BATCH_SIZE, TIME_STEPS, x_train_ts.shape[2]), dropout=0.2,
                        recurrent_dropout=0.2, stateful=True, return_sequences=True,
                        kernel_initializer='random_uniform'))
    if params["lstm_layers"] == 2:
        lstm_model.add(LSTM(params["lstm2_nodes"], dropout=0.2))
    else:
        lstm_model.add(Flatten())

    if params["dense_layers"] == 2:
        lstm_model.add(Dense(params["dense2_nodes"], activation='relu'))

    lstm_model.add(Dense(1, activation='sigmoid'))
    if params["optimizer"] == 'rms':
        optimizer = optimizers.RMSprop(lr=params["lr"])
    else:
        optimizer = optimizers.SGD(lr=params["lr"], decay=1e-6, momentum=0.9, nesterov=True)
    lstm_model.compile(loss='mean_squared_error', optimizer=optimizer)  # binary_crossentropy
    history = lstm_model.fit(x_train_ts, y_train_ts, epochs=params["epochs"], verbose=2, batch_size=BATCH_SIZE,
                             validation_data=[x_test_ts, y_test_ts],
                             callbacks=[LogMetrics(search_params, params, -1), csv_logger])
    return history, lstm_model
  
print("Starting Talos scanning...")
t = ta.Scan(x=mat, # data parameter is ignored in this example as here data varies based on batch_size & time_steps
            y=mat[:,0], # dummy data just to avoid errors. input and output calculated in create_model_talos
            model=create_model_talos,
            params=search_params,
            dataset_name='stock_ge',
            experiment_no='1',
            reduction_interval=10)

pickle.dump(t, open(os.path.join(OUTPUT_PATH,"talos_res"),"wb"))

In [5]:
features = ["easy", "easter", "eastmas", "estover"]
nonShiftFeatures = ["easy", "easter"]


In [7]:
for f in nonShiftFeatures:
    features.remove(f)
features

['eastmas', 'estover']