# Feature Selection and Analysis

In this notebook, we will perform various forms of analysis to determine which features that are available to us may be most useful when training multivariate models.

## Setup

In [6]:
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from Functions import tsPlot
from copy import deepcopy as dc
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from skopt import gp_minimize
from tqdm import tqdm  # for progress bar
from IPython.display import clear_output, display

from ta.trend import SMAIndicator, EMAIndicator, MACD
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

### Enable GPU

In [2]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device

'cuda:0'

### Import Data

In [3]:
# Read the csv file into a pandas DataFrame
df_ret = pd.read_csv('../DataManagement/daily_data.csv', parse_dates=['DATE'], index_col='DATE')

# Drop everything except the columns we need for this scenario
df_ret = df_ret.filter(['DATE', 'NVDA_OPEN', 'NVDA_HIGH', 'NVDA_LOW', 'NVDA_CLOSE', 'NVDA_VOLUME'])

df_ret.head()

Unnamed: 0_level_0,NVDA_OPEN,NVDA_HIGH,NVDA_LOW,NVDA_CLOSE,NVDA_VOLUME
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-05,4.225095,4.348957,4.225095,4.303082,79416540.0
2010-01-06,4.30079,4.339784,4.259502,4.330608,70753870.0
2010-01-07,4.307669,4.326019,4.213625,4.245738,59704760.0
2010-01-08,4.211332,4.284732,4.1861,4.254913,52116320.0
2010-01-11,4.27785,4.2962,4.135638,4.195275,60666070.0


## Data Pre Processing

In [7]:
# Calculate Simple Moving Averages (SMA)
df_ret['SMA_20'] = SMAIndicator(df_ret['NVDA_CLOSE'], window=20).sma_indicator()
df_ret['SMA_50'] = SMAIndicator(df_ret['NVDA_CLOSE'], window=50).sma_indicator()

# Calculate Exponential Moving Averages (EMA)
df_ret['EMA_20'] = EMAIndicator(df_ret['NVDA_CLOSE'], window=20).ema_indicator()
df_ret['EMA_50'] = EMAIndicator(df_ret['NVDA_CLOSE'], window=50).ema_indicator()

# Calculate Relative Strength Index (RSI)
df_ret['RSI'] = RSIIndicator(df_ret['NVDA_CLOSE']).rsi()

# Calculate Moving Average Convergence Divergence (MACD)
macd = MACD(df_ret['NVDA_CLOSE'])
df_ret['MACD'] = macd.macd()
df_ret['MACD_signal'] = macd.macd_signal()

# Calculate Bollinger Bands (BB)
bollinger = BollingerBands(df_ret['NVDA_CLOSE'])
df_ret['BB_upper'] = bollinger.bollinger_hband()
df_ret['BB_middle'] = bollinger.bollinger_mavg()
df_ret['BB_lower'] = bollinger.bollinger_lband()

# Specify the date range
start_date = '2012-06-30'
end_date = '2023-06-30'

# Slice the DataFrame for the desired date range
df_ret = df_ret.loc[start_date:end_date].copy()

df_ret.head()

Unnamed: 0_level_0,NVDA_OPEN,NVDA_HIGH,NVDA_LOW,NVDA_CLOSE,NVDA_VOLUME,SMA_20,SMA_50,EMA_20,EMA_50,RSI,MACD,MACD_signal,BB_upper,BB_middle,BB_lower
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2012-07-02,3.186024,3.188318,3.059868,3.085099,63398690.0,2.908938,2.900635,2.953446,2.972066,57.663951,0.049553,0.019496,3.159471,2.908938,2.658406
2012-07-03,3.085098,3.172261,3.082804,3.16538,23900130.0,2.928779,2.90325,2.97363,2.979647,61.019761,0.059685,0.027534,3.194115,2.928779,2.663443
2012-07-05,3.144736,3.158499,3.085099,3.133268,30525940.0,2.943345,2.907103,2.988834,2.985671,59.005034,0.06438,0.034903,3.219769,2.943345,2.666921
2012-07-06,3.11033,3.119505,3.039223,3.07363,41717650.0,2.960662,2.908571,2.99691,2.989121,55.349888,0.062568,0.040436,3.223851,2.960662,2.697474
2012-07-09,3.05528,3.069042,3.007111,3.032342,33388070.0,2.973278,2.909168,3.000284,2.990816,52.906453,0.057142,0.043778,3.224542,2.973278,2.722015


In [8]:
def prepare_data_lstm(data, n_steps, column):
    column_names = [column]
    data = dc(data)  # make deep copy of the input data

    for i in range(1, n_steps+1):
        column_name = f'{column}(t-{i})'
        column_names.append(column_name)
        data[column_name] = data[column].shift(i)

    data.dropna(inplace=True)
    data = data.loc[:, data.columns.intersection(column_names)]

    return data

In [9]:
lookback = 7

timeseries_columns = ['NVDA_CLOSE', 'NVDA_OPEN', 'NVDA_HIGH', 'NVDA_LOW', 'NVDA_VOLUME']
indicator_columns = ['SMA_20', 'SMA_50', 'EMA_20', 'EMA_50', 'RSI', 'MACD', 'MACD_signal', 'BB_upper', 'BB_middle', 'BB_lower']

# Same as the univariate case
shifted_close = prepare_data_lstm(df_ret, lookback, 'NVDA_CLOSE')    

# New: now we also perform the same procedure on each of the additional features that we wish to include in our X matrix
shifted_open = prepare_data_lstm(df_ret, lookback, 'NVDA_OPEN')
shifted_high = prepare_data_lstm(df_ret, lookback, 'NVDA_HIGH')
shifted_low = prepare_data_lstm(df_ret, lookback, 'NVDA_LOW')
shifted_volume = prepare_data_lstm(df_ret, lookback, 'NVDA_VOLUME')

# Then the same process for all of the technical indicators we want to include in our X matrix
shifted_SMA20 = prepare_data_lstm(df_ret, lookback, 'SMA_20')
shifted_SMA50 = prepare_data_lstm(df_ret, lookback, 'SMA_50')
shifted_EMA20 = prepare_data_lstm(df_ret, lookback, 'EMA_20')
shifted_EMA50 = prepare_data_lstm(df_ret, lookback, 'EMA_50')
shifted_RSI = prepare_data_lstm(df_ret, lookback, 'RSI')
shifted_MACD = prepare_data_lstm(df_ret, lookback, 'MACD')
shifted_MACD_SIGNAL = prepare_data_lstm(df_ret, lookback, 'MACD_signal')
shifted_BB_UPPER = prepare_data_lstm(df_ret, lookback, 'BB_upper')
shifted_BB_MIDDLE = prepare_data_lstm(df_ret, lookback, 'BB_middle')
shifted_BB_LOWER = prepare_data_lstm(df_ret, lookback, 'BB_lower')

# Now we convert the dataframes into numpy matrices
shifted_close_np = shifted_close.to_numpy()
shifted_open_np = shifted_open.to_numpy()
shifted_high_np = shifted_high.to_numpy()
shifted_low_np = shifted_low.to_numpy()
shifted_volume_np = shifted_volume.to_numpy()
shifted_SMA20_np = shifted_SMA20.to_numpy()
shifted_SMA50_np = shifted_SMA50.to_numpy()
shifted_EMA20_np = shifted_EMA20.to_numpy()
shifted_EMA50_np = shifted_EMA50.to_numpy()
shifted_RSI_np = shifted_RSI.to_numpy()
shifted_MACD_np = shifted_MACD.to_numpy()
shifted_MACD_SIGNAL_np = shifted_MACD_SIGNAL.to_numpy()
shifted_BB_UPPER_np = shifted_BB_UPPER.to_numpy()
shifted_BB_MIDDLE_np = shifted_BB_MIDDLE.to_numpy()
shifted_BB_LOWER_np = shifted_BB_LOWER.to_numpy()

In [10]:
price_scaler = StandardScaler()    # Scaler for price data
volume_scaler = StandardScaler()    # Scaler for volume data
indicator_scaler = StandardScaler()    # Scaler for indicators except MACD and MACD Signal
macd_scaler = StandardScaler()    # Scaler for MACD
macd_signal_scaler = StandardScaler()    # Scaler for MACD signal

# Scale the price data
shifted_open_np_scaled = price_scaler.fit_transform(shifted_open_np)
shifted_high_np_scaled = price_scaler.fit_transform(shifted_high_np)
shifted_low_np_scaled = price_scaler.fit_transform(shifted_low_np)
shifted_close_np_scaled = price_scaler.fit_transform(shifted_close_np)

# Scale the volume data
shifted_volume_np_scaled = volume_scaler.fit_transform(shifted_volume_np)

# Scale the indicators exc. MACD and MACD Signal
shifted_SMA20_np_scaled = indicator_scaler.fit_transform(shifted_SMA20_np)
shifted_SMA50_np_scaled = indicator_scaler.fit_transform(shifted_SMA50_np)
shifted_EMA20_np_scaled = indicator_scaler.fit_transform(shifted_EMA20_np)
shifted_EMA50_np_scaled = indicator_scaler.fit_transform(shifted_EMA50_np)
shifted_RSI_np_scaled = indicator_scaler.fit_transform(shifted_RSI_np)
shifted_BB_UPPER_np_scaled = indicator_scaler.fit_transform(shifted_BB_UPPER)
shifted_BB_MIDDLE_np_scaled = indicator_scaler.fit_transform(shifted_BB_MIDDLE)
shifted_BB_LOWER_np_scaled = indicator_scaler.fit_transform(shifted_BB_LOWER_np)

# Scale MACD
shifted_MACD_np_scaled = macd_scaler.fit_transform(shifted_MACD_np)

# Scale MACD Signal
shifted_MACD_SIGNAL_np_scaled = macd_signal_scaler.fit_transform(shifted_MACD_SIGNAL_np)

In [11]:
# Our y vector does not change in the multivariate case since we are still predicting the close prices
y = shifted_close_np_scaled[:, 0]

# Our X matrix does change though as we need to add additional dimensions to store the extra variables
# We start by slicing out the time t column from each of the X components
X_close = shifted_close_np_scaled[:, 1:]
X_open = shifted_open_np_scaled[:, 1:]
X_high = shifted_high_np_scaled[:, 1:]
X_low = shifted_low_np_scaled[:, 1:]
X_volume = shifted_volume_np_scaled[:, 1:]

X_SMA20 = shifted_SMA20_np_scaled[:, 1:]
X_SMA50 = shifted_SMA50_np_scaled[:, 1:]
X_EMA20 = shifted_EMA20_np_scaled[:, 1:]
X_EMA50 = shifted_EMA50_np_scaled[:, 1:]
X_RSI = shifted_RSI_np_scaled[:, 1:]
X_BB_UPPER = shifted_BB_UPPER_np_scaled[:, 1:]
X_BB_MIDDLE = shifted_BB_MIDDLE_np_scaled[:, 1:]
X_BB_LOWER = shifted_BB_LOWER_np_scaled[:, 1:]
X_MACD = shifted_MACD_np_scaled[:, 1:]
X_MACD_SIGNAL = shifted_MACD_SIGNAL_np_scaled[:, 1:]

# Then we individually "flip" each X component so that it goes, for example, t-7, t-6, t-5....
X_close = dc(np.flip(X_close, axis=1))
X_open = dc(np.flip(X_open, axis=1))
X_high = dc(np.flip(X_high, axis=1))
X_low = dc(np.flip(X_low, axis=1))
X_volume = dc(np.flip(X_volume, axis=1))

X_SMA20 = dc(np.flip(X_SMA20, axis=1))
X_SMA50 = dc(np.flip(X_SMA50, axis=1))
X_EMA20 = dc(np.flip(X_EMA20, axis=1))
X_EMA50 = dc(np.flip(X_EMA50, axis=1))
X_RSI = dc(np.flip(X_RSI, axis=1))
X_BB_UPPER = dc(np.flip(X_BB_UPPER, axis=1))
X_BB_MIDDLE = dc(np.flip(X_BB_MIDDLE, axis=1))
X_BB_LOWER = dc(np.flip(X_BB_LOWER, axis=1))
X_MACD = dc(np.flip(X_MACD, axis=1))
X_MACD_SIGNAL = dc(np.flip(X_MACD_SIGNAL, axis=1))

# Finally, we combine each X component into a single X matrix with the shape (samples, time steps, features) i.e. (1251, 7, 4) in this case
X = np.stack((
    X_close, 
    X_open, 
    X_high, 
    X_low, 
    X_volume,
    X_SMA20,
    X_SMA50,
    X_EMA20,
    X_EMA50,
    X_RSI,
    X_BB_UPPER,
    X_BB_MIDDLE,
    X_BB_LOWER,
    X_MACD,
    X_MACD_SIGNAL
    ), axis=-1)

# Also, y currently has shape (1251) but it needs to have shape (1251, 1) in this framework
y = y.reshape((-1, 1))

In [13]:
# Define the split proportions first
train_ratio = 0.80  # 80% of data for training
valid_ratio = 0.15  # 15% of data for validation
test_ratio = 0.05   # 5% of data for testing

# First split: separate out the test set
train_valid_index = int(len(X) * (train_ratio + valid_ratio))
X_train_valid, X_test = X[:train_valid_index], X[train_valid_index:]
y_train_valid, y_test = y[:train_valid_index], y[train_valid_index:]

# Second split: separate out the validation set from the remaining data
train_index = int(len(X_train_valid) * train_ratio / (train_ratio + valid_ratio))
X_train, X_valid = X_train_valid[:train_index], X_train_valid[train_index:]
y_train, y_valid = y_train_valid[:train_index], y_train_valid[train_index:]


In [14]:
X_train, y_train = torch.tensor(X_train).float(), torch.tensor(y_train).float()
X_test, y_test = torch.tensor(X_test).float(), torch.tensor(y_test).float()
X_valid, y_valid = torch.tensor(X_valid).float(), torch.tensor(y_valid).float()

In [15]:
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]
    
train_dataset = TimeSeriesDataset(X_train, y_train)
valid_dataset = TimeSeriesDataset(X_valid, y_valid)
test_dataset = TimeSeriesDataset(X_test, y_test)

In [16]:
batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [17]:
for _, batch in enumerate(train_loader):
    x_batch, y_batch = batch[0].to(device), batch[1].to(device)
    print(x_batch.shape, y_batch.shape)
    break

torch.Size([16, 7, 15]) torch.Size([16, 1])


## Correlation Analysis

This is a simple yet effective method to determine if a feature should be included in the model. Correlation measures the degree of relationship between two variables. If a feature is highly correlated with the target variable, it's often beneficial to include it in the model.

In [23]:
columns = ['Close', 'Open', 'High', 'Low', 'Volume','SMA20','SMA50','EMA20','EMA50','RSI','BB_UPPER','BB_MIDDLE','BB_LOWER','MACD','MACD_SIGNAL']

# Select the t-1 timestep for each feature
t_1_close = shifted_close_np_scaled[:, -1]
t_1_open = shifted_open_np_scaled[:, -1]
t_1_high = shifted_high_np_scaled[:, -1]
t_1_low = shifted_low_np_scaled[:, -1]
t_1_volume = shifted_volume_np_scaled[:, -1]

t_1_SMA20 = shifted_SMA20_np_scaled[:, -1]
t_1_SMA50 = shifted_SMA50_np_scaled[:, -1]
t_1_EMA20 = shifted_EMA20_np_scaled[:, -1]
t_1_EMA50 = shifted_EMA50_np_scaled[:, -1]
t_1_RSI = shifted_RSI_np_scaled[:, -1]
t_1_BB_UPPER = shifted_BB_UPPER_np_scaled[:, -1]
t_1_BB_MIDDLE = shifted_BB_MIDDLE_np_scaled[:, -1]
t_1_BB_LOWER = shifted_BB_LOWER_np_scaled[:, -1]
t_1_MACD = shifted_MACD_np_scaled[:, -1]
t_1_MACD_SIGNAL = shifted_MACD_SIGNAL_np_scaled[:, -1]

# Combine into a 2D array
data_t_1 = np.stack((t_1_close, 
                     t_1_open, 
                     t_1_high, 
                     t_1_low, 
                     t_1_volume,
                     t_1_SMA20,
                     t_1_SMA50,
                     t_1_EMA20,
                     t_1_EMA50,
                     t_1_RSI,
                     t_1_BB_UPPER,
                     t_1_BB_MIDDLE,
                     t_1_BB_LOWER,
                     t_1_MACD,
                     t_1_MACD_SIGNAL), axis=-1)

# Create a dataframe
df_t_1 = pd.DataFrame(data_t_1, columns=columns)

# Calculate the correlation matrix
correlation_matrix_t_1 = df_t_1.corr()
print(correlation_matrix_t_1)


                Close      Open      High       Low    Volume     SMA20  \
Close        1.000000  0.999349  0.999702  0.999709  0.075900  0.993292   
Open         0.999349  1.000000  0.999738  0.999727  0.075455  0.993845   
High         0.999702  0.999738  1.000000  0.999675  0.079995  0.993835   
Low          0.999709  0.999727  0.999675  1.000000  0.071455  0.993261   
Volume       0.075900  0.075455  0.079995  0.071455  1.000000  0.076169   
SMA20        0.993292  0.993845  0.993835  0.993261  0.076169  1.000000   
SMA50        0.982329  0.982757  0.983184  0.981945  0.081354  0.993783   
EMA20        0.995182  0.995626  0.995666  0.995146  0.076626  0.999680   
EMA50        0.986660  0.987105  0.987444  0.986340  0.081687  0.996435   
RSI          0.057088  0.051143  0.051778  0.056674 -0.016010 -0.008195   
BB_UPPER     0.992981  0.993522  0.993782  0.992776  0.086204  0.997730   
BB_MIDDLE    0.993292  0.993845  0.993835  0.993261  0.076169  1.000000   
BB_LOWER     0.987093  0.

## Permutation Importance

This method involves training the model with all the features first, then for each feature, the values are permuted in the validation data and the decrease in the model score is computed. The features which cause the largest decrease in score are considered the most important.

First, we want to understand the importance of multiple forms of time series data, i.e. open, close, high, low and volume on the performance of an LSTM model that we have already trained.

As a result, the first step is to load in the model.

In [21]:
# Optimal Parameters
hidden_size_optimal = 128
num_stacked_layers_optimal = 1
dropout_optimal = 0.5

# Fixed parameters
input_size = 15
num_epochs = 20
loss_function = nn.MSELoss()

# Define custom LSTM class (same as before)
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_stacked_layers, dropout):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_stacked_layers = num_stacked_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_stacked_layers, 
                            batch_first=True, dropout=dropout)
        
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Initialize the model
model_optimal = LSTM(input_size, hidden_size_optimal, num_stacked_layers_optimal, dropout_optimal)
model_optimal.to(device)

# Load the saved model
model_optimal.load_state_dict(torch.load("../Models/MvLSTM/28-07-2023_10-49-44/MvLSTM.pth"))    # choose which saved model to load

# Ensure the model is in evaluation mode
model_optimal.eval()

LSTM(
  (lstm): LSTM(15, 128, batch_first=True, dropout=0.5)
  (fc): Linear(in_features=128, out_features=1, bias=True)
)

New we have loaded the LSTM model, we can implement our permutation importance procedure using the eli5 library

**Note:** Our features are 'Close', 'Open', 'High', 'Low', 'Volume'

In [28]:
def score(X, y):
    model_optimal.eval()
    X, y = X.to(device), y.to(device)
    with torch.no_grad():
        y_pred = model_optimal(X).detach().cpu().numpy()
    return np.sqrt(mean_squared_error(y.cpu().numpy(), y_pred))

baseline = score(X_valid, y_valid)
imp = []
for feature in range(X_valid.shape[2]):  # Iterate over features, not sequence steps
    save = X_valid[:, :, feature].clone()  # Use clone for PyTorch tensor
    X_valid[:, :, feature] = torch.tensor(np.random.permutation(X_valid[:, :, feature].cpu().numpy()), device=device)  # Convert back to tensor after permutation
    m = score(X_valid, y_valid)
    X_valid[:, :, feature] = save  # Restore original values
    imp.append(baseline - m)

# Create a DataFrame from the lists
df_perm_imp = pd.DataFrame({'Column': columns, 'Importance': imp})

# Display the transposed DataFrame
display(df_perm_imp)

Unnamed: 0,Column,Importance
0,Close,-0.016998
1,Open,-0.002097
2,High,-0.011073
3,Low,-0.020116
4,Volume,0.000405
5,SMA20,-0.003552
6,SMA50,-0.007731
7,EMA20,-0.011972
8,EMA50,-0.006716
9,RSI,-0.022038


Permutation feature importance is a technique used to determine the most important features in our dataset. It works by shuffling the values of each feature and measuring how much the performance of the model decreases. The more the performance decreases, the more important the feature is.

Now to interpret these specific results:

* **Close:** The "Close" feature has a negative permutation importance of approximately -0.017. This suggests that the LSTM model relies somewhat on the original order of the "Close" values for making predictions. If you were to randomly shuffle the "Close" values, the model's performance would decrease by around 1.7%.

* **Open:** The "Open" feature has a negative permutation importance of approximately -0.002. Similar to "Close," the LSTM model also relies slightly on the order of "Open" values for making predictions. Shuffling "Open" values would lead to a performance decrease of about 0.2%.

* **High and Low:** The "High" and "Low" features have negative permutation importances of -0.011 and -0.020, respectively. Both features seem to be slightly more important than "Open" and "Close," indicating that the model uses the order of "High" and "Low" values to some extent in its predictions.

* **Volume:** The "Volume" feature has a positive permutation importance of approximately 0.0004. This value is close to zero, suggesting that the model is not strongly reliant on the order of "Volume" values for making predictions.

* **SMA20 and SMA50:** The "SMA20" and "SMA50" features have negative permutation importances of -0.0036 and -0.0077, respectively. These moving average features have a small impact on the model's performance, but they are still considered somewhat relevant.

* **EMA20 and EMA50:** The "EMA20" and "EMA50" features have negative permutation importances of -0.012 and -0.0067, respectively. Like the SMA features, the exponential moving averages have a small influence on the model.

* **RSI:** The "RSI" feature has a negative permutation importance of -0.022, indicating that it has a slightly larger impact on the model's predictions compared to other features mentioned above.

* **BB_UPPER, BB_MIDDLE, and BB_LOWER:** The Bollinger Bands features have negative permutation importances, suggesting they have some influence on the model's predictions, but their impact is not substantial.

* **MACD:** The "MACD" feature has a significant negative permutation importance of approximately -0.081. This indicates that the model heavily relies on the original order of the MACD values for making predictions.

* **MACD_SIGNAL:** The "MACD_SIGNAL" feature has a positive permutation importance of approximately 0.010, which is relatively small compared to the negative impact of "MACD."