In [139]:
american_politics_values = american_politics_df[american_politics_df.columns.difference(["Category","Title"])]
american_politics_values

Unnamed: 0_level_0,Compound,DOW JONES,RUSSELL 2000,S&P 500
Date Only,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-03,-0.6486,-0.008103,-0.003540,-0.007060
2020-01-03,0.4754,-0.008103,-0.003540,-0.007060
2020-01-03,0.0000,-0.008103,-0.003540,-0.007060
2020-01-03,-0.7184,-0.008103,-0.003540,-0.007060
2020-01-03,-0.4939,-0.008103,-0.003540,-0.007060
...,...,...,...,...
2020-01-17,-0.4215,0.001722,-0.003272,0.003862
2020-01-17,0.7906,0.001722,-0.003272,0.003862
2020-01-17,0.0000,0.001722,-0.003272,0.003862
2020-01-17,0.1027,0.001722,-0.003272,0.003862


In [141]:
# This function accepts the column number for the features (X) and the target (y)
# It chunks the data up with a rolling window of Xt-n to predict Xt
# It returns a numpy array of X any y
def window_data(american_politics_values, window, feature_col_number, target_col_number):
    X = []
    y = []
    for i in range(len(american_politics_values) - window - 1):
        features = american_politics_values.iloc[i:(i + window), feature_col_number]
        target = american_politics_values.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [142]:
# Then, experiment with window sizes anywhere from 1 to 10 and see how the model performance changes
window_size = 10

# Column index 0 is the 'fng_value' column
# Column index 1 is the `Close` column
feature_column = 1
target_column = 1
X, y = window_data(american_politics_values, window_size, feature_column, target_column)

In [163]:
# Manually splitting the data
split = int(0.7 * len(X))

X_train = X[: split]
X_test = X[split:]

y_train = y[: split]
y_test = y[split:]

In [165]:
# Importing the MinMaxScaler from sklearn
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler object
scaler = MinMaxScaler()

# Fit the MinMaxScaler object with the features data X
scaler.fit(X)

# Scale the features training and testing sets
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Fit the MinMaxScaler object with the target data Y
scaler.fit(y)

# Scale the target training and testing sets
y_train = scaler.transform(y_train)
y_test = scaler.transform(y_test)

In [166]:
# Reshape the features for the model
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [167]:
# Import Keras modules for model creation
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

In [168]:
model = Sequential()

number_units = 5
dropout_fraction = 0.2

# Layer 1
model.add(LSTM(
    units=number_units,
    return_sequences=True,
    input_shape=(X_train.shape[1], 1))
    )
model.add(Dropout(dropout_fraction))
# Layer 2
model.add(LSTM(units=number_units, return_sequences=True))
model.add(Dropout(dropout_fraction))
# Layer 3
model.add(LSTM(units=number_units))
model.add(Dropout(dropout_fraction))
# Output layer
model.add(Dense(1))

In [169]:
# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error") 

In [170]:
# Summarize the model
model.summary() 

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 10, 5)             140       
_________________________________________________________________
dropout_3 (Dropout)          (None, 10, 5)             0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 10, 5)             220       
_________________________________________________________________
dropout_4 (Dropout)          (None, 10, 5)             0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 5)                 220       
_________________________________________________________________
dropout_5 (Dropout)          (None, 5)                 0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                

In [171]:
model.fit(X_train, y_train, epochs=10, shuffle=False, batch_size=1, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fd149833af0>

In [172]:
model.evaluate(X_test, y_test)



0.07848215848207474

In [180]:
# Make sentiment predictions
predicted = model.predict(X_test)

In [181]:
# Recover the original prices instead of the scaled version
predicted_values = scaler.inverse_transform(predicted)
compound_values = scaler.inverse_transform(y_test.reshape(-1, 1))

ValueError: non-broadcastable output operand with shape (10,1) doesn't match the broadcast shape (10,140)

In [None]:
# Create a DataFrame of Real and Predicted values
american_politics = pd.DataFrame({
    "Real": compound_values.ravel(),
    "Predicted": predicted_values.ravel()
}, index = american_politics_df.index[-len(compound_values): ]) 
american_politics.head()

In [None]:
# Plot the real vs predicted values as a line chart
american_politics.plot()