In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import GridSearchCV

# Function to create the LSTM model
def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, input_shape=input_shape))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

# Function to fit the LSTM model
def fit_lstm_model(X_train, y_train, batch_size, epochs, input_shape):
    model = create_lstm_model(input_shape)
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)
    return model

In [4]:
# Load your dataset
data = pd.read_csv("modeldata.csv", index_col='Date', parse_dates=True)
print(data)

# Assign variables to independent and target variables
X = data[['returns', 'msci', 'sentiment']].values
y = data['returns'].values

# Normalize the data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler.fit_transform(y.reshape(-1, 1))

# Split the data into train and test sets
train_size = int(len(X_scaled) * 0.8)
X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
y_train, y_test = y_scaled[:train_size], y_scaled[train_size:]

# Reshape the data for LSTM input
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)  # Update reshaping
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)  # Update reshaping
input_shape = (X_train.shape[1], 1)  # Update input shape

             returns  msci  sentiment
Date                                 
2012-01-01  0.078040   5.1  -0.008786
2012-02-01  0.146806   0.3   0.148969
2012-03-01  0.083715   0.9  -0.140396
2012-04-01  0.001341   0.2  -0.019119
2012-05-01 -0.047849   2.9  -0.070178
...              ...   ...        ...
2022-08-01 -0.031358   6.7  -0.371627
2022-09-01 -0.128034   0.4   0.699164
2022-10-01  0.095568   1.3  -0.017947
2022-11-01 -0.017879  -3.2  -0.370013
2022-12-01 -0.117456   3.1  -0.709726

[132 rows x 3 columns]


In [5]:
# Base Model: Only using historical returns
base_model = fit_lstm_model(X_train[:, [0]], y_train, batch_size=1, epochs=100, input_shape=input_shape)
base_y_pred = base_model.predict(X_test[:, [0]])
base_y_pred = scaler.inverse_transform(base_y_pred)
base_rmse = np.sqrt(mean_squared_error(y_test, base_y_pred))

# MSCI Model: Using only MSCI and historical returns
msci_model = fit_lstm_model(X_train[:, [0, 1]], y_train, batch_size=1, epochs=100, input_shape=input_shape)
msci_y_pred = msci_model.predict(X_test[:, [0, 1]])
msci_y_pred = scaler.inverse_transform(msci_y_pred)
msci_rmse = np.sqrt(mean_squared_error(y_test, msci_y_pred))

# Sentiment Model: Using only Sentiment Scores and historical returns
sent_model = fit_lstm_model(X_train[:, [0, 2]], y_train, batch_size=1, epochs=100, input_shape=input_shape)
sent_y_pred = sent_model.predict(X_test[:, [0, 2]])
sent_y_pred = scaler.inverse_transform(sent_y_pred)
sent_rmse = np.sqrt(mean_squared_error(y_test, sent_y_pred))

# Both Model: Using MSCI, Sentiment, and historical returns
both_model = fit_lstm_model(X_train, y_train, batch_size=1, epochs=100, input_shape=input_shape)
both_y_pred = both_model.predict(X_test)
both_y_pred = scaler.inverse_transform(both_y_pred)
both_rmse = np.sqrt(mean_squared_error(y_test, both_y_pred))

  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step


In [7]:
# Print the RMSE scores for each model
print("Base Model RMSE:", base_rmse)
print("MSCI Model RMSE:", msci_rmse)
print("Sentiment Model RMSE:", sent_rmse)
print("Both Model RMSE:", both_rmse)

Base Model RMSE: 0.44515726476595463
MSCI Model RMSE: 0.44373214215841816
Sentiment Model RMSE: 0.44472949266400713
Both Model RMSE: 0.4448538070978331
