In [None]:
#LSTM
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Set random seed for reproducibility
np.random.seed(42)

# Load the data
data = pd.read_csv('IBM.csv') #change the dataset here 

# Normalize the data
scaler = MinMaxScaler(feature_range=(-1, 1))
normalized_data = scaler.fit_transform(data['Close'].values.reshape(-1, 1)).flatten() # Assuming 'Close' is the column you want to predict

# Data Preprocessing Function modified for multi-step
def create_sequences(data, sequence_length, steps):
    xs, ys = [], []
    for i in range(len(data) - sequence_length - steps + 1):
        x = data[i:(i + sequence_length)]
        y = data[i + sequence_length:i + sequence_length + steps]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Create sequences with modified function
sequence_length = 1  # Updated sequence length for your use case
steps = 1  # Number of future steps to predict
X, y = create_sequences(normalized_data, sequence_length, steps)

# Split the data
split = int(len(X) * 0.9)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Reshape for LSTM [samples, time steps, features]
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Build the LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(sequence_length, 1)))
model.add(Dropout(0.2))
model.add(Dense(steps))  # The output layer should have 'steps' units if predicting multiple steps

# Compile the model
model.compile(optimizer=Adam(0.001), loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=64, verbose=1, validation_data=(X_test, y_test))

# Predicting
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Evaluate the model
mae_train = mean_absolute_error(y_train, train_predict)
rmse_train = np.sqrt(mean_squared_error(y_train, train_predict))

mae_test = mean_absolute_error(y_test, test_predict)
rmse_test = np.sqrt(mean_squared_error(y_test, test_predict))

print(f"Training Data - MAE: {mae_train}, RMSE: {rmse_train}")
print(f"Testing Data - MAE: {mae_test}, RMSE: {rmse_test}")

# If you want to plot the predictions (optional)
import matplotlib.pyplot as plt
plt.plot(y_test)
plt.plot(test_predict)
plt.show()


In [None]:
#SVR
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import GridSearchCV
from scipy.stats import pearsonr

# Set random seed for reproducibility
np.random.seed(42)

# Load the data
data_path = 'IBM.csv'  # Make sure to update this to your data path
data = pd.read_csv(data_path)

# Assuming 'Close' is the column you want to predict
data = data['Close'].values.reshape(-1, 1)

# Normalize the data
scaler = MinMaxScaler(feature_range=(-1, 1))
normalized_data = scaler.fit_transform(data).flatten()

# Data Preprocessing Function modified for multi-step
def create_sequences(data, sequence_length, steps):
    xs, ys = [], []
    for i in range(len(data) - sequence_length - steps + 1):
        x = data[i:(i + sequence_length)]
        y = data[i + sequence_length:i + sequence_length + steps]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Create sequences with modified function
sequence_length = 1
steps = 1  # Number of future steps to predict
X, y = create_sequences(normalized_data, sequence_length, steps)

# Split the data
split = int(len(X) * 0.9)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Instantiate and fit the SVR model with initial parameters
svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)

# Use GridSearchCV or a similar method to adaptively find better hyperparameters based on initial performance
parameters = {'C': [1e-1, 1e0, 1e1, 1e2], 'gamma': [1e-1, 1e-2, 1e-3, 1e-4], 'epsilon': [1e-1, 1e-2, 1e-3, 1e-4]}
grid_search = GridSearchCV(svr_rbf, parameters, cv=5)
grid_search.fit(X_train, y_train.ravel())

best_svr = grid_search.best_estimator_

# Predictions with the optimized model
train_predict = best_svr.predict(X_train)
test_predict = best_svr.predict(X_test)

# Evaluate the model using MAE and RMSE on the scaled data
mae_train = mean_absolute_error(y_train, train_predict)
rmse_train = np.sqrt(mean_squared_error(y_train, train_predict))

mae_test = mean_absolute_error(y_test, test_predict)
rmse_test = np.sqrt(mean_squared_error(y_test, test_predict))

print(f"Training Data - MAE: {mae_train}, RMSE: {rmse_train}")
print(f"Testing Data - MAE: {mae_test}, RMSE: {rmse_test}")

In [None]:
#CNN-LSTM
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, Flatten, TimeDistributed, Reshape
from tensorflow.keras.optimizers import Adam

# Set random seed for reproducibility
np.random.seed(42)

# Load the data from a CSV file
data_path = 'IBM.csv'  # Update this to the correct path where IBM.csv is located
data = pd.read_csv(data_path)

# Assuming 'Close' is the column you want to predict, adjust if needed
data = data['Close'].values.reshape(-1, 1)

# Normalize the data
scaler = MinMaxScaler(feature_range=(-1, 1))
normalized_data = scaler.fit_transform(data).flatten()

# Data Preprocessing Function for multi-step
def create_sequences(data, sequence_length, steps):
    xs, ys = [], []
    for i in range(len(data) - sequence_length - steps + 1):
        x = data[i:(i + sequence_length)]
        y = data[i + sequence_length:i + sequence_length + steps]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Create sequences with modified function
sequence_length = 7  # This is the window size of your sequence
steps = 1  # This is the prediction horizon
X, y = create_sequences(normalized_data, sequence_length, steps)

# Split the data
split = int(len(X) * 0.9)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Reshape for CNN-LSTM [samples, subsequences, timesteps, features]
# For this example, we're keeping the model simple without changing the shape too much
# Normally, you would reshape your data to have multiple subsequences for the CNN to process
subsequences = 1
timesteps = X_train.shape[1] // subsequences
X_train = X_train.reshape((X_train.shape[0], subsequences, timesteps, 1))
X_test = X_test.reshape((X_test.shape[0], subsequences, timesteps, 1))

# Define the CNN-LSTM model
model = Sequential()

# CNN layers within TimeDistributed wrapper to process each subsequence
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(None, timesteps, 1)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))

# LSTM layers
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.2))

# Final dense layers
model.add(Dense(50, activation='relu'))
model.add(Dense(steps))  # Adjust 'steps' if you're predicting more than one future step

# Compile the model
model.compile(optimizer=Adam(0.001), loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=64, verbose=1, validation_data=(X_test, y_test))

# Predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Evaluate the model using MAE and RMSE
mae_train = mean_absolute_error(y_train, train_predict)
rmse_train = np.sqrt(mean_squared_error(y_train, train_predict))

mae_test = mean_absolute_error(y_test, test_predict)
rmse_test = np.sqrt(mean_squared_error(y_test, test_predict))

print(f"Training Data - MAE: {mae_train}, RMSE: {rmse_train}")
print(f"Testing Data - MAE: {mae_test}, RMSE: {rmse_test}")

In [None]:
#CNN-BiLSTM 
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Bidirectional, Dropout, Conv1D, MaxPooling1D
from tensorflow.keras.losses import MeanSquaredError
import matplotlib.pyplot as plt
from math import sqrt

# Load data
data = pd.read_csv('IBM.csv')

# Preprocessing
data['Date'] = pd.to_datetime(data['Date'])
data = data.sort_values('Date')
data.set_index('Date', inplace=True)

# Normalize the data
scaler = MinMaxScaler()
data_normalized = pd.DataFrame(scaler.fit_transform(data), columns=data.columns, index=data.index)

# Define a new scaler for the 'Adj Close' column
scaler_adj_close = MinMaxScaler()
data_normalized['Close'] = scaler_adj_close.fit_transform(data[['Close']])

# Create the sequences
lookback = 5
X, Y = [], []
for i in range(lookback, len(data_normalized)):
    X.append(data_normalized.iloc[i-lookback:i].values)
    Y.append(data_normalized.iloc[i, 3])
X, Y = np.array(X), np.array(Y)

# Split data into training and test sets
train_size = int(len(X) * 0.8)
trainX, testX = X[:train_size], X[train_size:]
trainY, testY = Y[:train_size], Y[train_size:]

# Define the model
model = Sequential()

# Add a 1D Convolutional layer with 64 filters
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(lookback, data_normalized.shape[1])))

# Add a MaxPooling layer
model.add(MaxPooling1D(pool_size=2))

# Add a Bidirectional LSTM layer with 50 units
model.add(Bidirectional(LSTM(50, activation='relu')))

# Add a Dense layer with 1 unit
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(trainX, trainY, epochs=20, validation_data=(testX, testY), verbose=1)

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Predict on the test data
preds = model.predict(testX)

# Un-normalize the predicted values
preds_unscaled = scaler_adj_close.inverse_transform(preds)

# Un-normalize the actual values
actual_unscaled = scaler_adj_close.inverse_transform(testY.reshape(-1, 1))

# Calculate and print MSE and MAE
mse = mean_squared_error(testY, preds)
mae = mean_absolute_error(testY, preds)
rmse = sqrt(mean_squared_error(testY, preds))
print(f'MSE: {mse}, MAE: {mae}, RMSE: {rmse}')