In [None]:
# Data handling and processing
import numpy as np
import pandas as pd

# Preprocessing and metrics
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Model building (TensorFlow and Keras)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Loading the csv file of Apple's stock price.
data = pd.read_csv('AAPL_2006-01-01_to_2018-01-01.csv')
print(data.head())

# Converting the data to a time-series data; set 'Date' as index.
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace = True)

# Using the 'Close' price column for the forecasting task.
dat = data['Close'].values

In [None]:
# Feature Scaling

# Reshaping the data to 2D for scaling
dat = dat.reshape(-1, 1)


# Initialize the MinMaxScaler and fit-transform the data
scaler = MinMaxScaler(feature_range = (0, 1))
dat_scaled = scaler.fit_transform(dat)

In [None]:
# Using the sliding window approach to create sequences of the time series of Apple's stock price
def create_sequences(dat, window_size):
    X, y = [], []
    for i in range(len(dat) - window_size):
        X.append(dat[i:i + window_size])
        y.append(dat[i + window_size])
    return np.array(X), np.array(y)

# Set window size (how many past time steps to use for prediction)
window_size = 10

# Create sequences from the scaled data
X, y = create_sequences(dat_scaled, window_size)

In [None]:
# Reshape for CNN input

# Reshape X to be [samples, timesteps, features] for CNN input
X = X.reshape((X.shape[0], X.shape[1], 1))
print(X.shape)

In [None]:
# Split the data into training and testing sets (70-30 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, shuffle = False)

In [None]:
# Build the CNN model
model = Sequential()

# Add 1D convolutional layer
# Conv1D, because of its handling time-series data. If image, Conv2D
# filters, number of filters to learn different patterns. 
# kernel_size, the size of window 
model.add(Conv1D(filters = 64, kernel_size = 3, activation = 'relu', input_shape = (window_size, 1)))

# Add average pooling layer
from tensorflow.keras.layers import AveragePooling1D
model.add(AveragePooling1D(pool_size = 2))

# Flatten the output
model.add(Flatten())

# Add fully connected layer
model.add(Dense(50, activation = 'relu'))

# Output layer (predicts the next value in the sequence)
model.add(Dense(1))

# Compile the model with Adam optimizer and mean squared error loss
model.compile(optimizer = 'adam', loss = 'mean_squared_error')

In [None]:
# Train the model
history = model.fit(X_train, y_train, epochs = 50, batch_size = 32, validation_data = (X_test, y_test))

In [None]:
# Evaluate the model using mean squared error
mse = mean_squared_error(y_test_rescaled, y_pred_rescaled)
print(f'Mean Squared Error: {mse}')

# Plotting the actual vs predicted values
plt.figure(figsize = (10, 6))
plt.plot(y_test_rescaled, label = 'Actual Price')
plt.plot(y_pred_rescaled, label = 'Predicted Price')
plt.title('Stock Price Prediction using CNN')
plt.xlabel('Time Steps')
plt.ylabel('Stock price')
plt.legend()
plt.show()

In [None]:
# Number of windows to use for future prediction
num_windows = 10


# Number of future predictions to make
num_future_predictions = 5

# Store predictions from each of the windows
all_future_predictions = []

# Loop over the latest num_windows
for start_idx in range(len(X_test) - num_windows, len(X_test)): # Iteration here.
    # Get the latest window of data for predicting future values
    latest_window = X_test[start_idx]  # Shape: (window_size, 1)

    # Create a list to store the future predictions for this window
    future_predictions = []

    # Iteratively predict future values for each window
    for _ in range(num_future_predictions):
        # Reshape the latest window to match the model's input shape
        latest_window_reshaped = latest_window.reshape(1, window_size, 1)
        
        # Predict the next value
        future_pred = model.predict(latest_window_reshaped)
        
        # Inverse transform the predicted value to the original scale
        future_pred_rescaled = scaler.inverse_transform(future_pred)
        
        # Append the predicted value to the list
        future_predictions.append(future_pred_rescaled[0][0])
        
        # Update the latest window: remove the first value and add the predicted value
        latest_window = np.append(latest_window[1:], future_pred).reshape(window_size, 1)

    # Store the predictions from this window
    all_future_predictions.append(future_predictions)
# Convert all future predictions to a numpy array for easier manipulation
all_future_predictions = np.array(all_future_predictions)

# Average the predictions from each window for a final prediction sequence
final_future_predictions = np.mean(all_future_predictions, axis=0)
std_dev_future = np.std(all_future_predictions, axis=0)

# Calculate the 95% confidence interval (using 1.96 for a normal distribution)
ci_upper = final_future_predictions + 1.96 * std_dev_future
ci_lower = final_future_predictions - 1.96 * std_dev_future


# Plotting the actual vs predicted values with the aggregated future predictions
plt.figure(figsize=(10, 6))
plt.plot(range(len(y_test_rescaled)), y_test_rescaled, label='Actual Price', color='blue')
plt.plot(range(len(y_test_rescaled), len(y_test_rescaled) + num_future_predictions), final_future_predictions, label='Future Predictions', color='red')
plt.fill_between(range(len(y_test_rescaled), len(y_test_rescaled) + num_future_predictions), ci_lower, ci_upper, color='red', alpha=0.2, label='95% Confidence Interval')
plt.title('Stock Price Prediction using CNN (Aggregated Future Predictions)')
plt.xlabel('Time Steps')
plt.ylabel('Stock Price')
plt.legend()
plt.show()
