# Prediction On Daily Data

In [50]:
import pandas as pd
import numpy as np
import requests
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import plotly.graph_objs as go
import datetime

In [51]:
# API Key and Fetch Data
API_KEY = 'F6U8EWWSXP0JBP5O'
SYMBOL = 'QQQ'
TIME_SERIES_SIZE = 'full'
URL = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={SYMBOL}&outputsize={TIME_SERIES_SIZE}&apikey={API_KEY}"


In [52]:
response = requests.get(URL)
data = response.json()['Time Series (Daily)']


In [53]:
df = pd.DataFrame.from_dict(data).T


In [54]:
df

Unnamed: 0,1. open,2. high,3. low,4. close,5. volume
2023-10-11,369.9400,371.4120,368.1900,371.2200,38215088
2023-10-10,366.9000,371.2800,366.3900,368.5900,46968806
2023-10-09,362.3000,367.1100,360.7800,366.5600,45304766
2023-10-06,355.6500,365.9100,354.8500,364.7000,75049119
2023-10-05,359.7400,360.1500,355.1400,358.6900,45897390
...,...,...,...,...,...
1999-11-05,137.8000,138.4000,136.4000,136.4000,7567300
1999-11-04,135.4000,135.6000,133.6000,135.0000,10024300
1999-11-03,132.8000,134.3000,132.4000,133.5000,9376300
1999-11-02,131.5000,133.1000,130.4000,130.9000,6417400


In [55]:
df = df.iloc[::-1]  # Reverse the order


In [56]:
df

Unnamed: 0,1. open,2. high,3. low,4. close,5. volume
1999-11-01,131.5000,133.1000,130.6000,130.8000,4840900
1999-11-02,131.5000,133.1000,130.4000,130.9000,6417400
1999-11-03,132.8000,134.3000,132.4000,133.5000,9376300
1999-11-04,135.4000,135.6000,133.6000,135.0000,10024300
1999-11-05,137.8000,138.4000,136.4000,136.4000,7567300
...,...,...,...,...,...
2023-10-05,359.7400,360.1500,355.1400,358.6900,45897390
2023-10-06,355.6500,365.9100,354.8500,364.7000,75049119
2023-10-09,362.3000,367.1100,360.7800,366.5600,45304766
2023-10-10,366.9000,371.2800,366.3900,368.5900,46968806


In [57]:
df = df.astype(float)

In [58]:
df

Unnamed: 0,1. open,2. high,3. low,4. close,5. volume
1999-11-01,131.50,133.100,130.60,130.80,4840900.0
1999-11-02,131.50,133.100,130.40,130.90,6417400.0
1999-11-03,132.80,134.300,132.40,133.50,9376300.0
1999-11-04,135.40,135.600,133.60,135.00,10024300.0
1999-11-05,137.80,138.400,136.40,136.40,7567300.0
...,...,...,...,...,...
2023-10-05,359.74,360.150,355.14,358.69,45897390.0
2023-10-06,355.65,365.910,354.85,364.70,75049119.0
2023-10-09,362.30,367.110,360.78,366.56,45304766.0
2023-10-10,366.90,371.280,366.39,368.59,46968806.0


In [59]:
# Data Preprocessing
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(df['4. close'].values.reshape(-1, 1))

In [60]:
training_data_len = int(np.ceil(len(scaled_data) * 0.95))
train_data = scaled_data[0:training_data_len, :]
x_train, y_train = [], []

In [61]:
sequence_length = 60
for i in range(sequence_length, len(train_data)):
    x_train.append(train_data[i-sequence_length:i, 0])
    y_train.append(train_data[i, 0])

In [62]:
len(x_train)

5664

In [63]:
len(y_train)

5664

In [64]:
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

In [65]:
# Model Definition and Training
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dense(units=25))
model.add(Dense(units=1))

In [66]:
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size=16, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x789d4d20e2f0>

In [67]:
# Creating Testing Dataset and Predicting Prices
test_data = scaled_data[training_data_len - sequence_length:, :]
x_test, y_test = [], scaled_data[training_data_len:, :]

In [68]:
for i in range(sequence_length, len(test_data)):
    x_test.append(test_data[i-sequence_length:i, 0])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))


In [69]:
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)



In [71]:
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error

In [73]:
import math

In [74]:
# Calculating Metrics
actual_prices = df['4. close'].iloc[training_data_len:].values
mse = mean_squared_error(actual_prices, predictions)
rmse = math.sqrt(mse)
mae = mean_absolute_error(actual_prices, predictions)
mape = mean_absolute_percentage_error(actual_prices, predictions)

In [75]:
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

Mean Squared Error (MSE): 84.1327
Root Mean Squared Error (RMSE): 9.1724
Mean Absolute Error (MAE): 7.6342
Mean Absolute Percentage Error (MAPE): 0.02%


In [76]:
# Predicting into the Future (120 units)
future_units = 120
current_input = x_test[-1].reshape((1, sequence_length, 1))  # Taking the last sequence of known output
forecasted_output = []

In [77]:
for i in range(future_units):
    # Predict the next future unit
    next_output = model.predict(current_input)
    forecasted_output.append(next_output[0])
    
    # Update the current_input with the predicted value
    current_input = np.roll(current_input, -1)
    current_input[0, -1, 0] = next_output



In [78]:
# Inverse transform the forecasted output to original scale
forecasted_output_original_scale = scaler.inverse_transform(np.array(forecasted_output).reshape(-1, 1))

# Generating future timestamps
last_timestamp = pd.to_datetime(df.index[-1])
future_timestamps = [last_timestamp + datetime.timedelta(days=i) for i in range(1, future_units+1)]

# Append future timestamps and forecasted_output to the plot
future_df = pd.DataFrame(forecasted_output_original_scale, index=future_timestamps, columns=['Predicted Future'])
concat_df = pd.concat([df, future_df], axis=0)

In [79]:
# Visualization using Plotly
fig = go.Figure()

fig.add_trace(go.Scatter(x=concat_df.index, y=concat_df['4. close'], mode='lines', name='Actual Prices'))
fig.add_trace(go.Scatter(x=concat_df.index[training_data_len:], y=predictions[:,0], mode='lines', name='Predicted Prices'))
fig.add_trace(go.Scatter(x=future_timestamps, y=forecasted_output_original_scale[:,0], mode='lines', name='Future Predictions'))

fig.show()

# Daily Data Frome YahooFinance

In [146]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import plotly.graph_objs as go
import math

In [147]:
def fetch_daily_data(ticker_symbol, years=25):
    """
    Fetches daily stock data from Yahoo Finance.
    
    Parameters:
    - ticker_symbol (str): The stock ticker symbol.
    - years (int): Number of years to fetch data for. Default is 5.
    
    Returns:
    - DataFrame with daily stock data.
    """
    return yf.download(ticker_symbol, period=f"{years}y", interval="1d")

# Fetch data
df = fetch_daily_data("QQQ")

[*********************100%%**********************]  1 of 1 completed


In [148]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1999-03-10,51.125000,51.156250,50.281250,51.062500,43.715050,5232000
1999-03-11,51.437500,51.734375,50.312500,51.312500,43.929092,9688600
1999-03-12,51.125000,51.156250,49.656250,50.062500,42.858936,8743600
1999-03-15,50.437500,51.562500,49.906250,51.500000,44.089622,6369000
1999-03-16,51.718750,52.156250,51.156250,51.937500,44.464161,4905800
...,...,...,...,...,...,...
2023-10-05,359.739990,360.149994,355.140015,358.690002,358.690002,45897400
2023-10-06,355.649994,365.910004,354.850006,364.700012,364.700012,74959200
2023-10-09,362.299988,367.109985,360.779999,366.559998,366.559998,45304800
2023-10-10,366.899994,371.279999,366.390015,368.589996,368.589996,46968800


In [153]:
# Data Preprocessing
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(df['Close'].values.reshape(-1, 1))

training_data_len = int(np.ceil(len(scaled_data) * 0.95))
train_data = scaled_data[0:training_data_len, :]
x_train, y_train = [], []

# Adjusted Sequence Length for daily data
sequence_length = 60

for i in range(sequence_length, len(train_data)):
    x_train.append(train_data[i-sequence_length:i, 0])
    y_train.append(train_data[i, 0])

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

# Model Definition with Dropout
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=25))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size=16, epochs=5)

# Test Dataset and Predictions
test_data = scaled_data[training_data_len - sequence_length:, :]
x_test, y_test = [], scaled_data[training_data_len:, :]

for i in range(sequence_length, len(test_data)):
    x_test.append(test_data[i-sequence_length:i, 0])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [155]:
# Calculating Metrics
actual_prices = df['Close'].iloc[training_data_len:].values
mse = mean_squared_error(actual_prices, predictions)
rmse = math.sqrt(mse)
mae = mean_absolute_error(actual_prices, predictions)
mape = mean_absolute_percentage_error(actual_prices, predictions)

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

Mean Squared Error (MSE): 72.8958
Root Mean Squared Error (RMSE): 8.5379
Mean Absolute Error (MAE): 7.1784
Mean Absolute Percentage Error (MAPE): 0.02%


In [156]:
# Future Forecasting
last_timestamp = df.index[-1]
start_of_next_day = last_timestamp + pd.Timedelta(days=1)

future_units = 120  # Predicting for the next 120 days
future_timestamps = [start_of_next_day + pd.Timedelta(days=i) for i in range(future_units)]

current_input = x_test[-1].reshape((1, sequence_length, 1))
forecasted_output = []

for i in range(future_units):
    next_output = model.predict(current_input)
    forecasted_output.append(next_output[0])
    current_input = np.roll(current_input, -1)
    current_input[0, -1, 0] = next_output

forecasted_output_original_scale = scaler.inverse_transform(np.array(forecasted_output).reshape(-1, 1))



In [157]:
# Visualization
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Actual Prices'))
fig.add_trace(go.Scatter(x=df.index[training_data_len:], y=predictions[:,0], mode='lines', name='Predicted Prices'))
fig.add_trace(go.Scatter(x=future_timestamps, y=forecasted_output_original_scale[:,0], mode='lines', name='Future Predictions'))

fig.update_layout(
    title="Stock Price Prediction",
    xaxis_title="Date",
    yaxis_title="Price"
)

fig.show()

## 5 Minute Interval

In [80]:
# API Key and Fetch Data for 5-minute intervals
API_KEY = 'F6U8EWWSXP0JBP5O'
SYMBOL = 'QQQ'
TIME_SERIES_SIZE = 'full'
URL = f"https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={SYMBOL}&interval=5min&outputsize={TIME_SERIES_SIZE}&apikey={API_KEY}"

response = requests.get(URL)
data = response.json()['Time Series (5min)']
df = pd.DataFrame.from_dict(data).T
df = df.iloc[::-1]  # Reverse the order
df = df.astype(float)

In [81]:
df

Unnamed: 0,1. open,2. high,3. low,4. close,5. volume
2023-09-20 04:00:00,370.05,370.05,369.67,369.70,4456.0
2023-09-20 04:05:00,369.67,369.85,369.67,369.77,11275.0
2023-09-20 04:10:00,369.75,369.87,369.75,369.78,1589.0
2023-09-20 04:15:00,369.79,370.10,369.79,370.09,2477.0
2023-09-20 04:20:00,370.10,370.29,370.07,370.29,7597.0
...,...,...,...,...,...
2023-10-11 19:35:00,372.09,372.09,371.95,371.96,8054.0
2023-10-11 19:40:00,371.97,372.02,371.95,372.02,2119.0
2023-10-11 19:45:00,372.02,372.08,372.00,372.02,3856.0
2023-10-11 19:50:00,372.05,372.10,372.01,372.07,1563.0


In [83]:
pip install yfinance

Collecting yfinance
  Downloading yfinance-0.2.31-py2.py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.6/65.6 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Downloading peewee-3.16.3.tar.gz (928 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m928.0/928.0 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: peewee
  Building wheel for peewee (pyproject.toml) ... [?25ldone
[?25h  Created wheel for peewee: filename=peewee-3.16.3-cp310-cp310-linux_x86_64.whl size=314460 sha256=18e27beaef51b9b4a0e9a363d4d2a750ccc9cb5d9209077fec65740f31c33ab3
  Stored in directory

In [92]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import plotly.graph_objs as go
import math

def fetch_intraday_data(ticker_symbol, days=60):
    """
    Fetches intraday stock data (5-minute intervals) from Yahoo Finance.
    
    Parameters:
    - ticker_symbol (str): The stock ticker symbol.
    - days (int): Number of days to fetch data for. Default is 60.
    
    Returns:
    - DataFrame with intraday stock data.
    """
    return yf.download(ticker_symbol, period=f"{days}d", interval="5m")

# Fetch data
df = fetch_intraday_data("QQQ")

# Make sure data is in ascending order by date
df = df.sort_index(ascending=True)




[*********************100%%**********************]  1 of 1 completed


In [93]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-07-19 09:30:00-04:00,386.820007,387.329987,385.954987,386.160004,386.160004,2325944
2023-07-19 09:35:00-04:00,386.149994,386.921387,386.029999,386.899994,386.899994,1179127
2023-07-19 09:40:00-04:00,386.920013,387.380005,386.779999,386.801697,386.801697,1101693
2023-07-19 09:45:00-04:00,386.809998,386.910004,386.350006,386.459991,386.459991,835945
2023-07-19 09:50:00-04:00,386.429993,386.565002,386.070007,386.390015,386.390015,788257
...,...,...,...,...,...,...
2023-10-11 15:35:00-04:00,370.290009,370.660004,370.170105,370.600006,370.600006,292746
2023-10-11 15:40:00-04:00,370.609985,370.880005,370.515015,370.790009,370.790009,647061
2023-10-11 15:45:00-04:00,370.790009,371.095001,370.750000,370.929993,370.929993,616198
2023-10-11 15:50:00-04:00,370.940002,371.209991,370.839996,371.049988,371.049988,925628


In [94]:
# Data Preprocessing
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(df['Close'].values.reshape(-1, 1))

training_data_len = int(np.ceil(len(scaled_data) * 0.95))
train_data = scaled_data[0:training_data_len, :]
x_train, y_train = [], []

sequence_length = 60  # We can adjust this value based on intraday data patterns
for i in range(sequence_length, len(train_data)):
    x_train.append(train_data[i-sequence_length:i, 0])
    y_train.append(train_data[i, 0])

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

# Model Definition and Training
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dense(units=25))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size=1, epochs=1)

# Creating Testing Dataset and Predicting Prices
test_data = scaled_data[training_data_len - sequence_length:, :]
x_test, y_test = [], scaled_data[training_data_len:, :]

for i in range(sequence_length, len(test_data)):
    x_test.append(test_data[i-sequence_length:i, 0])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

# Calculating Metrics
actual_prices = df['Close'].iloc[training_data_len:].values
mse = mean_squared_error(actual_prices, predictions)
rmse = math.sqrt(mse)
mae = mean_absolute_error(actual_prices, predictions)
mape = mean_absolute_percentage_error(actual_prices, predictions)

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Visualization using Plotly
fig = go.Figure()

fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Actual Prices'))
fig.add_trace(go.Scatter(x=df.index[training_data_len:], y=predictions[:,0], mode='lines', name='Predicted Prices'))

fig.show()

Mean Squared Error (MSE): 0.4823
Root Mean Squared Error (RMSE): 0.6945
Mean Absolute Error (MAE): 0.5413
Mean Absolute Percentage Error (MAPE): 0.00%


In [95]:
# Predicting into the Future (e.g., 120 time points for 5-minute intervals)
future_units = 120
current_input = x_test[-1].reshape((1, sequence_length, 1))  # Taking the last sequence of known output
forecasted_output = []

for i in range(future_units):
    # Predict the next future unit
    next_output = model.predict(current_input)
    forecasted_output.append(next_output[0])
    
    # Update the current_input with the predicted value
    current_input = np.roll(current_input, -1)
    current_input[0, -1, 0] = next_output

# Inverse transform the forecasted output to original scale
forecasted_output_original_scale = scaler.inverse_transform(np.array(forecasted_output).reshape(-1, 1))

# Generating future timestamps
last_timestamp = df.index[-1]
delta = pd.Timedelta(minutes=5)
future_timestamps = [last_timestamp + i*delta for i in range(1, future_units+1)]

# Append future timestamps and forecasted_output to the plot
future_df = pd.DataFrame(forecasted_output_original_scale, index=future_timestamps, columns=['Predicted Future'])
concat_df = pd.concat([df, future_df], axis=0)

# Visualization using Plotly
fig = go.Figure()

fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Actual Prices'))
fig.add_trace(go.Scatter(x=df.index[training_data_len:], y=predictions[:,0], mode='lines', name='Predicted Prices'))
fig.add_trace(go.Scatter(x=future_timestamps, y=forecasted_output_original_scale[:,0], mode='lines', name='Future Predictions'))

fig.show()



## Another IntraDay Approach 

- Sequence length adjusted to capture roughly a full trading day.
- Dropout layers added to the LSTM model.
- Prediction units adjusted to predict for the next trading day.

In [114]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import plotly.graph_objs as go
import math

def fetch_intraday_data(ticker_symbol, days=60):
    """
    Fetches intraday stock data (5-minute intervals) from Yahoo Finance.
    
    Parameters:
    - ticker_symbol (str): The stock ticker symbol.
    - days (int): Number of days to fetch data for. Default is 60.
    
    Returns:
    - DataFrame with intraday stock data.
    """
    return yf.download(ticker_symbol, period=f"{days}d", interval="5m")

# Fetch data
df = fetch_intraday_data("QQQ")

# Make sure data is in ascending order by date
df = df.sort_index(ascending=True)

# Data Preprocessing
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(df['Close'].values.reshape(-1, 1))

training_data_len = int(np.ceil(len(scaled_data) * 0.95))
train_data = scaled_data[0:training_data_len, :]
x_train, y_train = [], []

# Adjusted Sequence Length
sequence_length = 78  # This captures roughly a full trading day for intraday data

for i in range(sequence_length, len(train_data)):
    x_train.append(train_data[i-sequence_length:i, 0])
    y_train.append(train_data[i, 0])

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

# Model Definition with Dropout
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=25))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size=16, epochs=5)

# Creating Testing Dataset and Predicting Prices
test_data = scaled_data[training_data_len - sequence_length:, :]
x_test, y_test = [], scaled_data[training_data_len:, :]

for i in range(sequence_length, len(test_data)):
    x_test.append(test_data[i-sequence_length:i, 0])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

# Calculating Metrics
actual_prices = df['Close'].iloc[training_data_len:].values
mse = mean_squared_error(actual_prices, predictions)
rmse = math.sqrt(mse)
mae = mean_absolute_error(actual_prices, predictions)
mape = mean_absolute_percentage_error(actual_prices, predictions)

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Predicting into the Future (adjusted to 78 time points for 5-minute intervals)
future_units = 78  # Predicting for the next trading day
current_input = x_test[-1].reshape((1, sequence_length, 1))
forecasted_output = []

for i in range(future_units):
    next_output = model.predict(current_input)
    forecasted_output.append(next_output[0])
    current_input = np.roll(current_input, -1)
    current_input[0, -1, 0] = next_output

forecasted_output_original_scale = scaler.inverse_transform(np.array(forecasted_output).reshape(-1, 1))

# Generating future timestamps
last_timestamp = df.index[-1]
delta = pd.Timedelta(minutes=5)
future_timestamps = [last_timestamp + i*delta for i in range(1, future_units+1)]

# Visualization using Plotly
future_df = pd.DataFrame(forecasted_output_original_scale, index=future_timestamps, columns=['Predicted Future'])
concat_df = pd.concat([df, future_df], axis=0)

fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Actual Prices'))
fig.add_trace(go.Scatter(x=df.index[training_data_len:], y=predictions[:,0], mode='lines', name='Predicted Prices'))
fig.add_trace(go.Scatter(x=future_timestamps, y=forecasted_output_original_scale[:,0], mode='lines', name='Future Predictions'))

fig.show()

[*********************100%%**********************]  1 of 1 completed
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Mean Squared Error (MSE): 0.6538
Root Mean Squared Error (RMSE): 0.8086
Mean Absolute Error (MAE): 0.6273
Mean Absolute Percentage Error (MAPE): 0.00%


In [115]:
# Define the number of days you want to visualize in detail
days_to_display = 5

# Define the starting point for visualization
start_timestamp = df.index[-days_to_display*78]  # 78 intervals for 5-minute data in a trading day

fig = go.Figure()

fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Actual Prices'))
fig.add_trace(go.Scatter(x=df.index[training_data_len:], y=predictions[:,0], mode='lines', name='Predicted Prices'))
fig.add_trace(go.Scatter(x=future_timestamps, y=forecasted_output_original_scale[:,0], mode='lines', name='Future Predictions'))

# Adjust the x-axis to focus on the last few days and the future prediction
fig.update_layout(
    title="Stock Price Prediction",
    xaxis_title="Date",
    yaxis_title="Price",
    xaxis_range=[start_timestamp, future_timestamps[-1]]
)

fig.show()

# To Forecast Next Day Predictions

In [116]:
# Adjust the starting point for future timestamps to 9:30 AM of the next day
start_of_next_day = (last_timestamp + pd.Timedelta(days=1)).replace(hour=9, minute=30)
delta = pd.Timedelta(minutes=5)
future_timestamps = [start_of_next_day + i*delta for i in range(future_units)]


fig = go.Figure()

fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Actual Prices'))
fig.add_trace(go.Scatter(x=df.index[training_data_len:], y=predictions[:,0], mode='lines', name='Predicted Prices'))
fig.add_trace(go.Scatter(x=future_timestamps, y=forecasted_output_original_scale[:,0], mode='lines', name='Future Predictions'))

# Adjust the x-axis to focus on the last few days and the future prediction
fig.update_layout(
    title="Stock Price Prediction",
    xaxis_title="Date",
    yaxis_title="Price",
    xaxis_range=[start_timestamp, future_timestamps[-1]]
)

fig.show()

## Lets Skip The Weekends From the Plot

In [117]:
# Adjust the starting point for future timestamps to 9:30 AM of the next trading day
start_of_next_day = (last_timestamp + pd.Timedelta(days=1)).replace(hour=9, minute=30)

# Skip weekends
while start_of_next_day.weekday() > 4:  # 0-4 denotes Monday to Friday
    start_of_next_day += pd.Timedelta(days=1)

delta = pd.Timedelta(minutes=5)
future_timestamps = [start_of_next_day + i*delta for i in range(future_units)]

# Visualization
fig = go.Figure()

fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Actual Prices'))
fig.add_trace(go.Scatter(x=df.index[training_data_len:], y=predictions[:,0], mode='lines', name='Predicted Prices'))
fig.add_trace(go.Scatter(x=future_timestamps, y=forecasted_output_original_scale[:,0], mode='lines', name='Future Predictions'))

# Adjust the x-axis to focus on the last few days and the future prediction
fig.update_layout(
    title="Stock Price Prediction",
    xaxis_title="Date",
    yaxis_title="Price",
    xaxis_range=[start_timestamp, future_timestamps[-1]]
)

fig.show()


## 15 Minutes Interval 

In [118]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import plotly.graph_objs as go
import math

def fetch_intraday_data(ticker_symbol, days=60, interval="15m"):
    """
    Fetches intraday stock data from Yahoo Finance.
    
    Parameters:
    - ticker_symbol (str): The stock ticker symbol.
    - days (int): Number of days to fetch data for. Default is 60.
    - interval (str): Interval for fetching data (e.g., "5m", "15m").
    
    Returns:
    - DataFrame with intraday stock data.
    """
    return yf.download(ticker_symbol, period=f"{days}d", interval=interval)

# Fetch data
df = fetch_intraday_data("QQQ")

[*********************100%%**********************]  1 of 1 completed


In [119]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-07-19 09:30:00,386.820007,387.380005,385.954987,386.801697,386.801697,4606764
2023-07-19 09:45:00,386.809998,386.910004,386.010010,386.529999,386.529999,2207087
2023-07-19 10:00:00,386.535004,386.820007,386.089996,386.109985,386.109985,1765118
2023-07-19 10:15:00,386.109985,386.739990,385.790009,386.140015,386.140015,2024972
2023-07-19 10:30:00,386.119995,387.540009,386.079987,387.380005,387.380005,1489312
...,...,...,...,...,...,...
2023-10-11 14:45:00,370.019989,370.204987,369.790009,369.940002,369.940002,977264
2023-10-11 15:00:00,369.940002,370.320007,369.739990,369.839996,369.839996,721739
2023-10-11 15:15:00,369.850006,370.480011,369.709991,370.300415,370.300415,619777
2023-10-11 15:30:00,370.299988,370.880005,370.170105,370.790009,370.790009,1240152


In [120]:

# Data Preprocessing
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(df['Close'].values.reshape(-1, 1))

training_data_len = int(np.ceil(len(scaled_data) * 0.95))
train_data = scaled_data[0:training_data_len, :]
x_train, y_train = [], []

# Adjusted Sequence Length for 15-minute intervals
sequence_length = 104

for i in range(sequence_length, len(train_data)):
    x_train.append(train_data[i-sequence_length:i, 0])
    y_train.append(train_data[i, 0])

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

# Model Definition with Dropout
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=25))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size=16, epochs=5)

# Test Dataset and Predictions
test_data = scaled_data[training_data_len - sequence_length:, :]
x_test, y_test = [], scaled_data[training_data_len:, :]

for i in range(sequence_length, len(test_data)):
    x_test.append(test_data[i-sequence_length:i, 0])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

# Calculating Metrics
actual_prices = df['Close'].iloc[training_data_len:].values
mse = mean_squared_error(actual_prices, predictions)
rmse = math.sqrt(mse)
mae = mean_absolute_error(actual_prices, predictions)
mape = mean_absolute_percentage_error(actual_prices, predictions)

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Adjust the starting point for future timestamps to 9:30 AM of the next trading day
last_timestamp = df.index[-1]
start_of_next_day = (last_timestamp + pd.Timedelta(days=1)).replace(hour=9, minute=30)

# Skip weekends
while start_of_next_day.weekday() > 4:  # 0-4 denotes Monday to Friday
    start_of_next_day += pd.Timedelta(days=1)

delta = pd.Timedelta(minutes=15)
future_units = 26  # Predicting for the next trading day with 15-minute intervals
future_timestamps = [start_of_next_day + i*delta for i in range(future_units)]

# Future Forecasting
current_input = x_test[-1].reshape((1, sequence_length, 1))
forecasted_output = []

for i in range(future_units):
    next_output = model.predict(current_input)
    forecasted_output.append(next_output[0])
    current_input = np.roll(current_input, -1)
    current_input[0, -1, 0] = next_output

forecasted_output_original_scale = scaler.inverse_transform(np.array(forecasted_output).reshape(-1, 1))

# Visualization
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Actual Prices'))
fig.add_trace(go.Scatter(x=df.index[training_data_len:], y=predictions[:,0], mode='lines', name='Predicted Prices'))
fig.add_trace(go.Scatter(x=future_timestamps, y=forecasted_output_original_scale[:,0], mode='lines', name='Future Predictions'))

fig.update_layout(
    title="Stock Price Prediction",
    xaxis_title="Date",
    yaxis_title="Price",
    xaxis_range=[df.index[-sequence_length], future_timestamps[-1]]
)

fig.show()


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Mean Squared Error (MSE): 2.5376
Root Mean Squared Error (RMSE): 1.5930
Mean Absolute Error (MAE): 1.3292
Mean Absolute Percentage Error (MAPE): 0.00%


# 30 Minutes Interval

In [121]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import plotly.graph_objs as go
import math

def fetch_intraday_data(ticker_symbol, days=60, interval="30m"):
    """
    Fetches intraday stock data from Yahoo Finance.
    
    Parameters:
    - ticker_symbol (str): The stock ticker symbol.
    - days (int): Number of days to fetch data for. Default is 60.
    - interval (str): Interval for fetching data (e.g., "15m", "30m").
    
    Returns:
    - DataFrame with intraday stock data.
    """
    return yf.download(ticker_symbol, period=f"{days}d", interval=interval)

# Fetch data
df = fetch_intraday_data("QQQ")

[*********************100%%**********************]  1 of 1 completed


In [122]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-07-19 09:30:00,386.820007,387.380005,385.954987,386.529999,386.529999,6813851
2023-07-19 10:00:00,386.535004,386.820007,385.790009,386.140015,386.140015,3790090
2023-07-19 10:30:00,386.119995,387.540009,385.839996,386.519897,386.519897,3861277
2023-07-19 11:00:00,386.519989,387.649994,386.260010,387.230011,387.230011,3430335
2023-07-19 11:30:00,387.239990,387.654999,386.849487,387.089691,387.089691,3920703
...,...,...,...,...,...,...
2023-10-11 13:30:00,368.929993,369.040009,368.190002,368.890015,368.890015,2281095
2023-10-11 14:00:00,368.890015,370.334991,368.839996,369.579987,369.579987,2487543
2023-10-11 14:30:00,369.579987,370.204987,369.454987,369.940002,369.940002,1734258
2023-10-11 15:00:00,369.940002,370.480011,369.709991,370.300415,370.300415,1341516


In [124]:
# Data Preprocessing
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(df['Close'].values.reshape(-1, 1))

training_data_len = int(np.ceil(len(scaled_data) * 0.95))
train_data = scaled_data[0:training_data_len, :]
x_train, y_train = [], []

# Adjusted Sequence Length for 30-minute intervals
sequence_length = 52

for i in range(sequence_length, len(train_data)):
    x_train.append(train_data[i-sequence_length:i, 0])
    y_train.append(train_data[i, 0])

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

# Model Definition with Dropout
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=25))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size=16, epochs=5)

# Test Dataset and Predictions
test_data = scaled_data[training_data_len - sequence_length:, :]
x_test, y_test = [], scaled_data[training_data_len:, :]

for i in range(sequence_length, len(test_data)):
    x_test.append(test_data[i-sequence_length:i, 0])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

# Calculating Metrics
actual_prices = df['Close'].iloc[training_data_len:].values
mse = mean_squared_error(actual_prices, predictions)
rmse = math.sqrt(mse)
mae = mean_absolute_error(actual_prices, predictions)
mape = mean_absolute_percentage_error(actual_prices, predictions)

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Adjust the starting point for future timestamps to 9:30 AM of the next trading day
last_timestamp = df.index[-1]
start_of_next_day = (last_timestamp + pd.Timedelta(days=1)).replace(hour=9, minute=30)

# Skip weekends
while start_of_next_day.weekday() > 4:  # 0-4 denotes Monday to Friday
    start_of_next_day += pd.Timedelta(days=1)

delta = pd.Timedelta(minutes=30)
future_units = 13  # Predicting for the next trading day with 30-minute intervals
future_timestamps = [start_of_next_day + i*delta for i in range(future_units)]

# Future Forecasting
current_input = x_test[-1].reshape((1, sequence_length, 1))
forecasted_output = []

for i in range(future_units):
    next_output = model.predict(current_input)
    forecasted_output.append(next_output[0])
    current_input = np.roll(current_input, -1)
    current_input[0, -1, 0] = next_output

forecasted_output_original_scale = scaler.inverse_transform(np.array(forecasted_output).reshape(-1, 1))

# Visualization
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Actual Prices'))
fig.add_trace(go.Scatter(x=df.index[training_data_len:], y=predictions[:,0], mode='lines', name='Predicted Prices'))
fig.add_trace(go.Scatter(x=future_timestamps, y=forecasted_output_original_scale[:,0], mode='lines', name='Future Predictions'))

fig.update_layout(
    title="Stock Price Prediction",
    xaxis_title="Date",
    yaxis_title="Price",
    xaxis_range=[df.index[-sequence_length], future_timestamps[-1]]
)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Mean Squared Error (MSE): 3.4089
Root Mean Squared Error (RMSE): 1.8463
Mean Absolute Error (MAE): 1.4026
Mean Absolute Percentage Error (MAPE): 0.00%
