### Data Preprocessing

In [1]:
import pandas as pd

# Load the CSV file into a pandas DataFrame
df = pd.read_csv('/content/GOOGL.csv')

# Display the first few rows of the DataFrame
display(df.head())

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2004-08-19,50.050049,52.082081,48.028027,50.220219,50.220219,44659096
1,2004-08-20,50.555557,54.594597,50.300301,54.209209,54.209209,22834343
2,2004-08-23,55.430431,56.796799,54.579578,54.754753,54.754753,18256126
3,2004-08-24,55.675674,55.855858,51.836838,52.487488,52.487488,15247337
4,2004-08-25,52.532532,54.054054,51.991993,53.053055,53.053055,9188602


In [2]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Select the 'Close' price for forecasting
data = df['Close'].values.reshape(-1, 1)

# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Function to create sequences for LSTM
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

# Define sequence length
seq_length = 60 # Using 60 days of data to predict the next day

# Create sequences
X, y = create_sequences(scaled_data, seq_length)

# Reshape X for LSTM [samples, time steps, features]
X = np.reshape(X, (X.shape[0], X.shape[1], 1))

# Split data into training and testing sets
# Using a simple split for now, time series cross-validation would be more robust
train_size = int(len(X) * 0.8)
X_train, X_test = X[0:train_size,:], X[train_size:len(X),:]
y_train, y_test = y[0:train_size], y[train_size:len(y)]

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (3496, 60, 1)
y_train shape: (3496,)
X_test shape: (875, 60, 1)
y_test shape: (875,)


### Build and Train LSTM Model

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(seq_length, 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=25))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=25, batch_size=32, validation_split=0.1)

model.summary()

  super().__init__(**kwargs)


Epoch 1/25
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 71ms/step - loss: 0.0017 - val_loss: 1.6101e-04
Epoch 2/25
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 54ms/step - loss: 1.3423e-04 - val_loss: 1.4993e-04
Epoch 3/25
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 86ms/step - loss: 1.0608e-04 - val_loss: 1.4148e-04
Epoch 4/25
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 55ms/step - loss: 9.3540e-05 - val_loss: 1.1976e-04
Epoch 5/25
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 73ms/step - loss: 7.1314e-05 - val_loss: 1.1877e-04
Epoch 6/25
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 54ms/step - loss: 6.3515e-05 - val_loss: 1.2762e-04
Epoch 7/25
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 58ms/step - loss: 6.7571e-05 - val_loss: 1.1240e-04
Epoch 8/25
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 55ms/step - loss: 6.0828e-05 - val_lo

### Evaluate Model

In [4]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math

# Make predictions
predictions = model.predict(X_test)

# Inverse transform predictions and actual values to original scale
predictions = scaler.inverse_transform(predictions)
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))

# Evaluate the model
rmse = math.sqrt(mean_squared_error(y_test_actual, predictions))
mae = mean_absolute_error(y_test_actual, predictions)

print(f"RMSE: {rmse}")
print(f"MAE: {mae}")

# Note: F1 score is not appropriate for regression tasks like this.
# RMSE and MAE are common metrics for time series forecasting.

[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step
RMSE: 102.73480301581452
MAE: 85.30139334158481


### Prepare for Visualization

In [5]:
# Create a DataFrame to store actual and predicted values for visualization
train_data = scaled_data[:train_size + seq_length]
valid_data = scaled_data[train_size:]

train_df = df[:train_size].copy()
valid_df = df[train_size + seq_length:].copy() # Adjusted index for sequence length


# Add predictions to the DataFrame
valid_df['Predictions'] = predictions

# Display the head of the DataFrame with predictions
display(valid_df.head())

# You can now use this `valid_df` to create visualizations in Streamlit or log to Weights & Biases

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Predictions
3556,2018-10-03,1212.0,1214.180054,1202.150024,1211.530029,1211.530029,1312300,1152.226929
3557,2018-10-04,1205.030029,1205.900024,1163.849976,1177.069946,1177.069946,2328800,1155.801025
3558,2018-10-05,1176.0,1182.0,1154.319946,1167.829956,1167.829956,1592600,1155.002686
3559,2018-10-08,1160.0,1175.859985,1135.400024,1155.920044,1155.920044,2309500,1150.833374
3560,2018-10-09,1151.310059,1161.550049,1144.170044,1145.170044,1145.170044,1684500,1143.938965


# Task
Create a forecasting model for Google stock data ("GOOGL.csv") using an LSTM or RNN. The solution should include data cleaning, splitting data into training and testing sets, model training, and evaluation using metrics like F1 score (or appropriate regression metrics like RMSE/MAE). Additionally, develop a Streamlit dashboard that visualizes historical stock data as a candlestick chart (with red/green for price movements) and the model's predictions as a line chart. The dashboard should allow users to select time ranges (days, months, years) for both historical trends and predictions and incorporate logic for stock market holidays and trading hours. The entire process should run as a single flow.

## Incorporate holiday and trading hour logic

### Subtask:
Research and incorporate logic to handle stock market holidays and potentially trading hours for more accurate future predictions and visualizations. This might involve creating a calendar or using a library to identify non-trading days.


**Reasoning**:
Research and incorporate logic to handle stock market holidays by identifying holidays and marking them in the DataFrame. Trading hour logic is not strictly necessary for daily data.



In [6]:
import holidays

# Research stock market holidays for the US
us_holidays = holidays.US()

# Create a new column in the DataFrame to mark holidays
df['Date'] = pd.to_datetime(df['Date'])
df['Is_Holiday'] = df['Date'].apply(lambda date: date in us_holidays)

# Display the head of the DataFrame with the new 'Is_Holiday' column
display(df.head())

# Note: For this daily dataset, filtering out holidays might not be necessary
# for the model training itself since the data already excludes non-trading days.
# However, this logic is useful for visualization purposes and would be crucial
# for handling higher frequency data.

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Is_Holiday
0,2004-08-19,50.050049,52.082081,48.028027,50.220219,50.220219,44659096,False
1,2004-08-20,50.555557,54.594597,50.300301,54.209209,54.209209,22834343,False
2,2004-08-23,55.430431,56.796799,54.579578,54.754753,54.754753,18256126,False
3,2004-08-24,55.675674,55.855858,51.836838,52.487488,52.487488,15247337,False
4,2004-08-25,52.532532,54.054054,51.991993,53.053055,53.053055,9188602,False


## Generate data for candlestick chart

### Subtask:
Prepare the data in a format suitable for generating a candlestick chart, which requires Open, High, Low, and Close prices for each day.


**Reasoning**:
Create a new DataFrame for the candlestick chart by selecting the required columns from the original DataFrame, setting the 'Date' column as the index, sorting by index, and displaying the head.



In [7]:
# Create a new DataFrame for the candlestick chart
candlestick_df = df[['Date', 'Open', 'High', 'Low', 'Close']].copy()

# Set the 'Date' column as the index
candlestick_df = candlestick_df.set_index('Date')

# Sort the DataFrame by the 'Date' index
candlestick_df = candlestick_df.sort_index()

# Display the first few rows of the prepared candlestick DataFrame
display(candlestick_df.head())

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2004-08-19,50.050049,52.082081,48.028027,50.220219
2004-08-20,50.555557,54.594597,50.300301,54.209209
2004-08-23,55.430431,56.796799,54.579578,54.754753
2004-08-24,55.675674,55.855858,51.836838,52.487488
2004-08-25,52.532532,54.054054,51.991993,53.053055


## Develop streamlit dashboard

### Subtask:
Create the Streamlit application script.


**Reasoning**:
Set up the basic Streamlit application structure, including importing necessary libraries and adding a title.



In [8]:
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import datetime

st.title('Google Stock Price Forecasting Dashboard')

ModuleNotFoundError: No module named 'streamlit'

**Reasoning**:
Install the missing streamlit library using pip.



In [9]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.51.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.51.0-py3-none-any.whl (10.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m54.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m105.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.51.0


**Reasoning**:
Set up the basic Streamlit application structure, including importing necessary libraries and adding a title.



In [10]:
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import datetime

st.title('Google Stock Price Forecasting Dashboard')

2025-10-30 11:05:39.739 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]


DeltaGenerator()

**Reasoning**:
Assume the dataframes are available and add a section to display the historical data.



In [11]:
# Assume df, valid_df, and candlestick_df are available from previous steps

st.header('Historical Stock Data')

# Display the historical data DataFrame
st.dataframe(df)

# Add a date range selector for historical data
date_range_hist = st.date_input("Select date range for historical data:",
                                [df['Date'].min(), df['Date'].max()])

if len(date_range_hist) == 2:
    start_date_hist = date_range_hist[0]
    end_date_hist = date_range_hist[1]

    filtered_hist_df = df[(df['Date'] >= pd.to_datetime(start_date_hist)) &
                          (df['Date'] <= pd.to_datetime(end_date_hist))].copy()

    # Display filtered historical data
    st.subheader(f'Historical Data from {start_date_hist.strftime("%Y-%m-%d")} to {end_date_hist.strftime("%Y-%m-%d")}')
    st.dataframe(filtered_hist_df)

    # Display candlestick chart for the selected historical range
    st.subheader('Candlestick Chart')
    fig_candlestick = go.Figure(data=[go.Candlestick(x=filtered_hist_df['Date'],
                                                    open=filtered_hist_df['Open'],
                                                    high=filtered_hist_df['High'],
                                                    low=filtered_hist_df['Low'],
                                                    close=filtered_hist_df['Close'])])

    fig_candlestick.update_layout(title='Candlestick Chart',
                                  xaxis_title='Date',
                                  yaxis_title='Price',
                                  xaxis_rangeslider_visible=False)
    st.plotly_chart(fig_candlestick)




**Reasoning**:
Add a section to display the model's predictions and actual values, including a date range selector and a line chart.



In [12]:
st.header('Stock Price Predictions')

# Display the predictions DataFrame
st.dataframe(valid_df)

# Add a date range selector for predictions
date_range_pred = st.date_input("Select date range for predictions:",
                                [valid_df['Date'].min(), valid_df['Date'].max()])

if len(date_range_pred) == 2:
    start_date_pred = date_range_pred[0]
    end_date_pred = date_range_pred[1]

    filtered_pred_df = valid_df[(valid_df['Date'] >= pd.to_datetime(start_date_pred)) &
                                (valid_df['Date'] <= pd.to_datetime(end_date_pred))].copy()

    # Display filtered predictions
    st.subheader(f'Predictions from {start_date_pred.strftime("%Y-%m-%d")} to {end_date_pred.strftime("%Y-%m-%d")}')
    st.dataframe(filtered_pred_df)

    # Display prediction line chart
    st.subheader('Actual vs Predicted Prices')
    fig_pred = go.Figure()
    fig_pred.add_trace(go.Scatter(x=filtered_pred_df['Date'], y=filtered_pred_df['Close'], mode='lines', name='Actual Price'))
    fig_pred.add_trace(go.Scatter(x=filtered_pred_df['Date'], y=filtered_pred_df['Predictions'], mode='lines', name='Predicted Price'))

    fig_pred.update_layout(title='Actual vs Predicted Prices',
                           xaxis_title='Date',
                           yaxis_title='Price')
    st.plotly_chart(fig_pred)



TypeError: '>=' not supported between instances of 'str' and 'Timestamp'

**Reasoning**:
Convert the 'Date' column in `valid_df` to datetime objects to resolve the TypeError when comparing with Timestamp objects.



In [13]:
valid_df['Date'] = pd.to_datetime(valid_df['Date'])

st.header('Stock Price Predictions')

# Display the predictions DataFrame
st.dataframe(valid_df)

# Add a date range selector for predictions
date_range_pred = st.date_input("Select date range for predictions:",
                                [valid_df['Date'].min(), valid_df['Date'].max()])

if len(date_range_pred) == 2:
    start_date_pred = date_range_pred[0]
    end_date_pred = date_range_pred[1]

    filtered_pred_df = valid_df[(valid_df['Date'] >= pd.to_datetime(start_date_pred)) &
                                (valid_df['Date'] <= pd.to_datetime(end_date_pred))].copy()

    # Display filtered predictions
    st.subheader(f'Predictions from {start_date_pred.strftime("%Y-%m-%d")} to {end_date_pred.strftime("%Y-%m-%d")}')
    st.dataframe(filtered_pred_df)

    # Display prediction line chart
    st.subheader('Actual vs Predicted Prices')
    fig_pred = go.Figure()
    fig_pred.add_trace(go.Scatter(x=filtered_pred_df['Date'], y=filtered_pred_df['Close'], mode='lines', name='Actual Price'))
    fig_pred.add_trace(go.Scatter(x=filtered_pred_df['Date'], y=filtered_pred_df['Predictions'], mode='lines', name='Predicted Price'))

    fig_pred.update_layout(title='Actual vs Predicted Prices',
                           xaxis_title='Date',
                           yaxis_title='Price')
    st.plotly_chart(fig_pred)



## Implement interactive controls

### Subtask:
Add interactive elements in the Streamlit dashboard for selecting time ranges (days, months, years) for both historical trends and predictions.


**Reasoning**:
The Streamlit script needs to be updated to include the date range selector for the historical data and ensure both historical and prediction date range selectors are functional and filter the data correctly.



In [14]:
import streamlit as st
import plotly.graph_objects as go
import pandas as pd
from datetime import datetime

st.header('Stock Price Forecasting Dashboard')

# Convert 'Date' columns to datetime objects if not already done
# This was done for valid_df in the previous step, doing it for candlestick_df here
candlestick_df['Date'] = candlestick_df.index
candlestick_df['Date'] = pd.to_datetime(candlestick_df['Date'])


# Display the historical data DataFrame
st.subheader('Historical Stock Data')
st.dataframe(candlestick_df)

# Add a date range selector for historical data
date_range_hist = st.date_input("Select date range for historical data:",
                                [candlestick_df['Date'].min(), candlestick_df['Date'].max()])

filtered_hist_df = candlestick_df.copy()
if len(date_range_hist) == 2:
    start_date_hist = date_range_hist[0]
    end_date_hist = date_range_hist[1]

    filtered_hist_df = candlestick_df[(candlestick_df['Date'] >= pd.to_datetime(start_date_hist)) &
                                      (candlestick_df['Date'] <= pd.to_datetime(end_date_hist))].copy()


# Display historical candlestick chart
st.subheader('Historical Candlestick Chart')
fig_hist = go.Figure(data=[go.Candlestick(x=filtered_hist_df['Date'],
                                          open=filtered_hist_df['Open'],
                                          high=filtered_hist_df['High'],
                                          low=filtered_hist_df['Low'],
                                          close=filtered_hist_df['Close'])])

fig_hist.update_layout(title='Historical Stock Price Candlestick Chart',
                       xaxis_title='Date',
                       yaxis_title='Price')
st.plotly_chart(fig_hist)


# Display the predictions DataFrame
st.subheader('Stock Price Predictions')
st.dataframe(valid_df)

# Add a date range selector for predictions
date_range_pred = st.date_input("Select date range for predictions:",
                                [valid_df['Date'].min(), valid_df['Date'].max()])

filtered_pred_df = valid_df.copy()
if len(date_range_pred) == 2:
    start_date_pred = date_range_pred[0]
    end_date_pred = date_range_pred[1]

    filtered_pred_df = valid_df[(valid_df['Date'] >= pd.to_datetime(start_date_pred)) &
                                (valid_df['Date'] <= pd.to_datetime(end_date_pred))].copy()

    # Display filtered predictions
    st.subheader(f'Predictions from {start_date_pred.strftime("%Y-%m-%d")} to {end_date_pred.strftime("%Y-%m-%d")}')
    st.dataframe(filtered_pred_df)

    # Display prediction line chart
    st.subheader('Actual vs Predicted Prices')
    fig_pred = go.Figure()
    fig_pred.add_trace(go.Scatter(x=filtered_pred_df['Date'], y=filtered_pred_df['Close'], mode='lines', name='Actual Price'))
    fig_pred.add_trace(go.Scatter(x=filtered_pred_df['Date'], y=filtered_pred_df['Predictions'], mode='lines', name='Predicted Price'))

    fig_pred.update_layout(title='Actual vs Predicted Prices',
                           xaxis_title='Date',
                           yaxis_title='Price')
    st.plotly_chart(fig_pred)



## Visualize historical and predicted data

### Subtask:
Display the historical stock data as a candlestick chart and overlay the model's predictions on a line chart within the Streamlit dashboard. Use appropriate colors (red/green) for price movements in the candlestick chart.


**Reasoning**:
Generate the code for the Streamlit script to display the historical candlestick chart and the predictions line chart with the required configurations.



In [15]:
import streamlit as st
import plotly.graph_objects as go
import pandas as pd
from datetime import datetime

st.header('Stock Price Forecasting Dashboard')

# Convert 'Date' columns to datetime objects if not already done
# This was done for valid_df and candlestick_df in the previous step, ensuring consistency
candlestick_df['Date'] = candlestick_df.index
candlestick_df['Date'] = pd.to_datetime(candlestick_df['Date'])
valid_df['Date'] = pd.to_datetime(valid_df['Date'])


# Display the historical data DataFrame (Optional, for debugging)
# st.subheader('Historical Stock Data')
# st.dataframe(candlestick_df)

# Add a date range selector for historical data
st.subheader('Historical Stock Data and Predictions')
date_range_hist = st.date_input("Select date range for historical data and predictions:",
                                [candlestick_df['Date'].min(), candlestick_df['Date'].max()])

filtered_hist_df = candlestick_df.copy()
filtered_pred_df = valid_df.copy()

if len(date_range_hist) == 2:
    start_date_hist = date_range_hist[0]
    end_date_hist = date_range_hist[1]

    filtered_hist_df = candlestick_df[(candlestick_df['Date'] >= pd.to_datetime(start_date_hist)) &
                                      (candlestick_df['Date'] <= pd.to_datetime(end_date_hist))].copy()

    # Filter predictions based on the selected historical date range as well for combined visualization
    filtered_pred_df = valid_df[(valid_df['Date'] >= pd.to_datetime(start_date_hist)) &
                                (valid_df['Date'] <= pd.to_datetime(end_date_hist))].copy()


# Create the main figure
fig = go.Figure()

# Add the candlestick chart for historical data
fig.add_trace(go.Candlestick(x=filtered_hist_df['Date'],
                             open=filtered_hist_df['Open'],
                             high=filtered_hist_df['High'],
                             low=filtered_hist_df['Low'],
                             close=filtered_hist_df['Close'],
                             name='Historical'))

# Add the line chart for actual and predicted prices
fig.add_trace(go.Scatter(x=filtered_pred_df['Date'], y=filtered_pred_df['Close'], mode='lines', name='Actual Price'))
fig.add_trace(go.Scatter(x=filtered_pred_df['Date'], y=filtered_pred_df['Predictions'], mode='lines', name='Predicted Price'))


# Update layout for combined chart
fig.update_layout(title='Stock Price Historical Data and Predictions',
                  xaxis_title='Date',
                  yaxis_title='Price',
                  xaxis_rangeslider_visible=False) # Hide the range slider for clarity

st.plotly_chart(fig)

# Display filtered predictions (Optional, for debugging)
# st.subheader(f'Filtered Predictions from {filtered_pred_df["Date"].min().strftime("%Y-%m-%d")} to {filtered_pred_df["Date"].max().strftime("%Y-%m-%d")}')
# st.dataframe(filtered_pred_df)



DeltaGenerator()

## Display model evaluation metrics

### Subtask:
Show the RMSE and MAE values on the Streamlit dashboard.


**Reasoning**:
Add a new section to the Streamlit script to display the model evaluation metrics RMSE and MAE.



In [16]:
# Add a new section for model evaluation metrics
st.subheader("Model Evaluation Metrics")

# Display RMSE and MAE values
st.write(f"RMSE: {rmse}")
st.write(f"MAE: {mae}")



## Run streamlit app

### Subtask:
Provide instructions on how to run the Streamlit application in Colab.


**Reasoning**:
Write the Streamlit application script to a file named `app.py` so it can be executed.



In [17]:
%%writefile app.py
import streamlit as st
import plotly.graph_objects as go
import pandas as pd
from datetime import datetime
import holidays
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math

# Load the CSV file into a pandas DataFrame
df = pd.read_csv('/content/GOOGL.csv')

# Incorporate holiday logic
us_holidays = holidays.US()
df['Date'] = pd.to_datetime(df['Date'])
df['Is_Holiday'] = df['Date'].apply(lambda date: date in us_holidays)

# Prepare data for candlestick chart
candlestick_df = df[['Date', 'Open', 'High', 'Low', 'Close']].copy()
candlestick_df['Date'] = pd.to_datetime(candlestick_df['Date'])

# Select the 'Close' price for forecasting
data = df['Close'].values.reshape(-1, 1)

# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Function to create sequences for LSTM
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

# Define sequence length
seq_length = 60 # Using 60 days of data to predict the next day

# Create sequences
X, y = create_sequences(scaled_data, seq_length)

# Reshape X for LSTM [samples, time steps, features]
X = np.reshape(X, (X.shape[0], X.shape[1], 1))

# Split data into training and testing sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[0:train_size,:], X[train_size:len(X),:]
y_train, y_test = y[0:train_size], y[train_size:len(y)]

# Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=25))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=25, batch_size=32, validation_split=0.1, verbose=0)

# Make predictions
predictions = model.predict(X_test, verbose=0)

# Inverse transform predictions and actual values to original scale
predictions = scaler.inverse_transform(predictions)
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))

# Evaluate the model
rmse = math.sqrt(mean_squared_error(y_test_actual, predictions))
mae = mean_absolute_error(y_test_actual, predictions)

# Prepare data for visualization
train_data = scaled_data[:train_size + seq_length]
valid_data = scaled_data[train_size:]

valid_df = df[train_size + seq_length:].copy()
valid_df['Predictions'] = predictions


# Streamlit App
st.header('Stock Price Forecasting Dashboard')

# Add a date range selector for historical data and predictions
st.subheader('Historical Stock Data and Predictions')
date_range_hist = st.date_input("Select date range for historical data and predictions:",
                                [candlestick_df['Date'].min().date(), candlestick_df['Date'].max().date()])

filtered_hist_df = candlestick_df.copy()
filtered_pred_df = valid_df.copy()

if len(date_range_hist) == 2:
    start_date_hist = date_range_hist[0]
    end_date_hist = date_range_hist[1]

    filtered_hist_df = candlestick_df[(candlestick_df['Date'] >= pd.to_datetime(start_date_hist)) &
                                      (candlestick_df['Date'] <= pd.to_datetime(end_date_hist))].copy()

    # Filter predictions based on the selected historical date range as well for combined visualization
    filtered_pred_df = valid_df[(valid_df['Date'] >= pd.to_datetime(start_date_hist)) &
                                (valid_df['Date'] <= pd.to_datetime(end_date_hist))].copy()


# Create the main figure
fig = go.Figure()

# Add the candlestick chart for historical data
fig.add_trace(go.Candlestick(x=filtered_hist_df['Date'],
                             open=filtered_hist_df['Open'],
                             high=filtered_hist_df['High'],
                             low=filtered_hist_df['Low'],
                             close=filtered_hist_df['Close'],
                             name='Historical'))

# Add the line chart for actual and predicted prices
fig.add_trace(go.Scatter(x=filtered_pred_df['Date'], y=filtered_pred_df['Close'], mode='lines', name='Actual Price'))
fig.add_trace(go.Scatter(x=filtered_pred_df['Date'], y=filtered_pred_df['Predictions'], mode='lines', name='Predicted Price'))


# Update layout for combined chart
fig.update_layout(title='Stock Price Historical Data and Predictions',
                  xaxis_title='Date',
                  yaxis_title='Price',
                  xaxis_rangeslider_visible=False) # Hide the range slider for clarity

st.plotly_chart(fig)

# Add a new section for model evaluation metrics
st.subheader("Model Evaluation Metrics")

# Display RMSE and MAE values
st.write(f"RMSE: {rmse}")
st.write(f"MAE: {mae}")

Writing app.py


**Reasoning**:
Run the Streamlit application using the command `streamlit run app.py`.



In [18]:
!streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K[1G[0JNeed to install the following packages:
localtunnel@2.0.2
Ok to proceed? (y) [20G[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://104.199.243.89:8501[0m
[0m
[34m  Stopping...[0m
^C


# Task
Develop a Streamlit dashboard for predicting Google stock prices using an LSTM model. The dashboard should visualize historical data with a candlestick chart (showing daily trends with red/green colors), display predicted prices, allow users to select time ranges for both historical viewing and prediction, incorporate logic for stock market holidays and trading hours, show model evaluation metrics (RMSE, MAE), save the trained model and scaler as pickle files, and generate a `requirements.txt` file. The dashboard should be runnable in Colab and suitable for deployment on Streamlit Cloud. Use the provided "GOOGL.csv" file for training and evaluation.

## Save model and scaler

### Subtask:
Save the trained LSTM model and the MinMaxScaler object to files (e.g., using joblib or pickle).


**Reasoning**:
Save the trained model and scaler using joblib.



In [19]:
import joblib

# Save the trained LSTM model
joblib.dump(model, 'lstm_model.joblib')

# Save the fitted MinMaxScaler object
joblib.dump(scaler, 'scaler.joblib')

print("Model and scaler saved successfully.")

Model and scaler saved successfully.


## Generate requirements.txt

### Subtask:
Create a `requirements.txt` file listing all the necessary libraries and their versions for the Streamlit application.


**Reasoning**:
Generate the requirements.txt file by listing installed packages and their versions.



In [20]:
!pip freeze > requirements.txt

## Summary:

## Data Analysis Key Findings

*   The trained LSTM model was successfully saved as `lstm_model.joblib`.
*   The fitted `MinMaxScaler` object was successfully saved as `scaler.joblib`.
*   A `requirements.txt` file containing all installed Python packages and their versions was successfully generated using `pip freeze`.

## Insights or Next Steps

*   These saved files (`lstm_model.joblib`, `scaler.joblib`, and `requirements.txt`) are essential for deploying the Streamlit application, allowing the model and scaler to be loaded without retraining and ensuring dependency consistency.
*   The next step is to integrate these components into a Streamlit application, building the user interface and logic for historical data visualization, prediction, and evaluation metric display.
