In [None]:
# Import necessary libraries
import signal
import streamlit as st
import yfinance as yf
import pandas as pd
import numpy as np
import pywt
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, SimpleRNN, Dense, Dropout

In [None]:
# Constants for stock data
START = "2015-01-01"
TODAY = pd.Timestamp.now().strftime("%Y-%m-%d")

# Title of the app
st.title('Stock Forecast App')

# Option to upload a custom dataset
uploaded_file = st.file_uploader("Upload a CSV file with your own data:", type=["csv"], key="file_uploader_1")

In [None]:
# List of stock options
stocks= [
    'TSLA', 'AAPL', 'GOOGL', 'AMZN', 'MSFT', 'META', 'NVDA', 'NFLX', 'INTC', 'AMD',
    'BABA', 'DIS', 'V', 'MA', 'PG', 'KO', 'PEP', 'WMT', 'UNH', 'CVX', 'XOM',
    'GE', 'BA', 'IBM', 'ORCL', 'GS', 'SPY', 'QQQ', 'DIA', 'IWM', 'SLV',
    'GOLD', 'C', 'JPM', 'WFC', 'MS', 'T', 'VZ', 'TGT', 'HD', 'LOW', 'MCD',
    'NKE', 'SBUX', 'LULU', 'PYPL', 'AMD', 'SHOP', 'PFE', 'JNJ', 'MRK', 'BMY',
    'ABT', 'LLY', 'AMT', 'BNS', 'TSM', 'ZM', 'SQ', 'TWTR', 'CRM', 'ATVI',
    'MMM', 'AXP', 'BA', 'CAT', 'CSCO', 'KO', 'PEP', 'LMT', 'RTX', 'GD',
    'UNP', 'COST', 'HD', 'LOW', 'PFE', 'MRK', 'BAX', 'GILD', 'VRTX', 'REGN',
    'AMGN', 'VRTX', 'BIIB', 'VRTX', 'ISRG', 'SYK', 'BSX', 'MDT', 'HOLX',
    'EW', 'TMO', 'STE', 'SYY', 'MCK', 'CVS', 'HCA', 'BMY', 'LLY', 'AMGN',
    'DUK', 'SO', 'NEE', 'EXC', 'XEL', 'CNP', 'PPL', 'WEC', 'AES', 'NI',
    'FE', 'AEP', 'NEM', 'F', 'GM', 'RCL', 'LUV', 'UAL', 'DAL', 'AAL',
    'MAR', 'WYNN', 'MGM', 'CCL', 'NCLH', 'QCOM', 'CSX', 'NFLX', 'ETSY',
    'NOW', 'ZS', 'DT', 'VEEV', 'WDAY', 'SQ', 'FISV', 'ADBE', 'INTU', 'EBAY',
    'NVDA', 'IBM', 'T', 'VZ', 'DELL', 'AVGO', 'INTC', 'TXN', 'MRVL', 'NXP',
    'AMAT', 'LRCX', 'KLAC', 'LNG', 'WMB', 'TRGP', 'ET', 'PSX', 'COP', 'CVX',
    'HAL', 'SLB', 'OXY', 'EOG', 'PXD', 'DVN', 'CHK', 'APA', 'RIG', 'BKR',
    'GE', 'BA', 'LMT', 'NOC', 'RTX', 'GD', 'HII', 'KBR', 'FLIR', 'BA',
    'BNTX', 'MRK', 'PFE', 'JNJ', 'ABT', 'AMGN', 'BMY', 'LLY', 'REGN', 'GILD',
    'EXEL', 'VRTX', 'GSK', 'AZN', 'SNY', 'RHHBY', 'BIIB', 'VRTX', 'ISRG', 'SYK',
    'BSX', 'MDT', 'HOLX', 'EW', 'TMO', 'STE', 'SYY', 'MCK', 'CVS', 'HCA',
    'WBA', 'DGX', 'LH', 'ZTS', 'MRK', 'BAX', 'XRX', 'PNC', 'C', 'WFC',
    'JPM', 'GS', 'MS', 'TFC', 'USB', 'FRC', 'KEY', 'HIG', 'PNC', 'BMO',
    'RBC', 'TD', 'DB', 'CS', 'UBS', 'MS', 'BNS', 'CIBC', 'ET', 'PSX', 'MPC',
    'EOG', 'RDS-A', 'XOM', 'COP', 'CVX', 'SLB', 'HAL', 'PXD', 'OXY', 'DVN',
    'APA', 'CHK', 'RIG', 'BKR', 'NG', 'NEX', 'NEE', 'PPL', 'DUK', 'SRE',
    'EXC', 'XEL', 'AES', 'LNT', 'NI', 'SCG', 'WEC', 'FE', 'PPL', 'WEC',
    'NEM', 'AEM', 'GFI', 'LUN', 'ABX', 'KGC', 'DRD', 'XME', 'SLV', 'GDX',
    'SAIL', 'VALE', 'STT', 'MT', 'BX', 'KKR', 'C', 'TGT', 'WMT', 'KSS',
    'SBUX', 'QSR', 'MCD', 'YUM', 'NKE', 'LULU', 'UA', 'VFC', 'HBI', 'CROX',
    'RL', 'PUMA', 'ADDYY', 'GPS', 'TCB', 'KMX', 'CAR', 'AA', 'TSN', 'ADM',
    'NSC', 'CSX', 'UNP', 'KSU', 'JBHT', 'GWW', 'PNR', 'SWK', 'PH', 'ZBRA',
    'ADHI', 'CMS', 'CNP', 'AEE', 'AWK', 'XEL', 'AES', 'DTE', 'ED', 'PPL',
    'FISV', 'ADBE', 'INTU', 'MSFT', 'V', 'PYPL', 'SQ', 'TSM', 'LRCX', 'AVGO',
    'TXN', 'MRVL', 'NVDA', 'QCOM', 'AMD', 'INTC', 'MU', 'WDC', 'SWKS', 'XLNX'
]
  # Add more stocks as needed

# Dropdown for selecting a stock
selected_stock = st.selectbox('Select a stock:', stocks)

# Display the selected stock
st.write(f"You selected: {selected_stock}")

# Inject CSS to change the color of selected items in multiselect
st.markdown("""
    <style>
    /* Change background color of selected items in multiselect */
    .stMultiSelect [data-baseweb="tag"] {
        background-color: #D3D3D3 !important;  /* Change to your desired color */
        color: white !important;
    }
    </style>
    """, unsafe_allow_html=True)

# Model options
models = ('LSTM', 'GRU', 'RNN', 'HOON')
selected_models = st.multiselect('Select models:', models, default=models, key="model_selection")

# Load data function
@st.cache_data
def load_data(ticker):
    data = yf.download(ticker, start=START, end=TODAY)
    data.reset_index(inplace=True)
    return data

data_load_state = st.text('Loading data...')
if uploaded_file is not None:
    data = pd.read_csv(uploaded_file)
    data['Date'] = pd.to_datetime(data['Date'])
else:
    data = load_data(selected_stock)

data_load_state.text('Loading data... done!')

# Check for missing values
if data.isnull().values.any():
    st.warning('The dataset contains missing values. Here are the details:')
    st.write(data.isnull().sum())

    # Option to drop rows with missing values or fill them
    if st.button('Drop rows with missing values'):
        data = data.dropna()
        st.success('Rows with missing values have been dropped.')
    elif st.button('Fill missing values with forward fill'):
        data = data.fillna(method='ffill')
        st.success('Missing values have been filled using forward fill.')

# Check for duplicate entries
if data.duplicated().any():
    st.warning('The dataset contains duplicate entries. Here are the details:')
    st.write(data[data.duplicated()])

    # Option to drop duplicate rows
    if st.button('Drop duplicate rows'):
        data = data.drop_duplicates()
        st.success('Duplicate rows have been dropped.')

st.subheader('Cleaning Data')
st.write(data.tail())

# Preprocessing for time series models
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

# Use the closing price for prediction
close_prices = data['Close'].values
close_prices = close_prices.reshape(-1, 1)

In [None]:
# Apply Wavelet Transform to Denoise Data
def denoise_signal(signal, wavelet='db4', level=3, threshold_mode='soft'):
    """
    Applies wavelet transform to denoise a signal.
    Args:
        signal: Original signal (numpy array).
        wavelet: Type of wavelet (default 'db4').
        level: Decomposition level.
        threshold_mode: Thresholding mode ('soft' or 'hard').
    Returns:
        Denoised signal (numpy array).
    """
    coeffs = pywt.wavedec(signal.flatten(), wavelet, level=level)
    threshold = np.median(np.abs(coeffs[-1])) / 0.6745 * np.sqrt(2 * np.log(len(signal)))
    coeffs[1:] = [pywt.threshold(c, value=threshold, mode=threshold_mode) for c in coeffs[1:]]
    return pywt.waverec(coeffs, wavelet)

denoised_prices = denoise_signal(close_prices)

# Plot Original vs Denoised Data
st.subheader("Original vs Denoised Data")
fig = go.Figure()

# Add Original Prices trace
fig.add_trace(go.Scatter(
    x=data.index,
    y=close_prices.flatten(),
    mode='lines',
    name='Original Prices',
    line=dict(color='blue')  # Set color for original prices
))

# Add Denoised Prices trace with a custom color
fig.add_trace(go.Scatter(
    x=data.index[:len(denoised_prices)],
    y=denoised_prices,
    mode='lines',
    name='Denoised Prices',
    line=dict(color='red')  # Change the color to green (or your preferred color)
))

# Render the chart
st.plotly_chart(fig)

# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(close_prices)

# Prepare the dataset for time series prediction
def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step), 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

# Use 30 time steps
time_step = 30
X, y = create_dataset(scaled_data, time_step)

# Reshape or modify the dataset for different models
X_lstm_gru_rnn = X.reshape(X.shape[0], X.shape[1], 1)  # For LSTM, GRU, RNN
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
X_hoon = poly.fit_transform(X)  # For HOON

# Split into training and testing data
train_size = int(len(X) * 0.8)
X_train_lstm_gru_rnn, X_test_lstm_gru_rnn = X_lstm_gru_rnn[:train_size], X_lstm_gru_rnn[train_size:]
X_train_hoon, X_test_hoon = X_hoon[:train_size], X_hoon[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Functions to build each model
def build_lstm_model():
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=(X_train_lstm_gru_rnn.shape[1], 1)),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(25),
        Dense(1)
    ])
    return model

def build_gru_model():
    model = Sequential([
        GRU(50, return_sequences=True, input_shape=(X_train_lstm_gru_rnn.shape[1], 1)),
        Dropout(0.2),
        GRU(50, return_sequences=False),
        Dropout(0.2),
        Dense(25),
        Dense(1)
    ])
    return model

def build_rnn_model():
    model = Sequential([
        SimpleRNN(50, return_sequences=True, input_shape=(X_train_lstm_gru_rnn.shape[1], 1)),
        Dropout(0.2),
        SimpleRNN(50, return_sequences=False),
        Dropout(0.2),
        Dense(25),
        Dense(1)
    ])
    return model

def build_honn_model(input_shape):
    model = Sequential([
        Dense(128, input_shape=(input_shape,)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(1)
    ])
    return model

# Dictionary to store model functions
model_builders = {
    'LSTM': build_lstm_model,
    'GRU': build_gru_model,
    'RNN': build_rnn_model,
    'HOON': lambda: build_honn_model(X_train_hoon.shape[1])
}

# Dictionary to store predictions for plotting and metrics for each model
all_predictions = {}
metrics = {'Model': [], 'MSE': [], 'MAE': [], 'RMSE': []}

# Display a single header for the training and forecasting section
st.subheader('Training and Forecasting')

In [None]:
# Train, evaluate, and make predictions for each model
for model_name in selected_models:
    # Build, compile, and train model
    model = model_builders[model_name]()
    model.compile(optimizer='adam', loss='mean_squared_error')

    if model_name == 'HOON':
        model.fit(X_train_hoon, y_train, batch_size=32, epochs=20, verbose=0)
        train_predict = model.predict(X_train_hoon)
        test_predict = model.predict(X_test_hoon)
    else:
        model.fit(X_train_lstm_gru_rnn, y_train, batch_size=32, epochs=20, verbose=0)
        train_predict = model.predict(X_train_lstm_gru_rnn)
        test_predict = model.predict(X_test_lstm_gru_rnn)

    # Inverse transform predictions
    train_predict = scaler.inverse_transform(train_predict.reshape(-1, 1))
    test_predict = scaler.inverse_transform(test_predict.reshape(-1, 1))

    # Store predictions for plotting
    train_predict_plot = np.empty_like(scaled_data)
    train_predict_plot[:, :] = np.nan
    train_predict_plot[time_step:len(train_predict) + time_step, :] = train_predict

    test_predict_plot = np.empty_like(scaled_data)
    test_predict_plot[:, :] = np.nan
    test_start_idx = len(train_predict) + (time_step * 2)
    test_end_idx = test_start_idx + len(test_predict)
    if test_start_idx < len(scaled_data):
        test_end_idx = min(test_end_idx, len(scaled_data))
        test_predict_plot[test_start_idx:test_end_idx, :] = test_predict[:(test_end_idx - test_start_idx), :]

    all_predictions[model_name] = (train_predict_plot, test_predict_plot)

    # Calculate metrics
    mse = mean_squared_error(y_test, test_predict)
    mae = mean_absolute_error(y_test, test_predict)
    rmse = np.sqrt(mse)

    metrics['Model'].append(model_name)
    metrics['MSE'].append(mse)
    metrics['MAE'].append(mae)
    metrics['RMSE'].append(rmse)

# Plot using plotly for interactive zoom
actual_prices = scaler.inverse_transform(scaled_data)
fig = go.Figure()

# Add actual prices as a line
fig.add_trace(go.Scatter(
    x=data.index, y=actual_prices.flatten(), mode='lines', name='Actual Prices', line=dict(color='blue')
))

# Add model predictions to the plot
for model_name, (train_plot, test_plot) in all_predictions.items():
    fig.add_trace(go.Scatter(
        x=data.index, y=test_plot.flatten(),
        mode='lines', name=f'{model_name} Testing Predictions'
    ))

# Find the start date of predictions
prediction_start_date = data.index[len(train_predict) + (time_step * 2)]

# Add a vertical line for the prediction start date
fig.add_shape(
    type="line",
    x0=prediction_start_date,
    y0=actual_prices.min(),
    x1=prediction_start_date,
    y1=actual_prices.max()
)

# Customize layout
fig.update_layout(
    title={
        'text': "Stock Price Prediction Comparison",
        'x': 0.5,  # Center the title horizontally
        'xanchor': 'center',  # Anchor the title in the middle
        'y': 0.95,  # Position the title slightly lower from the top
        'yanchor': 'top'
    },
    xaxis_title="Date",
    yaxis_title="Price (USD)",
    template="plotly_white",
    showlegend=True
)

# Display the interactive plot
st.plotly_chart(fig)

# Find the most and least accurate model based on metrics
# Convert metrics to a DataFrame for easier comparison
metrics_df = pd.DataFrame(metrics)

# Optionally, display the entire metrics table
st.subheader('Final Model Evaluation Metrics')
st.table(metrics_df)