In [2]:
# install required module
!pip install vaderSentiment


Defaulting to user installation because normal site-packages is not writeable
Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 KB[0m [31m979.4 kB/s[0m eta [36m0:00:00[0m[36m0:00:01[0mm eta [36m0:00:01[0m
Installing collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [27]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sqlalchemy import create_engine
import mysql.connector

# Step 1: Apply sentiment analysis to the tweets data
analyzer = SentimentIntensityAnalyzer()

# Apply sentiment analysis and store results in a new column 'sentiment'
tweets_df['sentiment'] = tweets_df['tweet'].apply(lambda x: analyzer.polarity_scores(x)['compound'])

# Verify the sentiment column has been added
print(tweets_df[['tweet', 'sentiment']].head())
print(f"Columns in tweets dataframe: {tweets_df.columns}")

# Step 2: Store the DataFrame with sentiment results in a MySQL table
# Create the SQLAlchemy engine with a connection string
db_connection_str = 'mysql+mysqlconnector://root:Root%401234@localhost:3306/Stock_DB'

# Create engine using the connection string
engine = create_engine(db_connection_str)

# Store the DataFrame with sentiment results in a MySQL table
tweets_df.to_sql('tweets_with_sentiment', con=engine, if_exists='replace', index=False)

print("Sentiment data stored in MySQL database.")


                                               tweet  sentiment
0       $AMZN Dow futures up by 100 points already 🥳     0.3818
1  $TSLA Daddy's drinkin' eArly tonight! Here's t...     0.0000
2  $AAPL We’ll been riding since last December fr...     0.0000
3            $TSLA happy new year, 2020, everyone🍷🎉🙏     0.7506
4  $TSLA haha just a collection of greats..."Mars...     0.9371
Columns in tweets dataframe: Index(['id', 'date', 'ticker', 'tweet', 'sentiment'], dtype='object')
Sentiment data stored in MySQL database.


# Time-Series Forecasting

In [34]:
# Install required models if not done yet
!pip install statsmodels
!pip install tensorflow
!pip install scikit-learn
!pip install seaborn

import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta
import os
import glob
from sqlalchemy import create_engine
from sklearn.metrics import mean_squared_error

# Define paths to your CSV files
tweets_file = "stocktweet/stocktweet.csv"  # Replace with the path to your stock tweets CSV
stock_files_path = "stockprice/*.csv"  # Replace with the path to your stock prices folder

# MySQL connection string
db_connection_str = 'mysql+mysqlconnector://root:Root%401234@localhost:3306/Stock_DB'
engine = create_engine(db_connection_str)

# Step 1: Load tweets with sentiment from MySQL
tweets_df = pd.read_sql('SELECT * FROM tweets_with_sentiment', con=engine)

# Check if the 'sentiment' column exists
if 'sentiment' not in tweets_df.columns:
    raise KeyError("Sentiment column is missing in tweets dataframe")

# Convert the 'date' column in tweets_df to datetime
tweets_df['date'] = pd.to_datetime(tweets_df['date'], errors='coerce')

# Step 2: Load each stock price file and add ticker column
stock_dataframes = {}
for file in glob.glob(stock_files_path):
    ticker = os.path.basename(file).replace(".csv", "")
    df = pd.read_csv(file)
    df['ticker'] = ticker  # Add ticker column
    stock_dataframes[ticker] = df

# Clean stock data by ensuring the 'date' column is in datetime format
for ticker, stock_data in stock_dataframes.items():
    stock_data['date'] = pd.to_datetime(stock_data['Date'], errors='coerce')
    stock_data = stock_data[['date', 'Close', 'ticker']]  # Keep only necessary columns

    # Merge stock data with tweet sentiment data
    try:
        merged_data = pd.merge(stock_data, tweets_df[['date', 'sentiment', 'ticker']], how='left', on=['date', 'ticker'])
        stock_dataframes[ticker] = merged_data  # Update stock data with merged results
    except KeyError as e:
        print(f"KeyError while merging data for {ticker}: {e}")

# Step 3: Time series forecasting with ARIMA and LSTM

# Helper function to create and evaluate ARIMA model
def forecast_arima(df, steps=7):
    # Preparing the data for ARIMA model
    df = df[['date', 'Close']]
    df.set_index('date', inplace=True)
    df = df['Close'].dropna()
    
    # Fit ARIMA model
    model = ARIMA(df, order=(5, 1, 0))  # Adjust order based on your data
    model_fit = model.fit()

    # Forecasting
    forecast = model_fit.forecast(steps=steps)
    forecast_index = pd.date_range(df.index[-1] + timedelta(days=1), periods=steps, freq='D')
    forecast_df = pd.DataFrame(forecast, index=forecast_index, columns=['Forecast'])
    
    return forecast_df

## Helper function to create and evaluate LSTM model
def forecast_lstm(df, steps=7):
    # Normalize the data for LSTM
    df = df[['date', 'Close']]
    df.set_index('date', inplace=True)
    df = df['Close'].dropna()
    
    scaler = MinMaxScaler(feature_range=(0, 1))
    df_scaled = scaler.fit_transform(df.values.reshape(-1, 1))
    
    # Prepare data for LSTM
    X = []
    y = []
    for i in range(60, len(df_scaled)-steps):
        X.append(df_scaled[i-60:i, 0])
        y.append(df_scaled[i+steps-1, 0])
    X = np.array(X)
    y = np.array(y)
    
    # Reshaping input for LSTM [samples, time steps, features]
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))
    
    # Create the LSTM model
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)))
    model.add(LSTM(units=50, return_sequences=False))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train the model
    model.fit(X, y, epochs=20, batch_size=32)
    
    # Forecast for multiple days (steps)
    forecast_values = []
    last_60_days = df_scaled[-60:].reshape(1, -1)
    last_60_days = np.reshape(last_60_days, (last_60_days.shape[0], last_60_days.shape[1], 1))
    
    for _ in range(steps):
        forecast_scaled = model.predict(last_60_days)
        forecast_value = scaler.inverse_transform(forecast_scaled)[0, 0]
        forecast_values.append(forecast_value)
        
        # Update last_60_days with the latest forecasted value
        last_60_days = np.append(last_60_days[:, 1:, :], forecast_scaled.reshape(1, 1, 1), axis=1)
    
    forecast_df = pd.DataFrame(forecast_values, 
                               index=pd.date_range(df.index[-1] + timedelta(days=1), periods=steps, freq='D'), 
                               columns=['Forecast'])
    
    return forecast_df


# Step 4: Generate forecasts for each ticker

selected_tickers = list(stock_dataframes.keys())[:5]  # This limits to the first 5 stocks

for ticker in selected_tickers:
    stock_data = stock_dataframes[ticker]
    print(f"\nForecasting for {ticker} using ARIMA and LSTM:")
    
    # ARIMA forecast for 1 day, 3 days, and 7 days
    arima_forecast_1d = forecast_arima(stock_data, steps=1)
    arima_forecast_3d = forecast_arima(stock_data, steps=3)
    arima_forecast_7d = forecast_arima(stock_data, steps=7)
    
    print(f"ARIMA forecast for {ticker} (1 Day):")
    print(arima_forecast_1d)
    
    print(f"ARIMA forecast for {ticker} (3 Days):")
    print(arima_forecast_3d)
    
    print(f"ARIMA forecast for {ticker} (7 Days):")
    print(arima_forecast_7d)
    
    # LSTM forecast for 1 day, 3 days, and 7 days
    lstm_forecast_1d = forecast_lstm(stock_data, steps=1)
    lstm_forecast_3d = forecast_lstm(stock_data, steps=3)
    lstm_forecast_7d = forecast_lstm(stock_data, steps=7)
    
    print(f"LSTM forecast for {ticker} (1 Day):")
    print(lstm_forecast_1d)
    
    print(f"LSTM forecast for {ticker} (3 Days):")
    print(lstm_forecast_3d)
    
    print(f"LSTM forecast for {ticker} (7 Days):")
    print(lstm_forecast_7d)


Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable

Forecasting for META using ARIMA and LSTM:


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)


ARIMA forecast for META (1 Day):
            Forecast
2021-01-01       NaN
ARIMA forecast for META (3 Days):
           Forecast
2021-01-01      NaN
2021-01-02      NaN
2021-01-03      NaN
ARIMA forecast for META (7 Days):
           Forecast
2021-01-01      NaN
2021-01-02      NaN
2021-01-03      NaN
2021-01-04      NaN
2021-01-05      NaN
2021-01-06      NaN
2021-01-07      NaN
Epoch 1/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - loss: 0.3841
Epoch 2/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.0256
Epoch 3/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0108
Epoch 4/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0164
Epoch 5/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0061
Epoch 6/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0066
Epoch 7/20
[1m7/7[0m [32m━

  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 0.2609
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0269
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0091
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0183
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0068
Epoch 6/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0084
Epoch 7/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0062
Epoch 8/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0055
Epoch 9/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0063
Epoch 10/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0055
Epoch 11/20
[1m6/6[0m [32m━

  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 41ms/step - loss: 0.3044
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0255
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0118
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0158
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0093
Epoch 6/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0090
Epoch 7/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0072
Epoch 8/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0063
Epoch 9/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0063
Epoch 10/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0070
Epoch 11/20
[1m6/6[0m [32m━

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)


ARIMA forecast for UNH (1 Day):
            Forecast
2021-01-01       NaN
ARIMA forecast for UNH (3 Days):
           Forecast
2021-01-01      NaN
2021-01-02      NaN
2021-01-03      NaN
ARIMA forecast for UNH (7 Days):
           Forecast
2021-01-01      NaN
2021-01-02      NaN
2021-01-03      NaN
2021-01-04      NaN
2021-01-05      NaN
2021-01-06      NaN
2021-01-07      NaN
Epoch 1/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 0.1888
Epoch 2/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 68ms/step - loss: 0.0350
Epoch 3/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.0147
Epoch 4/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0073
Epoch 5/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.0072
Epoch 6/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0050
Epoch 7/20
[1m7/7[0m [32m━━━━

  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 35ms/step - loss: 0.3622
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 0.0272
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 0.0211
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 0.0145
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 0.0115
Epoch 6/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 0.0077
Epoch 7/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 0.0069
Epoch 8/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 0.0056
Epoch 9/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.0057
Epoch 10/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - loss: 0.0057
Epoch 11/20
[1m6/6[0m [32m━

  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 0.4261
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0352
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0273
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0180
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0122
Epoch 6/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0088
Epoch 7/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0069
Epoch 8/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0082
Epoch 9/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0081
Epoch 10/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0072
Epoch 11/20
[1m6/6[0m [32m━

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)


ARIMA forecast for NKE (1 Day):
            Forecast
2021-01-01       NaN
ARIMA forecast for NKE (3 Days):
           Forecast
2021-01-01      NaN
2021-01-02      NaN
2021-01-03      NaN
ARIMA forecast for NKE (7 Days):
           Forecast
2021-01-01      NaN
2021-01-02      NaN
2021-01-03      NaN
2021-01-04      NaN
2021-01-05      NaN
2021-01-06      NaN
2021-01-07      NaN
Epoch 1/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - loss: 0.3206
Epoch 2/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0209
Epoch 3/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0057
Epoch 4/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.0104
Epoch 5/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0033
Epoch 6/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0037
Epoch 7/20
[1m7/7[0m [32m━━━━

  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 0.2838
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - loss: 0.0262
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - loss: 0.0148
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - loss: 0.0158
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - loss: 0.0082
Epoch 6/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 0.0067
Epoch 7/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - loss: 0.0044
Epoch 8/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - loss: 0.0039
Epoch 9/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 0.0035
Epoch 10/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.0032
Epoch 11/20
[1m6/6[0m [32m━

  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 0.3343
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0381
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - loss: 0.0201
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - loss: 0.0147
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 0.0081
Epoch 6/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 0.0068
Epoch 7/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.0051
Epoch 8/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0054
Epoch 9/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - loss: 0.0042
Epoch 10/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 0.0040
Epoch 11/20
[1m6/6[0m [32m━

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)


ARIMA forecast for CVX (1 Day):
            Forecast
2021-01-01       NaN
ARIMA forecast for CVX (3 Days):
           Forecast
2021-01-01      NaN
2021-01-02      NaN
2021-01-03      NaN
ARIMA forecast for CVX (7 Days):
           Forecast
2021-01-01      NaN
2021-01-02      NaN
2021-01-03      NaN
2021-01-04      NaN
2021-01-05      NaN
2021-01-06      NaN
2021-01-07      NaN
Epoch 1/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 0.1675
Epoch 2/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0195
Epoch 3/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0080
Epoch 4/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0110
Epoch 5/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.0062
Epoch 6/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0073
Epoch 7/20
[1m7/7[0m [32m━━━━

  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 0.1253
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0238
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0096
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0144
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0076
Epoch 6/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0086
Epoch 7/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0076
Epoch 8/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0065
Epoch 9/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0062
Epoch 10/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0065
Epoch 11/20
[1m6/6[0m [32m━

  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 0.1976
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0224
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0148
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0143
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0094
Epoch 6/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.0095
Epoch 7/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.0094
Epoch 8/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0081
Epoch 9/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0092
Epoch 10/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0079
Epoch 11/20
[1m6/6[0m [32m━

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


ARIMA forecast for AAPL (1 Day):
            Forecast
2021-01-01       NaN
ARIMA forecast for AAPL (3 Days):
           Forecast
2021-01-01      NaN
2021-01-02      NaN
2021-01-03      NaN
ARIMA forecast for AAPL (7 Days):
           Forecast
2021-01-01      NaN
2021-01-02      NaN
2021-01-03      NaN
2021-01-04      NaN
2021-01-05      NaN
2021-01-06      NaN
2021-01-07      NaN
Epoch 1/20


  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 0.0960
Epoch 2/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0078
Epoch 3/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0028
Epoch 4/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0020
Epoch 5/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0023
Epoch 6/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0019
Epoch 7/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0020
Epoch 8/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0020
Epoch 9/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - loss: 0.0021
Epoch 10/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - loss: 0.0022
Epoch 11/2

  super().__init__(**kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 0.1855
Epoch 2/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - loss: 0.0124
Epoch 3/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - loss: 0.0057
Epoch 4/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - loss: 0.0029
Epoch 5/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - loss: 0.0024
Epoch 6/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - loss: 0.0026
Epoch 7/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - loss: 0.0025
Epoch 8/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - loss: 0.0022
Epoch 9/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - loss: 0.0025
Epoch 10/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - loss: 0.0022
Epoch 11/2

  super().__init__(**kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 0.0835
Epoch 2/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0080
Epoch 3/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0042
Epoch 4/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0036
Epoch 5/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0033
Epoch 6/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0037
Epoch 7/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0034
Epoch 8/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0033
Epoch 9/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0039
Epoch 10/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0036
Epoch 11/2

# Dynamic Dashboard

In [37]:
import dash
from dash import dcc, html
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from dash.dependencies import Input, Output

# Assuming the stock dataframes are loaded properly and are available in `stock_dataframes`
# Example: stock_dataframes = {'AAPL': pd.DataFrame(...), 'GOOG': pd.DataFrame(...)}

# Sample placeholder functions for ARIMA and LSTM (replace with actual implementations)
def forecast_arima(stock_data, steps):
    # Placeholder for ARIMA forecasting function
    forecast_dates = pd.date_range(start=stock_data.index[-1], periods=steps + 1, freq='D')[1:]
    forecast_values = np.random.randn(steps)  # Replace with actual ARIMA forecast logic
    return pd.DataFrame({'Forecast': forecast_values}, index=forecast_dates)

def forecast_lstm(stock_data, steps):
    # Placeholder for LSTM forecasting function
    forecast_dates = pd.date_range(start=stock_data.index[-1], periods=steps + 1, freq='D')[1:]
    forecast_values = np.random.randn(steps)  # Replace with actual LSTM forecast logic
    return pd.DataFrame({'Forecast': forecast_values}, index=forecast_dates)

# Create the Dash app
app = dash.Dash(__name__)

# Example of available tickers (replace with actual tickers)
selected_tickers = list(stock_dataframes.keys())[:5]  # Limit to first 5 tickers for demonstration

# Create the layout of the dashboard
app.layout = html.Div([
    html.H1('Stock Price Forecasting Dashboard'),

    # Dropdown to select ticker
    html.Div([
        dcc.Dropdown(
            id='ticker-dropdown',
            options=[{'label': ticker, 'value': ticker} for ticker in selected_tickers],
            value=selected_tickers[0],  # Default value
            style={'width': '50%'}
        ),
    ], style={'padding': '10px'}),

    # Dropdown to select forecast type (ARIMA or LSTM)
    html.Div([
        dcc.Dropdown(
            id='forecast-type-dropdown',
            options=[
                {'label': 'ARIMA', 'value': 'ARIMA'},
                {'label': 'LSTM', 'value': 'LSTM'}
            ],
            value='ARIMA',  # Default to ARIMA
            style={'width': '50%'}
        ),
    ], style={'padding': '10px'}),

    # Graphs for forecast (ARIMA or LSTM)
    html.Div([
        dcc.Graph(id='forecast-graph')
    ]),

    # Display Forecasts for 1-day, 3-day, and 7-day
    html.Div([
        html.H3("Forecasts"),
        html.Div(id="forecast-1d"),
        html.Div(id="forecast-3d"),
        html.Div(id="forecast-7d")
    ])
])

# Callback to update the forecast graphs based on selected ticker and forecast type
@app.callback(
    [Output('forecast-graph', 'figure'),
     Output('forecast-1d', 'children'),
     Output('forecast-3d', 'children'),
     Output('forecast-7d', 'children')],
    [Input('ticker-dropdown', 'value'),
     Input('forecast-type-dropdown', 'value')]
)
def update_forecasts(ticker, forecast_type):
    # Get stock data for selected ticker
    stock_data = stock_dataframes[ticker]
    print(f"Stock data for {ticker}:")
    print(stock_data.head())  # Check stock data structure
    
    # Initialize forecast DataFrames for ARIMA and LSTM
    forecast_1d = forecast_3d = forecast_7d = None
    
    # Generate forecasts based on selected forecast type
    if forecast_type == 'ARIMA':
        forecast_1d = forecast_arima(stock_data, steps=1)
        forecast_3d = forecast_arima(stock_data, steps=3)
        forecast_7d = forecast_arima(stock_data, steps=7)
    elif forecast_type == 'LSTM':
        forecast_1d = forecast_lstm(stock_data, steps=1)
        forecast_3d = forecast_lstm(stock_data, steps=3)
        forecast_7d = forecast_lstm(stock_data, steps=7)

    # Debugging prints for forecast data
    print("1 Day Forecast:", forecast_1d.head())
    print("3 Days Forecast:", forecast_3d.head())
    print("7 Days Forecast:", forecast_7d.head())
    
    # Ensure forecast data is not empty or NaN
    if forecast_1d.empty or forecast_3d.empty or forecast_7d.empty:
        print(f"Error: Forecast data for {ticker} is empty.")
        return go.Figure(), "Data not available", "Data not available", "Data not available"

    # Create forecast graph
    forecast_fig = go.Figure()
    forecast_fig.add_trace(go.Scatter(x=forecast_1d.index, y=forecast_1d['Forecast'], mode='lines', name=f'{forecast_type} Forecast (1 Day)'))
    forecast_fig.add_trace(go.Scatter(x=forecast_3d.index, y=forecast_3d['Forecast'], mode='lines', name=f'{forecast_type} Forecast (3 Days)'))
    forecast_fig.add_trace(go.Scatter(x=forecast_7d.index, y=forecast_7d['Forecast'], mode='lines', name=f'{forecast_type} Forecast (7 Days)'))
    forecast_fig.update_layout(title=f'{forecast_type} Forecast for {ticker}', xaxis_title='Date', yaxis_title='Price')

    # Return forecast figure along with numerical results for 1-day, 3-day, and 7-day
    return (
        forecast_fig,
        f"1 Day: {forecast_1d['Forecast'].iloc[0]:.2f}",
        f"3 Days: {forecast_3d['Forecast'].iloc[2]:.2f}",
        f"7 Days: {forecast_7d['Forecast'].iloc[6]:.2f}"
    )

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


Stock data for META:
        date       Close ticker  sentiment
0 2019-12-31  205.250000   META        NaN
1 2020-01-02  209.779999   META        NaN
2 2020-01-03  208.669998   META        NaN
3 2020-01-06  212.600006   META        NaN
4 2020-01-07  213.059998   META        NaN
1 Day Forecast:                                Forecast
1970-01-02 00:00:00.000000253  0.199267
3 Days Forecast:                                Forecast
1970-01-02 00:00:00.000000253 -1.081927
1970-01-03 00:00:00.000000253  1.003465
1970-01-04 00:00:00.000000253  1.106151
7 Days Forecast:                                Forecast
1970-01-02 00:00:00.000000253  0.135914
1970-01-03 00:00:00.000000253 -0.112316
1970-01-04 00:00:00.000000253  0.157987
1970-01-05 00:00:00.000000253 -0.443866
1970-01-06 00:00:00.000000253  1.039403
Stock data for UNH:
        date       Close ticker  sentiment
0 2019-12-31  293.980011    UNH        NaN
1 2020-01-02  292.500000    UNH        NaN
2 2020-01-03  289.540009    UNH        NaN