# 7 day stock predictor

Steps:
- Stock Data Fetching: Using yfinance
- Technical Indicators: SMA, EMA, RSI, MACD
- Sentiment Analysis: FinBERT + Google Trends + Twitter Sentiment
- Informer Model: Better than LSTM for time-series forecasting
- Hyperparameter Optimization: Using Optuna
- Data Preprocessing: MinMaxScaler for normalization
- Visualization: Matplotlib for insights

### Install dependancies

In [21]:
#pip install yfinance transformers ta optuna tensorflow numpy pandas scikit-learn requests beautifulsoup4


### Import Required Libraries

In [22]:
import numpy as np
import pandas as pd
import yfinance as yf
import tensorflow as tf
import optuna
import requests
from bs4 import BeautifulSoup
from transformers import pipeline
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from ta.momentum import RSIIndicator
from ta.trend import MACD, EMAIndicator
from ta import add_all_ta_features

### Function to fetch stock data and compute technical indicators

In [23]:
def calculate_rsi(series, period=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

def calculate_macd(series, short=12, long=26, signal=9):
    short_ema = series.ewm(span=short, adjust=False).mean()
    long_ema = series.ewm(span=long, adjust=False).mean()
    macd = short_ema - long_ema
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    macd_hist = macd - signal_line
    return macd, signal_line, macd_hist

def calculate_ema(series, period=14):
    return series.ewm(span=period, adjust=False).mean()

def fetch_stock_data(ticker, start, end):
    stock_data = yf.download(ticker, start=start, end=end)
    
    stock_data['RSI'] = calculate_rsi(stock_data['Close'])
    stock_data['MACD'], stock_data['MACD_Signal'], stock_data['MACD_Hist'] = calculate_macd(stock_data['Close'])
    stock_data['EMA'] = calculate_ema(stock_data['Close'])
    
    stock_data.dropna(inplace=True)
    return stock_data

### Function to fetch real-time financial news headlines

In [24]:
def fetch_news_headlines():
    url = "https://finance.yahoo.com/news/"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    headlines = [h.text for h in soup.find_all("h3")[:10]]
    return headlines if headlines else ["Stock market update"]

### Function for sentiment analysis on headlines


In [25]:
def fetch_sentiment_data(news_headlines):
    try:
        sentiment_analyzer = pipeline("sentiment-analysis", model="ProsusAI/finbert")
        sentiment_scores = [sentiment_analyzer(text)[0]['score'] for text in news_headlines]
        return np.interp(sentiment_scores, (min(sentiment_scores), max(sentiment_scores)), (0, 1))
    except Exception as e:
        print(f"Error in sentiment analysis: {e}")
        return [0.5] * len(news_headlines)

### Function to prepare the dataset for training


In [26]:
def prepare_data(df, sentiment_scores):
    df['Sentiment'] = [sentiment_scores] * len(df)
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df)
    X, y = [], []
    for i in range(30, len(df) - 7):
        X.append(scaled_data[i-30:i])
        y.append(scaled_data[i:i+7, 3])  # Predicting Close prices
    return np.array(X), np.array(y), scaler

### Function to build LSTM model using Optuna hyperparameter tuning


In [27]:
def build_model(trial, input_shape):
    model = Sequential([
        LSTM(trial.suggest_int("units", 50, 200), return_sequences=True, input_shape=input_shape),
        Dropout(trial.suggest_uniform("dropout_1", 0.1, 0.5)),
        LSTM(trial.suggest_int("units_2", 30, 100)),
        Dropout(trial.suggest_uniform("dropout_2", 0.1, 0.5)),
        Dense(7)
    ])
    optimizer = trial.suggest_categorical("optimizer", ["adam", "rmsprop", "sgd"])
    model.compile(optimizer=optimizer, loss='mse')
    return model

### Objective function for hyperparameter tuning


In [28]:
def objective(trial):
    global X_train, y_train, X_test, y_test
    model = build_model(trial, (30, X_train.shape[2]))
    model.fit(X_train, y_train, epochs=trial.suggest_int("epochs", 10, 50), batch_size=trial.suggest_categorical("batch_size", [16, 32, 64]), verbose=0)
    loss = model.evaluate(X_test, y_test, verbose=0)
    return loss

### Function to predict the next 7 days


In [29]:
def predict_next_7_days(model, X, scaler):
    preds = model.predict(X[-1].reshape(1, X.shape[1], X.shape[2]))
    preds = scaler.inverse_transform(np.concatenate((np.zeros((7, X.shape[2] - 1)), preds.T), axis=1))[:, 3]
    return preds

### Run model

In [30]:
# Fetch stock data
stock_data = fetch_stock_data("AAPL", "2023-01-01", "2024-01-01")

# Fetch latest news headlines
news_headlines = fetch_news_headlines()

# Sentiment analysis on news headlines
sentiment_scores = fetch_sentiment_data(news_headlines)

# Prepare data for training
X, y, scaler = prepare_data(stock_data, sentiment_scores)
X_train, X_test = X[:int(0.8*len(X))], X[int(0.8*len(X)):]
y_train, y_test = y[:int(0.8*len(y))], y[int(0.8*len(y)):]

# Perform hyperparameter tuning using Optuna
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

# Train the best model
best_model = build_model(study.best_trial, (30, X_train.shape[2]))
best_model.fit(X_train, y_train, epochs=study.best_trial.params["epochs"], batch_size=study.best_trial.params["batch_size"], verbose=1)

# Predict next 7 days
predictions = predict_next_7_days(best_model, X_test, scaler)
print(predictions)

[*********************100%***********************]  1 of 1 completed
Device set to use cpu
[I 2025-03-10 01:31:58,355] A new study created in memory with name: no-name-2c241fa7-4519-4d69-ae90-f457e931a023
  super().__init__(**kwargs)
  Dropout(trial.suggest_uniform("dropout_1", 0.1, 0.5)),
  Dropout(trial.suggest_uniform("dropout_2", 0.1, 0.5)),
[I 2025-03-10 01:32:02,838] Trial 0 finished with value: 0.017252709716558456 and parameters: {'units': 152, 'dropout_1': 0.2914208331730393, 'units_2': 34, 'dropout_2': 0.11824057112330166, 'optimizer': 'adam', 'epochs': 11, 'batch_size': 16}. Best is trial 0 with value: 0.017252709716558456.
[I 2025-03-10 01:32:09,213] Trial 1 finished with value: 0.020146245136857033 and parameters: {'units': 61, 'dropout_1': 0.4959715276993618, 'units_2': 84, 'dropout_2': 0.16796423884465464, 'optimizer': 'adam', 'epochs': 32, 'batch_size': 32}. Best is trial 0 with value: 0.017252709716558456.
[I 2025-03-10 01:32:15,871] Trial 2 finished with value: 0.0454

Epoch 1/22
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 0.2420
Epoch 2/22
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 0.0529
Epoch 3/22
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 0.0390
Epoch 4/22
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.0324
Epoch 5/22
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0382
Epoch 6/22
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.0308
Epoch 7/22
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.0303
Epoch 8/22
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.0289
Epoch 9/22
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 0.0298
Epoch 10/22
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 0.0278