In [None]:
import numpy as np
import pandas as pd
import requests
import io
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, Concatenate, Conv1D, MaxPooling1D, Flatten, Embedding, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from textblob import TextBlob
import yfinance as yf
import talib

# Set up API credentials for news sentiment analysis
RAPIDAPI_KEY = "62a5337beamsh4b06b153b683dcep1db8cfjsn6419e7690224"
RAPIDAPI_HOST = "apidojo-yahoo-finance-v1.p.rapidapi.com"
RAPIDAPI_URL = "https://apidojo-yahoo-finance-v1.p.rapidapi.com/news/v2/get-details"

# Set up stock symbol and time window for data retrieval
stock_symbol = "MSFT"
time_window = "10y"

# Set up sliding window parameters
window_size = 10
stride = 1

# Define function to retrieve stock data using Yahoo Finance API
def get_stock_data(symbol, window):
    # Download stock data from Yahoo Finance API
    df = yf.download(symbol, period=window, interval="1d", group_by='ticker')
    # Drop any rows with missing values
    df.dropna(inplace=True)
    # Return DataFrame
    return df

# Define function to retrieve news sentiment data using Yahoo Finance API
def get_news_sentiment_data(symbol, num_articles):
    # Set up query parameters
    querystring = {"symbols": symbol, "count": num_articles}
    # Set up headers
    headers = {
        "X-RapidAPI-Key": RAPIDAPI_KEY,
        "X-RapidAPI-Host": RAPIDAPI_HOST
    }
    # Send request to Yahoo Finance API
    response = requests.request("GET", url=RAPIDAPI_URL, headers=headers, params=querystring)
    # Parse response as JSON
    response_json = response.json()
    # Extract sentiment scores for each article
    sentiment_scores = []
    for article in response_json["data"][symbol]["news"]:
        text = article["summary"]
        if text:
            blob = TextBlob(text)
            sentiment_scores.append(blob.sentiment.polarity)
    # Return list of sentiment scores
    return sentiment_scores

# Define function to retrieve technical indicator data using TA-Lib library
def get_technical_indicator_data(df):
    # Calculate technical indicators
    df["rsi"] = talib.RSI(df["Close"])
    df["macd"], _, _ = talib.MACD(df["Close"])
    df["cci"] = talib.CCI(df["High"], df["Low"], df["Close"])
    # Drop any rows with missing values
    df.dropna(inplace=True)
    # Normalize technical indicator data
    scaler = MinMaxScaler()
    df[["rsi", "macd", "cci"]] = scaler.fit_transform(df[["rsi", "macd", "cci"]])
    # Return DataFrame
    return df

# Retrieve stock data from Yahoo Finance API
df_stock = get_stock_data(stock_symbol, time_window)
# Retrieve news sentiment data from Yahoo Finance API
sentiment_scores = get_news_sentiment_data(stock_symbol, len(df_stock))
# Retrieve technical indicator data using TA-Lib library
df_stock = get_technical_indicator_data(df_stock)
# Define function to create sliding windows of data
def create_sliding_windows(data, window_size, stride):
    # Initialize empty lists for input/output data
    X, y = [], []
    # Loop through data to create sliding windows
    for i in range(0, len(data) - window_size, stride):
        # Select window of data
        window = data[i:i + window_size]
        # Split window into input/output pairs
        X.append(window[:-1])
        y.append(window[-1])
    # Convert data to numpy arrays
    X = np.array(X)
    y = np.array(y)
    # Return input/output data
    return X, y

# Create sliding windows of data
X_stock, y_stock = create_sliding_windows(df_stock.values, window_size, stride)
X_sentiment, y_sentiment = create_sliding_windows(np.array(sentiment_scores).reshape(-1, 1), window_size, stride)

# Split data into training/validation/testing sets
train_size = int(0.7 * len(X_stock))
val_size = int(0.2 * len(X_stock))
test_size = len(X_stock) - train_size - val_size

X_stock_train = X_stock[:train_size]
y_stock_train = y_stock[:train_size]
X_sentiment_train = X_sentiment[:train_size]
y_sentiment_train = y_sentiment[:train_size]

X_stock_val = X_stock[train_size:train_size+val_size]
y_stock_val = y_stock[train_size:train_size+val_size]
X_sentiment_val = X_sentiment[train_size:train_size+val_size]
y_sentiment_val = y_sentiment[train_size:train_size+val_size]

X_stock_test = X_stock[-test_size:]
y_stock_test = y_stock[-test_size:]
X_sentiment_test = X_sentiment[-test_size:]
y_sentiment_test = y_sentiment[-test_size:]

# Define input layers
input_stock = Input(shape=(X_stock.shape[1], X_stock.shape[2]), name="stock_input")
input_sentiment = Input(shape=(X_sentiment.shape[1], X_sentiment.shape[2]), name="sentiment_input")

# Define LSTM layers for stock data
lstm_stock = LSTM(64, activation="relu", return_sequences=True)(input_stock)
lstm_stock = Dropout(0.2)(lstm_stock)
lstm_stock = LSTM(32, activation="relu")(lstm_stock)
lstm_stock = Dropout(0.2)(lstm_stock)

# Define Convolutional Neural Network (CNN) layers for sentiment data
cnn_sentiment = Conv1D(filters=32, kernel_size=3, activation="relu")(input_sentiment)
cnn_sentiment = MaxPooling1D(pool_size=2)(cnn_sentiment)
cnn_sentiment = Conv1D(filters=64, kernel_size=3, activation="relu")(cnn_sentiment)
cnn_sentiment = MaxPooling1D(pool_size=2)(cnn_sentiment)
cnn_sentiment = Flatten()(cnn_sentiment)

# Concatenate LSTM and CNN layers
concatenated = Concatenate()([lstm_stock, cnn_sentiment])
output = Dense(1, activation="sigmoid")(concatenated)

# Define model and compile
model = Model(inputs=[input_stock, input_sentiment], outputs=output)
model.compile(optimizer="adam", loss="mse")

# Define callbacks for early stopping and saving best model
early_stop = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
model_checkpoint = ModelCheckpoint("stock_prediction.h5", save_best_only=True)

# Compile and fit the model
model.compile(optimizer=Adam(lr=0.001), loss="mse")
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), batch_size=64, callbacks=[early_stop, model_checkpoint])

# Evaluate the model on the test set
model.load_weights("stock_prediction.h5")
mse, _ = model.evaluate(X_test, y_test)
rmse = np.sqrt(mse)
print(f"Test set RMSE: {rmse:.2f}")

# Plot the training and validation loss
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.title("Training and Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss (MSE)")
plt.legend()
plt.show()

# Make predictions on the test set
y_pred = model.predict(X_test)

# Plot the predicted vs actual prices
plt.plot(y_test, label="Actual Prices")
plt.plot(y_pred, label="Predicted Prices")
plt.title("Actual vs Predicted Prices")
plt.xlabel("Time")
plt.ylabel("Price ($)")
plt.legend()
plt.show()