In [57]:
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Step 1: Download stock data from Yahoo Finance
def download_stock_data(ticker, period='5y'):
    stock_data = yf.download(ticker, period=period)
    return stock_data

# Step 2: Preprocess the data
def preprocess_data(data, feature_col='Close', seq_length=60):
    # Use 'Close' prices to predict trends
    data = data[[feature_col]]

    # Normalize the data using MinMaxScaler
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)

    # Create sequences of data points for LSTM input
    X, y = [], []
    for i in range(seq_length, len(scaled_data)):
        X.append(scaled_data[i-seq_length:i, 0])
        y.append(scaled_data[i, 0])

    X, y = np.array(X), np.array(y)

    # Reshape the data to be compatible with LSTM (samples, timesteps, features)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))

    return X, y, scaler

# Step 3: Build the LSTM model
def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=25))
    model.add(Dense(units=1))  # Predicting a single output value (next price)

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Step 4: Train the LSTM model
def train_lstm_model(model, X_train, y_train, epochs=50, batch_size=64):
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1)
    return model

# Step 5: Make predictions and evaluate
def predict_and_evaluate(model, X_test, y_test, stock_scaler):
    predictions = model.predict(X_test)

    # Inverse transform only the stock price column
    predictions_stock_price = stock_scaler.inverse_transform(predictions)
    y_test_stock_price = stock_scaler.inverse_transform(y_test.reshape(-1, 1))

    # Calculate the Root Mean Squared Error (RMSE) on the stock prices
    rmse = np.sqrt(np.mean((predictions_stock_price - y_test_stock_price) ** 2))
    return predictions_stock_price, rmse

# Step 6: Trading simulation logic
def simulate_trading(predictions, actual_prices, dates, initial_balance=10000, shares=0):
    balance = initial_balance
    total_shares = shares
    trade_log = []

    for i in range(1, len(predictions)):
        predicted_price = predictions[i]
        actual_price = actual_prices[i]
        date = dates[i]  # Make sure 'date' is a datetime object

        # Ensure you're not comparing dates with prices
        if predicted_price > actual_prices[i-1] and balance > actual_price:
            shares_to_buy = balance // actual_price
            balance -= shares_to_buy * actual_price
            total_shares += shares_to_buy
            trade_log.append(f"Bought {shares_to_buy} shares at {actual_price} on {date}, Balance: {balance}, Shares: {total_shares}")

        elif predicted_price < actual_prices[i-1] and total_shares > 0:
            balance += total_shares * actual_price
            trade_log.append(f"Sold {total_shares} shares at {actual_price} on {date}, Balance: {balance}")
            total_shares = 0

    # Final balance after selling any remaining shares
    if total_shares > 0:
        balance += total_shares * actual_prices[-1]
        trade_log.append(f"Final Sale of {total_shares} shares at {actual_prices[-1]} on {dates[-1]}, Final Balance: {balance}")

    profit_loss = balance - initial_balance
    return trade_log, profit_loss


In [58]:
# Step 6: Trading simulation logic
def simulate_trading(predictions, actual_prices, dates, initial_balance=10000, shares=0):
    balance = initial_balance
    total_shares = shares
    trade_log = []

    for i in range(1, len(predictions)):
        predicted_price = predictions[i]
        actual_price = actual_prices[i]
        date = dates[i]  # Make sure 'date' is a datetime object

        # Ensure you're not comparing dates with prices
        if predicted_price > actual_prices[i-1] and balance > actual_price:
            shares_to_buy = balance // actual_price
            balance -= shares_to_buy * actual_price
            total_shares += shares_to_buy
            trade_log.append(f"Bought {shares_to_buy} shares at {actual_price} on {date}, Balance: {balance}, Shares: {total_shares}")

        elif predicted_price < actual_prices[i-1] and total_shares > 0:
            balance += total_shares * actual_price
            trade_log.append(f"Sold {total_shares} shares at {actual_price} on {date}, Balance: {balance}")
            total_shares = 0

    # Final balance after selling any remaining shares
    if total_shares > 0:
        balance += total_shares * actual_prices[-1]
        trade_log.append(f"Final Sale of {total_shares} shares at {actual_prices[-1]} on {dates[-1]}, Final Balance: {balance}")

    profit_loss = balance - initial_balance
    return trade_log, profit_loss

# Step 7: Run the entire pipeline with trading simulation
def run_stock_prediction_with_simulation(ticker, period='5y', seq_length=60):
    # Download and preprocess the data
    stock_data = download_stock_data(ticker, period)
    X, y, scaler = preprocess_data(stock_data, seq_length=seq_length)

    # Get the corresponding dates for the test set
    dates = stock_data.index[seq_length:]  # Dates aligned with the sequences

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Adjust the corresponding dates for the test set
    test_dates = dates[-len(X_test):]

    # Create and train the LSTM model
    model = create_lstm_model(input_shape=(X_train.shape[1], 1))
    model = train_lstm_model(model, X_train, y_train, epochs=10)

    # Make predictions and evaluate the model
    predictions, rmse = predict_and_evaluate(model, X_test, y_test, scaler)

    # Rescale the actual test prices
    actual_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

    # Simulate trading based on model predictions and include dates
    trade_log, profit_loss = simulate_trading(predictions.flatten(), actual_prices.flatten(), test_dates)

    print(f"Root Mean Squared Error: {rmse}")
    print(f"Final Profit/Loss: {profit_loss}")
    for log in trade_log:
        print(log)

    return stock_data, predictions, trade_log, profit_loss

# Example usage:
if __name__ == "__main__":
    ticker_symbol = 'GOOG '  # Example stock ticker (Apple Inc.)
    stock_data, predictions, trade_log, profit_loss = run_stock_prediction_with_simulation(ticker_symbol)


[*********************100%***********************]  1 of 1 completed

Epoch 1/10



  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 67ms/step - loss: 0.0800
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step - loss: 0.0106
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step - loss: 0.0047
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 68ms/step - loss: 0.0033
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 66ms/step - loss: 0.0028
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - loss: 0.0029
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 110ms/step - loss: 0.0028
Epoch 8/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 67ms/step - loss: 0.0028
Epoch 9/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step - loss: 0.0024
Epoch 10/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 69ms/step - loss: 0.0025
[1m8/8[

In [2]:
!pip install pytrends

Collecting pytrends
  Downloading pytrends-4.9.2-py3-none-any.whl.metadata (13 kB)
Downloading pytrends-4.9.2-py3-none-any.whl (15 kB)
Installing collected packages: pytrends
Successfully installed pytrends-4.9.2


In [61]:
from pytrends.request import TrendReq


# Step 1.1: Download Google Trends data with additional error handling
def download_trends_data(keyword, start_date, end_date):
    pytrends = TrendReq(hl='en-US', tz=360)
    pytrends.build_payload([keyword], timeframe=f'{start_date} {end_date}')
    trends_data = pytrends.interest_over_time()


    # Check if trends data is empty
    if trends_data.empty:
        print(f"No Google Trends data found for keyword: {keyword}")
        return None

    # Fill missing values in trends data, if any
    trends_data = trends_data.fillna(0)  # You can change the filling strategy as needed

    return trends_data


from pytrends.request import TrendReq
from textblob import TextBlob
import pandas as pd
import numpy as np
import requests

# Step 1.1: Download Google Trends data with additional error handling and sentiment analysis
def download_trends_data(keyword, start_date, end_date):
    pytrends = TrendReq(hl='en-US', tz=360)
    pytrends.build_payload([keyword], timeframe=f'{start_date} {end_date}')
    trends_data = pytrends.interest_over_time()

    # Check if trends data is empty
    if trends_data.empty:
        print(f"No Google Trends data found for keyword: {keyword}")
        return None

    # Fill missing values in trends data, if any
    trends_data = trends_data.fillna(0)

    # Calculate sentiment based on trend score fluctuations
    trends_data['change'] = trends_data[keyword].diff()  # Daily change in interest score
    trends_data['sentiment'] = trends_data['change'].apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))

    return trends_data

# Step 2: Preprocess the stock and trends data together
def preprocess_data_with_trends(stock_data, trends_data, feature_col='Close', seq_length=60):
    if trends_data is None:
        raise ValueError("No trends data available, cannot proceed with preprocessing.")

    trends_data.index = trends_data.index.tz_localize(None)
    stock_data.index = stock_data.index.tz_localize(None)

    # Reindex trends data to match stock data dates
    trends_data = trends_data.reindex(stock_data.index, method='ffill').fillna(0)

    # Normalize stock prices and trends data separately
    stock_scaler = MinMaxScaler(feature_range=(0, 1))
    trend_scaler = MinMaxScaler(feature_range=(0, 1))

    scaled_stock_data = stock_scaler.fit_transform(stock_data[[feature_col]])
    scaled_trends_data = trend_scaler.fit_transform(trends_data[['sentiment']])

    # Combine the scaled stock prices and trends data
    combined_data = np.hstack((scaled_stock_data, scaled_trends_data))

    # Create sequences for LSTM input
    X, y = [], []
    for i in range(seq_length, len(combined_data)):
        X.append(combined_data[i-seq_length:i])
        y.append(scaled_stock_data[i, 0])  # Stock price (first column)

    X, y = np.array(X), np.array(y)

    return X, y, stock_scaler

# Step 7: Run the entire pipeline with trading simulation, including trends data
def run_stock_prediction_with_simulation_and_trends(ticker, keyword, period='5y', seq_length=60):
    # Download stock data and Google trends data
    stock_data = download_stock_data(ticker, period)
    start_date = stock_data.index[0].strftime('%Y-%m-%d')
    end_date = stock_data.index[-1].strftime('%Y-%m-%d')

    trends_data = download_trends_data(keyword, start_date, end_date)
    if trends_data is None:
        raise ValueError("Google Trends data not available. Exiting...")

    # Preprocess the data (both stock and trends)
    X, y, scaler = preprocess_data_with_trends(stock_data, trends_data, seq_length=seq_length)

    # Get the corresponding dates for the test set
    dates = stock_data.index[seq_length:]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Adjust the corresponding dates for the test set
    test_dates = dates[-len(X_test):]

    # Create and train the LSTM model
    model = create_lstm_model(input_shape=(X_train.shape[1], X_train.shape[2]))
    model = train_lstm_model(model, X_train, y_train, epochs=10)

    # Make predictions and evaluate the model
    predictions, rmse = predict_and_evaluate(model, X_test, y_test, scaler)

    # Rescale the actual test prices
    actual_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

    # Simulate trading based on model predictions and include dates
    trade_log, profit_loss = simulate_trading(predictions.flatten(), actual_prices.flatten(), test_dates)

    print(f"Root Mean Squared Error: {rmse}")
    print(f"Final Profit/Loss: {profit_loss}")
    for log in trade_log:
        print(log)

    return stock_data, predictions, trade_log, profit_loss

# Example usage:
if __name__ == "__main__":
    ticker_symbol = 'GOOG'  # Google stock ticker for Yahoo Finance
    keyword = 'Google'  # Keyword for Google Trends to reflect broader market sentiment
    stock_data, predictions, trade_log, profit_loss = run_stock_prediction_with_simulation_and_trends(ticker_symbol, keyword)

[*********************100%***********************]  1 of 1 completed
  df = df.fillna(False)


NameError: name 'scaled_trends_data' is not defined

In [62]:
from pytrends.request import TrendReq
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Step 1.1: Download Google Trends data with additional error handling
def download_trends_data(keyword, start_date, end_date):
    pytrends = TrendReq(hl='en-US', tz=360)
    pytrends.build_payload([keyword], timeframe=f'{start_date} {end_date}')
    trends_data = pytrends.interest_over_time()

    # Check if trends data is empty
    if trends_data.empty:
        print(f"No Google Trends data found for keyword: {keyword}")
        return None

    # Fill missing values in trends data, if any
    trends_data = trends_data.fillna(0)

    return trends_data

# Step 2: Preprocess the stock and trends data together
def preprocess_data_with_trends(stock_data, trends_data, feature_col='Close', seq_length=60):
    if trends_data is None:
        raise ValueError("No trends data available, cannot proceed with preprocessing.")

    trends_data.index = trends_data.index.tz_localize(None)
    stock_data.index = stock_data.index.tz_localize(None)

    # Reindex trends data to match stock data dates
    trends_data = trends_data.reindex(stock_data.index, method='ffill').fillna(0)

    # Normalize stock prices and trends data separately
    stock_scaler = MinMaxScaler(feature_range=(0, 1))
    trend_scaler = MinMaxScaler(feature_range=(0, 1))

    scaled_stock_data = stock_scaler.fit_transform(stock_data[[feature_col]])
    scaled_trends_data = trend_scaler.fit_transform(trends_data[[keyword]])

    # Combine the scaled stock prices and trends data
    combined_data = np.hstack((scaled_stock_data, scaled_trends_data))

    # Create sequences for LSTM input
    X, y = [], []
    for i in range(seq_length, len(combined_data)):
        X.append(combined_data[i-seq_length:i])
        y.append(scaled_stock_data[i, 0])  # Stock price (first column)

    X, y = np.array(X), np.array(y)

    return X, y, stock_scaler

# Step 7: Run the entire pipeline with trading simulation, including trends data
def run_stock_prediction_with_simulation_and_trends(ticker, keyword, period='5y', seq_length=60):
    # Download stock data and Google trends data
    stock_data = download_stock_data(ticker, period)
    start_date = stock_data.index[0].strftime('%Y-%m-%d')
    end_date = stock_data.index[-1].strftime('%Y-%m-%d')

    trends_data = download_trends_data(keyword, start_date, end_date)
    if trends_data is None:
        raise ValueError("Google Trends data not available. Exiting...")

    # Preprocess the data (both stock and trends)
    X, y, scaler = preprocess_data_with_trends(stock_data, trends_data, seq_length=seq_length)

    # Get the corresponding dates for the test set
    dates = stock_data.index[seq_length:]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Adjust the corresponding dates for the test set
    test_dates = dates[-len(X_test):]

    # Create and train the LSTM model
    model = create_lstm_model(input_shape=(X_train.shape[1], X_train.shape[2]))
    model = train_lstm_model(model, X_train, y_train, epochs=10)

    # Make predictions and evaluate the model
    predictions, rmse = predict_and_evaluate(model, X_test, y_test, scaler)

    # Rescale the actual test prices
    actual_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

    # Simulate trading based on model predictions and include dates
    trade_log, profit_loss = simulate_trading(predictions.flatten(), actual_prices.flatten(), test_dates)

    print(f"Root Mean Squared Error: {rmse}")
    print(f"Final Profit/Loss: {profit_loss}")
    for log in trade_log:
        print(log)

    return stock_data, predictions, trade_log, profit_loss

# Example usage:
if __name__ == "__main__":
    ticker_symbol = 'GOOG'  # Google stock ticker for Yahoo Finance
    keyword = 'Google'  # Keyword for Google Trends to reflect broader market sentiment
    stock_data, predictions, trade_log, profit_loss = run_stock_prediction_with_simulation_and_trends(ticker_symbol, keyword)


[*********************100%***********************]  1 of 1 completed
  df = df.fillna(False)
  super().__init__(**kwargs)


Epoch 1/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 66ms/step - loss: 0.0854
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step - loss: 0.0113
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step - loss: 0.0040
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 66ms/step - loss: 0.0036
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 70ms/step - loss: 0.0029
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 65ms/step - loss: 0.0029
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 65ms/step - loss: 0.0025
Epoch 8/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 66ms/step - loss: 0.0028
Epoch 9/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 98ms/step - loss: 0.0027
Epoch 10/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 85ms/step - loss: 0.0024