## Welcome! All code included in the Python Implementation is also included here for ease of use. You can run this entire notebook from start to finish, and look at generated console outputs, and visualizations generated as saved PNGs

### The cell below gathers Fear and Index Data, and combines it with Bitcoin price data

In [2]:
import requests
import pandas as pd
!pip install yfinance
import yfinance as yf


def fetch_fear_and_greed_btc():
    # Define the API endpoint and parameters for Fear and Greed Index
    fng_api_url = "https://api.alternative.me/fng/"
    fng_params = {
        'limit': 0,  # Get all available data
        'format': 'json'
    }

    # Make the GET request to the Fear and Greed Index API
    response = requests.get(fng_api_url, params=fng_params)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the JSON response
        fng_data = response.json()
        # Convert the data to a Pandas DataFrame
        fng_df = pd.DataFrame(fng_data['data'])
        # Ensure the timestamp column is of numeric type before converting to datetime
        fng_df['timestamp'] = pd.to_numeric(fng_df['timestamp'], errors='coerce')
        # Convert the timestamp column to datetime
        fng_df['timestamp'] = pd.to_datetime(fng_df['timestamp'], unit='s')
        # Drop the time_until_update column
        fng_df.drop(columns=['time_until_update'], inplace=True)
        # Set the timestamp as the index
        fng_df.set_index('timestamp', inplace=True)
        # Sort the DataFrame by the index (timestamp) in ascending order
        fng_df.sort_index(inplace=True)
        # Save the Fear and Greed Index DataFrame to a CSV file with timestamp as index and column name 'timestamp'
        fng_df.to_csv('fear_and_greed_index.csv', index=True, index_label='timestamp')
        print("Fear and Greed Index data has been saved to 'fear_and_greed_index.csv'.")
    else:
        print(f"Failed to fetch Fear and Greed Index data. Status code: {response.status_code}")

    # Fetch daily Bitcoin prices using Yahoo Finance
    btc_data = yf.download('BTC-USD', start=fng_df.index.min().strftime('%Y-%m-%d'), end=fng_df.index.max().strftime('%Y-%m-%d'))

    # Concatenate Fear and Greed Index DataFrame with Bitcoin DataFrame based on date index
    combined_df = pd.concat([fng_df, btc_data], axis=1, join='inner')

    # Save the combined DataFrame to a CSV file
    combined_df.to_csv('fear_greed_btc_combined.csv', index=True, index_label='timestamp')
    print("Combined data has been saved to 'fear_greed_btc_combined.csv'.")

    return combined_df



### The code below contains a DataPreprocessor Class for getting data ready for use by our LSTM Model

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

class DataPreprocessor:
    def __init__(self, X_scaler, y_scaler, lag_features=['value', 'Close'], lags=5, target_col='Close', test_size=.25):
        self.lag_features = lag_features
        self.lags = lags
        self.target_col = target_col
        self.test_size = test_size
        self.X_scaler = X_scaler
        self.y_scaler = y_scaler

    def create_lagged_features(self, df):
        for feature in self.lag_features:
            for lag in range(1, self.lags + 1):
                df[f'{feature}_lag_{lag}'] = df[feature].shift(lag)
        df['target'] = df[self.target_col].shift(-1)
        df.dropna(inplace=True)
        return df

    def normalize_data(self, X_train, X_test, y_train, y_test):
        X_train_scaled = self.X_scaler.fit_transform(X_train)
        X_test_scaled = self.X_scaler.transform(X_test)
        
        y_train = y_train.values.reshape(-1, 1)
        y_train_scaled = self.y_scaler.fit_transform(y_train)
        y_test = y_test.values.reshape(-1, 1)
        y_test_scaled = self.y_scaler.transform(y_test)

        return X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled

    def split_train_test(self, data):
        lagged_df = self.create_lagged_features(data)
        X = lagged_df.drop(columns=['target', 'value_classification'])
        y = lagged_df['target']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.test_size, shuffle=False)
        
        X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled = self.normalize_data(
            X_train, X_test, y_train, y_test
        )
        
        return X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, X_test, y_test

    def preprocess_data(self, data):
        return self.split_train_test(data)

    def inverse_transform_y(self, y_scaled):
        return self.y_scaler.inverse_transform(y_scaled)

### This is the Signal Generation function. Update the code here to implement a new strategy

In [None]:
import pandas as pd
import os
from keras.models import load_model
from sklearn.preprocessing import RobustScaler
import datetime

def generate_signal(test_features, predictions, model_path=None):

    # put your predictions vector back into the test features dataframe
    test_features['predictions'] = predictions

####################################################################################
#------------------------CREATE YOUR STRATEGY HERE---------------------------------#
####################################################################################

    # Initialize an empty list to store signals
    signals = []

    # Iterate through each row of the DataFrame
    for i in range(len(test_features)):
        close_lag_1 = test_features['Close_lag_1'].iloc[i]
        prediction = test_features['predictions'].iloc[i]
        
        # Define your buy and sell conditions here (modular and editable)
        if close_lag_1 < prediction:
            signal = 1  # Buy signal
        else:
            signal = -1  # Sell signal
        
        signals.append(signal)

    # Add the signals list as a new column 'signal' in the DataFrame
    test_features['Signal'] = signals

    # Get the current timestamp and format it
    current_timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")

    # Define the full path for the new CSV file
    csv_path = os.path.join(f'{current_timestamp}_new_data_with_positions.csv')

    # Save new_data with positions
    test_features.to_csv(csv_path, index=True)

    return test_features


### This is the ModelEvaluator Class, used for evaluating our model's predictive capabilities on unseen data

In [4]:
from generate_signals import generate_signal
from plotting_utils import plot_predicted_actual, plot_residuals
from tensorflow.keras.models import load_model


class ModelEvaluator:
    def __init__(self, model, X_test, y_test, X_test_scaled, y_test_scaled, y_scaler, model_path=None):
        self.model = model
        self.model_path = model_path
        self.X_test = X_test
        self.y_test = y_test
        self.X_test_scaled = X_test_scaled
        self.y_test_scaled = y_test_scaled
        self.y_scaler = y_scaler
        self.predictions = None
        self.predictions_inversed = None
        self.y_test_inversed = None

        if not self.model and model_path:
            self.load_saved_model(model_path)    
    
    def load_saved_model(self, model_path):
        self.model = load_model(model_path)
        print(f'Model loaded from {model_path}')

    def evaluate_model(self):
        loss, mae = self.model.evaluate(self.X_test_scaled, self.y_test_scaled, verbose=2)
        error_in_dollars = self.y_test.mean() * mae
        print(f'Test Loss: {loss:.4f}')
        print(f'Test MAE: {mae:.2f}')
        print(f'MAE in dollars: +/- ${error_in_dollars:.2f}')

    def atr_to_data(self, window=30):
        self.X_test['ATR'] = self.calculate_atr()
        atr_total_test = self.X_test['ATR'].mean()
        atr_last_window = self.X_test['ATR'].iloc[-window:].mean()
        print(f"ATR for all test observations: ${atr_total_test:.2f}")
        print(f"ATR for last {window} observations: ${atr_last_window:.2f}")

    def calculate_atr(self, window=14):
        high_low = self.X_test['High'] - self.X_test['Low']
        high_close_prev = abs(self.X_test['High'] - self.X_test['Close'].shift(1))
        low_close_prev = abs(self.X_test['Low'] - self.X_test['Close'].shift(1))

        tr = high_low.to_frame(name='HL')
        tr['HC_prev'] = high_close_prev
        tr['LC_prev'] = low_close_prev

        true_range = tr.max(axis=1)

        atr = true_range.rolling(window=window, min_periods=1).mean()

        return atr
        
    def predict_model(self):
        self.predictions = self.model.predict(self.X_test_scaled)
        self.predictions_inversed = self.y_scaler.inverse_transform(self.predictions).flatten()
        self.y_test_inversed = self.y_scaler.inverse_transform(self.y_test_scaled).flatten()
        plot_predicted_actual(self.y_test_inversed, self.predictions_inversed)
        plot_residuals(self.y_test_inversed, self.predictions_inversed)

        return self.predictions_inversed

    def generate_model_signals(self):
        self.X_test = generate_signal(self.X_test_scaled, self.predictions_inversed)
        print(self.X_test)


### These are the functions for generating the visualizations we will see when our LSTMModel trains and tests

In [5]:
import os
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import tensorflow as tf
from sklearn.decomposition import PCA
from tensorflow.keras.utils import plot_model

def plot_loss_training_history(history):
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    
    save_plot("loss_training_history")

def plot_mae_training_history(history):
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['mean_absolute_error'], label='Training MAE')
    plt.plot(history.history['val_mean_absolute_error'], label='Validation MAE')
    plt.title('Model MAE Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('MAE')
    plt.legend()
    plt.grid(True)
    
    save_plot("MAE_training_history")

def plot_predicted_actual(actual, predicted):
    df = pd.DataFrame({'Actual': actual, 'Predicted': predicted})
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x='Actual', y='Predicted', data=df)
    plt.plot([min(actual), max(actual)], [min(actual), max(actual)], color='red', linestyle='--')
    plt.title('Predicted vs. Actual Values')
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.grid(True)
    save_plot("Predicted_vs_Actual")

def plot_residuals(actual, predicted):
    residuals = [actual - predicted for actual, predicted in zip(actual, predicted)]
    df = pd.DataFrame({'Actual': actual, 'Predicted': predicted, 'Residuals': residuals})
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x='Predicted', y='Residuals', data=df)
    plt.axhline(y=0, color='red', linestyle='--')
    plt.title('Residuals Plot')
    plt.xlabel('Predicted Values')
    plt.ylabel('Residuals')
    plt.grid(True)
    save_plot("Residuals")

def save_and_visualize_model(model, img_dir=None):
    timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    if img_dir is None:
        img_dir = os.path.dirname(os.path.abspath(__file__))
    os.makedirs(img_dir, exist_ok=True)
    img_path = os.path.join(img_dir, f"model_{timestamp}.png")
    plot_model(
        model,
        to_file=img_path,
        show_shapes=True,
        show_dtype=False,
        show_layer_names=True,
        rankdir="TB",
        expand_nested=False,
        dpi=200,
        show_layer_activations=True,
        show_trainable=True
    )
    print(f"Model visualization saved and displayed from {img_path}")
    # save_plot("Model_Arc")

def plot_PCA(X_scaled):
    # Apply PCA
    pca = PCA(n_components=2)  # Reduce to 2 dimensions
    X_pca = pca.fit_transform(X_scaled)

    # Plot PCA results
    plt.figure(figsize=(10, 8))
    plt.scatter(X_pca[:, 0], X_pca[:, 1])
    plt.title('PCA Visualization')
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.grid(True)
    # plt.show()

    save_plot("PCA_graph")


def save_plot(plot_name):
    current_timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    plt.savefig(f'{plot_name}_{current_timestamp}.png')
    plt.close()
    print(f"{plot_name} plot saved as '{plot_name}_{current_timestamp}.png'")


### This is the Backtesting code so we can test the efficacy of our predictions and strategy

In [6]:
import pandas as pd
from datetime import datetime
!pip install backtesting
from backtesting import Backtest, Strategy
    
class SignalStrategy(Strategy):
    def init(self):
        self.signal = self.data.Signal

    def next(self):
        current_signal = self.data.Signal[-1]
        current_date = self.data.index[-1]
        # print(f"Date: {current_date}, Current position size: {self.position.size}, Signal: {current_signal}, Position: {self.position.is_long}")
        
        if current_signal == 1:
            # print("Executing BUY order")
            self.buy(size=1)
        elif current_signal == -1 and self.position.is_long:
            # print("Attempting to SELL entire position")
            try:
                self.position.close()  # This closes the entire position
                # print("SELL order executed - entire position closed")
            except Exception as e:
                print(f"Error executing SELL order: {e}")
        elif current_signal == 0:
            # print("No trade executed")
            pass
    
        
        # print(f"Current position size: {self.position.size}")


def run_backtest(data_path=None, data=None, plot=True, cash=1_000_000, commission=0.002, trade_on_close=True):
    if data_path:
        # Load and preprocess the data from the specified path
        dataframe = pd.read_csv(data_path, index_col='Date', parse_dates=True)
        dataframe = dataframe.sort_index()
        dataframe = dataframe.dropna()
        dataframe = dataframe.drop_duplicates()
        dataframe.columns = [column.capitalize() for column in dataframe.columns]
    elif data is not None:
        # Use self.data if called from LSTMModel instance
        dataframe = data  # Assuming `self.data` is defined in LSTMModel
    
    # Initialize and run the backtest
    bt = Backtest(dataframe, SignalStrategy, cash=cash, commission=commission, trade_on_close=trade_on_close)
    stats = bt.run()

    # Print the statistics and plot the backtest results
    print(stats)

    current_time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    stats_output_file = f'backtest_stats_{current_time}.txt'

    # Save the statistics to a text file if stats_output_file is provided
    with open(stats_output_file, 'w') as f:
        f.write(str(stats))

    if plot == True:
        bt.plot()
    else:
        pass

### The Main Brain of the code, the LSTMModel Class. This is where your entire model will use all the modules above to create, train and test an LSTM Model

In [8]:
from datetime import datetime
import pandas as pd
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, Input, Dropout
from tensorflow.keras.optimizers import Adam
from plotting_utils import *
from fetch_data import fetch_fear_and_greed_btc
from generate_signals import generate_signal
from backtester_utils import *
from DataPreprocessor import DataPreprocessor
from ModelEvaluator import ModelEvaluator


class LSTMModel:
    def __init__(self, model_path=None, data_path=None, lags=5, test_size=.25, learning_rate = 0.001, epochs=50, batch_size=32, validation_split=0.2, plot=True):
        self.model = None
        self.model_path = model_path
        self.history = None
        self.data_path = data_path
        self.lag_features = ['value', 'Close'] # change these if you want to calculate lags on different feature columns
        self.target_col = 'Close' # change this if you want to target a different variable than Close
        self.X_scaler = RobustScaler()
        self.y_scaler = RobustScaler()
        self.preprocessor = DataPreprocessor(self.X_scaler, self.y_scaler, self.lag_features, lags, self.target_col, test_size)
        self.current_timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
        self.learning_rate = learning_rate 
        self.loss = 'mean_squared_error' # change this if you're not going to solve for a regression target
        self.metrics = ['mean_absolute_error']  # change this if you're not going to solve for a regression target
        self.epochs = epochs
        self.batch_size = batch_size
        self.validation_split = validation_split

        self.plot = plot # set plot to false when instantiating if you dont want the backtest graph

        if model_path:
            self.load_saved_model(model_path)
    
    def load_saved_model(self, model_path):
        self.model = load_model(model_path)
        print(f'Model loaded from {model_path}')

    # Loads Data from a User-fed CSV Path, if CSV passed
    def load_data(self):
        if self.data_path is None:
            print("No data path preloaded. Downloading Fear and Greed and BTC data...")
            self.data = fetch_fear_and_greed_btc()
        else:
            print("Data path preloaded. saving csv to dataframe...")
            self.data = pd.read_csv(self.data_path, parse_dates=True, index_col='timestamp') 

    def preprocess_data(self):
        (
            self.X_train_scaled,
            self.X_test_scaled,
            self.y_train_scaled,
            self.y_test_scaled,
            self.X_test,
            self.y_test
        ) = self.preprocessor.preprocess_data(self.data)

    def reshape_for_lstm(self):
        # Reshape from (samples, features) to (samples, 1, features)
        self.X_train_scaled = self.X_train_scaled.reshape((self.X_train_scaled.shape[0], 1, self.X_train_scaled.shape[1])) 
        self.X_test_scaled = self.X_test_scaled.reshape((self.X_test_scaled.shape[0], 1, self.X_test_scaled.shape[1])) 

    def build_model_lstm(self):
        self.reshape_for_lstm()
        timesteps = self.X_train_scaled.shape[1] 
        features = self.X_train_scaled.shape[2] 

        model = Sequential()
        model.add(Input(shape=(timesteps, features)))
        model.add(LSTM(150, return_sequences=False))
        model.add(Dropout(0.50)) # Dropout Regularization
        model.add(Dense(20, activation='relu'))
        model.add(Dense(1))  # No activation for regression
        model.compile(optimizer=Adam(learning_rate=self.learning_rate), loss=self.loss, metrics=self.metrics)
        model.summary()
        self.model = model
        save_and_visualize_model(self.model)

    def train_model(self):
        self.history = self.model.fit(
            self.X_train_scaled, 
            self.y_train_scaled, 
            epochs = self.epochs, 
            batch_size = self.batch_size, 
            validation_split = self.validation_split, 
            verbose=1
        )

    def plot_training_history(self):
        plot_loss_training_history(self.history)
        plot_mae_training_history(self.history)
    
    def evaluate_model(self):
        self.evaluator = ModelEvaluator(self.model, self.X_test, self.y_test, self.X_test_scaled, self.y_test_scaled, self.y_scaler)
        self.evaluator.evaluate_model()
        self.evaluator.atr_to_data()
    
    def predict_model(self):
        self.predictions_inversed = self.evaluator.predict_model()

    def save_model(self):  
        self.model_path = f'{self.current_timestamp}_LSTM_model_epochs_{self.epochs}.keras'
        self.model.save(self.model_path)
        print("Model saved successfully.")

    def generate_model_signals(self):
        self.X_test = generate_signal(self.X_test, self.predictions_inversed)
        # print(self.X_test)

    def backtest_signals(self):
        run_backtest(data=self.X_test, plot=self.plot)

    def run_and_train(self):
        self.load_data()
        self.preprocess_data()
        self.build_model_lstm()
        self.train_model()
        self.plot_training_history()
        self.evaluate_model()
        self.predict_model()
        self.save_model()
        self.generate_model_signals()
        self.backtest_signals()

    def run_with_pretrained(self):
        self.load_data()
        self.preprocess_data()
        self.reshape_for_lstm()
        self.evaluate_model()
        self.predict_model()
        self.generate_model_signals()
        self.backtest_signals()


model = LSTMModel(test_size=0.25, 
                  learning_rate=0.001, 
                  epochs=50, 
                  batch_size=32, 
                  validation_split=0.25, 
                  plot=True)

model.run_and_train()



No data path preloaded. Downloading Fear and Greed and BTC data...
Fear and Greed Index data has been saved to 'fear_and_greed_index.csv'.


[*********************100%%**********************]  1 of 1 completed

Combined data has been saved to 'fear_greed_btc_combined.csv'.





You must install pydot (`pip install pydot`) for `plot_model` to work.
Model visualization saved and displayed from /Users/joshbazz/Desktop/Bootcamp/fear-greed-lstm/model_2024_07_09_15_05_04.png
Epoch 1/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.1558 - mean_absolute_error: 0.2314 - val_loss: 0.0077 - val_mean_absolute_error: 0.0651
Epoch 2/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0102 - mean_absolute_error: 0.0667 - val_loss: 0.0071 - val_mean_absolute_error: 0.0611
Epoch 3/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0078 - mean_absolute_error: 0.0539 - val_loss: 0.0043 - val_mean_absolute_error: 0.0498
Epoch 4/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0075 - mean_absolute_error: 0.0521 - val_loss: 0.0057 - val_mean_absolute_error: 0.0590
Epoch 5/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1m

  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  df2 = (df.assign(_width=1).set_index('datetime')
  fig = gridplot(
  fig = gridplot(


### This is the optimizer code. Run this to fine tune your model's hyper parameters and get better predicitve capabilities

In [10]:
!pip install optuna
import optuna
import pandas as pd
from keras.backend import clear_session
from keras.layers import Input, LSTM, Dense, Dropout
from keras.models import Sequential
from keras.optimizers import Adam
from fetch_data import fetch_fear_and_greed_btc
from DataPreprocessor import DataPreprocessor
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler


data = fetch_fear_and_greed_btc()
X_scaler = RobustScaler()
y_scaler = RobustScaler()
preprocessor = DataPreprocessor(X_scaler, y_scaler)

BATCHSIZE = 64
VALIDATION_SPLIT = 0.25
CLASSES = 10
EPOCHS = 100

def objective(trial):
    # Clear clutter from previous Keras session graphs.
    clear_session()

    X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, _, _ = preprocessor.preprocess_data(data)

    # Print shapes for debugging
    print(f"X_train_scaled shape: {X_train_scaled.shape}")
    print(f"X_test_scaled shape: {X_test_scaled.shape}")

    X_train_scaled = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1])) 
    X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

    timesteps = X_train_scaled.shape[1] 
    features = X_train_scaled.shape[2]
    
    model = Sequential()
    model.add(Input(shape=(timesteps, features)))
    model.add(LSTM(units=trial.suggest_int('LSTM Neurons_0', 10, 100), return_sequences=True))
    model.add(Dropout(trial.suggest_float('Dropout Rate_0', .0001, .50)))
    # model.add(LSTM(units=trial.suggest_int('LSTM Neurons_1', 10, 1000), return_sequences=False))
    # model.add(Dropout(trial.suggest_float('Dropout Rate_1 ', .0001, .50)))
    model.add(Dense(trial.suggest_int('Dense Neurons', 1, 50), activation='relu'))
    model.add(Dense(1))  # No activation for regression

    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    model.compile(
        loss='mean_squared_error',
        optimizer=Adam(learning_rate=learning_rate),
        metrics=['mean_absolute_error']
    )
    
    model.fit(
        X_train_scaled,
        y_train_scaled,
        # validation_data=(X_test_scaled, y_test_scaled),
        shuffle=False,
        epochs=EPOCHS,
        batch_size=BATCHSIZE,
        validation_split=VALIDATION_SPLIT,
        verbose=False,
    )

    # Evaluate the model accuracy on the validation set.
    score = model.evaluate(X_test_scaled, y_test_scaled, verbose=0)
    return score[1]


if __name__ == "__main__":
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=10, timeout=100_000)

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

  from .autonotebook import tqdm as notebook_tqdm
[*********************100%%**********************]  1 of 1 completed
[I 2024-07-09 15:06:22,736] A new study created in memory with name: no-name-8071a92b-94bc-4536-a87b-a0e5dd688343


Fear and Greed Index data has been saved to 'fear_and_greed_index.csv'.
Combined data has been saved to 'fear_greed_btc_combined.csv'.
X_train_scaled shape: (1755, 17)
X_test_scaled shape: (586, 17)


[I 2024-07-09 15:06:27,315] Trial 0 finished with value: 0.2169150412082672 and parameters: {'LSTM Neurons_0': 48, 'Dropout Rate_0': 0.31450434557263884, 'Dense Neurons': 3, 'learning_rate': 0.0058929355495651}. Best is trial 0 with value: 0.2169150412082672.


X_train_scaled shape: (1751, 17)
X_test_scaled shape: (584, 17)


[I 2024-07-09 15:06:31,155] Trial 1 finished with value: 0.6041640043258667 and parameters: {'LSTM Neurons_0': 18, 'Dropout Rate_0': 0.17374010260975647, 'Dense Neurons': 17, 'learning_rate': 0.04065530598174142}. Best is trial 0 with value: 0.2169150412082672.


X_train_scaled shape: (1746, 17)
X_test_scaled shape: (583, 17)


[I 2024-07-09 15:06:35,962] Trial 2 finished with value: 0.15657788515090942 and parameters: {'LSTM Neurons_0': 89, 'Dropout Rate_0': 0.019938702566134076, 'Dense Neurons': 18, 'learning_rate': 0.0036280585091146107}. Best is trial 2 with value: 0.15657788515090942.


X_train_scaled shape: (1742, 17)
X_test_scaled shape: (581, 17)


[I 2024-07-09 15:06:40,735] Trial 3 finished with value: 0.3412560820579529 and parameters: {'LSTM Neurons_0': 80, 'Dropout Rate_0': 0.40862339197291453, 'Dense Neurons': 46, 'learning_rate': 0.01112844695793177}. Best is trial 2 with value: 0.15657788515090942.


X_train_scaled shape: (1737, 17)
X_test_scaled shape: (580, 17)


[I 2024-07-09 15:06:45,251] Trial 4 finished with value: 0.06925120204687119 and parameters: {'LSTM Neurons_0': 68, 'Dropout Rate_0': 0.4922541129858857, 'Dense Neurons': 4, 'learning_rate': 0.00045031943945022005}. Best is trial 4 with value: 0.06925120204687119.


X_train_scaled shape: (1733, 17)
X_test_scaled shape: (578, 17)


[I 2024-07-09 15:06:49,393] Trial 5 finished with value: 0.08490287512540817 and parameters: {'LSTM Neurons_0': 55, 'Dropout Rate_0': 0.03368772365666216, 'Dense Neurons': 5, 'learning_rate': 0.001711552612433156}. Best is trial 4 with value: 0.06925120204687119.


X_train_scaled shape: (1728, 17)
X_test_scaled shape: (577, 17)


[I 2024-07-09 15:06:53,263] Trial 6 finished with value: 0.0703185647726059 and parameters: {'LSTM Neurons_0': 21, 'Dropout Rate_0': 0.008733710203582785, 'Dense Neurons': 38, 'learning_rate': 0.00020994486102406156}. Best is trial 4 with value: 0.06925120204687119.


X_train_scaled shape: (1724, 17)
X_test_scaled shape: (575, 17)


[I 2024-07-09 15:06:56,863] Trial 7 finished with value: 0.4097180962562561 and parameters: {'LSTM Neurons_0': 15, 'Dropout Rate_0': 0.31472047672445497, 'Dense Neurons': 12, 'learning_rate': 0.027112566864195848}. Best is trial 4 with value: 0.06925120204687119.


X_train_scaled shape: (1719, 17)
X_test_scaled shape: (574, 17)


[I 2024-07-09 15:07:01,044] Trial 8 finished with value: 0.45466476678848267 and parameters: {'LSTM Neurons_0': 57, 'Dropout Rate_0': 0.29107903603258334, 'Dense Neurons': 47, 'learning_rate': 0.027726888159204294}. Best is trial 4 with value: 0.06925120204687119.


X_train_scaled shape: (1715, 17)
X_test_scaled shape: (572, 17)


[I 2024-07-09 15:07:05,653] Trial 9 finished with value: 0.09934979677200317 and parameters: {'LSTM Neurons_0': 68, 'Dropout Rate_0': 0.08680751374139938, 'Dense Neurons': 50, 'learning_rate': 0.0010866993731554748}. Best is trial 4 with value: 0.06925120204687119.


Number of finished trials: 10
Best trial:
  Value: 0.06925120204687119
  Params: 
    LSTM Neurons_0: 68
    Dropout Rate_0: 0.4922541129858857
    Dense Neurons: 4
    learning_rate: 0.00045031943945022005


### Below is another instance of the LSTMModel Class, using example optimization parameters above. You can change this as needed

In [11]:
from datetime import datetime
import pandas as pd
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, Input, Dropout
from tensorflow.keras.optimizers import Adam
from plotting_utils import *
from fetch_data import fetch_fear_and_greed_btc
from generate_signals import generate_signal
from backtester_utils import *
from DataPreprocessor import DataPreprocessor
from ModelEvaluator import ModelEvaluator


class LSTMModel:
    def __init__(self, model_path=None, data_path=None, lags=5, test_size=.25, learning_rate = 0.001, epochs=50, batch_size=32, validation_split=0.2, plot=True):
        self.model = None
        self.model_path = model_path
        self.history = None
        self.data_path = data_path
        self.lag_features = ['value', 'Close'] # change these if you want to calculate lags on different feature columns
        self.target_col = 'Close' # change this if you want to target a different variable than Close
        self.X_scaler = RobustScaler()
        self.y_scaler = RobustScaler()
        self.preprocessor = DataPreprocessor(self.X_scaler, self.y_scaler, self.lag_features, lags, self.target_col, test_size)
        self.current_timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
        self.learning_rate = learning_rate 
        self.loss = 'mean_squared_error' # change this if you're not going to solve for a regression target
        self.metrics = ['mean_absolute_error']  # change this if you're not going to solve for a regression target
        self.epochs = epochs
        self.batch_size = batch_size
        self.validation_split = validation_split

        self.plot = plot # set plot to false when instantiating if you dont want the backtest graph

        if model_path:
            self.load_saved_model(model_path)
    
    def load_saved_model(self, model_path):
        self.model = load_model(model_path)
        print(f'Model loaded from {model_path}')

    # Loads Data from a User-fed CSV Path, if CSV passed
    def load_data(self):
        if self.data_path is None:
            print("No data path preloaded. Downloading Fear and Greed and BTC data...")
            self.data = fetch_fear_and_greed_btc()
        else:
            print("Data path preloaded. saving csv to dataframe...")
            self.data = pd.read_csv(self.data_path, parse_dates=True, index_col='timestamp') 

    def preprocess_data(self):
        (
            self.X_train_scaled,
            self.X_test_scaled,
            self.y_train_scaled,
            self.y_test_scaled,
            self.X_test,
            self.y_test
        ) = self.preprocessor.preprocess_data(self.data)

    def reshape_for_lstm(self):
        # Reshape from (samples, features) to (samples, 1, features)
        self.X_train_scaled = self.X_train_scaled.reshape((self.X_train_scaled.shape[0], 1, self.X_train_scaled.shape[1])) 
        self.X_test_scaled = self.X_test_scaled.reshape((self.X_test_scaled.shape[0], 1, self.X_test_scaled.shape[1])) 

    def build_model_lstm(self):
        self.reshape_for_lstm()
        timesteps = self.X_train_scaled.shape[1] 
        features = self.X_train_scaled.shape[2] 

        model = Sequential()
        model.add(Input(shape=(timesteps, features)))
        model.add(LSTM(68, return_sequences=False))
        model.add(Dropout(0.4922541129858857)) # Dropout Regularization
        model.add(Dense(4, activation='relu'))
        model.add(Dense(1))  # No activation for regression
        model.compile(optimizer=Adam(learning_rate=self.learning_rate), loss=self.loss, metrics=self.metrics)
        model.summary()
        self.model = model
        save_and_visualize_model(self.model)

    def train_model(self):
        self.history = self.model.fit(
            self.X_train_scaled, 
            self.y_train_scaled, 
            epochs = self.epochs, 
            batch_size = self.batch_size, 
            validation_split = self.validation_split, 
            verbose=1
        )

    def plot_training_history(self):
        plot_loss_training_history(self.history)
        plot_mae_training_history(self.history)
    
    def evaluate_model(self):
        self.evaluator = ModelEvaluator(self.model, self.X_test, self.y_test, self.X_test_scaled, self.y_test_scaled, self.y_scaler)
        self.evaluator.evaluate_model()
        self.evaluator.atr_to_data()
    
    def predict_model(self):
        self.predictions_inversed = self.evaluator.predict_model()

    def save_model(self):  
        self.model_path = f'{self.current_timestamp}_LSTM_model_epochs_{self.epochs}.keras'
        self.model.save(self.model_path)
        print("Model saved successfully.")

    def generate_model_signals(self):
        self.X_test = generate_signal(self.X_test, self.predictions_inversed)

    def backtest_signals(self):
        run_backtest(data=self.X_test, plot=self.plot)

    def run_and_train(self):
        self.load_data()
        self.preprocess_data()
        self.build_model_lstm()
        self.train_model()
        self.plot_training_history()
        self.evaluate_model()
        self.predict_model()
        self.save_model()
        self.generate_model_signals()
        self.backtest_signals()

    def run_with_pretrained(self):
        self.load_data()
        self.preprocess_data()
        self.reshape_for_lstm()
        self.evaluate_model()
        self.predict_model()
        self.generate_model_signals()
        self.backtest_signals()


model = LSTMModel(test_size=0.25, 
                  learning_rate=0.00045031943945022005, 
                  epochs=100, 
                  batch_size=100, 
                  validation_split=0.25, 
                  plot=True)

model.run_and_train()

No data path preloaded. Downloading Fear and Greed and BTC data...


[*********************100%%**********************]  1 of 1 completed

Fear and Greed Index data has been saved to 'fear_and_greed_index.csv'.
Combined data has been saved to 'fear_greed_btc_combined.csv'.





You must install pydot (`pip install pydot`) for `plot_model` to work.
Model visualization saved and displayed from /Users/joshbazz/Desktop/Bootcamp/fear-greed-lstm/model_2024_07_09_15_10_11.png
Epoch 1/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.3113 - mean_absolute_error: 0.3287 - val_loss: 0.9695 - val_mean_absolute_error: 0.8648
Epoch 2/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2606 - mean_absolute_error: 0.3084 - val_loss: 0.6299 - val_mean_absolute_error: 0.7087
Epoch 3/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1698 - mean_absolute_error: 0.2768 - val_loss: 0.3618 - val_mean_absolute_error: 0.5441
Epoch 4/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1001 - mean_absolute_error: 0.2304 - val_loss: 0.1921 - val_mean_absolute_error: 0.3993
Epoch 5/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[

  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  df2 = (df.assign(_width=1).set_index('datetime')
  fig = gridplot(
  fig = gridplot(
