In [None]:
import torch
import torch.nn as nn
import torchvision.models
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import optuna
import math
from math import sqrt
import numpy as np
import pandas as pd
import yfinance as yf
import pandas as pd
import random
import matplotlib.pyplot as plt
import os
import time
import ta
#KMPFXIZR8T9GDJ1I Alpha Vantage key

# How we can adjust the model

#1 we can actually tune the learning rate
#2 we can tune L1 regularization
#2 We can tune L2 Regularization
#3 batch size we can tune
#4 we can change epoch run time
#5 we can validate the model via data from 2018 and on as planned

# TODO:
We're gonna need to make a class for the generation of the tensors, I want specific things, I need to randomize the datasets because it promotes diversity, I also need to ensure that the dataframe that is hosting the list
retains its values outside of the program thus I need some kind of a save object, we're gonna need to save the credentials inside a json file, the class will have to retain the object and be loadable and saveable when the model is saved
this way when we run a new epoch we can remake the object from its saved state and then continue training, once all the data is used a column inside the dataframe within this data object will be wiped and reset, this will continue forever, until
we choose to stop.

This object will be used in the tensor creation process and it will simply output the tensors we need to use for training and also create the data we need for those tensors prior to extracting numpy frame values, standardizing and normalizing them. 

In [None]:
# Check if CUDA is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Set pandas to display numbers without scientific notation
pd.set_option('display.float_format', lambda x: '%.6f' % x)

look_back = 60
look_forward = 30
standard_scaler = StandardScaler()
min_max_scaler = MinMaxScaler(feature_range=(0, 1))

In [None]:
# At the beginning of your notebook or training loop
if os.path.exists('C:\\Machine Learning\\SPY VIX 9 Input Output Model\\last_step.txt'):
    with open('C:\\Machine Learning\\SPY VIX 9 Input Output Model\\last_step.txt', 'r') as f:
        global_step = int(f.read())
else:
    global_step = 0

In [None]:
class TickerDataSystem(): #Simple system to create our list of data and return yFinance datasets
    def __init__(self, start_date = None, end_date = None):
        self.start_date = pd.to_datetime(start_date) #"1994-01-01"
        self.end_date = pd.to_datetime(end_date) #"2017-01-01"  This will specifically be used to cap off data for a specific criteria
        self.new_listing = None

    def modify_dates(self, df, date_column, start_date, end_date): #Specifically to drop specific dates before and after Vix start and end date
        # Convert date_column to datetime
        df[date_column] = pd.to_datetime(df[date_column])
        
        # Set dates earlier than start_date to start_date
        df.loc[df[date_column] < start_date, date_column] = start_date
        
        # Remove rows with dates later than end_date
        df = df[df[date_column] <= end_date]
        
        return df

    def CreateListingStatus(self):
        # Specify the path to your CSV file
        file_path = r'C:\Machine Learning\listing_status.csv' #from alpha vantage listing_status add api call later using BS4

        # Read the CSV file into a DataFrame
        listing_statusDF = pd.read_csv(file_path)

        # List of columns to drop
        columns_to_drop = ['name', 'exchange', 'assetType', 'delistingDate', 'status', '6']

        # Drop the columns
        new_ListingStatusDF = listing_statusDF.drop(columns=columns_to_drop, axis=1)

        # Drop the last 30 rows
        new_ListingStatusDF = new_ListingStatusDF.iloc[:-30]

        # Modify dates and filter rows based on the specified conditions
        new_ListingStatusDF = self.modify_dates(new_ListingStatusDF, 'ipoDate', self.start_date, self.end_date)
        
        self.new_listing = new_ListingStatusDF
        
    def Get_Lists(self):
        
        return self.new_listing

In [None]:
class TensorIntegrationSystem:  # This class will generate tensors on demand at random and retain their values as each creation 
    """
    This class is specifically for creating the tensors, adding the averages we want such as EMA and RSI to various components then finally outputting a tensor.
    """
    def __init__(self, startDate:str, endDate:str) -> None:
        self.endDate = endDate
        self.startDate = startDate
        self.ProgressTracker = ProgressTracker() #Create progress tracker object to parse data from CSV and retain the list to randomize insertion of data
        self.TickerDataSystem = TickerDataSystem(self.startDate, self.endDate) #This tool is specifically to create the date ranges we want from the listing_status CSV, eventually alpha vantage listings APi will be added
        self.FormattedCombinedDataFrame = None
        self.stockDataFrame = None
        self.VIXDataFrame = None
    
    def dataFetcher(self, symbol: str, startDate: str):
        """
        This will pull data from yF and test it if nothing is returned the subsequent method will test it and try again to create another ticker.
        It will also grab the split data and hold it inside a property.
        """
        #we gotta add a simple date modifier to change the enddate and add a year to it so we can pull more data as the IPORange is maxed at the self.endDate so we always have at least a year of data
        time.sleep(0.75)
        if symbol != "^VIX":
            data = yf.download(symbol, start=startDate, end=self.endDate, auto_adjust = True)
        else:
            data = yf.download(symbol, start=startDate, end=self.endDate)
            
        if data.shape == (0, 6):
            print("No Data")
            return None
        else:
            time.sleep(0.5)
            # Create a Ticker object for Pfizer
            tickerData = yf.Ticker(symbol)
            return pd.DataFrame(data)
        
    def CreateCombinedDataFrame(self):
        """
        This will create a random stock and VixData then create the formatted DataFrame and bind it to FormattedCombinedDataFrame.
        """
        max_attempts = 5
        for attempt in range(max_attempts):
            try:
                self.CreateCombinedData()
                break  # If successful, exit the loop
            except Exception as e:
                print(f"Attempt {attempt + 1} failed: {e}")
                if attempt < max_attempts - 1:
                    time.sleep(1)  # Wait for 1 second before next attempt
                else:
                    raise Exception("Failed to create combined data frame after several attempts.")


    def CreateCombinedData(self):
        """
        This method will try to generate the data required for the CreateCombinedDataFrame
        """
        self.VIXDataFrame = None
        self.stockDataFrame = None
        self.CreateNewVixData()
        self.CreateNewStockData()
        Formatter = FormatDataFrame(self.VIXDataFrame, self.stockDataFrame)
        Formatter.CreateDataSet()
        self.FormattedCombinedDataFrame = Formatter.GetCombinedData()
    
    
    def CustomTickerCreator(self, ticker, tickerDate):
        """
        This method will create custom ticker data for process into the predict features.
        """
        self.VIXDataFrame = None
        self.stockDataFrame = self.dataFetcher(ticker, tickerDate)
        self.CreateNewVixData()
        Formatter = FormatDataFrame(self.VIXDataFrame, self.stockDataFrame)
        Formatter.CreateDataSet()
        self.FormattedCombinedDataFrame = Formatter.GetCombinedData()
        
    def CreateNewVixData(self):
        """
        This method is for creating VIX data and checking if it was created via the dataFetcher and Progress Tracker.
        """
        counter = 0
        while True:
            counter += 1
            if counter == 10:
                return
            vixDataFrame = self.dataFetcher("^VIX", "1994-01-01")
            if vixDataFrame is None:
                continue  # Try again in the next iteration
            else:
                self.VIXDataFrame = vixDataFrame
                return
        
    def CreateNewStockData(self):
        """
        This method is for creating stock data and checking if it was created via the dataFetcher and Progress Tracker.
        """
        counter = 0
        while True:
            ticker, tickerDate = self.ProgressTracker.random_next()
            counter += 1
            if counter == 10:
                return
            stockDataFrame = self.dataFetcher(ticker, tickerDate)
            print(f"This Ticker is {ticker}")
            if stockDataFrame is None:
                continue  # Try again in the next iteration
            else:
                self.stockDataFrame = stockDataFrame
                return
            
    def CreateTensors(self):
        """
        Creates tensors that get output as x and y tensors with the required lookback and lookforward setup as public variables outside of the object (for now)
        """
        # Create a new DataFrame with selected features
        features = self.FormattedCombinedDataFrame.values

        # Standardize data
        standardized_features = standard_scaler.fit_transform(features)

        # Normalize data
        scaled_features = min_max_scaler.fit_transform(standardized_features)

        # Prepare data for PyTorch (sequences and targets)
        num_features = scaled_features.shape[1]

        x_train, y_train = [], []
        for i in range(look_back, len(scaled_features) - look_forward + 1):
            x_train.append(scaled_features[i - look_back:i, :])  # Include all features from the past 60 days
            y_train.append(scaled_features[i:i + look_forward, :])  # Include all features for the next 30 days

        x_train, y_train = np.array(x_train), np.array(y_train)
        x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], num_features))

        # Convert to PyTorch tensors
        x_train_tensor = torch.tensor(x_train).float()
        y_train_tensor = torch.tensor(y_train).float()

        # If you're using a GPU, move tensors to GPU (assuming 'device' is defined)
        x_train_tensor = x_train_tensor.to(device)
        y_train_tensor = y_train_tensor.to(device)
        
        return x_train_tensor, y_train_tensor


In [14]:
test = TensorIntegrationSystem("1994-01-01","2024-01-01")

In [15]:
df = test.CustomTickerCreator("AAPL","1994-01-01")

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
Data validation completed.


In [16]:
df = test.FormattedCombinedDataFrame

In [17]:
newdf = df.tail(120)

In [18]:
newdf = newdf.head(60)

In [19]:
test.CreateTensors()

(tensor([[[7.4243e-04, 7.5937e-04, 7.4167e-04,  ..., 7.3973e-02,
           9.4312e-01, 7.3163e-01],
          [7.5178e-04, 7.5009e-04, 7.2287e-04,  ..., 7.6712e-02,
           9.3907e-01, 7.3922e-01],
          [7.2372e-04, 7.3154e-04, 7.0406e-04,  ..., 7.9452e-02,
           9.3489e-01, 7.4674e-01],
          ...,
          [4.9374e-04, 5.1283e-04, 4.9166e-04,  ..., 3.0411e-01,
           3.2579e-01, 9.6868e-01],
          [5.1722e-04, 5.1283e-04, 4.8222e-04,  ..., 3.0685e-01,
           3.1775e-01, 9.6561e-01],
          [4.9374e-04, 4.9419e-04, 4.9166e-04,  ..., 3.0959e-01,
           3.0977e-01, 9.6241e-01]],
 
         [[7.5178e-04, 7.5009e-04, 7.2287e-04,  ..., 7.6712e-02,
           9.3907e-01, 7.3922e-01],
          [7.2372e-04, 7.3154e-04, 7.0406e-04,  ..., 7.9452e-02,
           9.3489e-01, 7.4674e-01],
          [7.2372e-04, 7.4081e-04, 6.9466e-04,  ..., 8.2192e-02,
           9.3058e-01, 7.5418e-01],
          ...,
          [5.1722e-04, 5.1283e-04, 4.8222e-04,  ..., 3.068

In [20]:
newdf

Unnamed: 0_level_0,Open_x,High_x,Low_x,Close_x,Volume,EMA20_Close_x,EMA40_Close_x,EMA60_Close_x,RSI20_Close_x,RSI40_Close_x,...,Close_y,EMA20_Close_y,EMA40_Close_y,EMA60_Close_y,RSI20_Close_y,RSI40_Close_y,RSI60_Close_y,shifted_day_of_year,cos_shifted_annual,sin_shifted_annual
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-07-12,189.174968,191.189594,187.968198,189.26474,60750200,186.967469,182.281242,177.955451,62.503099,62.598253,...,13.54,14.388288,15.115892,15.890125,43.573185,44.658958,45.457967,134,-0.670089,0.742281
2023-07-13,189.992798,190.680963,189.274713,190.032684,41342300,187.259395,182.659361,178.351425,63.539759,63.079535,...,13.61,14.314166,15.042434,15.815366,43.901797,44.795659,45.539996,135,-0.682758,0.730644
2023-07-14,189.723515,190.670982,189.125121,190.182297,41573900,187.537766,183.026333,178.739323,63.74531,63.174223,...,13.34,14.221388,14.959388,15.734207,42.887713,44.362148,45.272916,136,-0.695225,0.718792
2023-07-17,191.389049,193.802619,191.299292,193.473495,50520200,188.103074,183.535951,179.222411,67.931691,65.188552,...,13.48,14.150779,14.887223,15.660298,43.598794,44.647029,45.441638,139,-0.731378,0.681972
2023-07-18,192.83521,193.812596,191.907678,193.214188,48353800,188.589847,184.00806,179.681157,67.287306,64.901678,...,13.3,14.069753,14.809797,15.582912,42.87631,44.347594,45.259198,140,-0.743001,0.66929
2023-07-19,192.585876,197.702207,192.137062,194.580551,80507300,189.16039,184.523791,180.169662,68.922418,65.717028,...,13.76,14.040253,14.758588,15.523144,45.314081,45.309005,45.824467,141,-0.754404,0.656411
2023-07-20,194.570572,195.946903,191.987471,192.615799,59581200,189.489476,184.918523,180.577732,64.074717,63.540106,...,13.99,14.035467,14.721096,15.472877,46.51538,45.789263,46.107438,142,-0.765584,0.643337
2023-07-21,193.583223,194.450902,190.720854,191.42897,71917800,189.67419,185.236106,180.93351,61.33177,62.262298,...,13.6,13.993994,14.666408,15.411471,44.760358,45.100495,45.695861,143,-0.776537,0.630072
2023-07-24,192.895048,194.391054,191.738132,192.236801,45377800,189.918248,185.577604,181.30411,62.4825,62.784774,...,13.91,13.985994,14.62951,15.362243,46.450845,45.765579,46.084894,146,-0.808005,0.589176
2023-07-25,192.815256,193.922301,192.406344,193.104477,37283200,190.221699,185.944768,181.691007,63.703722,63.343849,...,13.86,13.973995,14.591973,15.312989,46.210746,45.674045,46.030805,147,-0.81802,0.57519


In [7]:
class FormatDataFrame():
    """
    This class is used to take in a dataFrame and add various different kinds of indicators as columns into the dataset in order to create additional data
    for the tensor inputs in the method chain.
    """
    def __init__(self, VIXDataFrame, TickerDataFrame):
        self.VIXDataFrame = VIXDataFrame
        self.TickerDataFrame = TickerDataFrame
        self.CombinedDataSet = None
        
    def GetCombinedData(self):
        """
        Returns the combined dataset after processing
        """
        return self.CombinedDataSet
        
    def CreateDataSet(self):
        """
        This method will execute all work requirements.
        """
        try:
            if self.TickerDataFrame is None or self.VIXDataFrame is None:
                raise TypeError("Missing DataFrames")

            #print("Transforming Ticker Data...")
            self.TransformTickerData()
            #print("Ticker Data Transformed.")

            #print("Transforming VIX Data...")
            self.TransformVIXData()
            #print("VIX Data Transformed.")

            #print("Creating Combined Data Set...")
            self.CreateCombinedDataSet()
            #print(f"Combined Data Set Created with shape: {self.CombinedDataSet.shape}")

            #print("Validating Data...")
            self.ValidateData()
            #print("Data Validated.")

            #print("Applying Fourier Transform...")
            self.applyFourierTransform()
            #print("Fourier Transform Applied.")

        except Exception as e:
            print(f"An error occurred: {e}")
            raise
        
    def ValidateData(self):
        """
        Validates the combined DataFrame for NaN values and checks date alignment.
        """
        if self.CombinedDataSet is None:
            raise ValueError("Combined DataSet is not available for validation.")

        # Check for NaN values
        nan_counts = self.CombinedDataSet.isna().sum()
        if nan_counts.sum() > 0:
            print("Warning: NaN values found in the following columns:")
            print(nan_counts[nan_counts > 0])

        # Check for date alignment
        if not self.CombinedDataSet.index.is_monotonic_increasing:
            raise ValueError("Date index is not in chronological order.")

        print("Data validation completed.")
        
    def CreateCombinedDataSet(self):
        # Ensure 'Date' is the index and perform the merge
        if self.TickerDataFrame.index.name != 'Date' or self.VIXDataFrame.index.name != 'Date':
            raise ValueError("Date must be the index for both DataFrames.")

        # Merge the DataFrames on the 'Date' index
        self.CombinedDataSet = pd.merge(self.TickerDataFrame, self.VIXDataFrame, left_index=True, right_index=True, how='inner')

        if self.CombinedDataSet.empty:
            raise ValueError("Merging resulted in an empty DataFrame. Check if the indices align correctly.")
        

    def TransformTickerData(self):
        """
        This method should take the dataframe for the Ticker, it'll check if it exists first and if it doesn't throw an error. Else
        it will add 20,40,60 EMA on close, and 20,40,60 EMA on volume, as well as RSI for both.
        """
        if self.TickerDataFrame is None:
            raise TypeError("Missing DataFrame")
            
        # Calculate EMAs for Close prices
        self.TickerDataFrame['EMA20_Close'] = ta.trend.ema_indicator(close=self.TickerDataFrame['Close'], window=20)
        self.TickerDataFrame['EMA40_Close'] = ta.trend.ema_indicator(close=self.TickerDataFrame['Close'], window=40)
        self.TickerDataFrame['EMA60_Close'] = ta.trend.ema_indicator(close=self.TickerDataFrame['Close'], window=60)

        # Calculate RSIs for Close prices
        self.TickerDataFrame['RSI20_Close'] = ta.momentum.rsi(close=self.TickerDataFrame['Close'], window=20)
        self.TickerDataFrame['RSI40_Close'] = ta.momentum.rsi(close=self.TickerDataFrame['Close'], window=40)
        self.TickerDataFrame['RSI60_Close'] = ta.momentum.rsi(close=self.TickerDataFrame['Close'], window=60)
        
        # Drop rows where any of the EMA or RSI columns contain NaN
        columns_to_check = ['EMA20_Close', 'EMA40_Close', 'EMA60_Close', 
                            'RSI20_Close', 'RSI40_Close', 'RSI60_Close']

        self.TickerDataFrame = self.TickerDataFrame.dropna(subset=columns_to_check)
        #self.TickerDataFrame = self.TickerDataFrame.drop(columns=['Adj Close'])
        
    def applyFourierTransform(self):
        """
        Applies a fourier transform on the date column in order to make it more consumable by the model.
        """
        
        # Calculate the shifted day of year for each date in the DataFrame
        self.CombinedDataSet['shifted_day_of_year'] = self.CombinedDataSet.index.map(self.shifted_day_of_year)

        # Define the number of days in this "year" - accounting for leap years
        # This is a simplification and could be further refined for exact leap year handling
        days_in_year = 365.25

        # Add Fourier features to df for the shifted year
        self.CombinedDataSet['cos_shifted_annual'] = np.cos(2 * np.pi * self.CombinedDataSet['shifted_day_of_year'] / days_in_year)
        self.CombinedDataSet['sin_shifted_annual'] = np.sin(2 * np.pi * self.CombinedDataSet['shifted_day_of_year'] / days_in_year)
        
    # Function to calculate the shifted day of year
    def shifted_day_of_year(self, date):
        """
        This will apply the transform specifically starting March 1st as the start of the new year.
        """
        # Set the start of the "new year" to March 1st
        new_year_start = pd.Timestamp(year=date.year, month=3, day=1)
        # Calculate the number of days from the start of the "new year"
        day_of_year = (date - new_year_start).days + 1
        # Handle dates before March 1st
        if date < new_year_start:
            # Shift to the previous year
            previous_year_start = pd.Timestamp(year=date.year-1, month=3, day=1)
            day_of_year = (date - previous_year_start).days + 1
        return day_of_year

    def TransformVIXData(self):
        """
        This will transform the vix data adding EMA and RSI to the closed price at specific ranges as well as applying a fourier transform to the date, starting March 1st as new year.
        """
        if self.VIXDataFrame is None:
            raise TypeError("Missing DataFrame")
        
        # Calculate EMAs for Close prices
        self.VIXDataFrame['EMA20_Close'] = ta.trend.ema_indicator(close=self.VIXDataFrame['Close'], window=20)
        self.VIXDataFrame['EMA40_Close'] = ta.trend.ema_indicator(close=self.VIXDataFrame['Close'], window=40)
        self.VIXDataFrame['EMA60_Close'] = ta.trend.ema_indicator(close=self.VIXDataFrame['Close'], window=60)

        # Calculate RSIs for Close prices
        self.VIXDataFrame['RSI20_Close'] = ta.momentum.rsi(close=self.VIXDataFrame['Close'], window=20)
        self.VIXDataFrame['RSI40_Close'] = ta.momentum.rsi(close=self.VIXDataFrame['Close'], window=40)
        self.VIXDataFrame['RSI60_Close'] = ta.momentum.rsi(close=self.VIXDataFrame['Close'], window=60)

        # Drop rows where any of the EMA or RSI columns contain NaN
        columns_to_check = ['EMA20_Close', 'EMA40_Close', 'EMA60_Close', 
                            'RSI20_Close', 'RSI40_Close', 'RSI60_Close']

        self.VIXDataFrame = self.VIXDataFrame.dropna(subset=columns_to_check)
        self.VIXDataFrame = self.VIXDataFrame.drop(columns=['Adj Close', 'Volume'])
        

        

In [6]:
class ProgressTracker:
    """
    This object is for retaining data in a CSV and randomly returning a ticker and IPO date so we can parse it from yFinance. 
    It also makes sure that nothing is repeated so we have a range of trading data.
    """
    PROGRESS_FILE = 'C:\\Machine Learning\\ProgressTracker.csv'

    def __init__(self):
        self.create_or_load_csv()
        self.load_progress()

    def create_or_load_csv(self):
        file_dir = os.path.dirname(ProgressTracker.PROGRESS_FILE)
        if not os.path.exists(file_dir):
            os.makedirs(file_dir)

        if not os.path.isfile(ProgressTracker.PROGRESS_FILE):
            empty_df = pd.DataFrame({'symbol': [], 'ipoDate': [], 'Completed': []})
            empty_df.to_csv(ProgressTracker.PROGRESS_FILE, index=False)

    def load_dataframe(self, dataframe):
        if os.path.exists(ProgressTracker.PROGRESS_FILE):
            os.replace(ProgressTracker.PROGRESS_FILE, 'backup_' + ProgressTracker.PROGRESS_FILE)
        dataframe.to_csv(ProgressTracker.PROGRESS_FILE, index=False)

    def mark_completed(self, symbol):
        index = next((idx for idx, row in enumerate(self.symbols) if row[0] == symbol), None)
        if index is not None:
            self.completed[index] = True

    def mark_uncompleted(self, symbol):
        index = next((idx for idx, row in enumerate(self.symbols) if row[0] == symbol), None)
        if index is not None:
            self.completed[index] = False

    def random_next(self):
        available_symbols = [sym for sym, comp in zip(self.symbols, self.completed) if not comp]
        if not available_symbols:  # No available symbols
            self.clear_completed()  # Clear all completed flags
            available_symbols = [sym for sym, comp in zip(self.symbols, self.completed) if not comp]  # Recheck available symbols
        selected_symbol = random.choice(available_symbols)
        index = self.symbols.index(selected_symbol)
        self.completed[index] = True
        self.save_progress()
        return selected_symbol[0], selected_symbol[1]  # Return the symbol and its IPO date

    def clear_completed(self):
        self.completed = [False] * len(self.symbols)
        self.save_progress()

    def save_progress(self):
        progress_df = pd.DataFrame(self.symbols, columns=['symbol', 'ipoDate'])
        progress_df['Completed'] = self.completed
        progress_df.to_csv(ProgressTracker.PROGRESS_FILE, index=False)


    def load_progress(self):
        if os.path.exists(ProgressTracker.PROGRESS_FILE):
            ticker_data_frame = pd.read_csv(ProgressTracker.PROGRESS_FILE)
            self.symbols = ticker_data_frame[['symbol', 'ipoDate']].values.tolist()
            self.completed = ticker_data_frame['Completed'].astype(bool).tolist()
        else:
            self.symbols = []
            self.completed = []

    def get_symbols_df(self):
        if os.path.exists(ProgressTracker.PROGRESS_FILE):
            return pd.read_csv(ProgressTracker.PROGRESS_FILE)
        else:
            df = pd.DataFrame({'symbol': [], 'ipoDate': [], 'Completed': []})
            df.to_csv(ProgressTracker.PROGRESS_FILE, index=False)
            return df
        
        
# tracker = ProgressTracker()

# # Mark IBM as completed
# tracker.mark_completed('IBM')

# # Randomly select a symbol to process
# ticker_to_process = tracker.random_next()
# print(ticker_to_process)

# # Clear all completed items
# tracker.clear_completed()

# # Load progress from the CSV file
# tracker.load_progress()

# Understanding self.fc:

    What is self.fc?
        self.fc is a class attribute you're defining in the DCGANGenerator class.
        It stands for a "fully connected" layer or layers in neural network terminology. However, in your case, self.fc is not just a single fully connected layer but a sequential container (nn.Sequential) of several layers.

    Role of nn.Sequential:
        nn.Sequential is a PyTorch module that sequentially applies a list of modules. It is used for encapsulating a simple sequence of layers or operations, automatically handling the forward pass through these layers in the order they are added.
        In nn.Sequential, the output of one layer is automatically passed as input to the next.

    Understanding the Sequence of Methods:
        Linear Layer (nn.Linear): This is the first layer in the sequence. It's a fully connected layer that linearly transforms the input data (input_size) to a higher dimensional space (hidden_layer_size * 4 * 4 * 256). This expansion is typical in GAN generators to prepare for reshaping and further processing in subsequent layers.
        Batch Normalization Layer (nn.BatchNorm1d): Follows the linear layer. Batch normalization is used to stabilize and speed up training by normalizing the output of the previous layer. It can help mitigate issues like vanishing or exploding gradients.
        Activation Function (nn.ReLU): This is the Rectified Linear Unit activation function. It introduces non-linearity into the model, allowing it to learn more complex patterns. The ReLU function is defined as f(x) = max(0, x) and is applied element-wise.

In [None]:
class CustomAdvancedCNNBlock(nn.Module):
    def __init__(self, input_channels, intermediate_channels):
        super().__init__()
        self.model1 = models.resnet50(pretrained=False)
        self.model1.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.model1.norm1 = nn.Identity()
        self.model2 = models.densenet121(pretrained=False)
        self.model3 = models.inception_v3(pretrained=False, aux_logits=None, transform_inputs=False)
        self.model3.AuxLogits = None
        self.model4 = models.efficientnet_b7(pretrained=False)
        self.model4._blocks[0].conv = nn.Conv2d(input_channels, 32, kernel_size=3, stride=2, padding=1, bias=False)

        self.adapter_block = nn.Sequential(
            nn.Conv2d(intermediate_channels*4, intermediate_channels, kernel_size=1),
            nn.BatchNorm2d(intermediate_channels),
            nn.ELU()
        )

    def forward(self, x):
        x = self.model1(x)
        x = self.model2(x)
        x = self.model3(x)
        x = self.model4(x)
        x = torch.cat([x[0][-1], x[-1]], dim=1)
        x = self.adapter_block(x)

        return x

In [None]:
class MyTCNModule(nn.Module):
    def __init__(self, input_channels, nb_filters, kernel_size, dilations, dropout_p):
        super().__init__()
        self.tcn = tcn_module.TemporalConvolution(input_channels, nb_filters, kernel_size, dilations, dropout_p)

    def forward(self, x):
        return self.tcn(x)

In [8]:
class HybridLSTMGRUAttentionWithCNN(nn.Module): # THIS IS MY IN PRODUCTION MODEL
    def __init__(self, input_size: int, hidden_layer_size: int, num_layers: int, output_size: int, dropout_rate: float, num_heads: int, kernel_size: int):
        super().__init__()
        self.input_size = input_size
        self.hidden_layer_size = hidden_layer_size
        self.num_layers = num_layers
        
        # CNN layers
        self.cnn_layers = nn.Sequential(
            nn.Conv1d(in_channels=input_size, out_channels=hidden_layer_size // 2, kernel_size=kernel_size, padding=kernel_size // 2),
            nn.BatchNorm1d(hidden_layer_size // 2),
            nn.ELU()
        )
        self.cnn_output_size = hidden_layer_size // 2  # Output size of CNN layers

        # Define LSTM and GRU layers alternatively
        self.rnn_layers = nn.ModuleList()
        for i in range(num_layers):
            is_last_layer = i == num_layers - 1
            layer_dropout = 0 if is_last_layer else dropout_rate
            #input_dim = self.cnn_output_size if i == 0 else (hidden_layer_size if i % 2 == 0 else 2 * hidden_layer_size)
            input_dim = self.cnn_output_size if i == 0 else 2 * hidden_layer_size
            rnn_type = nn.LSTM if i % 2 == 0 else nn.GRU
            self.rnn_layers.append(rnn_type(input_dim, hidden_layer_size, batch_first=True, bidirectional=True, dropout=layer_dropout))

        # Multi-Head Attention layer
        self.multihead_attn = nn.MultiheadAttention(embed_dim=2 * hidden_layer_size, num_heads=num_heads, batch_first=True)

        # Attention layer
        self.attention = nn.Linear(2 * hidden_layer_size, 1)

        # Fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(2 * hidden_layer_size, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(1024, output_size)
        )

        # Manual dropout layers between RNN layers
        self.dropout_layers = nn.ModuleList([nn.Dropout(dropout_rate) for _ in range(num_layers - 1)])

        # Initialize weights
        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.LSTM):
                for name, param in m.named_parameters():
                    if 'weight_ih' in name:
                        nn.init.xavier_uniform_(param.data)
                    elif 'weight_hh' in name:
                        nn.init.orthogonal_(param.data)
                    elif 'bias' in name:
                        param.data.fill_(0)
            elif isinstance(m, nn.GRU):
                for name, param in m.named_parameters():
                    if 'weight_ih' in name:
                        nn.init.xavier_uniform_(param.data)
                    elif 'weight_hh' in name:
                        nn.init.orthogonal_(param.data)
                    elif 'bias' in name:
                        param.data.zero_()
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        #print("Pre-Permuation RNN Layer Shape:", x.shape)  # Print statement here
        # Apply CNN layers
        x = x.permute(0, 2, 1)  # Change axes order for CNN operation
        #print("Pre-First RNN Layer Shape:", x.shape)  # Print statement here
        x = self.cnn_layers(x)
        x = x.permute(0, 2, 1)  # Restore original axes order
        #print("Post-CNN Shape:", x.shape)  # Debug print statement

        # Process RNN layers
        for i, rnn in enumerate(self.rnn_layers):
            #print(f"Pre-RNN Layer {i} Shape:", x.shape)  # Print statement here
            #rnn.flatten_parameters()  # Flatten RNN parameters
            x, _ = rnn(x)
            #print(f"Post-RNN Layer {i} Shape:", x.shape)  # Debug print statement
            if i < self.num_layers - 1:  # Apply dropout manually between RNN layers, but not after the last layer
                x = self.dropout_layers[i](x)

        # Separate forward and backward outputs
        fw_outputs, bw_outputs = x.chunk(2, dim=2)

        # Concatenate forward and backward outputs
        outputs = torch.cat([fw_outputs, bw_outputs], dim=-1)

        # Apply Multi-Head Attention
        attn_output, _ = self.multihead_attn(outputs, outputs, outputs)
        #print("Post-Attention Shape:", attn_output.shape)
        
        # Apply attention
        attention_weights = F.softmax(self.attention(attn_output), dim=1)
        x = torch.sum(attn_output * attention_weights, dim=1)
        #print("Post-Weighted Attention Shape:", x.shape)
        
        # Fully connected layers
        outputs = self.fc(x)
        #print("Post-Fully Connected Shape:", outputs.shape)
        return outputs

#MODEL IN PRODUCTION

In [None]:
class HybridLSTMGRUAttentionWithCNN(nn.Module):
    def __init__(self, input_size: int, hidden_layer_size: int, num_layers: int, output_size: int, dropout_rate: float, num_heads: int, kernel_size: int):
        super().__init__()
        self.input_size = input_size
        self.hidden_layer_size = hidden_layer_size
        self.num_layers = num_layers
        
        # CNN layers
        self.cnn_layers = nn.Sequential(
            nn.Conv1d(in_channels=input_size, out_channels=hidden_layer_size // 2, kernel_size=kernel_size, padding=kernel_size // 2),
            nn.BatchNorm1d(hidden_layer_size // 2),
            nn.ELU()
        )
        self.cnn_output_size = hidden_layer_size // 2  # Output size of CNN layers

        self.my_tcn = MyTCNModule(input_channels=hidden_layer_size // 2, nb_filters=64, kernel_size=2, dilations=[1, 2, 4, 8, 16], dropout_p=0.2)

        # Define LSTM and GRU layers alternatively
        self.rnn_layers = nn.ModuleList()
        for i in range(num_layers):
            is_last_layer = i == num_layers - 1
            layer_dropout = 0 if is_last_layer else dropout_rate
            #input_dim = self.cnn_output_size if i == 0 else (hidden_layer_size if i % 2 == 0 else 2 * hidden_layer_size)
            input_dim = self.cnn_output_size if i == 0 else 2 * hidden_layer_size
            rnn_type = nn.LSTM if i % 2 == 0 else nn.GRU
            self.rnn_layers.append(rnn_type(input_dim, hidden_layer_size, batch_first=True, bidirectional=True, dropout=layer_dropout))

        # Multi-Head Attention layer
        self.multihead_attn = nn.MultiheadAttention(embed_dim=2 * hidden_layer_size, num_heads=num_heads, batch_first=True)

        # Attention layer
        self.attention = nn.Linear(2 * hidden_layer_size, 1)

        # Fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(2 * hidden_layer_size, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(1024, output_size)
        )

        # Manual dropout layers between RNN layers
        self.dropout_layers = nn.ModuleList([nn.Dropout(dropout_rate) for _ in range(num_layers - 1)])

        # Initialize weights
        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.LSTM):
                for name, param in m.named_parameters():
                    if 'weight_ih' in name:
                        nn.init.xavier_uniform_(param.data)
                    elif 'weight_hh' in name:
                        nn.init.orthogonal_(param.data)
                    elif 'bias' in name:
                        param.data.fill_(0)
            elif isinstance(m, nn.GRU):
                for name, param in m.named_parameters():
                    if 'weight_ih' in name:
                        nn.init.xavier_uniform_(param.data)
                    elif 'weight_hh' in name:
                        nn.init.orthogonal_(param.data)
                    elif 'bias' in name:
                        param.data.zero_()
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        #print("Pre-Permuation RNN Layer Shape:", x.shape)  # Print statement here
        # Apply CNN layers
        x = x.permute(0, 2, 1)  # Change axes order for CNN operation
        #print("Pre-First RNN Layer Shape:", x.shape)  # Print statement here
        x = self.cnn_layers(x)
        x = x.permute(0, 2, 1)  # Restore original axes order
        #print("Post-CNN Shape:", x.shape)  # Debug print statement
        
        # Pass the CNN output through the TCN module
        x = self.my_tcn(x)
        # Process RNN layers
        for i, rnn in enumerate(self.rnn_layers):
            #print(f"Pre-RNN Layer {i} Shape:", x.shape)  # Print statement here
            #rnn.flatten_parameters()  # Flatten RNN parameters
            x, _ = rnn(x)
            #print(f"Post-RNN Layer {i} Shape:", x.shape)  # Debug print statement
            if i < self.num_layers - 1:  # Apply dropout manually between RNN layers, but not after the last layer
                x = self.dropout_layers[i](x)

        # Separate forward and backward outputs
        fw_outputs, bw_outputs = x.chunk(2, dim=2)

        # Concatenate forward and backward outputs
        outputs = torch.cat([fw_outputs, bw_outputs], dim=-1)

        # Apply Multi-Head Attention
        attn_output, _ = self.multihead_attn(outputs, outputs, outputs)
        #print("Post-Attention Shape:", attn_output.shape)
        
        # Apply attention
        attention_weights = F.softmax(self.attention(attn_output), dim=1)
        x = torch.sum(attn_output * attention_weights, dim=1)
        #print("Post-Weighted Attention Shape:", x.shape)
        
        # Fully connected layers
        outputs = self.fc(x)
        #print("Post-Fully Connected Shape:", outputs.shape)
        return outputs


In [9]:
# Assuming you have already defined your AdvancedLSTM class with appropriate changes

# Define the number of features and prediction length
num_features = 24  # Number of features in the dataset
prediction_length = 30  # Number of days you want to predict

# Instantiate the model
model = HybridLSTMGRUAttentionWithCNN(input_size=24, 
                     hidden_layer_size=600, 
                     num_layers=26, 
                     output_size=prediction_length * num_features,
                     dropout_rate= 0.0,
                     num_heads= 8, kernel_size= 8).to(device)  # output_size is 30 days * 9 features

# Weight Decay L2 Reg

The weight_decay parameter in neural network training, which applies L2 regularization, has a significant impact on how the model learns. Here's how changing it affects the training process:

    Increasing Weight Decay:
        This means you're increasing the strength of L2 regularization.
        It penalizes larger weights more heavily, encouraging the model to keep the weights smaller and more uniform.
        This can help prevent overfitting, especially in complex models or when you have limited training data, as it encourages the model to be simpler.
        However, if the weight decay is set too high, it might lead to underfitting, where the model becomes overly simple and fails to capture the underlying patterns in the data.

    Decreasing Weight Decay:
        Lowering the weight decay reduces the strength of L2 regularization.
        It allows the model to have larger weights, which can be beneficial if the model is too simple and underfitting.
        This gives the model more flexibility to learn from the data, which can be useful if your model is not capturing complex patterns in the training data.
        On the flip side, too little regularization might lead to overfitting, where the model learns the noise in the training data rather than the actual signal.

In summary, adjusting the weight decay is a balancing act:

    Increase it if your model is overfitting (i.e., it performs well on training data but poorly on validation/test data).
    Decrease it if your model is underfitting (i.e., it's not performing well even on the training data).

Finding the right level of regularization is crucial for training effective neural network models. It often requires some experimentation and tuning, alongside monitoring the model’s performance on both training and validation datasets.

# Learning Rate

Adjusting the learning rate, which is a key hyperparameter in neural network training, significantly impacts the learning dynamics. Here's how changing it affects the training process:

    Increasing the Learning Rate:
        Faster Learning: A higher learning rate can speed up the learning process. The model's weights are updated more significantly with each iteration, potentially leading to faster convergence.
        Risk of Instability: However, a too high learning rate can cause the training to become unstable. The model might overshoot the optimal points in the loss landscape, leading to erratic behavior in loss and other metrics.
        Potential to Skip Optimal Solutions: If the learning rate is excessively high, the optimizer might skip over minima, failing to converge to the best solution.

    Decreasing the Learning Rate:
        More Stable, Gradual Learning: A lower learning rate results in smaller, more precise updates to the weights. This can lead to more stable convergence and finer adjustments to the model parameters.
        Risk of Slow Convergence: While stability is increased, the downside is that learning might be slower. The model might take a longer time to converge to an optimal solution.
        Potential for Getting Stuck in Local Minima: With a very low learning rate, there's a risk that the model might get stuck in local minima or take an excessively long time to escape flat regions in the loss landscape.

In summary, the learning rate needs to be carefully chosen:

    Increase it if the training process is too slow, and there are no signs of convergence.
    Decrease it if the training process is unstable, with erratic loss values or failure to improve the loss over epochs.

Typically, it's common to start with a higher learning rate and then reduce it as training progresses (using learning rate schedulers). This approach allows for rapid learning initially and more refined adjustments later in the training. Experimenting with different learning rates and monitoring the training/validation performance is essential to find the best setting for your specific model and dataset.

# L1 and L2 Regularization techniques

L1 and L2 regularization are powerful techniques to prevent overfitting in machine learning models, including neural networks. Here's how to discern when and how to manipulate these regularization parameters:
L1 Regularization (Lasso):

    Characteristics: L1 regularization adds a penalty equal to the absolute value of the magnitude of coefficients. It can lead to sparse models where some weights become exactly zero, effectively performing feature selection.
    When to Use:
        If you suspect or know that only a subset of features is important, L1 can help in identifying them.
        Useful in models with high dimensionality (more features than samples).
    Adjustment Strategy:
        Increase the L1 penalty (lambda) if the model is overfitting (i.e., performing well on training data but poorly on validation data).
        Decrease if the model is underfitting or too many features are being zeroed out, causing loss of important information.

L2 Regularization (Ridge):

    Characteristics: L2 regularization adds a penalty equal to the square of the magnitude of coefficients. It doesn’t encourage sparse models but distributes the error among all the features.
    When to Use:
        Generally useful in most scenarios where overfitting is a concern.
        Particularly effective in neural networks to prevent weights from becoming too large, leading to overfitting.
    Adjustment Strategy:
        Increase the L2 penalty (weight decay) if the model is overfitting.
        Decrease if the model is underfitting or if training doesn’t converge.

Practical Tips for Manipulating L1/L2:

    Start with L2: L2 regularization is usually the first go-to method as it's less aggressive in terms of feature reduction and generally improves generalization.
    Combining L1 and L2: Elastic Net regularization combines both L1 and L2 penalties. It can be a good middle ground if you’re unsure which to choose.
    Scale of Regularization Parameters: Start with smaller values (e.g., 0.01, 0.001) and increase gradually. Very high values can overly constrain the model, leading to underfitting.
    Monitoring Performance: Regularly monitor training and validation loss. If validation loss decreases but training loss increases, it could indicate over-regularization.
    Automated Hyperparameter Tuning: Consider using techniques like grid search, random search, or Bayesian optimization for systematic tuning of these parameters.
    Regularization and Learning Rate: When adjusting regularization parameters, keep an eye on the learning rate. A higher learning rate may require stronger regularization to combat overfitting.

Conclusion:

The decision to adjust L1 or L2 regularization depends largely on the specific characteristics of your problem and dataset. Regularization is a balancing act – too little might lead to overfitting, too much to underfitting. Careful experimentation and monitoring are key to finding the right levels of L1 and L2 regularization for your model.

# L1 REG more details

L1 regularization, also known as Lasso regularization, encourages sparsity in the parameter weights of the model. This means it not only penalizes the magnitude of the weights to prevent overfitting but also can drive some of those weights to zero, effectively "unlearning" or removing some features from the model.

Here's how it works:

    The L1 penalty is the sum of the absolute values of the weights.
    When applied during training, it can cause some weights to shrink to zero, especially for less important or redundant features.
    This can be particularly useful when you have a high-dimensional dataset with many features that may not all be relevant to the prediction task.

The consequence of this "unlearning" is that L1 regularization can be seen as a form of automatic feature selection. By driving certain weights to zero, the model ends up using fewer features, which can make the model simpler, faster, and more interpretable. However, this also means that L1 regularization can lead to a biased model if the penalty is too strong, as it might remove features that are actually important.

The key with L1 (and all forms of regularization) is to find the right balance between fitting the training data and maintaining a model that can generalize well to unseen data. This balance is usually found through hyperparameter tuning.

In [None]:
load_model(r'C:\Machine Learning\SPY VIX 9 Input Output Model\Saved\MegaNumMuncher2_Checkpoint-current.pth')

In [10]:
def AddDataToTensorboard(avg_tpb, avg_loss, avg_mse, avg_mae, avg_rmse, avg_r2, model, global_step):
    """
    This method will populate the tensorboard with variables we want to watch.
    :param writer: The SummaryWriter object from TensorBoard.
    :param avg_tpb: Average time per batch.
    :param avg_loss: Average loss.
    :param avg_mse: Average mean squared error.
    :param avg_mae: Average mean absolute error.
    :param avg_rmse: Average root mean squared error.
    :param avg_r2: Average R-squared value.
    :param model: The PyTorch model from which to log parameters and gradients.
    :param global_step: The global step value to track progress in the logs.
    """
    writer.add_scalar('Average Time Per Batch', avg_tpb, global_step)
    writer.add_scalar('Average Loss', avg_loss, global_step)
    writer.add_scalar('Average MSE', avg_mse, global_step)
    writer.add_scalar('Average MAE', avg_mae, global_step)
    writer.add_scalar('Average RMSE', avg_rmse, global_step)
    writer.add_scalar('Average R2', avg_r2, global_step)
    
    # Make sure the model's parameters have gradients before trying to log them
    for tag, value in model.named_parameters():
        tag = tag.replace('.', '/')
        if value.grad is not None:
            writer.add_histogram(tag, value.data.cpu().numpy(), global_step)
            writer.add_histogram(tag+'/grad', value.grad.data.cpu().numpy(), global_step)

# Function to calculate and log feature metrics
def calculate_and_log_feature_metrics(y_true, y_pred, feature_names, global_step):
    """
    Calculate and log metrics for individual features.
    """
    for i, feature_name in enumerate(feature_names):
        feature_true = y_true[:, i]
        feature_pred = y_pred[:, i]
        
        mse = mean_squared_error(feature_true, feature_pred)
        mae = mean_absolute_error(feature_true, feature_pred)
        r2 = r2_score(feature_true, feature_pred)
        rmse = np.sqrt(mse)  # Calculate RMSE

        # Log the metrics for the current feature
        writer.add_scalar(f'MSE/{feature_name}', mse, global_step)
        writer.add_scalar(f'MAE/{feature_name}', mae, global_step)
        writer.add_scalar(f'R2/{feature_name}', r2, global_step)
        writer.add_scalar(f'RMSE/{feature_name}', rmse, global_step)

def linear_interpolate(x, x_min, x_max, y_min, y_max):
    """ Linearly interpolates a value 'x' in range [x_min, x_max] to range [y_min, y_max] """
    return y_min + (y_max - y_min) * ((x - x_min) / (x_max - x_min))

def FindOptimalTraining(x_train_tensor):
    """
    Takes the x_train_tensor and checks the row count to modify batch size and learning rate based on the iteration to optimize work.
    """
    row_count = x_train_tensor.size()[0]

    # Ensure row_count is within expected bounds
    row_count = max(100, min(5000, row_count))

    # # Linearly interpolate values TURNED OFF TO TEST
    # batch_size = int(linear_interpolate(row_count, 100, 5000, 16, 64))
    # learning_rate = linear_interpolate(row_count, 100, 5000, 0.0015, 0.00270)
    # #weight_decay = 0.0000003333 # Was 0.0000005806 changed 9:58 am 2024-01-07 #It's not uncommon to try values like 0.01, 0.001, or 0.0001 for weight decay          
    # weight_decay = linear_interpolate(row_count, 100, 5000, 0.000000450, 0.000000975)
    
    
        # Linearly interpolate values
    batch_size = 32 #int(linear_interpolate(row_count, 100, 5000, 16, 64))
    learning_rate = 0.00150 #linear_interpolate(row_count, 100, 5000, 0.0015, 0.00270)
    #weight_decay = 0.0000003333 # Was 0.0000005806 changed 9:58 am 2024-01-07 #It's not uncommon to try values like 0.01, 0.001, or 0.0001 for weight decay          
    weight_decay = 0.0000005000 #linear_interpolate(row_count, 100, 5000, 0.000000450, 0.000000975)
    
#WEIGHT DECAY , 0.0000005000 , learning rate 00.0025 to stabalize bath 32
    # Ensure batch size does not exceed the dataset size
    batch_size = min(batch_size, row_count)
    
    print(f"Learning Rate: {learning_rate:.6f}, Batch Size: {batch_size}, Weight Decay: {weight_decay:.10f} , X-Tensor Shape: {x_train_tensor.shape}")
    return (learning_rate, batch_size, weight_decay)


def save_checkpoint(model, optimizer, epoch, loss, filepath='model_checkpoint.pth'):
    """
    Save a model checkpoint.

    :param model: PyTorch model to save.
    :param optimizer: Optimizer whose state we want to save.
    :param epoch: Current epoch number.
    :param loss: Current loss value.
    :param filepath: Path to save the checkpoint.
    """
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss if loss is not None else 'N/A',  # Optionally store the loss value
    }
    
    torch.save(checkpoint, filepath)
    print(f"Checkpoint saved to {filepath}")

def load_checkpoint(filepath, model, optimizer, device):
    """
    Load a model checkpoint.

    :param filepath: Path to the checkpoint file.
    :param model: PyTorch model to load the parameters into.
    :param optimizer: Optimizer to load the state into.
    :param device: The device to load the model onto.
    :return: The checkpoint epoch and loss.
    """
    checkpoint = torch.load(filepath, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint.get('loss', None)  # Retrieve the loss if available, else None
    print(f"Checkpoint loaded from {filepath} at epoch {epoch} with loss {loss}")
    return epoch, loss

def save_model(model, filepath='model.pth'):
    """
    Save a model's state_dict.

    :param model: PyTorch model to save.
    :param filepath: Path to save the model.
    """
    torch.save(model.state_dict(), filepath)
    print(f"Model saved to {filepath}")

def load_model(filepath):
    """
    Load a model's state_dict from a given filepath.

    :param filepath: Path to the model file.
    :return: Loaded PyTorch model.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Define the number of features and prediction length
    num_features = 24  # Number of features in the dataset
    prediction_length = 30  # Number of days you want to predict

    # Instantiate the model
    model = HybridLSTMGRUAttentionWithCNN(input_size=24, 
                        hidden_layer_size=600, 
                        num_layers=26, 
                        output_size=prediction_length * num_features,
                        dropout_rate= 0.0,
                        num_heads= 8, kernel_size= 8).to(device)  # output_size is 30 days * 9 features
    
    try:
        # Load the saved model state into the model
        model_state = torch.load(filepath, map_location=device)
        model.load_state_dict(model_state['model_state_dict'])  # Ensure this matches the key used when saving
        print(f"Model loaded from {filepath}")
    except FileNotFoundError:
        print(f"Error: File not found at {filepath}")
    except KeyError:
        print("Error: Incompatible state dict keys. Please check the model architecture and the state dict.")
    
    return model


def prepare_data_for_prediction(normalized_data, look_back):
    """
    Prepare data for prediction based on the look_back period.
    :param normalized_data: Normalized data (2D array where rows are timesteps and columns are features).
    :param look_back: Number of timesteps to look back for making the prediction.
    :return: Data prepared for prediction.
    """
    # Use the last 'look_back' timesteps to make the prediction
    return normalized_data[-look_back:]

def preprocess_input(input_data, fitted_standard_scaler, fitted_min_max_scaler):
    """
    Standardize and normalize new input data using the already fitted scalers from the training data.
    :param input_data: New data to be preprocessed (2D array where rows are timesteps and columns are features).
    :param fitted_standard_scaler: The StandardScaler instance fitted on the training data.
    :param fitted_min_max_scaler: The MinMaxScaler instance fitted on the training data.
    :return: Preprocessed data.
    """
    standardized_data = fitted_standard_scaler.transform(input_data)
    normalized_data = fitted_min_max_scaler.transform(standardized_data)
    return normalized_data

# Example usage:
# new_raw_data = ...  # New data to be predicted (should be a 2D array)
# preprocessed_data = preprocess_input(new_raw_data, standard_scaler, min_max_scaler)

def predict(model, prepared_data, device):
    """
    Make a prediction based on prepared input data.
    :param model: Trained LSTM model.
    :param prepared_data: Prepared data for prediction (should be a 2D array of shape (look_back, number_of_features)).
    :param device: The device (CPU or CUDA) for running the prediction.
    :return: Predicted values for future sequence.
    """
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        if not isinstance(prepared_data, np.ndarray):
            raise ValueError("prepared_data should be a numpy array")
        if prepared_data.shape[1] != 30:  # Ensure the second dimension matches the number of features
            raise ValueError("prepared_data shape second dimension must be 9")
        
        # Convert prepared data to a PyTorch tensor and add a batch dimension
        input_tensor = torch.from_numpy(prepared_data).float().to(device)
        input_tensor = input_tensor.unsqueeze(0)  # Add a batch dimension

        # Get the prediction from the model
        prediction = model(input_tensor)

        # Move the prediction back to the CPU if it was on CUDA
        prediction = prediction.cpu()

        # Reshape the output to remove batch dimension and get the actual prediction values
        prediction = prediction.view(-1, 30)  # Reshape to match your output format (e.g., (30, 9))
        return prediction.numpy()  # Convert to numpy array for easier handling
    
def revert_scaling(predictions, min_max_scaler, standard_scaler, feature_columns):
    """
    Revert the scaling of the predictions to their original scale.

    :param predictions: The predictions from the model, assumed to be a numpy array.
    :param min_max_scaler: The MinMaxScaler instance used for scaling the data.
    :param standard_scaler: The StandardScaler instance used for scaling the data.
    :param feature_columns: List of feature names corresponding to the columns.
    :return: DataFrame with predictions reverted to original scale.
    """
    # Inverse MinMax Scaling
    predictions = min_max_scaler.inverse_transform(predictions)

    # Inverse Standard Scaling
    predictions = standard_scaler.inverse_transform(predictions)

    # Convert to DataFrame
    predictions_df = pd.DataFrame(predictions, columns=feature_columns)

    return predictions_df

# Example usage:
# Assuming you have the fitted min_max_scaler, standard_scaler, and the list of feature columns
# predictions = ... # Output from the predict function
# reverted_predictions_df = revert_scaling(predictions, min_max_scaler, standard_scaler, feature_columns)



#   l1_lambda = 0.0000875 AS OF 5:40 PM
#    batch_size = int(linear_interpolate(row_count, 100, 5000, 12, 64))
#    learning_rate = linear_interpolate(row_count, 100, 5000, 0.00135, 0.00350)
#   #weight_decay = 0.0000003333 # Was 0.0000005806 changed 9:58 am 2024-01-07 #It's not uncommon to try values like 0.01, 0.001, or 0.0001 for weight decay          
#   weight_decay = linear_interpolate(row_count, 100, 5000, 0.000000175, 0.0000022)

# L1 and L2

While it's true that Lasso regression utilizing L1 penalty tends to produce sparse coefficient vectors, encouraging feature selection and promoting simpler models, I wouldn't necessarily associate L1 with "flattening out" the weights. Rather, L1 encourages shrinkage of coefficients toward zero, effectively eliminating irrelevant or weak features altogether.

On the other hand, Ridge Regression with L2 penalty preserves nonzero coefficients but distributes strength uniformly across them, thereby discouraging extreme values and promoting cooperation amongst correlated features. Although L2 does not enforce explicit feature removal like L1, it indirectly leads to a degree of generalization by attenuating the influence of less important features.

Summarizing:

    # L1 Penalty (Lasso Regression): Encourages sparsity by driving some coefficients to exact zeros, hence contributing to feature selection and producing simple models.
    # L2 Penalty (Ridge Regression): Promotes equal distribution of importance among correlated features, preventing dominance by any single feature and ultimately facilitating moderate generalization.

Both penalties play vital roles in controlling model complexity, addressing collinearity concerns, and enhancing interpretability. Leveraging either or a blend of both depending on the context propels us toward our ultimate goal of acquiring meaningful insights and generating accurate predictions.


In [11]:
# Train the model <<< THIS SEEMS TO WORK WELL!!!!
writer = SummaryWriter('newRun/MegaNumMuncher3')
global_step = 0

epochs = 20
model.train()
iteration = 0
l1_lambda = 0.0000900 #0.0000900 TO STABALZE
start_time = time.time()
tpb_list = []

loss_function = nn.MSELoss()
learning_rate = 0.0010
weightDecay = 0.00001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weightDecay)
filepath = f"C:\\Machine Learning\\SPY VIX 9 Input Output Model\\MegaNumMuncher3_Checkpoint-0.pth"
load_checkpoint(filepath, model, optimizer, device)

for iteration in range(50):
    TensorIntSystem = TensorIntegrationSystem("1994-01-01", "2018-01-01")
    TensorIntSystem.CreateCombinedDataFrame()
    # Extract feature names from the DataFrame
    feature_names = TensorIntSystem.FormattedCombinedDataFrame.columns.tolist()
    
    x_train_tensor, y_train_tensor = TensorIntSystem.CreateTensors()
    new_learning_rate, batch_size, new_weightDecay = FindOptimalTraining(x_train_tensor)
    for param_group in optimizer.param_groups:
        param_group['lr'] = new_learning_rate
        param_group['weight_decay'] = new_weightDecay

    # Initialize metrics for each iteration
    running_loss = 0.0
    total_mse = 0.0
    total_mae = 0.0
    total_r2 = 0.0
    num_batches = 0

    epoch_start_time = time.time()
    for epoch in range(epochs):
        permutation = torch.randperm(x_train_tensor.size(0))

        for i in range(0, x_train_tensor.size(0), batch_size):
            batch_start_time = time.time()

            # Adjust batch size for the last batch
            #end_idx = min(i + batch_size, x_train_tensor.size(0))
            
            end_idx = i + batch_size
            # Adjust for the last batch if it has fewer samples
            # Skip the batch if it does not fit the expected shape
            if end_idx > x_train_tensor.size(0) - batch_size:
                continue

            indices = permutation[i:end_idx]
        
            batch_seq = x_train_tensor[indices].to(device)
            batch_label = y_train_tensor[indices].to(device)

            optimizer.zero_grad()
            y_pred = model(batch_seq)
            y_pred = y_pred.view(batch_label.size())
            loss = loss_function(y_pred, batch_label)

            l1_penalty = sum(p.abs().sum() for p in model.parameters())
            total_loss = loss + l1_lambda * l1_penalty
            running_loss += total_loss.item()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0, norm_type=2)
            total_loss.backward()
            optimizer.step()

            y_pred_flat = y_pred.view(y_pred.size(0), -1).cpu().detach().numpy()
            batch_label_flat = batch_label.view(batch_label.size(0), -1).cpu().detach().numpy()

            mse = mean_squared_error(batch_label_flat, y_pred_flat)
            mae = mean_absolute_error(batch_label_flat, y_pred_flat)
            r2 = r2_score(batch_label_flat, y_pred_flat)

            total_mse += mse
            total_mae += mae
            total_r2 += r2
            num_batches += 1

            batch_end_time = time.time()
            tpb_list.append(batch_end_time - batch_start_time)
            
            # Assuming `writer` is your SummaryWriter instance and `global_step` is defined
            calculate_and_log_feature_metrics(batch_label_flat, y_pred_flat, feature_names, global_step)

        # Calculate average metrics for each epoch
        avg_tpb = sum(tpb_list) / len(tpb_list) if tpb_list else 0
        avg_mse = total_mse / num_batches if num_batches else float('nan')
        avg_mae = total_mae / num_batches if num_batches else float('nan')
        avg_r2 = total_r2 / num_batches if num_batches else float('nan')
        avg_rmse = sqrt(avg_mse) if avg_mse >= 0 else float('nan')
        avg_loss = running_loss / num_batches if num_batches else float('nan')
        global_step += 1
        
        print(f'Iteration: {iteration}, Epoch: {epoch}, Avg Time per Batch: {avg_tpb:.4f} sec, '
              f'Loss: {avg_loss:.8f}, MSE: {avg_mse:.4f}, MAE: {avg_mae:.4f}, '
              f'RMSE: {avg_rmse:.4f}, R2: {avg_r2:.4f}')
        AddDataToTensorboard(avg_tpb, avg_loss, avg_mse, avg_mae, avg_rmse, avg_r2, model, global_step) #This will map data to tensorboard
        
        tpb_list.clear()
        # At the end of your training loop
        with open('C:\\Machine Learning\\SPY VIX 9 Input Output Model\\last_step.txt', 'w') as f:
            f.write(str(global_step))

    epoch_end_time = time.time()
    print(f'Epoch {epoch} completed in {epoch_end_time - epoch_start_time:.2f} seconds')

    del x_train_tensor, y_train_tensor  # Delete model and tensors
    torch.cuda.empty_cache()  # Clear cache
    import gc
    gc.collect()  # Invoke garbage collector

    save_checkpoint(model, optimizer, epoch, avg_loss, filepath)
    print(f'Checkpoint saved at Iteration: {iteration}, Epoch: {epoch}, Loss: {avg_loss:.10f}')
    
writer.close()


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
This Ticker is PFG
Data validation completed.
Learning Rate: 0.001500, Batch Size: 32, Weight Decay: 0.0000005000 , X-Tensor Shape: torch.Size([3928, 60, 24])
Iteration: 0, Epoch: 0, Avg Time per Batch: 0.2401 sec, Loss: 39.01580957, MSE: 0.0981, MAE: 0.2308, RMSE: 0.3132, R2: -1.9055
Iteration: 0, Epoch: 1, Avg Time per Batch: 0.2373 sec, Loss: 21.31200872, MSE: 0.0716, MAE: 0.1925, RMSE: 0.2677, R2: -1.1044
Iteration: 0, Epoch: 2, Avg Time per Batch: 0.2381 sec, Loss: 15.39796447, MSE: 0.0611, MAE: 0.1776, RMSE: 0.2472, R2: -0.7826
Iteration: 0, Epoch: 3, Avg Time per Batch: 0.2347 sec, Loss: 12.43427461, MSE: 0.0556, MAE: 0.1699, RMSE: 0.2358, R2: -0.6151
Iteration: 0, Epoch: 4, Avg Time per Batch: 0.2380 sec, Loss: 10.65137999, MSE: 0.0523, MAE: 0.1652, RMSE: 0.2286, R2: -0.5112
Iteration: 0, Epoch: 5, Avg Time per Batch: 0.2376 sec, Loss: 9.4589

In [None]:
TensorIntSystem = TensorIntegrationSystem("2018-01-01", "2024-01-01")
TensorIntSystem.CreateCombinedDataFrame()
# Extract feature names from the DataFrame
x_train_tensor, y_train_tensor = TensorIntSystem.CreateTensors()

In [13]:
# This is the validation Cycle system.

filepath = f"C:\\Machine Learning\\SPY VIX 9 Input Output Model\\MegaNumMuncher3_Checkpoint-0.pth"
load_checkpoint(filepath, model, optimizer, device)

writer = SummaryWriter(f'ValidationTesting/MegaNumMuncher2')
global_step = 0

with torch.no_grad():
    epochs = 5
    model.eval()
    iteration = 0
    l1_lambda = 0.0000900 #0.0000900 TO STABALZE
    start_time = time.time()
    tpb_list = []
    learning_rate = 0.0010
    weightDecay = 0.00001
    
    for iteration in range(50):
        TensorIntSystem = TensorIntegrationSystem("2018-01-01", "2024-01-01")
        TensorIntSystem.CreateCombinedDataFrame()
        # Extract feature names from the DataFrame
        feature_names = TensorIntSystem.FormattedCombinedDataFrame.columns.tolist()
        
        x_train_tensor, y_train_tensor = TensorIntSystem.CreateTensors()
        new_learning_rate, batch_size, new_weightDecay = FindOptimalTraining(x_train_tensor)
        for param_group in optimizer.param_groups:
            param_group['lr'] = new_learning_rate
            param_group['weight_decay'] = new_weightDecay

        # Initialize metrics for each iteration
        running_loss = 0.0
        total_mse = 0.0
        total_mae = 0.0
        total_r2 = 0.0
        num_batches = 0

        epoch_start_time = time.time()
        for epoch in range(epochs):
            permutation = torch.randperm(x_train_tensor.size(0))

            for i in range(0, x_train_tensor.size(0), batch_size):
                batch_start_time = time.time()

                # Adjust batch size for the last batch
                #end_idx = min(i + batch_size, x_train_tensor.size(0))
                
                end_idx = i + batch_size
                # Adjust for the last batch if it has fewer samples
                # Skip the batch if it does not fit the expected shape
                if end_idx > x_train_tensor.size(0) - batch_size:
                    continue

                indices = permutation[i:end_idx]
            
                batch_seq = x_train_tensor[indices].to(device)
                batch_label = y_train_tensor[indices].to(device)

                y_pred = model(batch_seq)
                y_pred = y_pred.view(batch_label.size())
                loss = loss_function(y_pred, batch_label)

                l1_penalty = sum(p.abs().sum() for p in model.parameters())
                total_loss = loss + l1_lambda * l1_penalty
                running_loss += total_loss.item()
                nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0, norm_type=2)

                y_pred_flat = y_pred.view(y_pred.size(0), -1).cpu().detach().numpy()
                batch_label_flat = batch_label.view(batch_label.size(0), -1).cpu().detach().numpy()

                mse = mean_squared_error(batch_label_flat, y_pred_flat)
                mae = mean_absolute_error(batch_label_flat, y_pred_flat)
                r2 = r2_score(batch_label_flat, y_pred_flat)

                total_mse += mse
                total_mae += mae
                total_r2 += r2
                num_batches += 1

                batch_end_time = time.time()
                tpb_list.append(batch_end_time - batch_start_time)
                
                # Assuming `writer` is your SummaryWriter instance and `global_step` is defined
                calculate_and_log_feature_metrics(batch_label_flat, y_pred_flat, feature_names, global_step)

            # Calculate average metrics for each epoch
            avg_tpb = sum(tpb_list) / len(tpb_list) if tpb_list else 0
            avg_mse = total_mse / num_batches if num_batches else float('nan')
            avg_mae = total_mae / num_batches if num_batches else float('nan')
            avg_r2 = total_r2 / num_batches if num_batches else float('nan')
            avg_rmse = sqrt(avg_mse) if avg_mse >= 0 else float('nan')
            avg_loss = running_loss / num_batches if num_batches else float('nan')
            global_step += 1
            
            print(f'Iteration: {iteration}, Epoch: {epoch}, Avg Time per Batch: {avg_tpb:.4f} sec, '
                f'Loss: {avg_loss:.8f}, MSE: {avg_mse:.4f}, MAE: {avg_mae:.4f}, '
                f'RMSE: {avg_rmse:.4f}, R2: {avg_r2:.4f}')
            AddDataToTensorboard(avg_tpb, avg_loss, avg_mse, avg_mae, avg_rmse, avg_r2, model, global_step) #This will map data to tensorboard
            
            tpb_list.clear()
            # At the end of your training loop
            with open('C:\\Machine Learning\\SPY VIX 9 Input Output Model\\last_step.txt', 'w') as f:
                f.write(str(global_step))

        epoch_end_time = time.time()
        print(f'Epoch {epoch} completed in {epoch_end_time - epoch_start_time:.2f} seconds')

        del x_train_tensor, y_train_tensor  # Delete model and tensors
        torch.cuda.empty_cache()  # Clear cache
        import gc
        gc.collect()  # Invoke garbage collector

        save_checkpoint(model, optimizer, epoch, avg_loss, filepath)
        print(f'Checkpoint saved at Iteration: {iteration}, Epoch: {epoch}, Loss: {avg_loss:.10f}')
        
    writer.close()


Checkpoint loaded from C:\Machine Learning\SPY VIX 9 Input Output Model\MegaNumMuncher3_Checkpoint-0.pth at epoch 19 with loss 3.374740635321058
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
This Ticker is WB
Data validation completed.
Learning Rate: 0.001500, Batch Size: 32, Weight Decay: 0.0000005000 , X-Tensor Shape: torch.Size([2295, 60, 24])
Iteration: 0, Epoch: 0, Avg Time per Batch: 0.0822 sec, Loss: 3.35805530, MSE: 0.0521, MAE: 0.1798, RMSE: 0.2283, R2: -0.4668
Iteration: 0, Epoch: 1, Avg Time per Batch: 0.0806 sec, Loss: 3.35805962, MSE: 0.0521, MAE: 0.1798, RMSE: 0.2283, R2: -0.4671
Iteration: 0, Epoch: 2, Avg Time per Batch: 0.0812 sec, Loss: 3.35801347, MSE: 0.0521, MAE: 0.1797, RMSE: 0.2282, R2: -0.4656
Iteration: 0, Epoch: 3, Avg Time per Batch: 0.0785 sec, Loss: 3.35804623, MSE: 0.0521, MAE: 0.1797, RMSE: 0.2283, R2: -0.4729
Iteration: 0, Epoch: 4, Avg Time per Batch: 0.0779 sec,

KeyboardInterrupt: 

In [None]:
# OPTUNA TESTER

# {'learning_rate': 0.0013167734585262175,
#  'weight_decay': 1.2816205616802949e-10,
#  'batch_size': 24}

def CreateTensors():
    """
    Testing different sizes in Optuna
    """
    TensorIntSystem = TensorIntegrationSystem("1994-01-01", "2018-01-01")
    TensorIntSystem.CreateCombinedDataFrame()
    x_train_tensor, y_train_tensor = TensorIntSystem.CreateTensors()
    return x_train_tensor, y_train_tensor

def objective(trial):
    # Hyperparameters to be optimized by Optuna
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    weight_decay = trial.suggest_float('weight_decay', 1e-10, 1e-3, log=True)
    batch_size = trial.suggest_categorical('batch_size', [8, 16, 24, 32, 40, 48, 56, 64])
    l1_lambda = trial.suggest_float('l1_lambda', 0.00005, 0.001, log=True)
    
    epochs = 5
    l1_lambda = 0.001
    tpb_list = []
    
    model = HybridLSTMGRUAttentionWithCNN(input_size=30, 
                                        hidden_layer_size=600, 
                                        num_layers=30, 
                                        output_size=30 * 30,
                                        dropout_rate=0.6,
                                        num_heads=6, 
                                        kernel_size=6).to(device)
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    loss_function = nn.MSELoss()
    
    x_train_tensor, y_train_tensor = CreateTensors()
    
    # Initialize metrics for each iteration
    running_loss = 0.0
    total_mse = 0.0
    total_mae = 0.0
    total_r2 = 0.0
    num_batches = 0

    epoch_start_time = time.time()
    for epoch in range(epochs):
        permutation = torch.randperm(x_train_tensor.size(0))

        for i in range(0, x_train_tensor.size(0), batch_size):
            batch_start_time = time.time()

            end_idx = i + batch_size
            # Adjust for the last batch if it has fewer samples
            # Skip the batch if it does not fit the expected shape
            if end_idx > x_train_tensor.size(0) - batch_size:
                continue

            indices = permutation[i:end_idx]
        
            batch_seq = x_train_tensor[indices].to(device)
            batch_label = y_train_tensor[indices].to(device)

            optimizer.zero_grad()
            y_pred = model(batch_seq)
            y_pred = y_pred.view(batch_label.size())
            loss = loss_function(y_pred, batch_label)

            l1_penalty = sum(p.abs().sum() for p in model.parameters())
            total_loss = loss + l1_lambda * l1_penalty
            running_loss += total_loss.item()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0, norm_type=2)
            total_loss.backward()
            optimizer.step()

            y_pred_flat = y_pred.view(y_pred.size(0), -1).cpu().detach().numpy()
            batch_label_flat = batch_label.view(batch_label.size(0), -1).cpu().detach().numpy()

            mse = mean_squared_error(batch_label_flat, y_pred_flat)
            mae = mean_absolute_error(batch_label_flat, y_pred_flat)
            r2 = r2_score(batch_label_flat, y_pred_flat)

            total_mse += mse
            total_mae += mae
            total_r2 += r2
            num_batches += 1

            batch_end_time = time.time()
            tpb_list.append(batch_end_time - batch_start_time)

        # Calculate average metrics for each epoch
        avg_tpb = sum(tpb_list) / len(tpb_list) if tpb_list else 0
        avg_mse = total_mse / num_batches if num_batches else float('nan')
        avg_mae = total_mae / num_batches if num_batches else float('nan')
        avg_r2 = total_r2 / num_batches if num_batches else float('nan')
        avg_rmse = sqrt(avg_mse) if avg_mse >= 0 else float('nan')
        avg_loss = running_loss / num_batches if num_batches else float('nan')
        
        # Inside the epoch loop
        print(f'Trial: {trial.number}, Epoch: {epoch}, Avg Time per Batch: {avg_tpb:.4f} sec, '
            f'Loss: {avg_loss:.8f}, MSE: {avg_mse:.4f}, MAE: {avg_mae:.4f}, '
            f'RMSE: {avg_rmse:.4f}, R2: {avg_r2:.4f}')
        
        tpb_list.clear()

    epoch_end_time = time.time()
    print(f'Epoch {epoch} completed in {epoch_end_time - epoch_start_time:.2f} seconds')

    del x_train_tensor, y_train_tensor  # Delete model and tensors
    torch.cuda.empty_cache()  # Clear cache
    import gc
    gc.collect()  # Invoke garbage collector

    #print(f'Checkpoint saved at Epoch: {epoch}, Loss: {avg_loss:.10f}')
    
    # At the end of the objective function
    return avg_loss
    


In [None]:
# Create a study object and specify the direction is 'minimize'.
study = optuna.create_study(direction='minimize')

# Optimize the study, the objective function is being called here.
study.optimize(objective, n_trials=100)

# Get the best hyperparameters
best_params = study.best_params
print(f"Best params: {best_params}")

In [21]:
newdf

Unnamed: 0_level_0,Open_x,High_x,Low_x,Close_x,Volume,EMA20_Close_x,EMA40_Close_x,EMA60_Close_x,RSI20_Close_x,RSI40_Close_x,...,Close_y,EMA20_Close_y,EMA40_Close_y,EMA60_Close_y,RSI20_Close_y,RSI40_Close_y,RSI60_Close_y,shifted_day_of_year,cos_shifted_annual,sin_shifted_annual
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-07-12,189.174968,191.189594,187.968198,189.26474,60750200,186.967469,182.281242,177.955451,62.503099,62.598253,...,13.54,14.388288,15.115892,15.890125,43.573185,44.658958,45.457967,134,-0.670089,0.742281
2023-07-13,189.992798,190.680963,189.274713,190.032684,41342300,187.259395,182.659361,178.351425,63.539759,63.079535,...,13.61,14.314166,15.042434,15.815366,43.901797,44.795659,45.539996,135,-0.682758,0.730644
2023-07-14,189.723515,190.670982,189.125121,190.182297,41573900,187.537766,183.026333,178.739323,63.74531,63.174223,...,13.34,14.221388,14.959388,15.734207,42.887713,44.362148,45.272916,136,-0.695225,0.718792
2023-07-17,191.389049,193.802619,191.299292,193.473495,50520200,188.103074,183.535951,179.222411,67.931691,65.188552,...,13.48,14.150779,14.887223,15.660298,43.598794,44.647029,45.441638,139,-0.731378,0.681972
2023-07-18,192.83521,193.812596,191.907678,193.214188,48353800,188.589847,184.00806,179.681157,67.287306,64.901678,...,13.3,14.069753,14.809797,15.582912,42.87631,44.347594,45.259198,140,-0.743001,0.66929
2023-07-19,192.585876,197.702207,192.137062,194.580551,80507300,189.16039,184.523791,180.169662,68.922418,65.717028,...,13.76,14.040253,14.758588,15.523144,45.314081,45.309005,45.824467,141,-0.754404,0.656411
2023-07-20,194.570572,195.946903,191.987471,192.615799,59581200,189.489476,184.918523,180.577732,64.074717,63.540106,...,13.99,14.035467,14.721096,15.472877,46.51538,45.789263,46.107438,142,-0.765584,0.643337
2023-07-21,193.583223,194.450902,190.720854,191.42897,71917800,189.67419,185.236106,180.93351,61.33177,62.262298,...,13.6,13.993994,14.666408,15.411471,44.760358,45.100495,45.695861,143,-0.776537,0.630072
2023-07-24,192.895048,194.391054,191.738132,192.236801,45377800,189.918248,185.577604,181.30411,62.4825,62.784774,...,13.91,13.985994,14.62951,15.362243,46.450845,45.765579,46.084894,146,-0.808005,0.589176
2023-07-25,192.815256,193.922301,192.406344,193.104477,37283200,190.221699,185.944768,181.691007,63.703722,63.343849,...,13.86,13.973995,14.591973,15.312989,46.210746,45.674045,46.030805,147,-0.81802,0.57519


In [22]:
# Assuming you have a new raw data array to predict
new_raw_data = newdf.values

# Preprocess the new data (standardize and normalize)
preprocessed_data = preprocess_input(new_raw_data, standard_scaler, min_max_scaler)

# Prepare the data for prediction
prepared_data = prepare_data_for_prediction(preprocessed_data, look_back)

# Make a prediction
prepared_data_tensor = torch.from_numpy(prepared_data).float().to(device)
model.eval()
with torch.no_grad():
    prepared_data_tensor = prepared_data_tensor.unsqueeze(0)
    prediction = model(prepared_data_tensor)
    prediction = prediction.cpu().numpy()

# Reshape the predictions to match your output format
predictions_reshaped = prediction.reshape(-1, num_features)  # Adjust this as per your model's output

# Apply the inverse transformations to revert the predictions to original scale
predictions_min_max_inversed = min_max_scaler.inverse_transform(predictions_reshaped)
original_scale_predictions = standard_scaler.inverse_transform(predictions_min_max_inversed)

# Create a DataFrame for the reverted predictions
reverted_df = pd.DataFrame(original_scale_predictions, columns=df.columns[:num_features])


In [23]:
reverted_df

Unnamed: 0,Open_x,High_x,Low_x,Close_x,Volume,EMA20_Close_x,EMA40_Close_x,EMA60_Close_x,RSI20_Close_x,RSI40_Close_x,...,Close_y,EMA20_Close_y,EMA40_Close_y,EMA60_Close_y,RSI20_Close_y,RSI40_Close_y,RSI60_Close_y,shifted_day_of_year,cos_shifted_annual,sin_shifted_annual
0,87.211418,88.378296,86.803787,86.958466,946947520.0,83.730179,82.526726,81.223221,60.528824,58.296509,...,19.839705,19.367441,19.308647,19.410965,51.167896,51.843208,51.288277,190.546097,-0.004181,-0.050945
1,87.042038,88.199036,86.68644,86.900658,890388480.0,83.6259,82.463181,81.228577,60.446342,58.435841,...,19.542809,19.322601,19.360321,19.463125,50.764824,51.567406,51.071781,190.944687,-0.001361,-0.050009
2,86.802452,88.103111,86.402641,86.851761,801209728.0,83.619713,82.461517,81.294991,60.624031,58.475307,...,19.130501,19.35454,19.276316,19.382448,50.036205,51.149681,50.752056,193.016357,-0.00187,-0.049512
3,86.487709,87.830811,86.118042,86.621788,767622784.0,83.614433,82.491531,81.183426,59.727512,57.898697,...,19.8018,19.322884,19.315168,19.45084,50.45459,51.432079,50.973419,193.568359,-0.000645,-0.050532
4,86.368454,87.603996,85.996964,86.493233,1037429504.0,83.578468,82.368446,81.309959,59.756145,57.958862,...,19.341043,19.371004,19.342754,19.374475,49.940075,51.144272,50.755882,193.173004,0.000307,-0.049836
5,86.453842,87.843292,86.128029,86.489441,978492096.0,83.613396,82.286819,81.345818,59.682049,57.875252,...,19.64522,19.339613,19.310829,19.374384,50.268562,51.31572,50.857185,193.804352,0.002148,-0.049477
6,86.729782,88.108459,86.509819,87.024673,833890560.0,83.614944,82.379341,81.290665,60.150543,58.047993,...,19.745634,19.328209,19.348251,19.404387,50.572029,51.580471,51.090702,194.69342,0.005761,-0.048251
7,87.164406,88.562454,86.732292,87.172791,894394432.0,83.500031,82.469276,81.322487,59.954502,57.97607,...,19.982641,19.411467,19.32366,19.398363,51.246265,51.948685,51.261475,191.875198,0.003781,-0.048134
8,87.266518,88.646683,87.127541,87.403305,865003520.0,83.557335,82.519508,81.275597,60.299408,58.239246,...,19.873808,19.386909,19.383232,19.384977,51.052052,51.838978,51.237701,192.713562,0.003433,-0.04842
9,87.323799,88.719864,87.007256,87.5131,815534976.0,83.618416,82.47525,81.390152,60.41539,58.207253,...,19.643829,19.348822,19.328451,19.388992,50.653,51.458393,51.024021,189.656952,0.004507,-0.04636
