<a href="https://colab.research.google.com/github/Sambosis/Historic_Crypto/blob/main/Untitled57.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary packages
# Uncomment the following lines if running in a new environment
# !pip install fluidstack -q
# !pip install pytorch_lightning tensorflow icecream tensorboardX rich wandb -q

# Standard Library Imports
import os
import glob
import io
import time
import random
from dataclasses import dataclass
import multiprocessing
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from rich import print as rr
# Third-Party Imports
import numpy as np
import pandas as pd
from sklearn.metrics import precision_recall_curve
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, RichProgressBar
from icecream import ic
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from PIL import Image
import requests
from rich.console import Console
from rich.table import Table
from rich.text import Text
from rich.box import ROUNDED
import wandb
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import Callback
import torch.distributed as dist
from scipy.ndimage import gaussian_filter1d

# Initialize Rich Console
console = Console()
num_cpus = multiprocessing.cpu_count()
print(f"Number of CPU cores available: {num_cpus}")

# Configuration Dataclass
@dataclass
class Config:
    # read the file "version" and increment the version number
    with open("version", "r") as f:
        version = int(f.read())
        VERSION_N = version + 1
        # print(f"Version number: {VERSION_N}")
        f.close()
    with open("version", "w") as f:
        f.write(str(VERSION_N))
        f.close()
    # VERSION_N: int = 87
    RECORDS_TO_LOAD: int = 1205040
    N_PAST: int = 3 * 12 * 3  # 1 week of 10-minute intervals
    N_FUTURE: int = 1 * 12 * 2  # 1 day of 10-minute intervals
    BATCH_SIZE: int = 5000
    HIDDEN_SIZE: int = 256
    NUM_LAYERS: int = 2
    NUM_EPOCHS: int = 150
    HOT_RESTART: bool = True
    TRAIN_FIRST: bool = True
    EPOCH_TO_RESTART: int = 50
    BATCH_FACTOR: int = 81
    DEBUG_FREQ: int = 180
    num_cpus = multiprocessing.cpu_count()
    NUM_WORKERS = (num_cpus // 4 - 4) if num_cpus > 16 else 4
    DEBUG_ON: bool = True
    DATA_URL: str = 'https://sambo.us-iad-1.linodeobjects.com/fillnan_combined_df.csv'
    DATA_FILE: str = './data/fill_nan_df.csv'
    MODEL_PATH: str = "/teamspace/studios/this_studio/models/TransformerModel355/model-355-epoch=40-val_loss=0.62.ckpt"
    MODEL_SAVE_PATH: str = f'./yay'
    DEVICE: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    EPSILON: float = 1e-4

# Initialize Configuration
cfg = Config()

# Set Random Seed for Reproducibility
pl.seed_everything(40, workers=True)

# Print Device Information
print(f"Using device: {cfg.DEVICE}")
os.makedirs(cfg.MODEL_SAVE_PATH, exist_ok=True)

# Initialize IceCream Debugging
if cfg.DEBUG_ON:
    ic.enable()
else:
    ic.disable()

# Callback to Update Percentile Cutoff
class UpdatePercentileCutoffCallback(Callback):
    def __init__(self, reduction_threshold=1.9, reduction_factor=0.9):
        super().__init__()
        self.reduction_threshold = reduction_threshold
        self.reduction_factor = reduction_factor

    def on_validation_epoch_end(self, trainer, pl_module):
        # Skip during sanity check to prevent freezing
        if trainer.sanity_checking:
            return

        # Only the main process (rank 0) determines if reduction is needed
        if trainer.is_global_zero:
            avg_reward = trainer.callback_metrics.get('val/reward', 0)

            if avg_reward > self.reduction_threshold:
                old_perc_cutoff = pl_module.criterion.get_perc_cutoff()
                new_perc_cutoff = old_perc_cutoff * self.reduction_factor
                pl_module.criterion.set_perc_cutoff(new_perc_cutoff)
                pl_module.criterion.perc_cutoff_buffer.fill_(new_perc_cutoff)
                print(f"PercentileCutoffCallback: Reducing perc_cutoff from {old_perc_cutoff:.5f} to {new_perc_cutoff:.5f}")

                # Log reduction event to WandB
                pl_module.logger.experiment.log({
                    "percentile_cutoff_reduction": new_perc_cutoff,
                    "avg_reward": avg_reward
                })

# PositionalEncoding Class
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()

        pe = torch.zeros(max_len, d_model)  # (max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)  # (max_len, 1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))  # (d_model/2,)

        pe[:, 0::2] = torch.sin(position * div_term)  # Even indices
        pe[:, 1::2] = torch.cos(position * div_term)  # Odd indices
        pe = pe.unsqueeze(0)  # (1, max_len, d_model)

        self.register_buffer('pe', pe)

    def forward(self, x):
        seq_len = x.size(1)
        x = x + self.pe[:, :seq_len, :]
        return x

# Transformer-Based Model
class CryptoTransformer(nn.Module):
    def __init__(
        self,
        input_size,
        d_model=256,
        nhead=8,
        num_encoder_layers=2,
        num_decoder_layers=2,
        dim_feedforward=2048,
        dropout=0.3,
        activation="gelu",
        n_future=24,
        num_outputs=24,
        max_seq_length=5000
    ):
        super(CryptoTransformer, self).__init__()

        self.input_size = input_size
        self.d_model = d_model
        self.n_future = n_future
        self.num_outputs = num_outputs

        # Input linear layer
        self.input_fc = nn.Linear(input_size, d_model)

        # Positional Encoding
        self.pos_encoder = PositionalEncoding(d_model, max_len=max_seq_length)
        self.pos_decoder = PositionalEncoding(d_model, max_len=max_seq_length)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
            batch_first=True  # Added for compatibility with batch_first=True
        )

        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        decoder_layer = nn.TransformerDecoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
            batch_first=True  # Added for compatibility with batch_first=True
        )

        self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=num_decoder_layers)

        # Output linear layer
        self.output_fc = nn.Linear(d_model, num_outputs)

        # Layer normalization
        self.layer_norm = nn.LayerNorm(d_model)

        # Dropout
        self.dropout = nn.Dropout(dropout)

    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask  # (sz, sz)

    def forward(self, src, tgt):
        """
        Args:
            src: (batch_size, n_past, num_features)
            tgt: (batch_size, n_future, num_features)
        Returns:
            out: (batch_size, n_future, num_outputs)
        """
        batch_size = src.size(0)

        # Input embedding
        src = self.input_fc(src) * np.sqrt(self.d_model)  # (batch_size, n_past, d_model)
        tgt = self.input_fc(tgt) * np.sqrt(self.d_model)  # (batch_size, n_future, d_model)

        # Add positional encoding
        src = self.pos_encoder(src)  # (batch_size, n_past, d_model)
        tgt = self.pos_decoder(tgt)  # (batch_size, n_future, d_model)

        # Create masks
        tgt_mask = self.generate_square_subsequent_mask(tgt.size(1)).to(tgt.device)  # (n_future, n_future)

        # Transformer forward pass
        memory = self.transformer_encoder(src)  # (batch_size, n_past, d_model)
        output = self.transformer_decoder(tgt, memory, tgt_mask=tgt_mask)  # (batch_size, n_future, d_model)

        # Final linear layer
        out = self.output_fc(output)  # (batch_size, n_future, num_outputs)

        return out  # (batch_size, n_future, num_outputs)

# Custom Balanced Loss Function
class BalancedCryptoLoss(nn.Module):
    def __init__(self, config):
        super(BalancedCryptoLoss, self).__init__()
        # Register perc_cutoff as a buffer for automatic synchronization
        self.register_buffer('perc_cutoff_buffer', torch.tensor(0.015))
        self.config = config
        self.mse_weight = 900.0
        self.mae_weight = 25.0
        self.max_diff_weight = 3.0
        self.balance_weight = 3.0
        self.direction_weight = 0.001
        self.mean_diff_weight = 15.0
        self.perc_diff_weight = 15.0
        self.within_1pct_reward_weight = 5.0
        self.reward_scaling = 5.0
        self.epsilon = config.EPSILON
        self.debug_freq = config.DEBUG_FREQ
        self.epoch = 0
        self.mean_mean_diff = 0.0
        self.reward = 0.0

    def directional_loss(self, preds, target):
        direction_pred = (preds[:, 1:] - preds[:, :-1]).sign()
        direction_true = (target[:, 1:] - target[:, :-1]).sign()

        # Convert signs to 0 and 1
        direction_pred = (direction_pred + 1) / 2
        direction_true = (direction_true + 1) / 2

        # Clamp values to prevent BCE from receiving exact 0 or 1
        direction_pred = torch.clamp(direction_pred, 1e-7, 1 - 1e-7)
        direction_true = torch.clamp(direction_true, 1e-7, 1 - 1e-7)

        return F.binary_cross_entropy(direction_pred, direction_true).mean() * self.direction_weight

    def mse_loss_component(self, y_pred, y_true):
        return F.mse_loss(y_pred, y_true) * self.mse_weight

    def mae_loss_component(self, y_pred, y_true):
        return F.l1_loss(y_pred, y_true) * self.mae_weight

    def percentage_diff_component(self, y_pred, y_true):
        perc_diff = torch.abs((y_pred - y_true) / (self.epsilon + y_true))
        self.mean_mean_diff = torch.mean(perc_diff).item()
        return (torch.mean(perc_diff) * self.perc_diff_weight) ** 2

    def max_diff_component(self, perc_diff):
        max_diffs, _ = torch.max(perc_diff, dim=1)
        return torch.mean(max_diffs) * self.max_diff_weight

    def imbalance_component(self, perc_diff):
        overpredict = torch.relu(perc_diff)
        underpredict = torch.relu(-perc_diff)
        imbalance = torch.abs(torch.mean(overpredict, dim=1) - torch.mean(underpredict, dim=1))
        return torch.mean(imbalance) * self.balance_weight

    def reward_component(self, y_pred, y_true):
        """
        Calculates the reward component based on the predicted and true values.

        Args:
            y_pred (torch.Tensor): The predicted values.
            y_true (torch.Tensor): The true values.

        Returns:
            torch.Tensor: The reward component calculated based on the percentage difference between
                          the predicted and true values.
        """
        percentage_diff = torch.abs((y_pred - y_true) / (self.epsilon + y_true))

        within_1pct = (percentage_diff <= self.perc_cutoff_buffer).float()
        within_1pct_ratio = torch.mean(within_1pct)
        return within_1pct_ratio * self.within_1pct_reward_weight

    def compute_all_losses(self, y_pred, y_true):
        mse_loss = self.mse_loss_component(y_pred, y_true)
        mae_loss = self.mae_loss_component(y_pred, y_true)
        perc_diff_loss = self.percentage_diff_component(y_pred, y_true)
        max_diff_loss = self.max_diff_component(torch.abs((y_pred - y_true) / (self.epsilon + y_true)))
        imbalance_loss = self.imbalance_component(torch.abs((y_pred - y_true) / (self.epsilon + y_true)))
        direction_loss = self.directional_loss(y_pred, y_true)
        reward = self.reward_component(y_pred, y_true)
        return mse_loss, mae_loss, perc_diff_loss, max_diff_loss, imbalance_loss, direction_loss, reward

    def forward(self, y_pred, y_true):
        self.epoch += 1

        # Compute Loss Components
        mse_loss, mae_loss, perc_diff_loss, max_diff_loss, imbalance_loss, direction_loss, reward = self.compute_all_losses(y_pred, y_true)
        self.reward = reward
        # Combine Loss Components
        final_loss = (mse_loss + mae_loss + perc_diff_loss + max_diff_loss +
                      imbalance_loss + direction_loss - (reward * self.reward_scaling))

        # Clamp Final Loss to prevent negative values
        final_loss = torch.clamp(final_loss, min=0.00001)
        perc_cutoff = self.get_perc_cutoff()
        return final_loss, mse_loss, mae_loss, perc_diff_loss, max_diff_loss, imbalance_loss, direction_loss, reward, perc_cutoff

    def get_reward(self):
        # Returns a float that is converted from a tensor
        return self.reward.item()

    def set_perc_cutoff(self, perc_cutoff):
        # Update the buffer in-place
        self.perc_cutoff_buffer.fill_(perc_cutoff)

    def get_last_mean_diff(self):
        return self.mean_mean_diff

    def get_perc_cutoff(self):
        return self.perc_cutoff_buffer.item()

# Custom Dataset
class CryptoDataset(Dataset):
    def __init__(self, data: pd.DataFrame, n_past: int, n_future: int):
        self.data = data
        self.n_past = n_past
        self.n_future = n_future

    def __len__(self):
        return len(self.data) - self.n_past - self.n_future + 1

    def __getitem__(self, idx):
        x = self.data.iloc[idx:idx + self.n_past].values  # (n_past, num_features)
        y = self.data.iloc[idx + self.n_past:idx + self.n_past + self.n_future].values  # (n_future, num_features)
        return torch.FloatTensor(x), torch.FloatTensor(y)

# Utility Functions
def get_random_sample(dataframe: pd.DataFrame):
    """
    Retrieve a random sample from the DataFrame.

    Args:
        dataframe (pd.DataFrame): DataFrame to sample from.

    Returns:
        tuple: (input_data, target_data)
    """
    random_index = random.randint(0, len(dataframe) - cfg.N_PAST - cfg.N_FUTURE)
    input_data = dataframe.iloc[random_index:random_index + cfg.N_PAST].values
    target_data = dataframe.iloc[random_index + cfg.N_PAST:random_index + cfg.N_PAST + cfg.N_FUTURE].values
    return torch.FloatTensor(input_data), torch.FloatTensor(target_data)

def prepare_input(input_data, device):
    """
    Prepare input tensor for the model.

    Args:
        input_data (torch.FloatTensor): Input data.

    Returns:
        torch.FloatTensor: Prepared input tensor.
    """
    return input_data.unsqueeze(0).to(device)

def convert_to_numpy(input_data, target, prediction):
    """
    Convert tensors to NumPy arrays.

    Args:
        input_data (torch.FloatTensor): Input data.
        target (torch.FloatTensor): Target data.
        prediction (torch.FloatTensor): Prediction data.

    Returns:
        tuple: (input_np, target_np, prediction_np)
    """
    return input_data.detach().cpu().numpy(), target.detach().cpu().numpy(), prediction.detach().cpu().numpy()

def gaussian_smoothing(data, window_size, sigma):
    """
    Compute the Gaussian smoothing of the data.

    Args:
        data (np.ndarray): Input data.
        window_size (int): Window size for Gaussian smoothing.
        sigma (float): Standard deviation of the Gaussian kernel.

    Returns:
        np.ndarray: Gaussian smoothed data.
    """
    # Generate Gaussian kernel
    x = np.linspace(-window_size // 2, window_size // 2, window_size)
    kernel = np.exp(-(x ** 2) / (2 * sigma ** 2))
    kernel /= kernel.sum()

    # Convolve data with Gaussian kernel
    return np.convolve(data, kernel, 'valid')

# def gaussian_smoothing(data, window_size, sigma):
#     """
#     Compute the Gaussian smoothing of the data.

#     Args:
#         data (np.ndarray): Input data.
#         window_size (int): Window size for Gaussian smoothing.
#         sigma (float): Standard deviation of the Gaussian kernel.

#     Returns:
#         np.ndarray: Gaussian smoothed data.
#     """
#     return gaussian_filter1d(data, sigma=sigma)

# Inverse Transformation Function
def inverse_transform_predictions(scaled_value, scaler, log_transform=True):
    """
    Inverse transform a scaled value back to its original scale.

    Args:
        scaled_value (np.ndarray or float): Scaled value(s).
        scaler (MinMaxScaler): Fitted scaler used during preprocessing.
        log_transform (bool): Indicates whether a log transform was applied.

    Returns:
        np.ndarray or float: Original scale value(s).
    """
    # Ensure scaled_value is a 2D array for inverse_transform
    scaled_array = np.array(scaled_value).reshape(-1, 1)
    inverse_scaled = scaler.inverse_transform(scaled_array).flatten()

    if log_transform:
        original = np.exp(inverse_scaled)
    else:
        original = inverse_scaled

    return original

# Visualization Function
def visualize_predictions(target_np, prediction_np, n_future, scalers, filtered_df, model_save_path):
    num_features = filtered_df.shape[1]
    max_cols = 4
    num_rows = (num_features - 1) // max_cols + 1
    num_cols = min(num_features, max_cols)

    plt.figure(figsize=(18 * num_cols / max_cols, 6 * num_rows))

    window_size = 7  # Adjust this value for smoothing

    for j in range(num_features):
        plt.subplot(num_rows, num_cols, j + 1)
        col_name = filtered_df.columns[j]

        # Extract past data from filtered_df
        past_scaled = filtered_df[col_name].values  # Shape: (n_past,)
        past_scaled = past_scaled[:-(n_future-1)]  # Only consider past data
        past_inverted = inverse_transform_predictions(past_scaled, scalers[col_name])

        # Directly extract the known future target data from filtered_df
        target_scaled = filtered_df[col_name].values # Shape: (n_future,)
        target_scaled = target_scaled[-(n_future+1):]  # Only consider future data
        target_inverted = inverse_transform_predictions(target_scaled, scalers[col_name])

        # Extract the predicted future data
        prediction_scaled = prediction_np[0, :, j]  # Shape: (n_future,)
        prediction_inverted = inverse_transform_predictions(prediction_scaled, scalers[col_name])

        last_xbtusd_price_scaled = filtered_df['XBTUSD_price'].iloc[-1]
        last_xbtusd_price = inverse_transform_predictions(last_xbtusd_price_scaled, scalers['XBTUSD_price'])

        # Adjust if column ends with 'XBT_price'
        if col_name.endswith('XBT_price'):
            past_inverted *= last_xbtusd_price
            target_inverted *= last_xbtusd_price
            prediction_inverted *= last_xbtusd_price

        # Combine past and future data
        # total_inverted = np.concatenate((past_inverted, target_inverted))
        total_predicted = np.concatenate((past_inverted, prediction_inverted))

        # Create time indices
        n_past = len(past_inverted)
        total_timesteps = n_past + n_future
        time_indices = range(total_timesteps)

        # Plot past data
        # print the lenth of the x and y axis
        # print(len(time_indices[-(len(past_inverted)):]), len(past_inverted[n_past-n_future:]))

        plt.plot(time_indices[n_past-n_future:(n_past+1)], past_inverted[-(n_future+1):], 'b', label='Past Data' if j == 0 else "")
        # plt.plot(time_indices[-(len(past_inverted))+n_future:], past_inverted[:], 'b', label='Past Data' if j == 0 else "")

        # Plot known target data
        plt.plot(time_indices[n_past-1:], target_inverted, 'g', alpha=0.7, label='Target Data' if j == 0 else "")

        # Plot prediction data
        plt.plot(time_indices[n_past:], prediction_inverted, 'r', alpha=0.7, label='Prediction Data' if j == 0 else "")

        # Optionally apply smoothing
        total_inverted_smooth = gaussian_smoothing(past_inverted, window_size, sigma=10)
        total_predicted_smooth = gaussian_smoothing(total_predicted, window_size, sigma=10)

        # Plot smoothed data
        # print the lenth of the x and y axis
        # plt.plot(time_indices[n_past:], total_inverted_smooth[-n_future:], 'g', linewidth=2, label='Target Smoothed' if j == 0 else "")

        # plt.plot(time_indices[n_past:], total_predicted_smooth[-n_future:], 'r', linewidth=2, label='Prediction Smoothed' if j == 0 else "")

        # plt.fill_between(range(-n_future), total_predicted_smooth[:-n_future], total_inverted_smooth[:-n_future], color='blue', alpha=0.1)
        # Ensure that both arrays have the same length for the fill_between operation
        min_length = min(len(total_predicted_smooth), len(total_inverted_smooth))

        # Adjust the indices to ensure matching lengths
        start_index = n_past - min_length
        end_index = n_past

        # Plot smoothed data
        # plt.plot(time_indices[n_past:], total_inverted_smooth[-n_future:], 'g', linewidth=2, label='Target Smoothed' if j == 0 else "")
        plt.plot(time_indices[n_past+window_size:], total_predicted_smooth[n_past+1:], 'r', linewidth=2, label='Prediction Smoothed' if j == 0 else "")

        # Fill between the smoothed prediction and smoothed total
        plt.fill_between(time_indices[-n_future:], total_predicted_smooth[-n_future:], target_inverted[-n_future:], color='blue', alpha=0.1)
        # Adjust plot settings
        plt.title(col_name)
        if j == 0:
            plt.legend(loc='upper right')

    plt.tight_layout()
    time_date = time.strftime("%Y%m%d-%H%M%S")
    image_path = os.path.join(model_save_path, f"{time_date}_predictions.png")
    plt.savefig(image_path)
    plt.close()

    return image_path

# Checkpoint Saving Function
def save_checkpoint(state, filename):
    """
    Save a training checkpoint.

    Args:
        state (dict): State dictionary containing model, optimizer, scheduler states, etc.
        filename (str): Path to save the checkpoint.
    """
    torch.save(state, filename)

# Checkpoint Loading Function
def load_checkpoint(model, optimizer, scheduler, model_path, device):
    print(f"Loading checkpoint from {model_path}")
    checkpoint = torch.load(model_path, map_location=device)
    print("Checkpoint keys:", checkpoint.keys())

    # Adjust the state_dict
    if 'model_state_dict' in checkpoint:
        state_dict = checkpoint['model_state_dict']
    elif 'state_dict' in checkpoint:
        state_dict = checkpoint['state_dict']
    else:
        # If the checkpoint is the model's state_dict itself
        state_dict = checkpoint

    # Remove 'model.' prefix from the keys
    from collections import OrderedDict
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        if k.startswith('model.'):
            name = k[6:]  # remove 'model.' prefix
        else:
            name = k
        new_state_dict[name] = v

    # Load the adjusted state_dict into the model
    missing_keys, unexpected_keys = model.load_state_dict(new_state_dict, strict=False)

    if missing_keys:
        print(f"Missing keys: {missing_keys}")
    if unexpected_keys:
        print(f"Unexpected keys: {unexpected_keys}")

    # Load optimizer and scheduler state dicts if available
    if optimizer is not None and 'optimizer_state_dict' in checkpoint:
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    if scheduler is not None and 'scheduler_state_dict' in checkpoint:
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])

    print("Checkpoint loaded successfully.")
    return checkpoint
# Data Loading and Preprocessing
def load_and_preprocess_data(file_path: str, download_url: str = None):
    """
    Load and preprocess data from a CSV file. If the file does not exist, download it.

    Args:
        file_path (str): Path to the CSV file.
        download_url (str, optional): URL to download the CSV file. Defaults to None.

    Returns:
        pd.DataFrame: Preprocessed DataFrame.
        dict: Dictionary of scalers used for each column.
    """
    ic("Starting data loading and preprocessing...")
    start_time = time.time()

    # Check if the file exists
    if not os.path.exists(file_path):
        ic(f"File {file_path} does not exist.")
        os.makedirs(os.path.dirname(file_path), exist_ok=True)

        if download_url:
            ic(f"Downloading file from {download_url}...")
            try:
                response = requests.get(download_url, stream=True)
                response.raise_for_status()
                with open(file_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                ic(f"File downloaded and saved to {file_path}")
            except requests.exceptions.RequestException as e:
                ic(f"Failed to download the file: {e}")
                raise
        else:
            ic("Download URL not provided. Cannot download the file.")
            raise FileNotFoundError(f"The file {file_path} does not exist and no download URL was provided.")

    # Load the DataFrame
    df = pd.read_csv(file_path, parse_dates=['timestamp'])
    df.set_index('timestamp', inplace=True)
    df = df.tail(cfg.RECORDS_TO_LOAD)
    scalers = {}
    start_time_preprocess = time.time()

    for col in df.columns:
        # Ensure no non-positive values before log transform
        if (df[col] <= 0).any():
            raise ValueError(f"Column {col} contains non-positive values, cannot apply log transform.")

        # Apply natural logarithm transformation
        df[col] = np.log(df[col])

        # Initialize and fit MinMaxScaler
        scaler = MinMaxScaler()
        df[col] = scaler.fit_transform(df[[col]])

        # Save the scaler
        scalers[col] = scaler

    ic(f"Data preprocessing completed in {time.time() - start_time_preprocess:.2f} seconds")
    ic(f"DataFrame shape: {df.shape}")

    return df, scalers

# Lightning Wrapper
class LightningWrapper(pl.LightningModule):
    def __init__(self, model, criterion, optimizer, scheduler, num_epochs: int, scaler_dict: dict, val_data: pd.DataFrame):
        super().__init__()
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.num_epochs = num_epochs
        self.scaler_dict = scaler_dict
        self.val_data = val_data  # For making predictions during logging
        self.validation_rewards = []  # Initialize validation rewards list

    def forward(self, src, tgt):
        return self.model(src, tgt)

    def training_step(self, batch, batch_idx):
        batch_X, batch_y = batch
        # Shift target sequence to the right and prepend zeros
        tgt_input = torch.zeros_like(batch_y)
        tgt_input[:, 1:, :] = batch_y[:, :-1, :]
        y_pred = self.model(batch_X, tgt_input)
        final_loss, mse_loss, mae_loss, perc_diff_loss, max_diff_loss, imbalance_loss, direction_loss, reward, perc_cutoff = self.criterion(y_pred, batch_y)

        # Log all loss components
        self.log('train/final_loss', final_loss, on_step=False, on_epoch=True, prog_bar=True, sync_dist=True)
        self.log('train/mse_loss', mse_loss, on_step=False, on_epoch=True, sync_dist=True)
        self.log('train/mae_loss', mae_loss, on_step=False, on_epoch=True, sync_dist=True)
        self.log('train/perc_diff_loss', perc_diff_loss, on_step=False, on_epoch=True, sync_dist=True)
        self.log('train/max_diff_loss', max_diff_loss, on_step=False, on_epoch=True, sync_dist=True)
        self.log('train/imbalance_loss', imbalance_loss, on_step=False, on_epoch=True, sync_dist=True)
        self.log('train/direction_loss', direction_loss, on_step=False, on_epoch=True, sync_dist=True)
        self.log('train/reward', reward, on_step=False, on_epoch=True, sync_dist=True)

        return final_loss

    def validation_step(self, batch, batch_idx):
        batch_X, batch_y = batch
        # Shift target sequence to the right and prepend zeros
        tgt_input = torch.zeros_like(batch_y)
        tgt_input[:, 1:, :] = batch_y[:, :-1, :]
        y_pred = self.model(batch_X, tgt_input)
        final_loss, mse_loss, mae_loss, perc_diff_loss, max_diff_loss, imbalance_loss, direction_loss, reward, percentile_cutoff = self.criterion(y_pred, batch_y)

        # Log all loss components
        self.log('val_loss', final_loss, on_step=False, on_epoch=True, prog_bar=True, sync_dist=True)
        self.log('val/mse_loss', mse_loss, on_step=False, on_epoch=True, sync_dist=True)
        self.log('val/mae_loss', mae_loss, on_step=False, on_epoch=True, sync_dist=True)
        self.log('val/perc_diff_loss', perc_diff_loss, on_step=False, on_epoch=True, sync_dist=True)
        self.log('val/max_diff_loss', max_diff_loss, on_step=False, on_epoch=True, sync_dist=True)
        self.log('val/imbalance_loss', imbalance_loss, on_step=False, on_epoch=True, sync_dist=True)
        self.log('val/direction_loss', direction_loss, on_step=False, on_epoch=True, sync_dist=True)
        self.log('val/reward', reward, on_step=False, on_epoch=True, sync_dist=True)
        self.log('val/perc_cutoff', percentile_cutoff, on_step=False, on_epoch=True, sync_dist=True)
        self.validation_rewards.append(reward.item())

        return final_loss

    def configure_optimizers(self):
        return {
            'optimizer': self.optimizer,
            'lr_scheduler': {
                'scheduler': self.scheduler,
                'monitor': 'val_loss'
            }
        }

    def on_validation_epoch_end(self):
        # Skip ALL logic during sanity check
        if hasattr(self.trainer, 'running_sanity_check') and self.trainer.running_sanity_check:
            self.print("Skipping ALL on_validation_epoch_end logic during sanity check.")
            return  # Exit the method early

        if self.global_rank == 0:
            try:
                plot_path = self.generate_and_log_plots()
                if plot_path:
                    img = Image.open(plot_path)
                    self.logger.experiment.log({
                        "Validation/Prediction_vs_Target": wandb.Image(img),
                        "global_step": self.global_step
                    })
                    # os.remove(plot_path)
            except Exception as e:
                self.print(f"Error in generate_and_log_plots: {e}")

    def on_train_epoch_end(self):
        # Only the main process should perform logging
        if self.global_rank == 0:
            # Log learning rate
            optimizer = self.optimizers()
            lr = optimizer.param_groups[0]['lr']
            self.logger.experiment.log({'learning_rate': lr, 'epoch': self.current_epoch})

    # def on_after_backward(self):
    #     # Only the main process should perform logging
    #     if self.global_rank == 0:
    #         total_norm = 0.0
    #         for p in self.model.parameters():
    #             if p.grad is not None:
    #                 param_norm = p.grad.detach().data.norm(2)
    #                 total_norm += param_norm.item() ** 2
    #         total_norm = total_norm ** 0.5
    #         self.logger.experiment.log({'Gradients/grad_total_norm': total_norm, 'step': self.global_step})
    def on_after_backward(self):
        # Only the main process should perform logging
        if self.global_rank == 0:
            total_norm = 0.0
            clip_value = 50.0  # Your gradient clipping value

            for p in self.model.parameters():
                if p.grad is not None:
                    param_norm = p.grad.detach().data.norm(2)
                    total_norm += param_norm.item() ** 2
            total_norm = total_norm ** 0.5

            # Log total gradient norm
            self.logger.experiment.log({'Gradients/grad_total_norm': total_norm, 'step': self.global_step})

            # Log whether the gradients were clipped
            clipped = total_norm > clip_value
            # convert to float to plot in wandb
            clipped = float(clipped)
            self.logger.experiment.log({'Gradients/clipped': clipped, 'step': self.global_step})

    def generate_and_log_plots(self):
        """
        Generate prediction vs target plots and save them to a temporary file.
        Returns the path to the saved image.
        """
        # Make predictions on a random sample from validation data
        sample = get_random_sample(self.val_data)
        input_data, target = sample
        input_tensor = prepare_input(input_data, self.device)
        tgt_input = torch.zeros_like(target).unsqueeze(0).to(self.device)
        prediction = self.model(input_tensor, tgt_input)
        _, target_np, prediction_np = convert_to_numpy(input_tensor, target, prediction)

        # Prepare DataFrame for plotting
        start_idx = random.randint(0, len(self.val_data) - cfg.N_PAST - cfg.N_FUTURE)
        past_df = self.val_data.iloc[start_idx:start_idx + cfg.N_PAST]

        # Get future data to form the target data
        future_df = self.val_data.iloc[start_idx + cfg.N_PAST: start_idx + cfg.N_PAST + cfg.N_FUTURE]

        # Ensure past_df and future_df have correct lengths
        if len(past_df) < cfg.N_PAST or len(future_df) < cfg.N_FUTURE:
            print("Not enough data for plotting.")
            return None

        # Create filtered_df for plotting: combining past and future data
        filtered_df = pd.concat([past_df, future_df])

        # Generate and save plot
        image_path = visualize_predictions(target_np, prediction_np, cfg.N_FUTURE, self.scaler_dict, filtered_df, cfg.MODEL_SAVE_PATH)

        return image_path

    def set_perc_cutoff(self, perc_cutoff):
        self.criterion.set_perc_cutoff(perc_cutoff)

    def get_perc_cutoff(self):
        return self.criterion.get_perc_cutoff()

# Main Execution Block
def train_main(cfg):
    torch.set_float32_matmul_precision("medium")
    # Load and preprocess data
    df, scalers = load_and_preprocess_data(cfg.DATA_FILE, cfg.DATA_URL)
    NUM_FEATURES = df.shape[1]

    # Initialize the Wandb logger and name your Wandb project
    logger = WandbLogger(project='my-awesome-project', log_model=True)  # Set log_model to True

    # Log hyperparameters to Wandb
    logger.log_hyperparams({
        "batch_size": cfg.BATCH_SIZE,
        "hidden_size": cfg.HIDDEN_SIZE,
        "num_layers": cfg.NUM_LAYERS,
        "num_epochs": cfg.NUM_EPOCHS,
        "learning_rate": 4e-5,
        "weight_decay": 5e-5
    })

    # Split data into training and validation
    train_size = int(0.8 * len(df))
    train_data = df.iloc[:train_size]
    val_data = df.iloc[train_size:]

    # Create Datasets
    train_dataset = CryptoDataset(train_data, cfg.N_PAST, cfg.N_FUTURE)
    val_dataset = CryptoDataset(val_data, cfg.N_PAST, cfg.N_FUTURE)

    # Create DataLoaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=cfg.BATCH_SIZE,
        shuffle=False,
        num_workers=cfg.NUM_WORKERS,
        pin_memory=True
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=cfg.BATCH_SIZE,
        shuffle=False,
        num_workers=cfg.NUM_WORKERS,
        pin_memory=True
    )

    # Initialize Transformer Model
    model = CryptoTransformer(
        input_size=NUM_FEATURES,
        d_model=cfg.HIDDEN_SIZE,
        nhead=8,
        num_encoder_layers=cfg.NUM_LAYERS,
        num_decoder_layers=cfg.NUM_LAYERS,
        dim_feedforward=2048,
        dropout=0.2,
        activation="gelu",
        n_future=cfg.N_FUTURE,
        num_outputs=NUM_FEATURES,
        max_seq_length=cfg.N_PAST + cfg.N_FUTURE
    ).to(cfg.DEVICE)

    # Initialize Loss Function
    criterion = BalancedCryptoLoss(cfg)

    # Initialize Optimizer and Scheduler
    optimizer = optim.AdamW(model.parameters(), lr=4e-5, weight_decay=1e-4)
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.8, patience=4, min_lr=4e-6)

        # Handle Hot Restart# Create the CosineAnnealingLR scheduler
    # Create the CosineAnnealingWarmRestarts scheduler
    scheduler = CosineAnnealingWarmRestarts(
        optimizer,
        T_0=12,           # First restart after 50 epochs
        T_mult=2,         # Double the cycle length after each restart
        eta_min=1e-6,     # Minimum learning rate
        last_epoch=-1,    # Start from the beginning1
    )    # ... (rest of your code)

    # Handle Hot Restart
    if cfg.HOT_RESTART:
        try:
            # Load the checkpoint into LightningWrapper
            wrapped_model = LightningWrapper.load_from_checkpoint(
                checkpoint_path=cfg.MODEL_PATH,
                model=model,  # Pass your model instance
                criterion=criterion,  # Pass your criterion instance
                optimizer=optimizer,  # Pass your optimizer instance
                scheduler=scheduler,  # Pass your scheduler instance
                num_epochs=cfg.NUM_EPOCHS,
                scaler_dict=scalers,
                val_data=val_data
            )

            # Access the underlying CryptoTransformer model if needed
            model = wrapped_model.model

        except FileNotFoundError:
            print(f"No checkpoint found at {cfg.MODEL_PATH}. Starting fresh.")
            wrapped_model = LightningWrapper(
                model=model,
                criterion=criterion,
                optimizer=optimizer,
                scheduler=scheduler,
                num_epochs=cfg.NUM_EPOCHS,
                scaler_dict=scalers,
                val_data=val_data
            )
    # ... (rest of your code)
    if cfg.TRAIN_FIRST:
        # Initialize Lightning Wrapper
        wrapped_model = LightningWrapper(
            model=model,
            criterion=criterion,  # Pass the initialized criterion
            optimizer=optimizer,
            scheduler=scheduler,
            num_epochs=cfg.NUM_EPOCHS,
            scaler_dict=scalers,
            val_data=val_data
        )

        # Initialize Callbacks
        early_stopping_callback = EarlyStopping(
            monitor='val_loss',  # Ensure this matches the logged metric
            patience=75,
            mode='min'
        )

        checkpoint_callback = ModelCheckpoint(
            monitor='val_loss',  # Ensure this matches the logged metric
            dirpath=cfg.MODEL_SAVE_PATH,
            filename=f'model-{cfg.VERSION_N}-{{epoch:02d}}-{{val_loss:.2f}}',
            save_top_k=9,
            mode='min',
            save_weights_only=False
        )

        # Initialize Progress Bar Callback
        progress_bar = RichProgressBar(refresh_rate=2)  # Set your desired refresh rate

        # Initialize Percentile Cutoff Callback
        perc_cutoff_callback = UpdatePercentileCutoffCallback(
            reduction_threshold=0.8, # Set your desired reduction threshold of the reward
            reduction_factor=0.95
        )

        # Initialize Trainer with Wandb logger and all callbacks
        trainer = pl.Trainer(
            max_epochs=cfg.NUM_EPOCHS,
            logger=logger,  # Use Wandb logger here
            accelerator='gpu' if torch.cuda.is_available() else 'cpu',
            devices=torch.cuda.device_count() if torch.cuda.is_available() else 1,
            strategy='ddp_find_unused_parameters_true' if torch.cuda.device_count() > 1 else "ddp_notebook",  # Distributed Data Parallel
            callbacks=[progress_bar, checkpoint_callback, early_stopping_callback, perc_cutoff_callback],
            enable_progress_bar=True,
            log_every_n_steps=10,
            # precision=16,  # Optional: Use mixed precision for faster training
            gradient_clip_val=50.0,  # Optional: Gradient clipping
        )

        # Start Training
        trainer.fit(wrapped_model, train_dataloaders=train_loader, val_dataloaders=val_loader)
    else:
        print("Skipping training as TRAIN_FIRST is set to False.")

    # Finalizing WandB
    wandb.finish()

    # Clean up CUDA cache
    try:
        torch.cuda.empty_cache()
    except Exception as e:
        print(f"Failed to empty CUDA cache: {e}")
        pass

    # Terminate the script
    raise Exception("Training completed and script terminated.")

In [None]:
import torch
import pandas as pd
from dataclasses import dataclass
import multiprocessing
import glob
import numpy as np
from rich.table import Table
from rich.console import Console
import os

# Initialize Rich Console
console = Console()

# Define your CryptoTransformer class as per your implementation
class CryptoTransformer(torch.nn.Module):
    def __init__(self, input_size, d_model, nhead, num_encoder_layers, num_decoder_layers,
                 dim_feedforward, dropout, activation, n_future, num_outputs, max_seq_length):
        super(CryptoTransformer, self).__init__()
        # Transformer implementation as per your code
        self.transformer = torch.nn.Transformer(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
        )
        self.input_fc = torch.nn.Linear(input_size, d_model)
        self.output_fc = torch.nn.Linear(d_model, num_outputs)
        self.n_future = n_future

    def forward(self, src, tgt):
        src = self.input_fc(src)
        tgt = self.input_fc(tgt)
        output = self.transformer(src, tgt)
        output = self.output_fc(output)
        return output

# Define your CryptoLSTM class as per your implementation
class CryptoLSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, output_size):
        super(CryptoLSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers,
                                  dropout=dropout, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        # Optionally include LayerNorm if your models use it
        if 'ln' in output_size:
            self.ln = torch.nn.LayerNorm(hidden_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        if hasattr(self, 'ln'):
            out = self.ln(out)
        # If the model outputs a sequence
        if out.dim() == 3:
            out = self.fc(out)
        else:
            out = self.fc(out[:, -1, :])
        return out

# Function to inverse transform data, adjusted to handle varying number of features
def get_original_values(df_columns, input_df, target_df, prediction_df, scalers):
    inv_pred = pd.DataFrame()
    inv_target = pd.DataFrame()
    inv_input = pd.DataFrame()
    for col in prediction_df.columns:
        scaler = scalers.get(col)
        if scaler:
            inv_pred[col] = np.exp(scaler.inverse_transform(prediction_df[[col]]).flatten())
    for col in target_df.columns:
        scaler = scalers.get(col)
        if scaler:
            inv_target[col] = np.exp(scaler.inverse_transform(target_df[[col]]).flatten())
    for col in input_df.columns:
        scaler = scalers.get(col)
        if scaler:
            inv_input[col] = np.exp(scaler.inverse_transform(input_df[[col]]).flatten())
    return inv_input, inv_target, inv_pred

# Function to calculate the score
def calculate_score(inv_target, inv_pred):
    scores = {}
    for col in inv_target.columns:
        # Ensure lengths match
        min_len = min(len(inv_target[col]), len(inv_pred[col]))
        target_col = inv_target[col][:min_len]
        pred_col = inv_pred[col][:min_len]
        # Calculate the absolute percentage error
        ape = np.abs((target_col - pred_col) / target_col)
        # Handle division by zero
        ape = ape.replace([np.inf, -np.inf], np.nan).dropna()
        # Calculate the mean absolute percentage error
        mape = ape.mean()
        scores[col] = mape
    # Calculate the overall score (average of individual scores)
    overall_score = np.mean(list(scores.values()))
    print(f"Overall Score: {overall_score:.2%}")
    return overall_score



import torch
import os
import traceback
def load_model(model_file, cfg, device='cpu'):
    try:
        if not os.path.exists(model_file):
            print(f"File not found: {model_file}")
            return None

        # Load the state dict
        state_dict = torch.load(model_file, map_location=device)

        # Determine the model type
        if any(key.startswith('transformer') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoTransformer")
            # Infer Transformer parameters
            hidden_size = state_dict['transformer.encoder.layers.0.self_attn.in_proj_weight'].shape[0]
            num_heads = state_dict['transformer.encoder.layers.0.self_attn.in_proj_weight'].shape[1] // hidden_size
            num_layers = len([key for key in state_dict.keys() if key.startswith('transformer.encoder.layers')])
            input_size = state_dict['transformer.encoder.layers.0.self_attn.in_proj_weight'].shape[1]
            output_size = state_dict['transformer.decoder.layers.0.self_attn.in_proj_weight'].shape[1]
            print(f"Inferred Transformer parameters: hidden_size={hidden_size}, num_heads={num_heads}, num_layers={num_layers}, input_size={input_size}, output_size={output_size}")

            # Create the model
            model = CryptoTransformer(
                input_size=input_size,
                hidden_size=hidden_size,
                num_heads=num_heads,
                num_layers=num_layers,
                output_size=output_size,
                dropout=0.2
            ).to(device)

            # Load state dict
            model.load_state_dict(state_dict, strict = False)

            return model, {'input_size': input_size, 'output_size': output_size, 'n_past': cfg.N_PAST, 'n_future': cfg.N_FUTURE}

        elif any(key.startswith('lstm') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoLSTM")
            # Infer LSTM parameters
            hidden_size = state_dict['lstm.weight_ih_l0'].shape[0] // 4
            num_layers = len([key for key in state_dict.keys() if key.startswith('lstm.weight_ih_l')])

            input_size = state_dict['lstm.weight_ih_l0'].shape[1]
            output_size = state_dict['fc.weight'].shape[0]
            print(f"Inferred LSTM parameters: hidden_size={hidden_size}, num_layers={num_layers}, input_size={input_size}, output_size={output_size}")

            # Create the model
            model = CryptoLSTM(
                input_size=input_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                output_size=output_size,
                dropout=0.2
            ).to(device)

            # Load state dict
            model.load_state_dict(state_dict,strict=False)

            return model, {'input_size': input_size, 'output_size': output_size, 'n_past': cfg.N_PAST, 'n_future': cfg.N_FUTURE}

        else:
            print(f"Unrecognized model format for {model_file}. Keys: {list(state_dict.keys())}")
            return None

    except Exception as e:
        print(f"Failed to load model {model_file}: {str(e)}")
        return None


def evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=10):
    top_models = []
    for model_file in model_files:
        cfg.MODEL_PATH = model_file
        print(f"\nLoading model from {cfg.MODEL_PATH}")

        result = load_model(cfg.MODEL_PATH, cfg, cfg.DEVICE)
        if result is None:
            print(f"Skipping model {cfg.MODEL_PATH} due to loading error")
            continue

        model, params = result
        input_size = params['input_size']
        output_size = params['output_size']
        n_past = params['n_past']
        n_future = params['n_future']

        # Get the input and target columns
        input_cols = df.columns[:input_size]
        target_cols = df.columns[:output_size]

        try:
            input_data, target_data = get_random_sample(df, n_past, n_future, input_cols, target_cols)
        except Exception as e:
            print(f"Error getting random sample: {str(e)}")
            continue

        # Prepare the tensors
        try:
            input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
            target_tensor = torch.tensor(target_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
        except Exception as e:
            print(f"Error preparing tensors: {str(e)}")
            continue

        try:
            model.eval()
            with torch.no_grad():
                if isinstance(model, CryptoTransformer):
                    tgt_input = torch.zeros((1, n_future, input_size), device=cfg.DEVICE)
                    prediction = model(input_tensor, tgt_input)
                elif isinstance(model, CryptoLSTM):
                    prediction = model(input_tensor)
                else:
                    print(f"Unknown model type for file: {model_file}")
                    continue
        except Exception as e:
            print(f"Failed to make predictions: {e}")
            continue

        # Evaluate the model
        try:
            loss = calculate_loss(prediction, target_tensor)
            top_models.append((model_file, loss))
        except Exception as e:
            print(f"Error calculating loss: {str(e)}")
            continue

    # Sort the models by loss
    top_models.sort(key=lambda x: x[1])

    # Return the top models
    return top_models[:top_n]


# Function to prepare input data
def prepare_input(input_data, device):
    input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)
    return input_tensor

# Function to get a random sample from the DataFrame
def get_random_sample(df, n_past, n_future, input_cols, target_cols):
    max_start = len(df) - n_past - n_future
    start_idx = np.random.randint(0, max_start)
    input_data = df[input_cols].iloc[start_idx:start_idx + n_past].values
    target_data = df[target_cols].iloc[start_idx + n_past:start_idx + n_past + n_future].values
    return input_data, target_data

# Function to convert tensors to numpy arrays
def convert_to_numpy(input_tensor, target, prediction):
    input_np = input_tensor.cpu().numpy()
    target_np = target.cpu().numpy()
    prediction_np = prediction.cpu().numpy()
    return input_np, target_np, prediction_np

@dataclass
class Config:
    VERSION_N: int = 1
    RECORDS_TO_LOAD: int = 1205040
    N_PAST: int = 3 * 12 * 3  # 1 week of 10-minute intervals
    N_FUTURE: int = 1 * 12 * 2  # 1 day of 10-minute intervals
    BATCH_SIZE: int = 5000
    HIDDEN_SIZE: int = 256
    NUM_LAYERS: int = 2
    DROPOUT: float = 0.2
    NUM_EPOCHS: int = 150
    HOT_RESTART: bool = True
    TRAIN_FIRST: bool = True
    EPOCH_TO_RESTART: int = 50
    BATCH_FACTOR: int = 81
    DEBUG_FREQ: int = 180
    num_cpus = multiprocessing.cpu_count()
    NUM_WORKERS = max((num_cpus // 4 - 4), 4) if num_cpus > 16 else 4
    DEBUG_ON: bool = False
    DATA_URL: str = 'https://sambo.us-iad-1.linodeobjects.com/fillnan_combined_df.csv'
    DATA_FILE: str = './data/fill_nan_df.csv'
    MODEL_PATH: str = "/teamspace/studios/this_studio/models/TransformerModel355/model-355-epoch=40-val_loss=0.62.ckpt"
    MODEL_SAVE_PATH: str = f'./yay'
    DEVICE: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    EPSILON: float = 1e-4
    PATH_TO_SEARCH: str = "/content/drive/MyDrive/Kraken"
    # Additional parameters
    NUM_FEATURES: int = None  # To be set after loading data

cfg = Config()

# Load and preprocess data
def load_and_preprocess_data(file_path: str, download_url: str = None):
    """
    Load and preprocess data from a CSV file. If the file does not exist, download it.

    Args:
        file_path (str): Path to the CSV file.
        download_url (str, optional): URL to download the CSV file. Defaults to None.

    Returns:
        pd.DataFrame: Preprocessed DataFrame.
        dict: Dictionary of scalers used for each column.
    """
    ic("Starting data loading and preprocessing...")
    start_time = time.time()

    # Check if the file exists
    if not os.path.exists(file_path):
        ic(f"File {file_path} does not exist.")
        os.makedirs(os.path.dirname(file_path), exist_ok=True)

        if download_url:
            ic(f"Downloading file from {download_url}...")
            try:
                response = requests.get(download_url, stream=True)
                response.raise_for_status()
                with open(file_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                ic(f"File downloaded and saved to {file_path}")
            except requests.exceptions.RequestException as e:
                ic(f"Failed to download the file: {e}")
                raise
        else:
            ic("Download URL not provided. Cannot download the file.")
            raise FileNotFoundError(f"The file {file_path} does not exist and no download URL was provided.")

    # Load the DataFrame
    df = pd.read_csv(file_path, parse_dates=['timestamp'])
    df.set_index('timestamp', inplace=True)
    df = df.tail(cfg.RECORDS_TO_LOAD)
    scalers = {}
    start_time_preprocess = time.time()

    for col in df.columns:
        # Ensure no non-positive values before log transform
        if (df[col] <= 0).any():
            raise ValueError(f"Column {col} contains non-positive values, cannot apply log transform.")

        # Apply natural logarithm transformation
        df[col] = np.log(df[col])

        # Initialize and fit MinMaxScaler
        scaler = MinMaxScaler()
        df[col] = scaler.fit_transform(df[[col]])

        # Save the scaler
        scalers[col] = scaler

    ic(f"Data preprocessing completed in {time.time() - start_time_preprocess:.2f} seconds")
    ic(f"DataFrame shape: {df.shape}")

    return df, scalers

df, scalers = load_and_preprocess_data(cfg.DATA_FILE, cfg.DATA_URL)
cfg.NUM_FEATURES = df.shape[1]
# def evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=10):
#     top_models = []
#     for model_file in model_files:
#         cfg.MODEL_PATH = model_file
#         print(f"\nLoading model from {cfg.MODEL_PATH}")

#         result = load_model(cfg.MODEL_PATH, cfg, cfg.DEVICE)
#         if result is None:
#             print(f"Skipping model {cfg.MODEL_PATH} due to loading error")
#             continue

#         model, params = result
#         input_size = params['input_size']
#         output_size = params['output_size']
#         n_past = params['n_past']
#         n_future = params['n_future']

#         # Get the input and target columns
#         input_cols = df.columns[:input_size]
#         target_cols = df.columns[:output_size]

#         try:
#             input_data, target_data = get_random_sample(df, n_past, n_future, input_cols, target_cols)
#         except Exception as e:
#             print(f"Error getting random sample: {str(e)}")
#             continue

#         # Prepare the tensors
#         try:
#             input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
#             target_tensor = torch.tensor(target_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
#         except Exception as e:
#             print(f"Error preparing tensors: {str(e)}")
#             continue

#         try:
#             model.eval()
#             with torch.no_grad():
#                 if isinstance(model, CryptoTransformer):
#                     tgt_input = torch.zeros((1, n_future, input_size), device=cfg.DEVICE)
#                     prediction = model(input_tensor, tgt_input)
#                 elif isinstance(model, CryptoLSTM):
#                     prediction = model(input_tensor)
#                 else:
#                     print(f"Unknown model type for file: {model_file}")
#                     continue
#         except Exception as e:
#             print(f"Failed to make predictions: {e}")
#             continue

#         # Convert tensors to numpy
#         input_np = input_tensor.squeeze(0).cpu().numpy()
#         target_np = target_tensor.squeeze(0).cpu().numpy()
#         prediction_np = prediction.squeeze(0).cpu().numpy()

#         # Convert to DataFrames
#         input_df = pd.DataFrame(input_np, columns=input_cols)
#         target_df = pd.DataFrame(target_np, columns=target_cols)
#         if prediction_np.ndim == 1:
#             prediction_df = pd.DataFrame(prediction_np.reshape(-1, 1), columns=target_cols)
#         else:
#             prediction_df = pd.DataFrame(prediction_np, columns=target_cols)

#         # Inverse transform
#         inv_input, inv_target, inv_pred = get_original_values(df.columns, input_df, target_df, prediction_df, scalers)

#         print("Calculating scores...")
#         model_score = calculate_score(inv_target, inv_pred)

#         # Append and maintain top N
#         top_models.append((model_file, model_score))
#         top_models = sorted(top_models, key=lambda x: x[1])
#         top_models = top_models[:top_n]

#         print(f"Current top {len(top_models)} models:")
#         for m, s in top_models:
#             print(f"  Model: {m}, Score: {s:.2%}")

#     return top_models


# Create a list of model files
model_files = glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.ckpt"), recursive=True) + \
              glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.pth"), recursive=True)
print(f"Total model files found: {len(model_files)}")

# Optionally, truncate the list for testing
model_files = model_files[:20]  # Adjust as needed

# Evaluate models and get top N
top_n = 10
top_models = evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=top_n)

# Display Top N models
print(f"\nTop {len(top_models)} models:")
for model_file, score in top_models:
    print(f"Model: {model_file}, Score: {score:.2%}")

In [None]:
import torch
import pandas as pd
from dataclasses import dataclass
import multiprocessing
import glob
import numpy as np
from rich.table import Table
from rich.console import Console
import os
import time
import requests
from sklearn.preprocessing import MinMaxScaler
from icecream import ic  # Ensure icecream is installed (`pip install icecream`)
import traceback

# Initialize Rich Console
console = Console()

# Define your CryptoTransformer class as per your implementation
class CryptoTransformer(torch.nn.Module):
    def __init__(self, input_size, d_model, nhead, num_encoder_layers, num_decoder_layers,
                 dim_feedforward, dropout, activation, n_future, num_outputs, max_seq_length):
        super(CryptoTransformer, self).__init__()
        # Transformer implementation as per your code
        self.transformer = torch.nn.Transformer(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
        )
        self.input_fc = torch.nn.Linear(inwarnput_size, d_model)
        self.output_fc = torch.nn.Linear(d_model, num_outputs)
        self.n_future = n_future

    def forward(self, src, tgt):
        src = self.input_fc(src)
        tgt = self.input_fc(tgt)
        output = self.transformer(src, tgt)
        output = self.output_fc(output)
        return output

# Define your CryptoLSTM class as per your implementation
class CryptoLSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, output_size, has_ln=False):
        super(CryptoLSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers,
                                  dropout=dropout, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        # Optionally include LayerNorm if your models use it
        if has_ln:
            self.ln = torch.nn.LayerNorm(hidden_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        if hasattr(self, 'ln'):
            out = self.ln(out)
        # If the model outputs a sequence
        if out.dim() == 3:
            out = self.fc(out)
        else:
            out = self.fc(out[:, -1, :])
        return out

# Function to inverse transform data, adjusted to handle varying number of features
def get_original_values(df_columns, input_df, target_df, prediction_df, scalers):
    inv_pred = pd.DataFrame()
    inv_target = pd.DataFrame()
    inv_input = pd.DataFrame()
    for col in prediction_df.columns:
        scaler = scalers.get(col)
        if scaler:
            inv_pred[col] = np.exp(scaler.inverse_transform(prediction_df[[col]]).flatten())
    for col in target_df.columns:
        scaler = scalers.get(col)
        if scaler:
            inv_target[col] = np.exp(scaler.inverse_transform(target_df[[col]]).flatten())
    for col in input_df.columns:
        scaler = scalers.get(col)
        if scaler:
            inv_input[col] = np.exp(scaler.inverse_transform(input_df[[col]]).flatten())
    return inv_input, inv_target, inv_pred

# Function to calculate the score
def calculate_score(inv_target, inv_pred):
    scores = {}
    for col in inv_target.columns:
        # Ensure lengths match
        min_len = min(len(inv_target[col]), len(inv_pred[col]))
        target_col = inv_target[col][:min_len]
        pred_col = inv_pred[col][:min_len]
        # Calculate the absolute percentage error
        ape = np.abs((target_col - pred_col) / target_col)
        # Handle division by zero
        ape = ape.replace([np.inf, -np.inf], np.nan).dropna()
        # Calculate the mean absolute percentage error
        mape = ape.mean()
        scores[col] = mape
    # Calculate the overall score (average of individual scores)
    overall_score = np.mean(list(scores.values()))
    print(f"Overall Score: {overall_score:.2%}")
    return overall_score

# Suppress specific warnings
warnings.filterwarnings("ignore", message=".*does not have many workers.*")

def load_model(model_file, cfg, device='cpu'):
    try:
        if not os.path.exists(model_file):
            print(f"File not found: {model_file}")
            return None

        # Load the state dict
        state_dict = torch.load(model_file, map_location=device)

        # Check if it's a Lightning checkpoint
        if isinstance(state_dict, dict) and 'state_dict' in state_dict:
            state_dict = state_dict['state_dict']

        # Determine the model type based on key prefixes
        if any(key.startswith('model.lstm') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoLSTM")

            # Infer LSTM parameters
            lstm_weight_ih_keys = [k for k in state_dict.keys() if k.startswith('model.lstm.weight_ih_l')]
            num_layers = len(lstm_weight_ih_keys)
            if num_layers == 0:
                print(f"No LSTM layers found in {model_file}. Keys: {list(state_dict.keys())}")
                return None

            sample_weight_ih = state_dict[lstm_weight_ih_keys[0]]
            hidden_size = sample_weight_ih.shape[0] // 4  # LSTM gates
            input_size = sample_weight_ih.shape[1]

            if 'model.fc.weight' in state_dict:
                output_size = state_dict['model.fc.weight'].shape[0]
            elif 'model.fc.bias' in state_dict:
                output_size = state_dict['model.fc.bias'].shape[0]
            else:
                print(f"Could not infer output_size for LSTM in {model_file}. Using default: {cfg.NUM_FEATURES}")
                output_size = cfg.NUM_FEATURES

            # Infer if LayerNorm is present
            has_ln = any('model.layer_norm' in key or 'model.ln' in key for key in state_dict.keys())
            print(f"Inferred LSTM parameters: hidden_size={hidden_size}, num_layers={num_layers}, "
                  f"input_size={input_size}, output_size={output_size}, LayerNorm={has_ln}")

            # Create the model
            model = CryptoLSTM(
                input_size=input_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=cfg.DROPOUT,
                output_size=output_size,
                has_ln=has_ln
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {'input_size': input_size, 'output_size': output_size,
                          'n_past': cfg.N_PAST, 'n_future': cfg.N_FUTURE}

        elif any(key.startswith('model.transformer') for key in state_dict.keys()):
            # It's a Transformer model
            print(f"Loading {model_file} as CryptoTransformer")
            # Attempt to infer Transformer parameters from state_dict
            # This is heuristic and may need adjustments based on actual model architecture

            # Infer d_model from in_proj_weight of the first transformer encoder layer
            d_model = cfg.HIDDEN_SIZE  # Default value
            for key in state_dict.keys():
                if 'transformer_encoder.layers.0.self_attn.in_proj_weight' in key:
                    weight = state_dict[key]
                    d_model = weight.shape[1] // 3  # Assuming in_proj_weight has shape (3*d_model, d_model)
                    break

            # Infer number of encoder and decoder layers
            transformer_encoder_keys = [k for k in state_dict.keys() if 'transformer_encoder.layers.' in k]
            num_encoder_layers = len(set(k.split('.')[3] for k in transformer_encoder_keys))
            transformer_decoder_keys = [k for k in state_dict.keys() if 'transformer_decoder.layers.' in k]
            num_decoder_layers = len(set(k.split('.')[3] for k in transformer_decoder_keys))

            # Infer nhead from the shape of out_proj.weight
            nhead = cfg.NHEAD if hasattr(cfg, 'NHEAD') else 8
            if transformer_encoder_keys:
                first_layer_key = transformer_encoder_keys[0]
                out_proj_weight = state_dict.get(first_layer_key.replace('in_proj_weight', 'out_proj.weight'))
                if out_proj_weight is not None:
                    nhead = out_proj_weight.shape[1] // (d_model // nhead)

            dim_feedforward = cfg.DIM_FEEDFORWARD if hasattr(cfg, 'DIM_FEEDFORWARD') else 2048
            dropout = cfg.DROPOUT
            activation = 'gelu'  # Defaulting to 'gelu'

            print(f"Inferred Transformer parameters: d_model={d_model}, nhead={nhead}, "
                  f"num_encoder_layers={num_encoder_layers}, num_decoder_layers={num_decoder_layers}, "
                  f"dim_feedforward={dim_feedforward}, dropout={dropout}, activation={activation}")

            # Create the model
            model = CryptoTransformer(
                input_size=cfg.NUM_FEATURES,
                d_model=d_model,
                nhead=nhead,
                num_encoder_layers=num_encoder_layers,
                num_decoder_layers=num_decoder_layers,
                dim_feedforward=dim_feedforward,
                dropout=dropout,
                activation=activation,
                n_future=cfg.N_FUTURE,
                num_outputs=cfg.NUM_FEATURES,
                max_seq_length=cfg.N_PAST + cfg.N_FUTURE
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            # Return model and inferred parameters
            return model, {
                'input_size': cfg.NUM_FEATURES,
                'output_size': cfg.NUM_FEATURES,
                'n_past': cfg.N_PAST,
                'n_future': cfg.N_FUTURE
            }

        else:
            print(f"Unrecognized model format for {model_file}. Keys: {list(state_dict.keys())}")
            return None

    except Exception as e:
        print(f"Failed to load model {model_file}: {str(e)}")
        traceback.print_exc()
        return None

# Function to prepare input data
def prepare_input(input_data, device):
    input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)
    return input_tensor

# Function to get a random sample from the DataFrame
def get_random_sample(df, n_past, n_future, input_cols, target_cols):
    max_start = len(df) - n_past - n_future
    if max_start <= 0:
        raise ValueError("DataFrame is too short for the given n_past and n_future")
    start_idx = np.random.randint(0, max_start)
    input_data = df[input_cols].iloc[start_idx:start_idx + n_past].values
    target_data = df[target_cols].iloc[start_idx + n_past:start_idx + n_past + n_future].values
    return input_data, target_data

# Function to convert tensors to numpy arrays
def convert_to_numpy(input_tensor, target, prediction):
    input_np = input_tensor.cpu().numpy()
    target_np = target.cpu().numpy()
    prediction_np = prediction.cpu().numpy()
    return input_np, target_np, prediction_np

@dataclass
class Config:
    VERSION_N: int = 1
    RECORDS_TO_LOAD: int = 1205040
    N_PAST: int = 3 * 12 * 3  # 1 week of 10-minute intervals
    N_FUTURE: int = 1 * 12 * 2  # 1 day of 10-minute intervals
    BATCH_SIZE: int = 5000
    HIDDEN_SIZE: int = 256
    NUM_LAYERS: int = 2
    DROPOUT: float = 0.2
    NUM_EPOCHS: int = 150
    HOT_RESTART: bool = True
    TRAIN_FIRST: bool = True
    EPOCH_TO_RESTART: int = 50
    BATCH_FACTOR: int = 81
    DEBUG_FREQ: int = 180
    num_cpus = multiprocessing.cpu_count()
    NUM_WORKERS = max((num_cpus // 4 - 4), 4) if num_cpus > 16 else 4
    DEBUG_ON: bool = False
    DATA_URL: str = 'https://sambo.us-iad-1.linodeobjects.com/fillnan_combined_df.csv'
    DATA_FILE: str = './data/fill_nan_df.csv'
    MODEL_PATH: str = "/teamspace/studios/this_studio/models/TransformerModel355/model-355-epoch=40-val_loss=0.62.ckpt"
    MODEL_SAVE_PATH: str = f'./yay'
    DEVICE: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    EPSILON: float = 1e-4
    PATH_TO_SEARCH: str = "/content/drive/MyDrive/Kraken"
    # Additional parameters
    NUM_FEATURES: int = None  # To be set after loading data

cfg = Config()

# Load and preprocess data
def load_and_preprocess_data(file_path: str, download_url: str = None):
    """
    Load and preprocess data from a CSV file. If the file does not exist, download it.

    Args:
        file_path (str): Path to the CSV file.
        download_url (str, optional): URL to download the CSV file. Defaults to None.

    Returns:
        pd.DataFrame: Preprocessed DataFrame.
        dict: Dictionary of scalers used for each column.
    """
    ic("Starting data loading and preprocessing...")
    start_time = time.time()

    # Check if the file exists
    if not os.path.exists(file_path):
        ic(f"File {file_path} does not exist.")
        os.makedirs(os.path.dirname(file_path), exist_ok=True)

        if download_url:
            ic(f"Downloading file from {download_url}...")
            try:
                response = requests.get(download_url, stream=True)
                response.raise_for_status()
                with open(file_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                ic(f"File downloaded and saved to {file_path}")
            except requests.exceptions.RequestException as e:
                ic(f"Failed to download the file: {e}")
                raise
        else:
            ic("Download URL not provided. Cannot download the file.")
            raise FileNotFoundError(f"The file {file_path} does not exist and no download URL was provided.")

    # Load the DataFrame
    df = pd.read_csv(file_path, parse_dates=['timestamp'])
    df.set_index('timestamp', inplace=True)
    df = df.tail(cfg.RECORDS_TO_LOAD)
    scalers = {}
    start_time_preprocess = time.time()

    for col in df.columns:
        # Ensure no non-positive values before log transform
        if (df[col] <= 0).any():
            raise ValueError(f"Column {col} contains non-positive values, cannot apply log transform.")

        # Apply natural logarithm transformation
        df[col] = np.log(df[col])

        # Initialize and fit MinMaxScaler
        scaler = MinMaxScaler()
        df[col] = scaler.fit_transform(df[[col]])

        # Save the scaler
        scalers[col] = scaler

    ic(f"Data preprocessing completed in {time.time() - start_time_preprocess:.2f} seconds")
    ic(f"DataFrame shape: {df.shape}")

    return df, scalers

df, scalers = load_and_preprocess_data(cfg.DATA_FILE, cfg.DATA_URL)
cfg.NUM_FEATURES = df.shape[1]

def evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=10):
    top_models = []
    for model_file in model_files:
        cfg.MODEL_PATH = model_file
        print(f"\nLoading model from {cfg.MODEL_PATH}")

        result = load_model(cfg.MODEL_PATH, cfg, cfg.DEVICE)
        if result is None:
            print(f"Skipping model {cfg.MODEL_PATH} due to loading error")
            continue

        model, params = result
        input_size = params['input_size']
        output_size = params['output_size']
        n_past = params['n_past']
        n_future = params['n_future']

        # Get the input and target columns
        input_cols = df.columns[:input_size]
        target_cols = df.columns[:output_size]

        try:
            input_data, target_data = get_random_sample(df, n_past, n_future, input_cols, target_cols)
        except Exception as e:
            print(f"Error getting random sample: {str(e)}")
            continue

        # Prepare the tensors
        try:
            input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
            target_tensor = torch.tensor(target_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
        except Exception as e:
            print(f"Error preparing tensors: {str(e)}")
            continue

        try:
            model.eval()
            with torch.no_grad():
                if isinstance(model, CryptoTransformer):
                    tgt_input = torch.zeros((1, n_future, input_size), device=cfg.DEVICE)
                    prediction = model(input_tensor, tgt_input)
                elif isinstance(model, CryptoLSTM):
                    prediction = model(input_tensor)
                else:
                    print(f"Unknown model type for file: {model_file}")
                    continue
        except Exception as e:
            print(f"Failed to make predictions: {e}")
            continue

        # Convert tensors to numpy
        input_np = input_tensor.squeeze(0).cpu().numpy()
        target_np = target_tensor.squeeze(0).cpu().numpy()
        prediction_np = prediction.squeeze(0).cpu().numpy()

        # Convert to DataFrames
        input_df = pd.DataFrame(input_np, columns=input_cols)
        target_df = pd.DataFrame(target_np, columns=target_cols)
        if prediction_np.ndim == 1:
            prediction_df = pd.DataFrame(prediction_np.reshape(-1, 1), columns=target_cols)
        else:
            prediction_df = pd.DataFrame(prediction_np, columns=target_cols)

        # Inverse transform
        inv_input, inv_target, inv_pred = get_original_values(df.columns, input_df, target_df, prediction_df, scalers)

        print("Calculating scores...")
        model_score = calculate_score(inv_target, inv_pred)

        # Append and maintain top N
        top_models.append((model_file, model_score))
        top_models = sorted(top_models, key=lambda x: x[1])
        top_models = top_models[:top_n]

        print(f"Current top {len(top_models)} models:")
        for m, s in top_models:
            print(f"  Model: {m}, Score: {s:.2%}")

    return top_models

# Create a list of model files
model_files = glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.ckpt"), recursive=True) + \
              glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.pth"), recursive=True)
print(f"Total model files found: {len(model_files)}")

# Optionally, truncate the list for testing
model_files = model_files[:20]  # Adjust as needed

# Evaluate models and get top N
top_n = 10
top_models = evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=top_n)

# Display Top N models
print(f"\nTop {len(top_models)} models:")
for model_file, score in top_models:
    print(f"Model: {model_file}, Score: {score:.2%}")


In [None]:
import torch
import pandas as pd
from dataclasses import dataclass
import multiprocessing
import glob
import numpy as np
from rich.table import Table
from rich.console import Console
import os
import time
import requests
from sklearn.preprocessing import MinMaxScaler
from icecream import ic  # Ensure icecream is installed (`pip install icecream`)
import traceback
import warnings

# Initialize Rich Console
console = Console()

# Define your CryptoTransformer class as per your implementation
class CryptoTransformer(torch.nn.Module):
    def __init__(self, input_size, d_model, nhead, num_encoder_layers, num_decoder_layers,
                 dim_feedforward, dropout, activation, n_future, num_outputs, max_seq_length):
        super(CryptoTransformer, self).__init__()
        # Transformer implementation as per your code
        self.transformer = torch.nn.Transformer(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
        )
        self.input_fc = torch.nn.Linear(input_size, d_model)
        self.output_fc = torch.nn.Linear(d_model, num_outputs)
        self.n_future = n_future

    def forward(self, src, tgt):
        src = self.input_fc(src)
        tgt = self.input_fc(tgt)
        output = self.transformer(src, tgt)
        output = self.output_fc(output)
        return output

# Define your CryptoLSTM class as per your implementation
class CryptoLSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, output_size, has_ln=False):
        super(CryptoLSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers,
                                  dropout=dropout, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        # Optionally include LayerNorm if your models use it
        if has_ln:
            self.ln = torch.nn.LayerNorm(hidden_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        if hasattr(self, 'ln'):
            out = self.ln(out)
        # If the model outputs a sequence
        if out.dim() == 3:
            out = self.fc(out)
        else:
            out = self.fc(out[:, -1, :])
        return out

# Function to find a suitable number of attention heads
def find_nhead(d_model, max_nhead=16, min_head_dim=8):
    """
    Find the maximum nhead such that d_model is divisible by nhead
    and the head dimension is at least min_head_dim.
    """
    for nhead in range(max_nhead, 0, -1):
        if d_model % nhead == 0:
            head_dim = d_model // nhead
            if head_dim >= min_head_dim:
                return nhead
    return 1  # Fallback to single head

# Function to inverse transform data, adjusted to handle varying number of features
def get_original_values(df_columns, input_np, target_np, prediction_np, scalers, n_future, num_features):
    inv_pred = pd.DataFrame()
    inv_target = pd.DataFrame()
    inv_input = pd.DataFrame()

    # Reshape prediction and target to (n_future, num_features)
    prediction_np = prediction_np.reshape(n_future, num_features)
    target_np = target_np.reshape(n_future, num_features)

    # Inverse transform each feature across all future steps
    for i in range(n_future):
        for j, col in enumerate(df_columns):
            scaler = scalers.get(col)
            if scaler:
                pred_val = np.exp(scaler.inverse_transform(prediction_np[i, j].reshape(1, -1)).flatten())
                target_val = np.exp(scaler.inverse_transform(target_np[i, j].reshape(1, -1)).flatten())
                input_val = np.exp(scalers[col].inverse_transform(input_np[j].reshape(1, -1)).flatten())

                inv_pred.at[i, f"{col}_step_{i}"] = pred_val
                inv_target.at[i, f"{col}_step_{i}"] = target_val
                inv_input.at[i, f"{col}_step_{i}"] = input_val

    return inv_input, inv_target, inv_pred

# Function to calculate the score
def calculate_score(inv_target, inv_pred):
    scores = {}
    for col in inv_target.columns:
        # Ensure lengths match
        min_len = min(len(inv_target[col]), len(inv_pred[col]))
        target_col = inv_target[col][:min_len]
        pred_col = inv_pred[col][:min_len]
        # Calculate the absolute percentage error
        ape = np.abs((target_col - pred_col) / target_col)
        # Handle division by zero
        ape = ape.replace([np.inf, -np.inf], np.nan).dropna()
        # Calculate the mean absolute percentage error
        mape = ape.mean()
        scores[col] = mape
    # Calculate the overall score (average of individual scores)
    overall_score = np.mean(list(scores.values()))
    print(f"Overall Score: {overall_score:.2%}")
    return overall_score

# Suppress specific warnings
warnings.filterwarnings("ignore", message=".*does not have many workers.*")

def load_model(model_file, cfg, device='cpu'):
    try:
        if not os.path.exists(model_file):
            print(f"File not found: {model_file}")
            return None

        # Load the state dict
        state_dict = torch.load(model_file, map_location=device)

        # Check if it's a Lightning checkpoint
        if isinstance(state_dict, dict) and 'state_dict' in state_dict:
            state_dict = state_dict['state_dict']

        # Strip 'model.' prefix if present
        if any(key.startswith('model.') for key in state_dict.keys()):
            new_state_dict = {}
            for k, v in state_dict.items():
                if k.startswith('model.'):
                    new_k = k[6:]  # Remove 'model.' prefix
                else:
                    new_k = k
                new_state_dict[new_k] = v
            state_dict = new_state_dict

        # Determine the model type based on key prefixes
        if any(key.startswith('lstm') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoLSTM")

            # Infer LSTM parameters
            lstm_weight_ih_keys = [k for k in state_dict.keys() if k.startswith('lstm.weight_ih_l')]
            num_layers = len(lstm_weight_ih_keys)
            if num_layers == 0:
                print(f"No LSTM layers found in {model_file}. Keys: {list(state_dict.keys())}")
                return None

            sample_weight_ih = state_dict[lstm_weight_ih_keys[0]]
            hidden_size = sample_weight_ih.shape[0] // 4  # LSTM gates
            input_size = sample_weight_ih.shape[1]

            if 'fc.weight' in state_dict:
                output_size = state_dict['fc.weight'].shape[0]
            elif 'fc.bias' in state_dict:
                output_size = state_dict['fc.bias'].shape[0]
            else:
                print(f"Could not infer output_size for LSTM in {model_file}. Using default: {cfg.NUM_FEATURES}")
                output_size = cfg.NUM_FEATURES

            # Infer if LayerNorm is present
            has_ln = any('layer_norm' in key or 'ln' in key for key in state_dict.keys())
            print(f"Inferred LSTM parameters: hidden_size={hidden_size}, num_layers={num_layers}, "
                  f"input_size={input_size}, output_size={output_size}, LayerNorm={has_ln}")

            # Ensure output_size aligns with the data
            expected_output_size = cfg.N_FUTURE * cfg.NUM_FEATURES
            if output_size != expected_output_size:
                print(f"Model's output_size ({output_size}) does not match expected ({expected_output_size}). Skipping model.")
                return None

            # Create the model
            model = CryptoLSTM(
                input_size=input_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=cfg.DROPOUT,
                output_size=output_size,
                has_ln=has_ln
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {'input_size': input_size, 'output_size': output_size,
                          'n_past': cfg.N_PAST, 'n_future': cfg.N_FUTURE}

        elif any(key.startswith('transformer') for key in state_dict.keys()):
            # It's a Transformer model
            print(f"Loading {model_file} as CryptoTransformer")
            # Attempt to infer Transformer parameters from state_dict
            # This is heuristic and may need adjustments based on actual model architecture

            # Infer d_model from the first transformer's in_proj_weight
            d_model = cfg.HIDDEN_SIZE  # Default value
            for key in state_dict.keys():
                if 'transformer_encoder.layers.0.self_attn.in_proj_weight' in key:
                    weight = state_dict[key]
                    d_model = weight.shape[1]  # Typically (3*d_model, d_model)
                    break

            # Find a suitable nhead
            nhead = find_nhead(d_model)
            if nhead == 1 and d_model < 8:
                print(f"Could not find a suitable nhead for d_model={d_model}. Skipping model.")
                return None

            # Infer number of encoder and decoder layers
            transformer_encoder_keys = [k for k in state_dict.keys() if 'transformer_encoder.layers.' in k]
            num_encoder_layers = len(set(k.split('.')[3] for k in transformer_encoder_keys))
            transformer_decoder_keys = [k for k in state_dict.keys() if 'transformer_decoder.layers.' in k]
            num_decoder_layers = len(set(k.split('.')[3] for k in transformer_decoder_keys))

            # Infer dim_feedforward from the first linear layer
            dim_feedforward = cfg.DIM_FEEDFORWARD if hasattr(cfg, 'DIM_FEEDFORWARD') else 2048
            dropout = cfg.DROPOUT
            activation = 'gelu'  # Defaulting to 'gelu'

            print(f"Inferred Transformer parameters: d_model={d_model}, nhead={nhead}, "
                  f"num_encoder_layers={num_encoder_layers}, num_decoder_layers={num_decoder_layers}, "
                  f"dim_feedforward={dim_feedforward}, dropout={dropout}, activation={activation}")

            # Create the model
            model = CryptoTransformer(
                input_size=cfg.NUM_FEATURES,
                d_model=d_model,
                nhead=nhead,
                num_encoder_layers=num_encoder_layers,
                num_decoder_layers=num_decoder_layers,
                dim_feedforward=dim_feedforward,
                dropout=dropout,
                activation=activation,
                n_future=cfg.N_FUTURE,
                num_outputs=cfg.N_FUTURE * cfg.NUM_FEATURES,  # Adjusted for multi-step prediction
                max_seq_length=cfg.N_PAST + cfg.N_FUTURE
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {
                'input_size': cfg.NUM_FEATURES,
                'output_size': cfg.N_FUTURE * cfg.NUM_FEATURES,
                'n_past': cfg.N_PAST,
                'n_future': cfg.N_FUTURE
            }

        else:
            print(f"Unrecognized model format for {model_file}. Keys: {list(state_dict.keys())}")
            return None

    except Exception as e:
        print(f"Failed to load model {model_file}: {str(e)}")
        traceback.print_exc()
        return None

# Function to prepare input data
def prepare_input(input_data, device):
    input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)
    return input_tensor

# Function to get a random sample from the DataFrame
def get_random_sample(df, n_past, n_future, input_cols, target_cols):
    max_start = len(df) - n_past - n_future
    if max_start <= 0:
        raise ValueError("DataFrame is too short for the given n_past and n_future")
    start_idx = np.random.randint(0, max_start)
    input_data = df[input_cols].iloc[start_idx:start_idx + n_past].values
    target_data = df[input_cols].iloc[start_idx + n_past:start_idx + n_past + n_future].values
    return input_data, target_data

# Function to convert tensors to numpy arrays
def convert_to_numpy(input_tensor, target, prediction):
    input_np = input_tensor.cpu().numpy()
    target_np = target.cpu().numpy()
    prediction_np = prediction.cpu().numpy()
    return input_np, target_np, prediction_np

@dataclass
class Config:
    VERSION_N: int = 1
    RECORDS_TO_LOAD: int = 1205040
    N_PAST: int = 3 * 12 * 3  # 1 week of 10-minute intervals
    N_FUTURE: int = 1 * 12 * 2  # 1 day of 10-minute intervals
    BATCH_SIZE: int = 5000
    HIDDEN_SIZE: int = 256
    NUM_LAYERS: int = 2
    DROPOUT: float = 0.2
    NUM_EPOCHS: int = 150
    HOT_RESTART: bool = True
    TRAIN_FIRST: bool = True
    EPOCH_TO_RESTART: int = 50
    BATCH_FACTOR: int = 81
    DEBUG_FREQ: int = 180
    num_cpus = multiprocessing.cpu_count()
    NUM_WORKERS = max((num_cpus // 4 - 4), 4) if num_cpus > 16 else 4
    DEBUG_ON: bool = False
    DATA_URL: str = 'https://sambo.us-iad-1.linodeobjects.com/fillnan_combined_df.csv'
    DATA_FILE: str = './data/fill_nan_df.csv'
    MODEL_PATH: str = "/teamspace/studios/this_studio/models/TransformerModel355/model-355-epoch=40-val_loss=0.62.ckpt"
    MODEL_SAVE_PATH: str = f'./yay'
    DEVICE: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    EPSILON: float = 1e-4
    PATH_TO_SEARCH: str = "/content/drive/MyDrive/Kraken"
    # Additional parameters
    NUM_FEATURES: int = None  # To be set after loading data

cfg = Config()

# Load and preprocess data
def load_and_preprocess_data(file_path: str, download_url: str = None):
    """
    Load and preprocess data from a CSV file. If the file does not exist, download it.

    Args:
        file_path (str): Path to the CSV file.
        download_url (str, optional): URL to download the CSV file. Defaults to None.

    Returns:
        pd.DataFrame: Preprocessed DataFrame.
        dict: Dictionary of scalers used for each column.
    """
    ic("Starting data loading and preprocessing...")
    start_time = time.time()

    # Check if the file exists
    if not os.path.exists(file_path):
        ic(f"File {file_path} does not exist.")
        os.makedirs(os.path.dirname(file_path), exist_ok=True)

        if download_url:
            ic(f"Downloading file from {download_url}...")
            try:
                response = requests.get(download_url, stream=True)
                response.raise_for_status()
                with open(file_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                ic(f"File downloaded and saved to {file_path}")
            except requests.exceptions.RequestException as e:
                ic(f"Failed to download the file: {e}")
                raise
        else:
            ic("Download URL not provided. Cannot download the file.")
            raise FileNotFoundError(f"The file {file_path} does not exist and no download URL was provided.")

    # Load the DataFrame
    df = pd.read_csv(file_path, parse_dates=['timestamp'])
    df.set_index('timestamp', inplace=True)
    df = df.tail(cfg.RECORDS_TO_LOAD)
    scalers = {}
    start_time_preprocess = time.time()

    for col in df.columns:
        # Ensure no non-positive values before log transform
        if (df[col] <= 0).any():
            raise ValueError(f"Column {col} contains non-positive values, cannot apply log transform.")

        # Apply natural logarithm transformation
        df[col] = np.log(df[col])

        # Initialize and fit MinMaxScaler
        scaler = MinMaxScaler()
        df[col] = scaler.fit_transform(df[[col]])

        # Save the scaler
        scalers[col] = scaler

    ic(f"Data preprocessing completed in {time.time() - start_time_preprocess:.2f} seconds")
    ic(f"DataFrame shape: {df.shape}")

    return df, scalers

df, scalers = load_and_preprocess_data(cfg.DATA_FILE, cfg.DATA_URL)
cfg.NUM_FEATURES = df.shape[1]

def evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=10):
    top_models = []
    for model_file in model_files:
        cfg.MODEL_PATH = model_file
        print(f"\nLoading model from {cfg.MODEL_PATH}")

        result = load_model(cfg.MODEL_PATH, cfg, cfg.DEVICE)
        if result is None:
            print(f"Skipping model {cfg.MODEL_PATH} due to loading error")
            continue

        model, params = result
        input_size = params['input_size']
        output_size = params['output_size']
        n_past = params['n_past']
        n_future = params['n_future']
        num_features = cfg.NUM_FEATURES

        # Get the input columns
        input_cols = df.columns[:input_size]

        try:
            input_data, target_data = get_random_sample(df, n_past, n_future, input_cols, input_cols)
        except Exception as e:
            print(f"Error getting random sample: {str(e)}")
            continue

        # Prepare the tensors
        try:
            input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
            target_tensor = torch.tensor(target_data, dtype=torch.float32).to(cfg.DEVICE)
        except Exception as e:
            print(f"Error preparing tensors: {str(e)}")
            continue

        try:
            model.eval()
            with torch.no_grad():
                if isinstance(model, CryptoTransformer):
                    # For Transformer, prepare tgt_input with zeros
                    tgt_input = torch.zeros((1, n_future, input_size), device=cfg.DEVICE)
                    prediction = model(input_tensor, tgt_input)
                elif isinstance(model, CryptoLSTM):
                    prediction = model(input_tensor)
                else:
                    print(f"Unknown model type for file: {model_file}")
                    continue
        except Exception as e:
            print(f"Failed to make predictions: {e}")
            continue

        # Convert tensors to numpy
        input_np = input_tensor.squeeze(0).cpu().numpy()  # (n_past, num_features)
        target_np = target_tensor.cpu().numpy()  # (n_future, num_features)
        prediction_np = prediction.cpu().numpy()  # (1, output_size) or similar

        # Handle multi-step predictions
        if isinstance(model, CryptoTransformer) or output_size > num_features:
            # Assume output_size = n_future * num_features
            if output_size != n_future * num_features:
                print(f"Model's output_size ({output_size}) does not match n_future * num_features ({n_future * num_features}). Skipping model.")
                continue
            prediction_np = prediction_np.reshape(n_future, num_features)
            # Flatten the target_np for inverse transformation
            target_np_flat = target_np.flatten()
        else:
            # Single-step prediction
            prediction_np = prediction_np.reshape(-1, num_features)
            target_np_flat = target_np.flatten()

        # Inverse transform
        try:
            inv_input, inv_target, inv_pred = get_original_values(
                df.columns, input_np, target_np, prediction_np, scalers, n_future, num_features
            )
        except Exception as e:
            print(f"Error during inverse transformation: {str(e)}")
            continue

        print("Calculating scores...")
        try:
            model_score = calculate_score(inv_target, inv_pred)
        except Exception as e:
            print(f"Error calculating score: {str(e)}")
            continue

        # Append and maintain top N
        top_models.append((model_file, model_score))
        top_models = sorted(top_models, key=lambda x: x[1])
        top_models = top_models[:top_n]

        print(f"Current top {len(top_models)} models:")
        for m, s in top_models:
            print(f"  Model: {m}, Score: {s:.2%}")

    return top_models

# Create a list of model files
model_files = glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.ckpt"), recursive=True) + \
              glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.pth"), recursive=True)
print(f"Total model files found: {len(model_files)}")

# Optionally, truncate the list for testing
model_files = model_files[:20]  # Adjust as needed

# Evaluate models and get top N
top_n = 10
top_models = evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=top_n)

# Display Top N models
print(f"\nTop {len(top_models)} models:")
for model_file, score in top_models:
    print(f"Model: {model_file}, Score: {score:.2%}")


In [None]:
import torch
import pandas as pd
from dataclasses import dataclass
import multiprocessing
import glob
import numpy as np
from rich.table import Table
from rich.console import Console
import os
import time
import requests
from sklearn.preprocessing import MinMaxScaler
from icecream import ic  # Ensure icecream is installed (`pip install icecream`)
import traceback
import warnings

# Initialize Rich Console
console = Console()

# Define your CryptoTransformer class as per your implementation
class CryptoTransformer(torch.nn.Module):
    def __init__(self, input_size, d_model, nhead, num_encoder_layers, num_decoder_layers,
                 dim_feedforward, dropout, activation, n_future, num_outputs, max_seq_length):
        super(CryptoTransformer, self).__init__()
        # Transformer implementation as per your code
        self.transformer = torch.nn.Transformer(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
        )
        self.input_fc = torch.nn.Linear(input_size, d_model)
        self.output_fc = torch.nn.Linear(d_model, num_outputs)
        self.n_future = n_future

    def forward(self, src, tgt):
        src = self.input_fc(src)
        tgt = self.input_fc(tgt)
        output = self.transformer(src, tgt)
        output = self.output_fc(output)
        return output

# Define your CryptoLSTM class as per your implementation
class CryptoLSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, output_size, has_ln=False):
        super(CryptoLSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers,
                                  dropout=dropout, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        # Optionally include LayerNorm if your models use it
        if has_ln:
            self.layer_norm = torch.nn.LayerNorm(hidden_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        if hasattr(self, 'layer_norm'):
            out = self.layer_norm(out)
        # If the model outputs a sequence
        if out.dim() == 3:
            out = self.fc(out)
        else:
            out = self.fc(out[:, -1, :])
        return out

# Function to find a suitable number of attention heads
def find_nhead(d_model, max_nhead=16, min_head_dim=8):
    """
    Find the maximum nhead such that d_model is divisible by nhead
    and the head dimension is at least min_head_dim.
    """
    for nhead in range(max_nhead, 0, -1):
        if d_model % nhead == 0:
            head_dim = d_model // nhead
            if head_dim >= min_head_dim:
                return nhead
    return 1  # Fallback to single head

# Function to inverse transform data, adjusted to handle varying number of features
def get_original_values(df_columns, input_np, target_np, prediction_np, scalers, n_future, num_features):
    inv_pred = pd.DataFrame()
    inv_target = pd.DataFrame()
    inv_input = pd.DataFrame()

    # Determine if the model outputs multi-step or single-step predictions
    if prediction_np.shape[-1] == num_features:
        # Single-step prediction
        for i, col in enumerate(df_columns):
            scaler = scalers.get(col)
            if scaler:
                inv_pred[col] = np.exp(scaler.inverse_transform(prediction_np[:, i].reshape(-1, 1)).flatten())
                inv_target[col] = np.exp(scaler.inverse_transform(target_np[:, i].reshape(-1, 1)).flatten())
                inv_input[col] = np.exp(scalers[col].inverse_transform(input_np[:, i].reshape(-1, 1)).flatten())
    elif prediction_np.shape[-1] == n_future * num_features:
        # Multi-step prediction
        prediction_np = prediction_np.reshape(-1, n_future, num_features)
        target_np = target_np.reshape(-1, n_future, num_features)
        for i in range(n_future):
            for j, col in enumerate(df_columns):
                scaler = scalers.get(col)
                if scaler:
                    inv_pred[f"{col}_step_{i}"] = np.exp(scaler.inverse_transform(prediction_np[:, i, j].reshape(-1, 1)).flatten())
                    inv_target[f"{col}_step_{i}"] = np.exp(scaler.inverse_transform(target_np[:, i, j].reshape(-1, 1)).flatten())
                    inv_input[f"{col}_step_{i}"] = np.exp(scalers[col].inverse_transform(input_np[:, j].reshape(-1, 1)).flatten())
    else:
        raise ValueError(f"Unexpected prediction output shape: {prediction_np.shape}")

    return inv_input, inv_target, inv_pred

# Function to calculate the score
def calculate_score(inv_target, inv_pred):
    scores = {}
    for col in inv_target.columns:
        # Ensure lengths match
        min_len = min(len(inv_target[col]), len(inv_pred[col]))
        target_col = inv_target[col][:min_len]
        pred_col = inv_pred[col][:min_len]
        # Calculate the absolute percentage error
        ape = np.abs((target_col - pred_col) / target_col)
        # Handle division by zero
        ape = ape.replace([np.inf, -np.inf], np.nan).dropna()
        # Calculate the mean absolute percentage error
        mape = ape.mean()
        scores[col] = mape
    # Calculate the overall score (average of individual scores)
    overall_score = np.mean(list(scores.values()))
    print(f"Overall Score: {overall_score:.2%}")
    return overall_score

# Suppress specific warnings
warnings.filterwarnings("ignore", message=".*does not have many workers.*")

def load_model(model_file, cfg, device='cpu'):
    try:
        if not os.path.exists(model_file):
            print(f"File not found: {model_file}")
            return None

        # Load the state dict
        state_dict = torch.load(model_file, map_location=device)

        # Check if it's a Lightning checkpoint
        if isinstance(state_dict, dict) and 'state_dict' in state_dict:
            state_dict = state_dict['state_dict']

        # Strip 'model.' prefix if present
        if any(key.startswith('model.') for key in state_dict.keys()):
            new_state_dict = {}
            for k, v in state_dict.items():
                if k.startswith('model.'):
                    new_k = k[6:]  # Remove 'model.' prefix
                else:
                    new_k = k
                new_state_dict[new_k] = v
            state_dict = new_state_dict

        # Determine the model type based on key prefixes
        if any(key.startswith('lstm') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoLSTM")

            # Infer LSTM parameters
            lstm_weight_ih_keys = [k for k in state_dict.keys() if k.startswith('lstm.weight_ih_l')]
            num_layers = len(lstm_weight_ih_keys)
            if num_layers == 0:
                print(f"No LSTM layers found in {model_file}. Keys: {list(state_dict.keys())}")
                return None

            sample_weight_ih = state_dict[lstm_weight_ih_keys[0]]
            hidden_size = sample_weight_ih.shape[0] // 4  # LSTM gates
            input_size = sample_weight_ih.shape[1]

            if 'fc.weight' in state_dict:
                output_size = state_dict['fc.weight'].shape[0]
            elif 'fc.bias' in state_dict:
                output_size = state_dict['fc.bias'].shape[0]
            else:
                print(f"Could not infer output_size for LSTM in {model_file}. Using default: {cfg.NUM_FEATURES}")
                output_size = cfg.NUM_FEATURES

            # Infer if LayerNorm is present
            has_ln = any('layer_norm' in key or 'ln' in key for key in state_dict.keys())
            print(f"Inferred LSTM parameters: hidden_size={hidden_size}, num_layers={num_layers}, "
                  f"input_size={input_size}, output_size={output_size}, LayerNorm={has_ln}")

            # Determine expected output_size based on n_future and num_features
            expected_output_size = cfg.N_FUTURE * cfg.NUM_FEATURES
            if output_size != expected_output_size:
                print(f"Model's output_size ({output_size}) does not match expected ({expected_output_size}). Skipping model.")
                return None

            # Create the model
            model = CryptoLSTM(
                input_size=input_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=cfg.DROPOUT,
                output_size=output_size,
                has_ln=has_ln
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {'input_size': input_size, 'output_size': output_size,
                          'n_past': cfg.N_PAST, 'n_future': cfg.N_FUTURE}

        elif any(key.startswith('transformer') for key in state_dict.keys()):
            # It's a Transformer model
            print(f"Loading {model_file} as CryptoTransformer")
            # Attempt to infer Transformer parameters from state_dict
            # This is heuristic and may need adjustments based on actual model architecture

            # Infer d_model from the first transformer's in_proj_weight
            d_model = cfg.HIDDEN_SIZE  # Default value
            for key in state_dict.keys():
                if 'transformer_encoder.layers.0.self_attn.in_proj_weight' in key:
                    weight = state_dict[key]
                    d_model = weight.shape[1]  # Typically (3*d_model, d_model)
                    break

            # Find a suitable nhead
            nhead = find_nhead(d_model)
            if nhead == 1 and d_model < 8:
                print(f"Could not find a suitable nhead for d_model={d_model}. Skipping model.")
                return None

            # Infer number of encoder and decoder layers
            transformer_encoder_keys = [k for k in state_dict.keys() if 'transformer_encoder.layers.' in k]
            num_encoder_layers = len(set(k.split('.')[3] for k in transformer_encoder_keys))
            transformer_decoder_keys = [k for k in state_dict.keys() if 'transformer_decoder.layers.' in k]
            num_decoder_layers = len(set(k.split('.')[3] for k in transformer_decoder_keys))

            # Infer dim_feedforward from the first linear layer
            dim_feedforward = cfg.DIM_FEEDFORWARD if hasattr(cfg, 'DIM_FEEDFORWARD') else 2048
            dropout = cfg.DROPOUT
            activation = 'gelu'  # Defaulting to 'gelu'

            print(f"Inferred Transformer parameters: d_model={d_model}, nhead={nhead}, "
                  f"num_encoder_layers={num_encoder_layers}, num_decoder_layers={num_decoder_layers}, "
                  f"dim_feedforward={dim_feedforward}, dropout={dropout}, activation={activation}")

            # Infer num_outputs based on state_dict
            if 'output_fc.weight' in state_dict:
                num_outputs = state_dict['output_fc.weight'].shape[0]
            else:
                num_outputs = cfg.N_FUTURE * cfg.NUM_FEATURES  # Default assumption

            # Check if num_outputs is divisible by num_features to determine n_future
            if num_outputs % cfg.NUM_FEATURES == 0:
                n_future_model = num_outputs // cfg.NUM_FEATURES
            else:
                n_future_model = 1  # Single-step prediction

            # Create the model
            model = CryptoTransformer(
                input_size=cfg.NUM_FEATURES,
                d_model=d_model,
                nhead=nhead,
                num_encoder_layers=num_encoder_layers,
                num_decoder_layers=num_decoder_layers,
                dim_feedforward=dim_feedforward,
                dropout=dropout,
                activation=activation,
                n_future=n_future_model,
                num_outputs=num_outputs,
                max_seq_length=cfg.N_PAST + cfg.N_FUTURE
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {
                'input_size': cfg.NUM_FEATURES,
                'output_size': num_outputs,
                'n_past': cfg.N_PAST,
                'n_future': n_future_model
            }

        else:
            print(f"Unrecognized model format for {model_file}. Keys: {list(state_dict.keys())}")
            return None

    except Exception as e:
        print(f"Failed to load model {model_file}: {str(e)}")
        traceback.print_exc()
        return None

# Function to prepare input data
def prepare_input(input_data, device):
    input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)
    return input_tensor

# Function to get a random sample from the DataFrame
def get_random_sample(df, n_past, n_future, input_cols, target_cols):
    max_start = len(df) - n_past - n_future
    if max_start <= 0:
        raise ValueError("DataFrame is too short for the given n_past and n_future")
    start_idx = np.random.randint(0, max_start)
    input_data = df[input_cols].iloc[start_idx:start_idx + n_past].values
    target_data = df[target_cols].iloc[start_idx + n_past:start_idx + n_past + n_future].values
    return input_data, target_data

# Function to convert tensors to numpy arrays
def convert_to_numpy(input_tensor, target, prediction):
    input_np = input_tensor.cpu().numpy()
    target_np = target.cpu().numpy()
    prediction_np = prediction.cpu().numpy()
    return input_np, target_np, prediction_np

@dataclass
class Config:
    VERSION_N: int = 1
    RECORDS_TO_LOAD: int = 1205040
    N_PAST: int = 3 * 12 * 3  # 1 week of 10-minute intervals
    N_FUTURE: int = 1 * 12 * 2  # 1 day of 10-minute intervals
    BATCH_SIZE: int = 5000
    HIDDEN_SIZE: int = 512  # Adjusted to match inferred d_model=512
    NUM_LAYERS: int = 2
    DROPOUT: float = 0.2
    NUM_EPOCHS: int = 150
    HOT_RESTART: bool = True
    TRAIN_FIRST: bool = True
    EPOCH_TO_RESTART: int = 50
    BATCH_FACTOR: int = 81
    DEBUG_FREQ: int = 180
    num_cpus = multiprocessing.cpu_count()
    NUM_WORKERS = max((num_cpus // 4 - 4), 4) if num_cpus > 16 else 4
    DEBUG_ON: bool = False
    DATA_URL: str = 'https://sambo.us-iad-1.linodeobjects.com/fillnan_combined_df.csv'
    DATA_FILE: str = './data/fill_nan_df.csv'
    MODEL_PATH: str = "/teamspace/studios/this_studio/models/TransformerModel355/model-355-epoch=40-val_loss=0.62.ckpt"
    MODEL_SAVE_PATH: str = f'./yay'
    DEVICE: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    EPSILON: float = 1e-4
    PATH_TO_SEARCH: str = "/content/drive/MyDrive/Kraken"
    # Additional parameters
    NUM_FEATURES: int = None  # To be set after loading data

cfg = Config()

# Load and preprocess data
def load_and_preprocess_data(file_path: str, download_url: str = None):
    """
    Load and preprocess data from a CSV file. If the file does not exist, download it.

    Args:
        file_path (str): Path to the CSV file.
        download_url (str, optional): URL to download the CSV file. Defaults to None.

    Returns:
        pd.DataFrame: Preprocessed DataFrame.
        dict: Dictionary of scalers used for each column.
    """
    ic("Starting data loading and preprocessing...")
    start_time = time.time()

    # Check if the file exists
    if not os.path.exists(file_path):
        ic(f"File {file_path} does not exist.")
        os.makedirs(os.path.dirname(file_path), exist_ok=True)

        if download_url:
            ic(f"Downloading file from {download_url}...")
            try:
                response = requests.get(download_url, stream=True)
                response.raise_for_status()
                with open(file_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                ic(f"File downloaded and saved to {file_path}")
            except requests.exceptions.RequestException as e:
                ic(f"Failed to download the file: {e}")
                raise
        else:
            ic("Download URL not provided. Cannot download the file.")
            raise FileNotFoundError(f"The file {file_path} does not exist and no download URL was provided.")

    # Load the DataFrame
    df = pd.read_csv(file_path, parse_dates=['timestamp'])
    df.set_index('timestamp', inplace=True)
    df = df.tail(cfg.RECORDS_TO_LOAD)
    scalers = {}
    start_time_preprocess = time.time()

    for col in df.columns:
        # Ensure no non-positive values before log transform
        if (df[col] <= 0).any():
            raise ValueError(f"Column {col} contains non-positive values, cannot apply log transform.")

        # Apply natural logarithm transformation
        df[col] = np.log(df[col])

        # Initialize and fit MinMaxScaler
        scaler = MinMaxScaler()
        df[col] = scaler.fit_transform(df[[col]])

        # Save the scaler
        scalers[col] = scaler

    ic(f"Data preprocessing completed in {time.time() - start_time_preprocess:.2f} seconds")
    ic(f"DataFrame shape: {df.shape}")

    return df, scalers

df, scalers = load_and_preprocess_data(cfg.DATA_FILE, cfg.DATA_URL)
cfg.NUM_FEATURES = df.shape[1]

def evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=10):
    top_models = []
    for model_file in model_files:
        cfg.MODEL_PATH = model_file
        print(f"\nLoading model from {cfg.MODEL_PATH}")

        result = load_model(cfg.MODEL_PATH, cfg, cfg.DEVICE)
        if result is None:
            print(f"Skipping model {cfg.MODEL_PATH} due to loading error")
            continue

        model, params = result
        input_size = params['input_size']
        output_size = params['output_size']
        n_past = params['n_past']
        n_future = params['n_future']
        num_features = cfg.NUM_FEATURES

        # Get the input columns
        input_cols = df.columns[:input_size]

        try:
            input_data, target_data = get_random_sample(df, n_past, n_future, input_cols, input_cols)
        except Exception as e:
            print(f"Error getting random sample: {str(e)}")
            continue

        # Prepare the tensors
        try:
            input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
            target_tensor = torch.tensor(target_data, dtype=torch.float32).to(cfg.DEVICE)
        except Exception as e:
            print(f"Error preparing tensors: {str(e)}")
            continue

        try:
            model.eval()
            with torch.no_grad():
                if isinstance(model, CryptoTransformer):
                    # For Transformer, prepare tgt_input with zeros
                    tgt_input = torch.zeros((1, n_future, input_size), device=cfg.DEVICE)
                    prediction = model(input_tensor, tgt_input)
                elif isinstance(model, CryptoLSTM):
                    prediction = model(input_tensor)
                else:
                    print(f"Unknown model type for file: {model_file}")
                    continue
        except Exception as e:
            print(f"Failed to make predictions: {e}")
            continue

        # Convert tensors to numpy
        input_np = input_tensor.squeeze(0).cpu().numpy()  # (n_past, num_features)
        target_np = target_tensor.cpu().numpy()  # (n_future, num_features)
        prediction_np = prediction.cpu().numpy()  # (1, output_size) or similar

        # Handle multi-step predictions
        try:
            if isinstance(model, CryptoTransformer) or output_size == n_future * num_features:
                # Multi-step prediction
                if output_size != n_future * num_features:
                    print(f"Model's output_size ({output_size}) does not match n_future * num_features ({n_future * num_features}). Skipping model.")
                    continue
                prediction_np = prediction_np.reshape(-1, n_future, num_features)
                target_np = target_np.reshape(-1, n_future, num_features)
                input_np = input_np.reshape(-1, num_features)
            elif output_size == num_features:
                # Single-step prediction
                prediction_np = prediction_np.reshape(-1, num_features)
                target_np = target_np.reshape(-1, num_features)
                input_np = input_np.reshape(-1, num_features)
            else:
                print(f"Unexpected output_size ({output_size}) for model {model_file}. Skipping model.")
                continue
        except ValueError as ve:
            print(f"ValueError during reshaping: {ve}. Skipping model.")
            continue

        # Inverse transform
        try:
            inv_input, inv_target, inv_pred = get_original_values(
                df.columns, input_np, target_np, prediction_np, scalers, n_future, num_features
            )
        except Exception as e:
            print(f"Error during inverse transformation: {str(e)}")
            continue

        print("Calculating scores...")
        try:
            model_score = calculate_score(inv_target, inv_pred)
        except Exception as e:
            print(f"Error calculating score: {str(e)}")
            continue

        # Append and maintain top N
        top_models.append((model_file, model_score))
        top_models = sorted(top_models, key=lambda x: x[1])
        top_models = top_models[:top_n]

        print(f"Current top {len(top_models)} models:")
        for m, s in top_models:
            print(f"  Model: {m}, Score: {s:.2%}")

    return top_models

# Create a list of model files
model_files = glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.ckpt"), recursive=True) + \
              glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.pth"), recursive=True)
print(f"Total model files found: {len(model_files)}")

# Optionally, truncate the list for testing
model_files = model_files[:20]  # Adjust as needed

# Evaluate models and get top N
top_n = 10
top_models = evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=top_n)

# Display Top N models
print(f"\nTop {len(top_models)} models:")
for model_file, score in top_models:
    print(f"Model: {model_file}, Score: {score:.2%}")


In [None]:
import torch
import pandas as pd
from dataclasses import dataclass
import multiprocessing
import glob
import numpy as np
from rich.table import Table
from rich.console import Console
import os
import time
import requests
from sklearn.preprocessing import MinMaxScaler
from icecream import ic  # Ensure icecream is installed (`pip install icecream`)
import traceback
import warnings

# Initialize Rich Console
console = Console()

# Define your CryptoTransformer class as per your implementation
class CryptoTransformer(torch.nn.Module):
    def __init__(self, input_size, d_model, nhead, num_encoder_layers, num_decoder_layers,
                 dim_feedforward, dropout, activation, n_future, num_outputs, max_seq_length):
        super(CryptoTransformer, self).__init__()
        # Transformer implementation as per your code
        self.transformer = torch.nn.Transformer(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
        )
        self.input_fc = torch.nn.Linear(input_size, d_model)
        self.output_fc = torch.nn.Linear(d_model, num_outputs)
        self.n_future = n_future

    def forward(self, src, tgt):
        src = self.input_fc(src)
        tgt = self.input_fc(tgt)
        output = self.transformer(src, tgt)
        output = self.output_fc(output)
        return output

# Define your CryptoLSTM class with corrected LayerNorm naming
class CryptoLSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, output_size, has_layer_norm=False):
        super(CryptoLSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers,
                                  dropout=dropout, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        # Optionally include LayerNorm if your models use it
        if has_layer_norm:
            self.layer_norm = torch.nn.LayerNorm(hidden_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        if hasattr(self, 'layer_norm'):
            out = self.layer_norm(out)
        # If the model outputs a sequence
        if out.dim() == 3:
            out = self.fc(out)
        else:
            out = self.fc(out[:, -1, :])
        return out

# Function to find a suitable number of attention heads
def find_nhead(d_model, max_nhead=16, min_head_dim=8):
    """
    Find the maximum nhead such that d_model is divisible by nhead
    and the head dimension is at least min_head_dim.
    """
    for nhead in range(max_nhead, 0, -1):
        if d_model % nhead == 0:
            head_dim = d_model // nhead
            if head_dim >= min_head_dim:
                return nhead
    return 1  # Fallback to single head

# Function to inverse transform data, adjusted to handle varying number of features
def get_original_values(df_columns, input_np, target_np, prediction_np, scalers, n_future, num_features):
    inv_pred = pd.DataFrame()
    inv_target = pd.DataFrame()
    inv_input = pd.DataFrame()

    # Determine if the model outputs multi-step or single-step predictions
    if prediction_np.shape[-1] == num_features:
        # Single-step prediction
        for i, col in enumerate(df_columns):
            scaler = scalers.get(col)
            if scaler:
                inv_pred[col] = np.exp(scaler.inverse_transform(prediction_np[:, i].reshape(-1, 1)).flatten())
                inv_target[col] = np.exp(scaler.inverse_transform(target_np[:, i].reshape(-1, 1)).flatten())
                inv_input[col] = np.exp(scalers[col].inverse_transform(input_np[:, i].reshape(-1, 1)).flatten())
    elif prediction_np.shape[-1] == n_future * num_features:
        # Multi-step prediction
        prediction_np = prediction_np.reshape(-1, n_future, num_features)
        target_np = target_np.reshape(-1, n_future, num_features)
        input_np = input_np.reshape(-1, num_features)
        for i in range(n_future):
            for j, col in enumerate(df_columns):
                scaler = scalers.get(col)
                if scaler:
                    inv_pred[f"{col}_step_{i}"] = np.exp(scaler.inverse_transform(prediction_np[:, i, j].reshape(-1, 1)).flatten())
                    inv_target[f"{col}_step_{i}"] = np.exp(scaler.inverse_transform(target_np[:, i, j].reshape(-1, 1)).flatten())
                    inv_input[f"{col}_step_{i}"] = np.exp(scalers[col].inverse_transform(input_np[:, j].reshape(-1, 1)).flatten())
    else:
        raise ValueError(f"Unexpected prediction output shape: {prediction_np.shape}")

    return inv_input, inv_target, inv_pred

# Function to calculate the score
def calculate_score(inv_target, inv_pred):
    scores = {}
    for col in inv_target.columns:
        # Ensure lengths match
        min_len = min(len(inv_target[col]), len(inv_pred[col]))
        target_col = inv_target[col][:min_len]
        pred_col = inv_pred[col][:min_len]
        # Calculate the absolute percentage error
        ape = np.abs((target_col - pred_col) / target_col)
        # Handle division by zero
        ape = ape.replace([np.inf, -np.inf], np.nan).dropna()
        # Calculate the mean absolute percentage error
        mape = ape.mean()
        scores[col] = mape
    # Calculate the overall score (average of individual scores)
    overall_score = np.mean(list(scores.values()))
    print(f"Overall Score: {overall_score:.2%}")
    return overall_score

# Suppress specific warnings
warnings.filterwarnings("ignore", message=".*does not have many workers.*")
warnings.filterwarnings("ignore", category=FutureWarning, message=".*torch.load.*")

def load_model(model_file, cfg, device='cpu'):
    try:
        if not os.path.exists(model_file):
            print(f"File not found: {model_file}")
            return None

        # Load the state dict
        state_dict = torch.load(model_file, map_location=device)

        # Check if it's a Lightning checkpoint
        if isinstance(state_dict, dict) and 'state_dict' in state_dict:
            state_dict = state_dict['state_dict']

        # Strip 'model.' prefix if present
        if any(key.startswith('model.') for key in state_dict.keys()):
            new_state_dict = {}
            for k, v in state_dict.items():
                if k.startswith('model.'):
                    new_k = k[6:]  # Remove 'model.' prefix
                else:
                    new_k = k
                new_state_dict[new_k] = v
            state_dict = new_state_dict

        # Determine the model type based on key prefixes
        if any(key.startswith('lstm') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoLSTM")

            # Infer LSTM parameters
            lstm_weight_ih_keys = [k for k in state_dict.keys() if k.startswith('lstm.weight_ih_l')]
            num_layers = len(lstm_weight_ih_keys)
            if num_layers == 0:
                print(f"No LSTM layers found in {model_file}. Keys: {list(state_dict.keys())}")
                return None

            sample_weight_ih = state_dict[lstm_weight_ih_keys[0]]
            hidden_size = sample_weight_ih.shape[0] // 4  # LSTM gates
            input_size = sample_weight_ih.shape[1]

            if 'fc.weight' in state_dict:
                output_size = state_dict['fc.weight'].shape[0]
            elif 'fc.bias' in state_dict:
                output_size = state_dict['fc.bias'].shape[0]
            else:
                print(f"Could not infer output_size for LSTM in {model_file}. Using default: {cfg.NUM_FEATURES}")
                output_size = cfg.NUM_FEATURES

            # Infer if LayerNorm is present
            has_layer_norm = any('layer_norm' in key or 'ln' in key for key in state_dict.keys())
            print(f"Inferred LSTM parameters: hidden_size={hidden_size}, num_layers={num_layers}, "
                  f"input_size={input_size}, output_size={output_size}, LayerNorm={has_layer_norm}")

            # Determine expected output_size based on n_future and num_features
            expected_output_size = cfg.N_FUTURE * cfg.NUM_FEATURES
            if output_size != expected_output_size:
                print(f"Model's output_size ({output_size}) does not match expected ({expected_output_size}). Skipping model.")
                return None

            # Create the model
            model = CryptoLSTM(
                input_size=input_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=cfg.DROPOUT,
                output_size=output_size,
                has_layer_norm=has_layer_norm
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {'input_size': input_size, 'output_size': output_size,
                          'n_past': cfg.N_PAST, 'n_future': cfg.N_FUTURE}

        elif any(key.startswith('transformer') for key in state_dict.keys()):
            # It's a Transformer model
            print(f"Loading {model_file} as CryptoTransformer")
            # Attempt to infer Transformer parameters from state_dict
            # This is heuristic and may need adjustments based on actual model architecture

            # Infer d_model from the first transformer's in_proj_weight
            d_model = cfg.HIDDEN_SIZE  # Default value
            for key in state_dict.keys():
                if 'transformer_encoder.layers.0.self_attn.in_proj_weight' in key:
                    weight = state_dict[key]
                    d_model = weight.shape[1]  # Typically (3*d_model, d_model)
                    break

            # Find a suitable nhead
            nhead = find_nhead(d_model)
            if nhead == 1 and d_model < 8:
                print(f"Could not find a suitable nhead for d_model={d_model}. Skipping model.")
                return None

            # Infer number of encoder and decoder layers
            transformer_encoder_keys = [k for k in state_dict.keys() if 'transformer_encoder.layers.' in k]
            num_encoder_layers = len(set(k.split('.')[3] for k in transformer_encoder_keys))
            transformer_decoder_keys = [k for k in state_dict.keys() if 'transformer_decoder.layers.' in k]
            num_decoder_layers = len(set(k.split('.')[3] for k in transformer_decoder_keys))

            # Infer dim_feedforward from the first linear layer
            dim_feedforward = cfg.DIM_FEEDFORWARD if hasattr(cfg, 'DIM_FEEDFORWARD') else 2048
            dropout = cfg.DROPOUT
            activation = 'gelu'  # Defaulting to 'gelu'

            print(f"Inferred Transformer parameters: d_model={d_model}, nhead={nhead}, "
                  f"num_encoder_layers={num_encoder_layers}, num_decoder_layers={num_decoder_layers}, "
                  f"dim_feedforward={dim_feedforward}, dropout={dropout}, activation={activation}")

            # Infer num_outputs based on state_dict
            if 'output_fc.weight' in state_dict:
                num_outputs = state_dict['output_fc.weight'].shape[0]
            else:
                num_outputs = cfg.N_FUTURE * cfg.NUM_FEATURES  # Default assumption

            # Check if num_outputs is divisible by num_features to determine n_future
            if num_outputs % cfg.NUM_FEATURES == 0:
                n_future_model = num_outputs // cfg.NUM_FEATURES
            elif num_outputs == cfg.NUM_FEATURES:
                n_future_model = 1  # Single-step prediction
            else:
                print(f"Unexpected num_outputs ({num_outputs}) for model {model_file}. Skipping model.")
                return None

            # Ensure that we are only processing models trained with 13 features
            if cfg.NUM_FEATURES != 13:
                print(f"Model {model_file} uses {cfg.NUM_FEATURES} features instead of 13. Skipping model.")
                return None

            # Create the model
            model = CryptoTransformer(
                input_size=cfg.NUM_FEATURES,
                d_model=d_model,
                nhead=nhead,
                num_encoder_layers=num_encoder_layers,
                num_decoder_layers=num_decoder_layers,
                dim_feedforward=dim_feedforward,
                dropout=dropout,
                activation=activation,
                n_future=n_future_model,
                num_outputs=num_outputs,
                max_seq_length=cfg.N_PAST + cfg.N_FUTURE
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {
                'input_size': cfg.NUM_FEATURES,
                'output_size': num_outputs,
                'n_past': cfg.N_PAST,
                'n_future': n_future_model
            }

        else:
            print(f"Unrecognized model format for {model_file}. Keys: {list(state_dict.keys())}")
            return None
    except Exception as e:
        print(f"Error loading model {model_file}: {e}")
        traceback.print_exc()
        return None
    # Function to prepare input data
    def prepare_input(input_data, device):
        input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)
        return input_tensor

    # Function to get a random sample from the DataFrame
    def get_random_sample(df, n_past, n_future, input_cols, target_cols):
        max_start = len(df) - n_past - n_future
        if max_start <= 0:
            raise ValueError("DataFrame is too short for the given n_past and n_future")
        start_idx = np.random.randint(0, max_start)
        input_data = df[input_cols].iloc[start_idx:start_idx + n_past].values
        target_data = df[target_cols].iloc[start_idx + n_past:start_idx + n_past + n_future].values
        return input_data, target_data

    # Function to convert tensors to numpy arrays
    def convert_to_numpy(input_tensor, target, prediction):
        input_np = input_tensor.cpu().numpy()
        target_np = target.cpu().numpy()
        prediction_np = prediction.cpu().numpy()
        return input_np, target_np, prediction_np

    @dataclass
    class Config:
        VERSION_N: int = 1
        RECORDS_TO_LOAD: int = 1205040
        N_PAST: int = 3 * 12 * 3  # 1 week of 10-minute intervals (108)
        N_FUTURE: int = 1 * 12 * 2  # 1 day of 10-minute intervals (24)
        BATCH_SIZE: int = 5000
        HIDDEN_SIZE: int = 512  # Adjusted to match inferred d_model=512
        NUM_LAYERS: int = 2
        DROPOUT: float = 0.2
        NUM_EPOCHS: int = 150
        HOT_RESTART: bool = True
        TRAIN_FIRST: bool = True
        EPOCH_TO_RESTART: int = 50
        BATCH_FACTOR: int = 81
        DEBUG_FREQ: int = 180
        num_cpus = multiprocessing.cpu_count()
        NUM_WORKERS = max((num_cpus // 4 - 4), 4) if num_cpus > 16 else 4
        DEBUG_ON: bool = False
        DATA_URL: str = 'https://sambo.us-iad-1.linodeobjects.com/fillnan_combined_df.csv'
        DATA_FILE: str = './data/fill_nan_df.csv'
        MODEL_PATH: str = "/teamspace/studios/this_studio/models/TransformerModel355/model-355-epoch=40-val_loss=0.62.ckpt"
        MODEL_SAVE_PATH: str = f'./yay'
        DEVICE: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        EPSILON: float = 1e-4
        PATH_TO_SEARCH: str = "/content/drive/MyDrive/Kraken"
        # Additional parameters
        NUM_FEATURES: int = None  # To be set after loading data

    cfg = Config()

    # Load and preprocess data
    def load_and_preprocess_data(file_path: str, download_url: str = None):
        """
        Load and preprocess data from a CSV file. If the file does not exist, download it.

        Args:
            file_path (str): Path to the CSV file.
            download_url (str, optional): URL to download the CSV file. Defaults to None.

        Returns:
            pd.DataFrame: Preprocessed DataFrame.
            dict: Dictionary of scalers used for each column.
        """
        ic("Starting data loading and preprocessing...")
        start_time = time.time()

        # Check if the file exists
        if not os.path.exists(file_path):
            ic(f"File {file_path} does not exist.")
            os.makedirs(os.path.dirname(file_path), exist_ok=True)

            if download_url:
                ic(f"Downloading file from {download_url}...")
                try:
                    response = requests.get(download_url, stream=True)
                    response.raise_for_status()
                    with open(file_path, 'wb') as f:
                        for chunk in response.iter_content(chunk_size=8192):
                            f.write(chunk)
                    ic(f"File downloaded and saved to {file_path}")
                except requests.exceptions.RequestException as e:
                    ic(f"Failed to download the file: {e}")
                    raise
            else:
                ic("Download URL not provided. Cannot download the file.")
                raise FileNotFoundError(f"The file {file_path} does not exist and no download URL was provided.")

        # Load the DataFrame
        df = pd.read_csv(file_path, parse_dates=['timestamp'])
        df.set_index('timestamp', inplace=True)
        df = df.tail(cfg.RECORDS_TO_LOAD)
        scalers = {}
        start_time_preprocess = time.time()

        for col in df.columns:
            # Ensure no non-positive values before log transform
            if (df[col] <= 0).any():
                raise ValueError(f"Column {col} contains non-positive values, cannot apply log transform.")

            # Apply natural logarithm transformation
            df[col] = np.log(df[col])

            # Initialize and fit MinMaxScaler
            scaler = MinMaxScaler()
            df[col] = scaler.fit_transform(df[[col]])

            # Save the scaler
            scalers[col] = scaler

        ic(f"Data preprocessing completed in {time.time() - start_time_preprocess:.2f} seconds")
        ic(f"DataFrame shape: {df.shape}")

        return df, scalers

    df, scalers = load_and_preprocess_data(cfg.DATA_FILE, cfg.DATA_URL)
    cfg.NUM_FEATURES = df.shape[1]

    def evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=10):
        top_models = []
        for model_file in model_files:
            cfg.MODEL_PATH = model_file
            print(f"\nLoading model from {cfg.MODEL_PATH}")

            result = load_model(cfg.MODEL_PATH, cfg, cfg.DEVICE)
            if result is None:
                print(f"Skipping model {cfg.MODEL_PATH} due to loading error")
                continue

            model, params = result
            input_size = params['input_size']
            output_size = params['output_size']
            n_past = params['n_past']
            n_future = params['n_future']
            num_features = cfg.NUM_FEATURES

            # Get the input columns
            input_cols = df.columns[:input_size]

            try:
                input_data, target_data = get_random_sample(df, n_past, n_future, input_cols, input_cols)
            except Exception as e:
                print(f"Error getting random sample: {str(e)}")
                continue

            # Prepare the tensors
            try:
                input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
                target_tensor = torch.tensor(target_data, dtype=torch.float32).to(cfg.DEVICE)
            except Exception as e:
                print(f"Error preparing tensors: {str(e)}")
                continue

            try:
                model.eval()
                with torch.no_grad():
                    if isinstance(model, CryptoTransformer):
                        # For Transformer, prepare tgt_input with zeros
                        tgt_input = torch.zeros((1, n_future, input_size), device=cfg.DEVICE)
                        prediction = model(input_tensor, tgt_input)
                    elif isinstance(model, CryptoLSTM):
                        prediction = model(input_tensor)
                    else:
                        print(f"Unknown model type for file: {model_file}")
                        continue
            except Exception as e:
                print(f"Failed to make predictions: {e}")
                continue

            # Convert tensors to numpy
            input_np = input_tensor.squeeze(0).cpu().numpy()  # (n_past, num_features)
            target_np = target_tensor.cpu().numpy()  # (n_future, num_features)
            prediction_np = prediction.cpu().numpy()  # (1, output_size) or similar

            # Handle multi-step predictions
            try:
                if isinstance(model, CryptoTransformer) or output_size == n_future * num_features:
                    # Multi-step prediction
                    if output_size != n_future * num_features:
                        print(f"Model's output_size ({output_size}) does not match n_future * num_features ({n_future * num_features}). Skipping model.")
                        continue
                    prediction_np = prediction_np.reshape(-1, n_future, num_features)
                    target_np = target_np.reshape(-1, n_future, num_features)
                    input_np = input_np.reshape(-1, num_features)
                elif output_size == num_features:
                    # Single-step prediction
                    prediction_np = prediction_np.reshape(-1, num_features)
                    target_np = target_np.reshape(-1, num_features)
                    input_np = input_np.reshape(-1, num_features)
                else:
                    print(f"Unexpected output_size ({output_size}) for model {model_file}. Skipping model.")
                    continue
            except ValueError as ve:
                print(f"ValueError during reshaping: {ve}. Skipping model.")
                continue

            # Inverse transform
            try:
                inv_input, inv_target, inv_pred = get_original_values(
                    df.columns, input_np, target_np, prediction_np, scalers, n_future, num_features
                )
            except Exception as e:
                print(f"Error during inverse transformation: {str(e)}")
                continue

            print("Calculating scores...")
            try:
                model_score = calculate_score(inv_target, inv_pred)
            except Exception as e:
                print(f"Error calculating score: {e}")
                continue

            # Append and maintain top N
            top_models.append((model_file, model_score))
            top_models = sorted(top_models, key=lambda x: x[1])
            top_models = top_models[:top_n]

            print(f"Current top {len(top_models)} models:")
            for m, s in top_models:
                print(f"  Model: {m}, Score: {s:.2%}")

        return top_models

    # Create a list of model files
    model_files = glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.ckpt"), recursive=True) + \
                  glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.pth"), recursive=True)
    print(f"Total model files found: {len(model_files)}")

    # Optionally, truncate the list for testing
    model_files = model_files[:20]  # Adjust as needed

    # Evaluate models and get top N
    top_n = 10
    top_models = evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=top_n)

    # Display Top N models
    print(f"\nTop {len(top_models)} models:")
    for model_file, score in top_models:
        print(f"Model: {model_file}, Score: {score:.2%}")

# Create a list of model files
model_files = glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.ckpt"), recursive=True) + \
              glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.pth"), recursive=True)
print(f"Total model files found: {len(model_files)}")

# Optionally, truncate the list for testing
model_files = model_files[:20]  # Adjust as needed

# Evaluate models and get top N
top_n = 10
top_models = evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=top_n)

# Display Top N models
print(f"\nTop {len(top_models)} models:")
for model_file, score in top_models:
    print(f"Model: {model_file}, Score: {score:.2%}")


In [None]:
import torch
import pandas as pd
from dataclasses import dataclass
import multiprocessing
import glob
import numpy as np
from rich.table import Table
from rich.console import Console
import os
import time
import requests
from sklearn.preprocessing import MinMaxScaler
from icecream import ic  # Ensure icecream is installed (`pip install icecream`)
import traceback
import warnings

# Initialize Rich Console
console = Console()

# Define your CryptoTransformer class as per your implementation
class CryptoTransformer(torch.nn.Module):
    def __init__(self, input_size, d_model, nhead, num_encoder_layers, num_decoder_layers,
                 dim_feedforward, dropout, activation, n_future, num_outputs, max_seq_length):
        super(CryptoTransformer, self).__init__()
        # Transformer implementation as per your code
        self.transformer = torch.nn.Transformer(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
        )
        self.input_fc = torch.nn.Linear(input_size, d_model)
        self.output_fc = torch.nn.Linear(d_model, num_outputs)
        self.n_future = n_future

    def forward(self, src, tgt):
        src = self.input_fc(src)
        tgt = self.input_fc(tgt)
        output = self.transformer(src, tgt)
        output = self.output_fc(output)
        return output

# Define your CryptoLSTM class with corrected LayerNorm naming
class CryptoLSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, output_size, has_layer_norm=False):
        super(CryptoLSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers,
                                  dropout=dropout, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        # Optionally include LayerNorm if your models use it
        if has_layer_norm:
            self.layer_norm = torch.nn.LayerNorm(hidden_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        if hasattr(self, 'layer_norm'):
            out = self.layer_norm(out)
        # If the model outputs a sequence
        if out.dim() == 3:
            out = self.fc(out)
        else:
            out = self.fc(out[:, -1, :])
        return out

# Function to find a suitable number of attention heads
def find_nhead(d_model, max_nhead=16, min_head_dim=8):
    """
    Find the maximum nhead such that d_model is divisible by nhead
    and the head dimension is at least min_head_dim.
    """
    for nhead in range(max_nhead, 0, -1):
        if d_model % nhead == 0:
            head_dim = d_model // nhead
            if head_dim >= min_head_dim:
                return nhead
    return 1  # Fallback to single head

# Function to inverse transform data, adjusted to handle varying number of features
def get_original_values(df_columns, input_np, target_np, prediction_np, scalers, n_future, num_features):
    inv_pred = pd.DataFrame()
    inv_target = pd.DataFrame()
    inv_input = pd.DataFrame()

    # Determine if the model outputs multi-step or single-step predictions
    if prediction_np.shape[-1] == num_features:
        # Single-step prediction
        for i, col in enumerate(df_columns):
            scaler = scalers.get(col)
            if scaler:
                inv_pred[col] = np.exp(scaler.inverse_transform(prediction_np[:, i].reshape(-1, 1)).flatten())
                inv_target[col] = np.exp(scaler.inverse_transform(target_np[:, i].reshape(-1, 1)).flatten())
                inv_input[col] = np.exp(scalers[col].inverse_transform(input_np[:, i].reshape(-1, 1)).flatten())
    elif prediction_np.shape[-1] == n_future * num_features:
        # Multi-step prediction
        prediction_np = prediction_np.reshape(-1, n_future, num_features)
        target_np = target_np.reshape(-1, n_future, num_features)
        input_np = input_np.reshape(-1, num_features)
        for i in range(n_future):
            for j, col in enumerate(df_columns):
                scaler = scalers.get(col)
                if scaler:
                    inv_pred[f"{col}_step_{i}"] = np.exp(scaler.inverse_transform(prediction_np[:, i, j].reshape(-1, 1)).flatten())
                    inv_target[f"{col}_step_{i}"] = np.exp(scaler.inverse_transform(target_np[:, i, j].reshape(-1, 1)).flatten())
                    inv_input[f"{col}_step_{i}"] = np.exp(scalers[col].inverse_transform(input_np[:, j].reshape(-1, 1)).flatten())
    else:
        raise ValueError(f"Unexpected prediction output shape: {prediction_np.shape}")

    return inv_input, inv_target, inv_pred

# Function to calculate the score
def calculate_score(inv_target, inv_pred):
    scores = {}
    for col in inv_target.columns:
        # Ensure lengths match
        min_len = min(len(inv_target[col]), len(inv_pred[col]))
        target_col = inv_target[col][:min_len]
        pred_col = inv_pred[col][:min_len]
        # Calculate the absolute percentage error
        ape = np.abs((target_col - pred_col) / target_col)
        # Handle division by zero
        ape = ape.replace([np.inf, -np.inf], np.nan).dropna()
        # Calculate the mean absolute percentage error
        mape = ape.mean()
        scores[col] = mape
    # Calculate the overall score (average of individual scores)
    overall_score = np.mean(list(scores.values()))
    print(f"Overall Score: {overall_score:.2%}")
    return overall_score

# Suppress specific warnings
warnings.filterwarnings("ignore", message=".*does not have many workers.*")
warnings.filterwarnings("ignore", category=FutureWarning, message=".*torch.load.*")

def load_model(model_file, cfg, device='cpu'):
    try:
        if not os.path.exists(model_file):
            print(f"File not found: {model_file}")
            return None

        # Load the state dict
        state_dict = torch.load(model_file, map_location=device)

        # Check if it's a Lightning checkpoint
        if isinstance(state_dict, dict) and 'state_dict' in state_dict:
            state_dict = state_dict['state_dict']

        # Strip 'model.' prefix if present
        if any(key.startswith('model.') for key in state_dict.keys()):
            new_state_dict = {}
            for k, v in state_dict.items():
                if k.startswith('model.'):
                    new_k = k[6:]  # Remove 'model.' prefix
                else:
                    new_k = k
                new_state_dict[new_k] = v
            state_dict = new_state_dict

        # Determine the model type based on key prefixes
        if any(key.startswith('lstm') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoLSTM")

            # Infer LSTM parameters
            lstm_weight_ih_keys = [k for k in state_dict.keys() if k.startswith('lstm.weight_ih_l')]
            num_layers = len(lstm_weight_ih_keys)
            if num_layers == 0:
                print(f"No LSTM layers found in {model_file}. Keys: {list(state_dict.keys())}")
                return None

            sample_weight_ih = state_dict[lstm_weight_ih_keys[0]]
            hidden_size = sample_weight_ih.shape[0] // 4  # LSTM gates
            input_size = sample_weight_ih.shape[1]

            if 'fc.weight' in state_dict:
                output_size = state_dict['fc.weight'].shape[0]
            elif 'fc.bias' in state_dict:
                output_size = state_dict['fc.bias'].shape[0]
            else:
                print(f"Could not infer output_size for LSTM in {model_file}. Using default: {cfg.NUM_FEATURES}")
                output_size = cfg.NUM_FEATURES

            # Infer if LayerNorm is present
            has_layer_norm = any('layer_norm' in key or 'ln' in key for key in state_dict.keys())
            print(f"Inferred LSTM parameters: hidden_size={hidden_size}, num_layers={num_layers}, "
                  f"input_size={input_size}, output_size={output_size}, LayerNorm={has_layer_norm}")

            # Determine expected output_size based on n_future and num_features
            expected_output_size = cfg.N_FUTURE * cfg.NUM_FEATURES
            if output_size != expected_output_size:
                print(f"Model's output_size ({output_size}) does not match expected ({expected_output_size}). Skipping model.")
                return None

            # Create the model
            model = CryptoLSTM(
                input_size=input_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=cfg.DROPOUT,
                output_size=output_size,
                has_layer_norm=has_layer_norm
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {'input_size': input_size, 'output_size': output_size,
                          'n_past': cfg.N_PAST, 'n_future': cfg.N_FUTURE}

        elif any(key.startswith('transformer') for key in state_dict.keys()):
            # It's a Transformer model
            print(f"Loading {model_file} as CryptoTransformer")
            # Attempt to infer Transformer parameters from state_dict
            # This is heuristic and may need adjustments based on actual model architecture

            # Infer d_model from the first transformer's in_proj_weight
            d_model = cfg.HIDDEN_SIZE  # Default value
            for key in state_dict.keys():
                if 'transformer_encoder.layers.0.self_attn.in_proj_weight' in key:
                    weight = state_dict[key]
                    d_model = weight.shape[1]  # Typically (3*d_model, d_model)
                    break

            # Find a suitable nhead
            nhead = find_nhead(d_model)
            if nhead == 1 and d_model < 8:
                print(f"Could not find a suitable nhead for d_model={d_model}. Skipping model.")
                return None

            # Infer number of encoder and decoder layers
            transformer_encoder_keys = [k for k in state_dict.keys() if 'transformer_encoder.layers.' in k]
            num_encoder_layers = len(set(k.split('.')[3] for k in transformer_encoder_keys))
            transformer_decoder_keys = [k for k in state_dict.keys() if 'transformer_decoder.layers.' in k]
            num_decoder_layers = len(set(k.split('.')[3] for k in transformer_decoder_keys))

            # Infer dim_feedforward from the first linear layer
            dim_feedforward = cfg.DIM_FEEDFORWARD if hasattr(cfg, 'DIM_FEEDFORWARD') else 2048
            dropout = cfg.DROPOUT
            activation = 'gelu'  # Defaulting to 'gelu'

            print(f"Inferred Transformer parameters: d_model={d_model}, nhead={nhead}, "
                  f"num_encoder_layers={num_encoder_layers}, num_decoder_layers={num_decoder_layers}, "
                  f"dim_feedforward={dim_feedforward}, dropout={dropout}, activation={activation}")

            # Infer num_outputs based on state_dict
            if 'output_fc.weight' in state_dict:
                num_outputs = state_dict['output_fc.weight'].shape[0]
            else:
                num_outputs = cfg.N_FUTURE * cfg.NUM_FEATURES  # Default assumption

            # Check if num_outputs is divisible by num_features to determine n_future
            if num_outputs % cfg.NUM_FEATURES == 0:
                n_future_model = num_outputs // cfg.NUM_FEATURES
            elif num_outputs == cfg.NUM_FEATURES:
                n_future_model = 1  # Single-step prediction
            else:
                print(f"Unexpected num_outputs ({num_outputs}) for model {model_file}. Skipping model.")
                return None

            # Ensure that we are only processing models trained with 13 features
            if cfg.NUM_FEATURES != 13:
                print(f"Model {model_file} uses {cfg.NUM_FEATURES} features instead of 13. Skipping model.")
                return None

            # Create the model
            model = CryptoTransformer(
                input_size=cfg.NUM_FEATURES,
                d_model=d_model,
                nhead=nhead,
                num_encoder_layers=num_encoder_layers,
                num_decoder_layers=num_decoder_layers,
                dim_feedforward=dim_feedforward,
                dropout=dropout,
                activation=activation,
                n_future=n_future_model,
                num_outputs=num_outputs,
                max_seq_length=cfg.N_PAST + cfg.N_FUTURE
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {
                'input_size': cfg.NUM_FEATURES,
                'output_size': num_outputs,
                'n_past': cfg.N_PAST,
                'n_future': n_future_model
            }

        else:
            print(f"Unrecognized model format for {model_file}. Keys: {list(state_dict.keys())}")
            return None
    except Exception as e:
        print(f"Error loading model {model_file}: {e}")
        return None
    # Function to prepare input data
    def prepare_input(input_data, device):
        input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)
        return input_tensor

    # Function to get a random sample from the DataFrame
    def get_random_sample(df, n_past, n_future, input_cols, target_cols):
        max_start = len(df) - n_past - n_future
        if max_start <= 0:
            raise ValueError("DataFrame is too short for the given n_past and n_future")
        start_idx = np.random.randint(0, max_start)
        input_data = df[input_cols].iloc[start_idx:start_idx + n_past].values
        target_data = df[target_cols].iloc[start_idx + n_past:start_idx + n_past + n_future].values
        return input_data, target_data

    # Function to convert tensors to numpy arrays
    def convert_to_numpy(input_tensor, target, prediction):
        input_np = input_tensor.cpu().numpy()
        target_np = target.cpu().numpy()
        prediction_np = prediction.cpu().numpy()
        return input_np, target_np, prediction_np

    @dataclass
    class Config:
        VERSION_N: int = 1
        RECORDS_TO_LOAD: int = 1205040
        N_PAST: int = 3 * 12 * 3  # 1 week of 10-minute intervals (108)
        N_FUTURE: int = 1 * 12 * 2  # 1 day of 10-minute intervals (24)
        BATCH_SIZE: int = 5000
        HIDDEN_SIZE: int = 512  # Adjusted to match inferred d_model=512
        NUM_LAYERS: int = 2
        DROPOUT: float = 0.2
        NUM_EPOCHS: int = 150
        HOT_RESTART: bool = True
        TRAIN_FIRST: bool = True
        EPOCH_TO_RESTART: int = 50
        BATCH_FACTOR: int = 81
        DEBUG_FREQ: int = 180
        num_cpus = multiprocessing.cpu_count()
        NUM_WORKERS = max((num_cpus // 4 - 4), 4) if num_cpus > 16 else 4
        DEBUG_ON: bool = False
        DATA_URL: str = 'https://sambo.us-iad-1.linodeobjects.com/fillnan_combined_df.csv'
        DATA_FILE: str = './data/fill_nan_df.csv'
        MODEL_PATH: str = "/teamspace/studios/this_studio/models/TransformerModel355/model-355-epoch=40-val_loss=0.62.ckpt"
        MODEL_SAVE_PATH: str = f'./yay'
        DEVICE: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        EPSILON: float = 1e-4
        PATH_TO_SEARCH: str = "/content/drive/MyDrive/Kraken"
        # Additional parameters
        NUM_FEATURES: int = None  # To be set after loading data

    cfg = Config()

    # Load and preprocess data
    def load_and_preprocess_data(file_path: str, download_url: str = None):
        """
        Load and preprocess data from a CSV file. If the file does not exist, download it.

        Args:
            file_path (str): Path to the CSV file.
            download_url (str, optional): URL to download the CSV file. Defaults to None.

        Returns:
            pd.DataFrame: Preprocessed DataFrame.
            dict: Dictionary of scalers used for each column.
        """
        ic("Starting data loading and preprocessing...")
        start_time = time.time()

        # Check if the file exists
        if not os.path.exists(file_path):
            ic(f"File {file_path} does not exist.")
            os.makedirs(os.path.dirname(file_path), exist_ok=True)

            if download_url:
                ic(f"Downloading file from {download_url}...")
                try:
                    response = requests.get(download_url, stream=True)
                    response.raise_for_status()
                    with open(file_path, 'wb') as f:
                        for chunk in response.iter_content(chunk_size=8192):
                            f.write(chunk)
                    ic(f"File downloaded and saved to {file_path}")
                except requests.exceptions.RequestException as e:
                    ic(f"Failed to download the file: {e}")
                    raise
            else:
                ic("Download URL not provided. Cannot download the file.")
                raise FileNotFoundError(f"The file {file_path} does not exist and no download URL was provided.")

        # Load the DataFrame
        df = pd.read_csv(file_path, parse_dates=['timestamp'])
        df.set_index('timestamp', inplace=True)
        df = df.tail(cfg.RECORDS_TO_LOAD)
        scalers = {}
        start_time_preprocess = time.time()

        for col in df.columns:
            # Ensure no non-positive values before log transform
            if (df[col] <= 0).any():
                raise ValueError(f"Column {col} contains non-positive values, cannot apply log transform.")

            # Apply natural logarithm transformation
            df[col] = np.log(df[col])

            # Initialize and fit MinMaxScaler
            scaler = MinMaxScaler()
            df[col] = scaler.fit_transform(df[[col]])

            # Save the scaler
            scalers[col] = scaler

        ic(f"Data preprocessing completed in {time.time() - start_time_preprocess:.2f} seconds")
        ic(f"DataFrame shape: {df.shape}")

        return df, scalers

    df, scalers = load_and_preprocess_data(cfg.DATA_FILE, cfg.DATA_URL)
    cfg.NUM_FEATURES = df.shape[1]

    def evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=10):
        top_models = []
        for model_file in model_files:
            cfg.MODEL_PATH = model_file
            print(f"\nLoading model from {cfg.MODEL_PATH}")

            result = load_model(cfg.MODEL_PATH, cfg, cfg.DEVICE)
            if result is None:
                print(f"Skipping model {cfg.MODEL_PATH} due to loading error")
                continue

            model, params = result
            input_size = params['input_size']
            output_size = params['output_size']
            n_past = params['n_past']
            n_future = params['n_future']
            num_features = cfg.NUM_FEATURES

            # Get the input columns
            input_cols = df.columns[:input_size]

            try:
                input_data, target_data = get_random_sample(df, n_past, n_future, input_cols, input_cols)
            except Exception as e:
                print(f"Error getting random sample: {str(e)}")
                continue

            # Prepare the tensors
            try:
                input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
                target_tensor = torch.tensor(target_data, dtype=torch.float32).to(cfg.DEVICE)
            except Exception as e:
                print(f"Error preparing tensors: {str(e)}")
                continue

            try:
                model.eval()
                with torch.no_grad():
                    if isinstance(model, CryptoTransformer):
                        # For Transformer, prepare tgt_input with zeros
                        tgt_input = torch.zeros((1, n_future, input_size), device=cfg.DEVICE)
                        prediction = model(input_tensor, tgt_input)
                    elif isinstance(model, CryptoLSTM):
                        # For LSTM, perform iterative predictions
                        prediction = []
                        current_input = input_tensor.clone()
                        for _ in range(n_future):
                            pred = model(current_input)
                            prediction.append(pred)
                            # Append the prediction to current_input and remove the first timestep
                            current_input = torch.cat((current_input[:, 1:, :], pred.unsqueeze(1)), dim=1)
                        prediction = torch.cat(prediction, dim=1)  # Shape: (1, n_future, num_features)
                    else:
                        print(f"Unknown model type for file: {model_file}")
                        continue
            except Exception as e:
                print(f"Failed to make predictions: {e}")
                continue

            # Convert tensors to numpy
            input_np = input_tensor.squeeze(0).cpu().numpy()  # (n_past, num_features)
            target_np = target_tensor.cpu().numpy()  # (n_future, num_features)
            prediction_np = prediction.cpu().numpy()  # (1, n_future, num_features)

            # Handle multi-step predictions
            try:
                if isinstance(model, CryptoTransformer):
                    # Transformer: output_size should be n_future * num_features
                    if output_size != n_future * num_features:
                        print(f"Model's output_size ({output_size}) does not match n_future * num_features ({n_future * num_features}). Skipping model.")
                        continue
                    prediction_np = prediction_np.reshape(-1, n_future, num_features)
                    target_np = target_np.reshape(-1, n_future, num_features)
                    input_np = input_np.reshape(-1, num_features)
                elif isinstance(model, CryptoLSTM):
                    # LSTM: output is already (1, n_future, num_features)
                    if output_size != num_features:
                        print(f"LSTM model's output_size ({output_size}) does not match num_features ({num_features}). Skipping model.")
                        continue
                    prediction_np = prediction_np.reshape(-1, n_future, num_features)
                    target_np = target_np.reshape(-1, n_future, num_features)
                    input_np = input_np.reshape(-1, num_features)
                else:
                    print(f"Unexpected model type for {model_file}. Skipping model.")
                    continue
            except ValueError as ve:
                print(f"ValueError during reshaping: {ve}. Skipping model.")
                continue

            # Inverse transform
            try:
                inv_input, inv_target, inv_pred = get_original_values(
                    df.columns, input_np, target_np, prediction_np, scalers, n_future, num_features
                )
            except Exception as e:
                print(f"Error during inverse transformation: {str(e)}")
                continue

            print("Calculating scores...")
            try:
                model_score = calculate_score(inv_target, inv_pred)
            except Exception as e:
                print(f"Error calculating score: {e}")
                continue

            # Append and maintain top N
            top_models.append((model_file, model_score))
            top_models = sorted(top_models, key=lambda x: x[1])
            top_models = top_models[:top_n]

            print(f"Current top {len(top_models)} models:")
            for m, s in top_models:
                print(f"  Model: {m}, Score: {s:.2%}")

        return top_models

    # Create a list of model files
    model_files = glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.ckpt"), recursive=True) + \
                  glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.pth"), recursive=True)
    print(f"Total model files found: {len(model_files)}")

    # Optionally, truncate the list for testing
    model_files = model_files[:20]  # Adjust as needed

    # Evaluate models and get top N
    top_n = 10
    top_models = evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=top_n)

    # Display Top N models
    print(f"\nTop {len(top_models)} models:")
    for model_file, score in top_models:
        print(f"Model: {model_file}, Score: {score:.2%}")


# ```python
# # Ensure that we are only processing models trained with 13 features
# if cfg.NUM_FEATURES != 13:
#     print(f"Model {model_file} uses {cfg.NUM_FEATURES} features instead of 13. Skipping model.")
#     return None

# Create a list of model files
model_files = glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.ckpt"), recursive=True) + \
              glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.pth"), recursive=True)
print(f"Total model files found: {len(model_files)}")

# Optionally, truncate the list for testing
model_files = model_files[:20]  # Adjust as needed

# Evaluate models and get top N
top_n = 10
top_models = evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=top_n)

# Display Top N models
print(f"\nTop {len(top_models)} models:")
for model_file, score in top_models:
    print(f"Model: {model_file}, Score: {score:.2%}")


In [None]:
import torch
import pandas as pd
from dataclasses import dataclass
import multiprocessing
import glob
import numpy as np
from rich.table import Table
from rich.console import Console
import os
import time
import requests
from sklearn.preprocessing import MinMaxScaler
from icecream import ic  # Ensure icecream is installed (`pip install icecream`)
import traceback
import warnings

# Initialize Rich Console
console = Console()

# Define your CryptoTransformer class as per your implementation
class CryptoTransformer(torch.nn.Module):
    def __init__(self, input_size, d_model, nhead, num_encoder_layers, num_decoder_layers,
                 dim_feedforward, dropout, activation, n_future, num_outputs, max_seq_length):
        super(CryptoTransformer, self).__init__()
        # Transformer implementation as per your code
        self.transformer = torch.nn.Transformer(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
        )
        self.input_fc = torch.nn.Linear(input_size, d_model)
        self.output_fc = torch.nn.Linear(d_model, num_outputs)
        self.n_future = n_future

    def forward(self, src, tgt):
        src = self.input_fc(src)
        tgt = self.input_fc(tgt)
        output = self.transformer(src, tgt)
        output = self.output_fc(output)
        return output

# Define your CryptoLSTM class with corrected LayerNorm naming
class CryptoLSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, output_size, has_layer_norm=False):
        super(CryptoLSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers,
                                  dropout=dropout, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        # Optionally include LayerNorm if your models use it
        if has_layer_norm:
            self.layer_norm = torch.nn.LayerNorm(hidden_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        if hasattr(self, 'layer_norm'):
            out = self.layer_norm(out)
        # If the model outputs a sequence
        if out.dim() == 3:
            out = self.fc(out)
        else:
            out = self.fc(out[:, -1, :])
        return out

# Function to find a suitable number of attention heads
def find_nhead(d_model, max_nhead=16, min_head_dim=8):
    """
    Find the maximum nhead such that d_model is divisible by nhead
    and the head dimension is at least min_head_dim.
    """
    for nhead in range(max_nhead, 0, -1):
        if d_model % nhead == 0:
            head_dim = d_model // nhead
            if head_dim >= min_head_dim:
                return nhead
    return 1  # Fallback to single head

# Function to inverse transform data, adjusted to handle varying number of features
def get_original_values(df_columns, input_np, target_np, prediction_np, scalers, n_future, num_features):
    inv_pred = pd.DataFrame()
    inv_target = pd.DataFrame()
    inv_input = pd.DataFrame()

    # Determine if the model outputs multi-step or single-step predictions
    if prediction_np.shape[-1] == num_features:
        # Single-step prediction
        for i, col in enumerate(df_columns):
            scaler = scalers.get(col)
            if scaler:
                inv_pred[col] = np.exp(scaler.inverse_transform(prediction_np[:, i].reshape(-1, 1)).flatten())
                inv_target[col] = np.exp(scaler.inverse_transform(target_np[:, i].reshape(-1, 1)).flatten())
                inv_input[col] = np.exp(scalers[col].inverse_transform(input_np[:, i].reshape(-1, 1)).flatten())
    elif prediction_np.shape[-1] == n_future * num_features:
        # Multi-step prediction (Transformer)
        prediction_np = prediction_np.reshape(-1, n_future, num_features)
        target_np = target_np.reshape(-1, n_future, num_features)
        input_np = input_np.reshape(-1, num_features)
        for i in range(n_future):
            for j, col in enumerate(df_columns):
                scaler = scalers.get(col)
                if scaler:
                    inv_pred[f"{col}_step_{i}"] = np.exp(scaler.inverse_transform(prediction_np[:, i, j].reshape(-1, 1)).flatten())
                    inv_target[f"{col}_step_{i}"] = np.exp(scaler.inverse_transform(target_np[:, i, j].reshape(-1, 1)).flatten())
                    inv_input[f"{col}_step_{i}"] = np.exp(scalers[col].inverse_transform(input_np[:, j].reshape(-1, 1)).flatten())
    else:
        raise ValueError(f"Unexpected prediction output shape: {prediction_np.shape}")

    return inv_input, inv_target, inv_pred

# Function to calculate the score
def calculate_score(inv_target, inv_pred):
    scores = {}
    for col in inv_target.columns:
        # Ensure lengths match
        min_len = min(len(inv_target[col]), len(inv_pred[col]))
        target_col = inv_target[col][:min_len]
        pred_col = inv_pred[col][:min_len]
        # Calculate the absolute percentage error
        ape = np.abs((target_col - pred_col) / target_col)
        # Handle division by zero
        ape = ape.replace([np.inf, -np.inf], np.nan).dropna()
        # Calculate the mean absolute percentage error
        mape = ape.mean()
        scores[col] = mape
    # Calculate the overall score (average of individual scores)
    overall_score = np.mean(list(scores.values()))
    print(f"Overall Score: {overall_score:.2%}")
    return overall_score

# Suppress specific warnings
warnings.filterwarnings("ignore", message=".*does not have many workers.*")
warnings.filterwarnings("ignore", category=FutureWarning, message=".*torch.load.*")

def load_model(model_file, cfg, device='cpu'):
    try:
        if not os.path.exists(model_file):
            print(f"File not found: {model_file}")
            return None

        # Load the state dict
        state_dict = torch.load(model_file, map_location=device)

        # Check if it's a Lightning checkpoint
        if isinstance(state_dict, dict) and 'state_dict' in state_dict:
            state_dict = state_dict['state_dict']

        # Strip 'model.' prefix if present
        if any(key.startswith('model.') for key in state_dict.keys()):
            new_state_dict = {}
            for k, v in state_dict.items():
                if k.startswith('model.'):
                    new_k = k[6:]  # Remove 'model.' prefix
                else:
                    new_k = k
                new_state_dict[new_k] = v
            state_dict = new_state_dict

        # Determine the model type based on key prefixes
        if any(key.startswith('lstm') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoLSTM")

            # Infer LSTM parameters
            lstm_weight_ih_keys = [k for k in state_dict.keys() if k.startswith('lstm.weight_ih_l')]
            num_layers = len(lstm_weight_ih_keys)
            if num_layers == 0:
                print(f"No LSTM layers found in {model_file}. Keys: {list(state_dict.keys())}")
                return None

            sample_weight_ih = state_dict[lstm_weight_ih_keys[0]]
            hidden_size = sample_weight_ih.shape[0] // 4  # LSTM gates
            input_size = sample_weight_ih.shape[1]

            if 'fc.weight' in state_dict:
                output_size = state_dict['fc.weight'].shape[0]
            elif 'fc.bias' in state_dict:
                output_size = state_dict['fc.bias'].shape[0]
            else:
                print(f"Could not infer output_size for LSTM in {model_file}. Using default: {cfg.NUM_FEATURES}")
                output_size = cfg.NUM_FEATURES

            # Infer if LayerNorm is present
            has_layer_norm = any('layer_norm' in key or 'ln' in key for key in state_dict.keys())
            print(f"Inferred LSTM parameters: hidden_size={hidden_size}, num_layers={num_layers}, "
                  f"input_size={input_size}, output_size={output_size}, LayerNorm={has_layer_norm}")

            # Determine expected output_size based on model type
            if output_size == cfg.NUM_FEATURES:
                # Single-step LSTM model (requires iterative predictions)
                expected_output_size = cfg.NUM_FEATURES
                model_type = 'single_step_lstm'
            else:
                # Assuming multi-step LSTM model outputs all predictions at once
                expected_output_size = cfg.N_FUTURE * cfg.NUM_FEATURES
                model_type = 'multi_step_lstm'

            if model_type == 'multi_step_lstm' and output_size != expected_output_size:
                print(f"Model's output_size ({output_size}) does not match expected ({expected_output_size}). Skipping model.")
                return None

            # Create the model
            model = CryptoLSTM(
                input_size=input_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=cfg.DROPOUT,
                output_size=output_size,
                has_layer_norm=has_layer_norm
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {'input_size': input_size, 'output_size': output_size,
                          'n_past': cfg.N_PAST, 'n_future': cfg.N_FUTURE, 'model_type': model_type}

        elif any(key.startswith('transformer') for key in state_dict.keys()):
            # It's a Transformer model
            print(f"Loading {model_file} as CryptoTransformer")
            # Attempt to infer Transformer parameters from state_dict
            # This is heuristic and may need adjustments based on actual model architecture

            # Infer d_model from the first transformer's in_proj_weight
            d_model = cfg.HIDDEN_SIZE  # Default value
            for key in state_dict.keys():
                if 'transformer_encoder.layers.0.self_attn.in_proj_weight' in key:
                    weight = state_dict[key]
                    d_model = weight.shape[1]  # Typically (3*d_model, d_model)
                    break

            # Find a suitable nhead
            nhead = find_nhead(d_model)
            if nhead == 1 and d_model < 8:
                print(f"Could not find a suitable nhead for d_model={d_model}. Skipping model.")
                return None

            # Infer number of encoder and decoder layers
            transformer_encoder_keys = [k for k in state_dict.keys() if 'transformer_encoder.layers.' in k]
            num_encoder_layers = len(set(k.split('.')[3] for k in transformer_encoder_keys))
            transformer_decoder_keys = [k for k in state_dict.keys() if 'transformer_decoder.layers.' in k]
            num_decoder_layers = len(set(k.split('.')[3] for k in transformer_decoder_keys))

            # Infer dim_feedforward from the first linear layer
            dim_feedforward = cfg.DIM_FEEDFORWARD if hasattr(cfg, 'DIM_FEEDFORWARD') else 2048
            dropout = cfg.DROPOUT
            activation = 'gelu'  # Defaulting to 'gelu'

            print(f"Inferred Transformer parameters: d_model={d_model}, nhead={nhead}, "
                  f"num_encoder_layers={num_encoder_layers}, num_decoder_layers={num_decoder_layers}, "
                  f"dim_feedforward={dim_feedforward}, dropout={dropout}, activation={activation}")

            # Infer num_outputs based on state_dict
            if 'output_fc.weight' in state_dict:
                num_outputs = state_dict['output_fc.weight'].shape[0]
            else:
                num_outputs = cfg.N_FUTURE * cfg.NUM_FEATURES  # Default assumption

            # Check if num_outputs is divisible by num_features to determine n_future
            if num_outputs % cfg.NUM_FEATURES == 0:
                n_future_model = num_outputs // cfg.NUM_FEATURES
            elif num_outputs == cfg.NUM_FEATURES:
                n_future_model = 1  # Single-step prediction
            else:
                print(f"Unexpected num_outputs ({num_outputs}) for model {model_file}. Skipping model.")
                return None

            # Ensure that we are only processing models trained with 13 features
            if cfg.NUM_FEATURES != 13:
                print(f"Model {model_file} uses {cfg.NUM_FEATURES} features instead of 13. Skipping model.")
                return None

            # Expected output_size for Transformer is n_future * num_features
            expected_output_size = cfg.N_FUTURE * cfg.NUM_FEATURES
            if num_outputs != expected_output_size:
                print(f"Model's output_size ({num_outputs}) does not match expected ({expected_output_size}). Skipping model.")
                return None

            # Create the model
            model = CryptoTransformer(
                input_size=cfg.NUM_FEATURES,
                d_model=d_model,
                nhead=nhead,
                num_encoder_layers=num_encoder_layers,
                num_decoder_layers=num_decoder_layers,
                dim_feedforward=dim_feedforward,
                dropout=dropout,
                activation=activation,
                n_future=n_future_model,
                num_outputs=num_outputs,
                max_seq_length=cfg.N_PAST + cfg.N_FUTURE
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {
                'input_size': cfg.NUM_FEATURES,
                'output_size': num_outputs,
                'n_past': cfg.N_PAST,
                'n_future': n_future_model,
                'model_type': 'transformer'
            }

        else:
            print(f"Unrecognized model format for {model_file}. Keys: {list(state_dict.keys())}")
            return None
    except Exception as e:
        print(f"Error loading model {model_file}: {e}")
        traceback.print_exc()
        return None
    # Function to prepare input data
    def prepare_input(input_data, device):
        input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)
        return input_tensor

    # Function to get a random sample from the DataFrame
    def get_random_sample(df, n_past, n_future, input_cols, target_cols):
        max_start = len(df) - n_past - n_future
        if max_start <= 0:
            raise ValueError("DataFrame is too short for the given n_past and n_future")
        start_idx = np.random.randint(0, max_start)
        input_data = df[input_cols].iloc[start_idx:start_idx + n_past].values
        target_data = df[target_cols].iloc[start_idx + n_past:start_idx + n_past + n_future].values
        return input_data, target_data

    # Function to convert tensors to numpy arrays
    def convert_to_numpy(input_tensor, target, prediction):
        input_np = input_tensor.cpu().numpy()
        target_np = target.cpu().numpy()
        prediction_np = prediction.cpu().numpy()
        return input_np, target_np, prediction_np

    @dataclass
    class Config:
        VERSION_N: int = 1
        RECORDS_TO_LOAD: int = 1205040
        N_PAST: int = 3 * 12 * 3  # 1 week of 10-minute intervals (108)
        N_FUTURE: int = 1 * 12 * 2  # 1 day of 10-minute intervals (24)
        BATCH_SIZE: int = 5000
        HIDDEN_SIZE: int = 512  # Adjusted to match inferred d_model=512
        NUM_LAYERS: int = 2
        DROPOUT: float = 0.2
        NUM_EPOCHS: int = 150
        HOT_RESTART: bool = True
        TRAIN_FIRST: bool = True
        EPOCH_TO_RESTART: int = 50
        BATCH_FACTOR: int = 81
        DEBUG_FREQ: int = 180
        num_cpus = multiprocessing.cpu_count()
        NUM_WORKERS = max((num_cpus // 4 - 4), 4) if num_cpus > 16 else 4
        DEBUG_ON: bool = False
        DATA_URL: str = 'https://sambo.us-iad-1.linodeobjects.com/fillnan_combined_df.csv'
        DATA_FILE: str = './data/fill_nan_df.csv'
        MODEL_PATH: str = "/teamspace/studios/this_studio/models/TransformerModel355/model-355-epoch=40-val_loss=0.62.ckpt"
        MODEL_SAVE_PATH: str = f'./yay'
        DEVICE: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        EPSILON: float = 1e-4
        PATH_TO_SEARCH: str = "/content/drive/MyDrive/Kraken"
        # Additional parameters
        NUM_FEATURES: int = None  # To be set after loading data

    cfg = Config()

    # Load and preprocess data
    def load_and_preprocess_data(file_path: str, download_url: str = None):
        """
        Load and preprocess data from a CSV file. If the file does not exist, download it.

        Args:
            file_path (str): Path to the CSV file.
            download_url (str, optional): URL to download the CSV file. Defaults to None.

        Returns:
            pd.DataFrame: Preprocessed DataFrame.
            dict: Dictionary of scalers used for each column.
        """
        ic("Starting data loading and preprocessing...")
        start_time = time.time()

        # Check if the file exists
        if not os.path.exists(file_path):
            ic(f"File {file_path} does not exist.")
            os.makedirs(os.path.dirname(file_path), exist_ok=True)

            if download_url:
                ic(f"Downloading file from {download_url}...")
                try:
                    response = requests.get(download_url, stream=True)
                    response.raise_for_status()
                    with open(file_path, 'wb') as f:
                        for chunk in response.iter_content(chunk_size=8192):
                            f.write(chunk)
                    ic(f"File downloaded and saved to {file_path}")
                except requests.exceptions.RequestException as e:
                    ic(f"Failed to download the file: {e}")
                    raise
            else:
                ic("Download URL not provided. Cannot download the file.")
                raise FileNotFoundError(f"The file {file_path} does not exist and no download URL was provided.")

        # Load the DataFrame
        df = pd.read_csv(file_path, parse_dates=['timestamp'])
        df.set_index('timestamp', inplace=True)
        df = df.tail(cfg.RECORDS_TO_LOAD)
        scalers = {}
        start_time_preprocess = time.time()

        for col in df.columns:
            # Ensure no non-positive values before log transform
            if (df[col] <= 0).any():
                raise ValueError(f"Column {col} contains non-positive values, cannot apply log transform.")

            # Apply natural logarithm transformation
            df[col] = np.log(df[col])

            # Initialize and fit MinMaxScaler
            scaler = MinMaxScaler()
            df[col] = scaler.fit_transform(df[[col]])

            # Save the scaler
            scalers[col] = scaler

        ic(f"Data preprocessing completed in {time.time() - start_time_preprocess:.2f} seconds")
        ic(f"DataFrame shape: {df.shape}")

        return df, scalers

    df, scalers = load_and_preprocess_data(cfg.DATA_FILE, cfg.DATA_URL)
    cfg.NUM_FEATURES = df.shape[1]

    def evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=10):
        top_models = []
        for model_file in model_files:
            cfg.MODEL_PATH = model_file
            print(f"\nLoading model from {cfg.MODEL_PATH}")

            result = load_model(cfg.MODEL_PATH, cfg, cfg.DEVICE)
            if result is None:
                print(f"Skipping model {cfg.MODEL_PATH} due to loading error")
                continue

            model, params = result
            input_size = params['input_size']
            output_size = params['output_size']
            n_past = params['n_past']
            n_future = params['n_future']
            num_features = cfg.NUM_FEATURES
            model_type = params.get('model_type', 'unknown')

            # Get the input columns
            input_cols = df.columns[:input_size]

            try:
                input_data, target_data = get_random_sample(df, n_past, n_future, input_cols, input_cols)
            except Exception as e:
                print(f"Error getting random sample: {str(e)}")
                continue

            # Prepare the tensors
            try:
                input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
                target_tensor = torch.tensor(target_data, dtype=torch.float32).to(cfg.DEVICE)
            except Exception as e:
                print(f"Error preparing tensors: {str(e)}")
                continue

            try:
                model.eval()
                with torch.no_grad():
                    if model_type == 'transformer':
                        # For Transformer, prepare tgt_input with zeros
                        tgt_input = torch.zeros((1, n_future, input_size), device=cfg.DEVICE)
                        prediction = model(input_tensor, tgt_input)
                    elif model_type == 'single_step_lstm':
                        # For single-step LSTM, perform iterative predictions
                        prediction = []
                        current_input = input_tensor.clone()
                        for _ in range(n_future):
                            pred = model(current_input)  # pred shape: (1, 13)
                            prediction.append(pred)
                            # Append the prediction to current_input and remove the first timestep
                            pred_unsqueezed = pred.unsqueeze(1)  # (1, 1, 13)
                            current_input = torch.cat((current_input[:, 1:, :], pred_unsqueezed), dim=1)  # (1, n_past, 13)
                        prediction = torch.cat(prediction, dim=1)  # (1, n_future, 13)
                    else:
                        print(f"Unknown model type for file: {model_file}")
                        continue
            except Exception as e:
                print(f"Failed to make predictions: {e}")
                continue

            # Convert tensors to numpy
            input_np = input_tensor.squeeze(0).cpu().numpy()  # (n_past, 13)
            target_np = target_tensor.cpu().numpy()  # (n_future, 13)
            prediction_np = prediction.cpu().numpy()  # (1, n_future, 13)

            # Handle multi-step predictions
            try:
                if model_type == 'transformer':
                    # Transformer: output_size should be n_future * num_features
                    if output_size != n_future * num_features:
                        print(f"Model's output_size ({output_size}) does not match n_future * num_features ({n_future * num_features}). Skipping model.")
                        continue
                    prediction_np = prediction_np.reshape(-1, n_future, num_features)
                    target_np = target_np.reshape(-1, n_future, num_features)
                    input_np = input_np.reshape(-1, num_features)
                elif model_type == 'single_step_lstm':
                    # LSTM: prediction_np is already (1, n_future, 13)
                    if output_size != num_features:
                        print(f"LSTM model's output_size ({output_size}) does not match num_features ({num_features}). Skipping model.")
                        continue
                    # Reshape to (-1, n_future, num_features)
                    prediction_np = prediction_np.reshape(-1, n_future, num_features)
                    target_np = target_np.reshape(-1, n_future, num_features)
                    input_np = input_np.reshape(-1, num_features)
                else:
                    print(f"Unexpected model type for {model_file}. Skipping model.")
                    continue
            except ValueError as ve:
                print(f"ValueError during reshaping: {ve}. Skipping model.")
                continue

            # Inverse transform
            try:
                inv_input, inv_target, inv_pred = get_original_values(
                    df.columns, input_np, target_np, prediction_np, scalers, n_future, num_features
                )
            except Exception as e:
                print(f"Error during inverse transformation: {str(e)}")
                continue

            print("Calculating scores...")
            try:
                model_score = calculate_score(inv_target, inv_pred)
            except Exception as e:
                print(f"Error calculating score: {e}")
                continue

            # Append and maintain top N
            top_models.append((model_file, model_score))
            top_models = sorted(top_models, key=lambda x: x[1])
            top_models = top_models[:top_n]

            print(f"Current top {len(top_models)} models:")
            for m, s in top_models:
                print(f"  Model: {m}, Score: {s:.2%}")

        return top_models

    # Create a list of model files
    model_files = glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.ckpt"), recursive=True) + \
                  glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.pth"), recursive=True)
    print(f"Total model files found: {len(model_files)}")

    # Optionally, truncate the list for testing
    model_files = model_files[:20]  # Adjust as needed

    # Evaluate models and get top N
    top_n = 10
    top_models = evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=top_n)

    # Display Top N models
    print(f"\nTop {len(top_models)} models:")
    for model_file, score in top_models:
        print(f"Model: {model_file}, Score: {score:.2%}")





# Create a list of model files
model_files = glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.ckpt"), recursive=True) + \
              glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.pth"), recursive=True)
print(f"Total model files found: {len(model_files)}")

# Optionally, truncate the list for testing
model_files = model_files[:20]  # Adjust as needed

# Evaluate models and get top N
top_n = 10
top_models = evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=top_n)

# Display Top N models
print(f"\nTop {len(top_models)} models:")
for model_file, score in top_models:
    print(f"Model: {model_file}, Score: {score:.2%}")


In [None]:
import torch
import pandas as pd
from dataclasses import dataclass
import multiprocessing
import glob
import numpy as np
from rich.console import Console
import os
import time
import requests
from sklearn.preprocessing import MinMaxScaler
from icecream import ic  # Ensure icecream is installed (`pip install icecream`)
import traceback
import warnings

# Initialize Rich Console
console = Console()

# Define your CryptoTransformer class
class CryptoTransformer(torch.nn.Module):
    def __init__(self, input_size, d_model, nhead, num_encoder_layers, num_decoder_layers,
                 dim_feedforward, dropout, activation, n_future, num_outputs, max_seq_length):
        super(CryptoTransformer, self).__init__()
        self.transformer = torch.nn.Transformer(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
        )
        self.input_fc = torch.nn.Linear(input_size, d_model)
        self.output_fc = torch.nn.Linear(d_model, num_outputs)
        self.n_future = n_future

    def forward(self, src, tgt):
        src = self.input_fc(src)
        tgt = self.input_fc(tgt)
        output = self.transformer(src, tgt)
        output = self.output_fc(output)
        return output

# Define your CryptoLSTM class with corrected LayerNorm naming
class CryptoLSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, output_size, has_layer_norm=False):
        super(CryptoLSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers,
                                  dropout=dropout, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        if has_layer_norm:
            self.layer_norm = torch.nn.LayerNorm(hidden_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        if hasattr(self, 'layer_norm'):
            out = self.layer_norm(out)
        out = self.fc(out)
        return out

# Function to find a suitable number of attention heads
def find_nhead(d_model, max_nhead=16, min_head_dim=8):
    for nhead in range(max_nhead, 0, -1):
        if d_model % nhead == 0:
            head_dim = d_model // nhead
            if head_dim >= min_head_dim:
                return nhead
    return 1  # Fallback to single head

# Function to inverse transform data
def get_original_values(df_columns, input_np, target_np, prediction_np, scalers):
    inv_pred = pd.DataFrame()
    inv_target = pd.DataFrame()
    inv_input = pd.DataFrame()

    num_features = len(df_columns)
    for i, col in enumerate(df_columns):
        scaler = scalers.get(col)
        if scaler:
            # For predictions and targets, flatten the sequences
            pred_values = prediction_np[:, :, i].flatten()
            target_values = target_np[:, :, i].flatten()
            inv_pred[col] = np.exp(scaler.inverse_transform(pred_values.reshape(-1, 1)).flatten())
            inv_target[col] = np.exp(scaler.inverse_transform(target_values.reshape(-1, 1)).flatten())
            # For input, only take the last timestep
            input_values = input_np[:, -1, i]
            inv_input[col] = np.exp(scaler.inverse_transform(input_values.reshape(-1, 1)).flatten())

    return inv_input, inv_target, inv_pred

# Function to calculate the score
def calculate_score(inv_target, inv_pred):
    scores = {}
    for col in inv_target.columns:
        # Ensure lengths match
        min_len = min(len(inv_target[col]), len(inv_pred[col]))
        target_col = inv_target[col][:min_len]
        pred_col = inv_pred[col][:min_len]
        # Calculate the absolute percentage error
        ape = np.abs((target_col - pred_col) / target_col)
        # Handle division by zero
        ape = ape.replace([np.inf, -np.inf], np.nan).dropna()
        # Calculate the mean absolute percentage error
        mape = ape.mean()
        scores[col] = mape
    # Calculate the overall score (average of individual scores)
    overall_score = np.mean(list(scores.values()))
    print(f"Overall Score: {overall_score:.2%}")
    return overall_score

# Suppress specific warnings
warnings.filterwarnings("ignore", message=".*does not have many workers.*")
warnings.filterwarnings("ignore", category=FutureWarning, message=".*torch.load.*")

def load_model(model_file, cfg, device='cpu'):
    try:
        if not os.path.exists(model_file):
            print(f"File not found: {model_file}")
            return None

        # Load the state dict
        state_dict = torch.load(model_file, map_location=device)

        # Check if it's a Lightning checkpoint
        if isinstance(state_dict, dict) and 'state_dict' in state_dict:
            state_dict = state_dict['state_dict']

        # Strip 'model.' prefix if present
        if any(key.startswith('model.') for key in state_dict.keys()):
            new_state_dict = {}
            for k, v in state_dict.items():
                if k.startswith('model.'):
                    new_k = k[6:]  # Remove 'model.' prefix
                else:
                    new_k = k
                new_state_dict[new_k] = v
            state_dict = new_state_dict

        # Determine the model type based on key prefixes
        if any(key.startswith('lstm') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoLSTM")

            # Infer LSTM parameters
            lstm_weight_ih_keys = [k for k in state_dict.keys() if k.startswith('lstm.weight_ih_l')]
            num_layers = len(lstm_weight_ih_keys)
            if num_layers == 0:
                print(f"No LSTM layers found in {model_file}. Keys: {list(state_dict.keys())}")
                return None

            sample_weight_ih = state_dict[lstm_weight_ih_keys[0]]
            hidden_size = sample_weight_ih.shape[0] // 4  # LSTM gates
            input_size = sample_weight_ih.shape[1]

            if 'fc.weight' in state_dict:
                output_size = state_dict['fc.weight'].shape[0]
            elif 'fc.bias' in state_dict:
                output_size = state_dict['fc.bias'].shape[0]
            else:
                print(f"Could not infer output_size for LSTM in {model_file}. Using default: {cfg.NUM_FEATURES}")
                output_size = cfg.NUM_FEATURES

            # Infer if LayerNorm is present
            has_layer_norm = any('layer_norm' in key or 'ln' in key for key in state_dict.keys())
            print(f"Inferred LSTM parameters: hidden_size={hidden_size}, num_layers={num_layers}, "
                  f"input_size={input_size}, output_size={output_size}, LayerNorm={has_layer_norm}")

            # Create the model
            model = CryptoLSTM(
                input_size=input_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=cfg.DROPOUT,
                output_size=output_size,
                has_layer_norm=has_layer_norm
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {'input_size': input_size, 'output_size': output_size,
                          'n_past': cfg.N_PAST, 'n_future': cfg.N_FUTURE, 'model_type': 'lstm'}

        elif any(key.startswith('transformer') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoTransformer")
            # Attempt to infer Transformer parameters from state_dict

            # Infer d_model from the first transformer's in_proj_weight
            d_model = cfg.HIDDEN_SIZE  # Default value
            for key in state_dict.keys():
                if 'transformer_encoder.layers.0.self_attn.in_proj_weight' in key:
                    weight = state_dict[key]
                    d_model = weight.shape[1]  # Typically (3*d_model, d_model)
                    break

            # Find a suitable nhead
            nhead = find_nhead(d_model)
            if nhead == 1 and d_model < 8:
                print(f"Could not find a suitable nhead for d_model={d_model}. Skipping model.")
                return None

            # Infer number of encoder and decoder layers
            transformer_encoder_keys = [k for k in state_dict.keys() if 'transformer_encoder.layers.' in k]
            num_encoder_layers = len(set(k.split('.')[3] for k in transformer_encoder_keys))
            transformer_decoder_keys = [k for k in state_dict.keys() if 'transformer_decoder.layers.' in k]
            num_decoder_layers = len(set(k.split('.')[3] for k in transformer_decoder_keys))

            # Infer dim_feedforward from the first linear layer
            dim_feedforward = cfg.DIM_FEEDFORWARD if hasattr(cfg, 'DIM_FEEDFORWARD') else 2048
            dropout = cfg.DROPOUT
            activation = 'gelu'  # Defaulting to 'gelu'

            print(f"Inferred Transformer parameters: d_model={d_model}, nhead={nhead}, "
                  f"num_encoder_layers={num_encoder_layers}, num_decoder_layers={num_decoder_layers}, "
                  f"dim_feedforward={dim_feedforward}, dropout={dropout}, activation={activation}")

            if 'output_fc.weight' in state_dict:
                num_outputs = state_dict['output_fc.weight'].shape[1]
                output_size = state_dict['output_fc.weight'].shape[0]
            else:
                num_outputs = cfg.NUM_FEATURES
                output_size = cfg.NUM_FEATURES

            # Create the model
            model = CryptoTransformer(
                input_size=cfg.NUM_FEATURES,
                d_model=d_model,
                nhead=nhead,
                num_encoder_layers=num_encoder_layers,
                num_decoder_layers=num_decoder_layers,
                dim_feedforward=dim_feedforward,
                dropout=dropout,
                activation=activation,
                n_future=cfg.N_FUTURE,
                num_outputs=output_size,
                max_seq_length=cfg.N_PAST + cfg.N_FUTURE
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {
                'input_size': cfg.NUM_FEATURES,
                'output_size': output_size,
                'n_past': cfg.N_PAST,
                'n_future': cfg.N_FUTURE,
                'model_type': 'transformer'
            }

        else:
            print(f"Unrecognized model format for {model_file}. Keys: {list(state_dict.keys())}")
            return None

    except Exception as e:
        print(f"Failed to load model {model_file}: {str(e)}")
        traceback.print_exc()
        return None

# Function to prepare input data
def prepare_input(input_data, device):
    input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)
    return input_tensor

# Function to get a random sample from the DataFrame
def get_random_sample(df, n_past, n_future, input_cols, target_cols):
    max_start = len(df) - n_past - n_future
    if max_start <= 0:
        raise ValueError("DataFrame is too short for the given n_past and n_future")
    start_idx = np.random.randint(0, max_start)
    input_data = df[input_cols].iloc[start_idx:start_idx + n_past].values
    target_data = df[target_cols].iloc[start_idx + n_past:start_idx + n_past + n_future].values
    return input_data, target_data

# Function to convert tensors to numpy arrays
def convert_to_numpy(input_tensor, target, prediction):
    input_np = input_tensor.cpu().numpy()
    target_np = target.cpu().numpy()
    prediction_np = prediction.cpu().numpy()
    return input_np, target_np, prediction_np

@dataclass
class Config:
    VERSION_N: int = 1
    RECORDS_TO_LOAD: int = 1205040
    N_PAST: int = 3 * 12 * 3  # 1 week of 10-minute intervals (108)
    N_FUTURE: int = 1 * 12 * 2  # 1 day of 10-minute intervals (24)
    BATCH_SIZE: int = 5000
    HIDDEN_SIZE: int = 512  # Adjusted to match inferred d_model=512
    NUM_LAYERS: int = 2
    DROPOUT: float = 0.2
    NUM_EPOCHS: int = 150
    HOT_RESTART: bool = True
    TRAIN_FIRST: bool = True
    EPOCH_TO_RESTART: int = 50
    BATCH_FACTOR: int = 81
    DEBUG_FREQ: int = 180
    num_cpus = multiprocessing.cpu_count()
    NUM_WORKERS = max((num_cpus // 4 - 4), 4) if num_cpus > 16 else 4
    DEBUG_ON: bool = False
    DATA_URL: str = 'https://sambo.us-iad-1.linodeobjects.com/fillnan_combined_df.csv'
    DATA_FILE: str = './data/fill_nan_df.csv'
    MODEL_PATH: str = ""
    MODEL_SAVE_PATH: str = f'./yay'
    DEVICE: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    EPSILON: float = 1e-4
    PATH_TO_SEARCH: str = "/content/drive/MyDrive/Kraken"
    # Additional parameters
    NUM_FEATURES: int = None  # To be set after loading data

cfg = Config()

# Load and preprocess data
def load_and_preprocess_data(file_path: str, download_url: str = None):
    ic("Starting data loading and preprocessing...")
    start_time = time.time()

    # Check if the file exists
    if not os.path.exists(file_path):
        ic(f"File {file_path} does not exist.")
        os.makedirs(os.path.dirname(file_path), exist_ok=True)

        if download_url:
            ic(f"Downloading file from {download_url}...")
            try:
                response = requests.get(download_url, stream=True)
                response.raise_for_status()
                with open(file_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                ic(f"File downloaded and saved to {file_path}")
            except requests.exceptions.RequestException as e:
                ic(f"Failed to download the file: {e}")
                raise
        else:
            ic("Download URL not provided. Cannot download the file.")
            raise FileNotFoundError(f"The file {file_path} does not exist and no download URL was provided.")

    # Load the DataFrame
    df = pd.read_csv(file_path, parse_dates=['timestamp'])
    df.set_index('timestamp', inplace=True)
    df = df.tail(cfg.RECORDS_TO_LOAD)
    scalers = {}
    start_time_preprocess = time.time()

    for col in df.columns:
        # Ensure no non-positive values before log transform
        if (df[col] <= 0).any():
            raise ValueError(f"Column {col} contains non-positive values, cannot apply log transform.")

        # Apply natural logarithm transformation
        df[col] = np.log(df[col])

        # Initialize and fit MinMaxScaler
        scaler = MinMaxScaler()
        df[col] = scaler.fit_transform(df[[col]])

        # Save the scaler
        scalers[col] = scaler

    ic(f"Data preprocessing completed in {time.time() - start_time_preprocess:.2f} seconds")
    ic(f"DataFrame shape: {df.shape}")

    return df, scalers

df, scalers = load_and_preprocess_data(cfg.DATA_FILE, cfg.DATA_URL)
cfg.NUM_FEATURES = df.shape[1]

def evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=10):
    top_models = []
    for model_file in model_files:
        cfg.MODEL_PATH = model_file
        print(f"\nLoading model from {cfg.MODEL_PATH}")

        result = load_model(cfg.MODEL_PATH, cfg, cfg.DEVICE)
        if result is None:
            print(f"Skipping model {cfg.MODEL_PATH} due to loading error")
            continue

        model, params = result
        input_size = params['input_size']
        output_size = params['output_size']
        n_past = params['n_past']
        n_future = params['n_future']
        num_features = cfg.NUM_FEATURES
        model_type = params.get('model_type', 'unknown')

        # Get the input columns
        input_cols = df.columns[:input_size]

        try:
            input_data, target_data = get_random_sample(df, n_past, n_future, input_cols, input_cols)
        except Exception as e:
            print(f"Error getting random sample: {str(e)}")
            continue

        # Prepare the tensors
        try:
            input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
            target_tensor = torch.tensor(target_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
        except Exception as e:
            print(f"Error preparing tensors: {str(e)}")
            continue

        try:
            model.eval()
            with torch.no_grad():
                if model_type == 'transformer':
                    # For Transformer, prepare tgt_input with zeros
                    tgt_input = torch.zeros((1, n_future, input_size), device=cfg.DEVICE)
                    prediction = model(input_tensor, tgt_input)
                elif model_type == 'lstm':
                    prediction = model(input_tensor)
                else:
                    print(f"Unknown model type for file: {model_file}")
                    continue
        except Exception as e:
            print(f"Failed to make predictions: {e}")
            continue

        # Convert tensors to numpy
        input_np = input_tensor.cpu().numpy()  # (1, n_past, num_features)
        target_np = target_tensor.cpu().numpy()  # (1, n_future, num_features)
        prediction_np = prediction.cpu().numpy()  # Expected to be (1, n_future, num_features)

        # Handle predictions
        try:
            if prediction_np.shape[1] != n_future:
                print(f"Prediction output has unexpected number of timesteps: {prediction_np.shape[1]}")
                continue
            # Ensure shapes are correct
            input_np = input_np.reshape(-1, n_past, num_features)
            target_np = target_np.reshape(-1, n_future, num_features)
            prediction_np = prediction_np.reshape(-1, n_future, num_features)
        except ValueError as ve:
            print(f"ValueError during reshaping: {ve}. Skipping model.")
            continue

        # Inverse transform
        try:
            inv_input, inv_target, inv_pred = get_original_values(
                df.columns, input_np, target_np, prediction_np, scalers
            )
        except Exception as e:
            print(f"Error during inverse transformation: {str(e)}")
            continue

        print("Calculating scores...")
        try:
            model_score = calculate_score(inv_target, inv_pred)
        except Exception as e:
            print(f"Error calculating score: {e}")
            continue

        # Append and maintain top N
        top_models.append((model_file, model_score))
        top_models = sorted(top_models, key=lambda x: x[1])
        top_models = top_models[:top_n]

        print(f"Current top {len(top_models)} models:")
        for m, s in top_models:
            print(f"  Model: {m}, Score: {s:.2%}")

    return top_models

# Create a list of model files
model_files = glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.ckpt"), recursive=True) + \
              glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.pth"), recursive=True)
print(f"Total model files found: {len(model_files)}")

# Optionally, truncate the list for testing
model_files = model_files[:20]  # Adjust as needed

# Evaluate models and get top N
top_n = 10
top_models = evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=top_n)

# Display Top N models
print(f"\nTop {len(top_models)} models:")
for model_file, score in top_models:
    print(f"Model: {model_file}, Score: {score:.2%}")


In [None]:
import torch
import pandas as pd
from dataclasses import dataclass
import multiprocessing
import glob
import numpy as np
from rich.console import Console
import os
import time
import requests
from sklearn.preprocessing import MinMaxScaler
from icecream import ic  # Ensure icecream is installed (`pip install icecream`)
import traceback
import warnings

# Initialize Rich Console
console = Console()

# Define your CryptoTransformer class
class CryptoTransformer(torch.nn.Module):
    def __init__(self, input_size, d_model, nhead, num_encoder_layers, num_decoder_layers,
                 dim_feedforward, dropout, activation, n_future, num_outputs, max_seq_length):
        super(CryptoTransformer, self).__init__()
        self.transformer = torch.nn.Transformer(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
        )
        self.input_fc = torch.nn.Linear(input_size, d_model)
        self.output_fc = torch.nn.Linear(d_model, num_outputs)
        self.n_future = n_future

    def forward(self, src, tgt):
        src = self.input_fc(src)
        tgt = self.input_fc(tgt)
        output = self.transformer(src, tgt)
        output = self.output_fc(output)
        return output

# Define your CryptoLSTM class with corrected LayerNorm naming
class CryptoLSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, output_size, has_layer_norm=False):
        super(CryptoLSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers,
                                  dropout=dropout, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        if has_layer_norm:
            self.layer_norm = torch.nn.LayerNorm(hidden_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        if hasattr(self, 'layer_norm'):
            out = self.layer_norm(out)
        out = self.fc(out)
        return out

# Function to find a suitable number of attention heads
def find_nhead(d_model, max_nhead=16, min_head_dim=8):
    for nhead in range(max_nhead, 0, -1):
        if d_model % nhead == 0:
            head_dim = d_model // nhead
            if head_dim >= min_head_dim:
                return nhead
    return 1  # Fallback to single head

# Function to inverse transform data
def get_original_values(df_columns, input_np, target_np, prediction_np, scalers):
    inv_pred = pd.DataFrame()
    inv_target = pd.DataFrame()
    inv_input = pd.DataFrame()

    num_features = len(df_columns)
    for i, col in enumerate(df_columns):
        scaler = scalers.get(col)
        if scaler:
            # For predictions and targets, flatten the sequences
            pred_values = prediction_np[:, :, i].flatten()
            target_values = target_np[:, :, i].flatten()
            inv_pred[col] = np.exp(scaler.inverse_transform(pred_values.reshape(-1, 1)).flatten())
            inv_target[col] = np.exp(scaler.inverse_transform(target_values.reshape(-1, 1)).flatten())
            # For input, only take the last timestep
            input_values = input_np[:, -1, i]
            inv_input[col] = np.exp(scaler.inverse_transform(input_values.reshape(-1, 1)).flatten())

    return inv_input, inv_target, inv_pred

# Function to calculate the score
def calculate_score(inv_target, inv_pred):
    scores = {}
    for col in inv_target.columns:
        # Ensure lengths match
        min_len = min(len(inv_target[col]), len(inv_pred[col]))
        target_col = inv_target[col][:min_len]
        pred_col = inv_pred[col][:min_len]
        # Calculate the absolute percentage error
        ape = np.abs((target_col - pred_col) / target_col)
        # Handle division by zero
        ape = ape.replace([np.inf, -np.inf], np.nan).dropna()
        # Calculate the mean absolute percentage error
        mape = ape.mean()
        scores[col] = mape
    # Calculate the overall score (average of individual scores)
    overall_score = np.mean(list(scores.values()))
    print(f"Overall Score: {overall_score:.2%}")
    return overall_score

# Suppress specific warnings
warnings.filterwarnings("ignore", message=".*does not have many workers.*")
warnings.filterwarnings("ignore", category=FutureWarning, message=".*torch.load.*")

def load_model(model_file, cfg, device='cpu'):
    try:
        if not os.path.exists(model_file):
            print(f"File not found: {model_file}")
            return None

        # Load the state dict
        state_dict = torch.load(model_file, map_location=device)

        # Check if it's a Lightning checkpoint
        if isinstance(state_dict, dict) and 'state_dict' in state_dict:
            state_dict = state_dict['state_dict']

        # Strip 'model.' prefix if present
        if any(key.startswith('model.') for key in state_dict.keys()):
            new_state_dict = {}
            for k, v in state_dict.items():
                if k.startswith('model.'):
                    new_k = k[6:]  # Remove 'model.' prefix
                else:
                    new_k = k
                new_state_dict[new_k] = v
            state_dict = new_state_dict

        # Determine the model type based on key prefixes
        if any(key.startswith('lstm') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoLSTM")

            # Infer LSTM parameters
            lstm_weight_ih_keys = [k for k in state_dict.keys() if k.startswith('lstm.weight_ih_l')]
            num_layers = len(lstm_weight_ih_keys)
            if num_layers == 0:
                print(f"No LSTM layers found in {model_file}. Keys: {list(state_dict.keys())}")
                return None

            sample_weight_ih = state_dict[lstm_weight_ih_keys[0]]
            hidden_size = sample_weight_ih.shape[0] // 4  # LSTM gates
            input_size = sample_weight_ih.shape[1]

            if 'fc.weight' in state_dict:
                output_size = state_dict['fc.weight'].shape[0]
            elif 'fc.bias' in state_dict:
                output_size = state_dict['fc.bias'].shape[0]
            else:
                print(f"Could not infer output_size for LSTM in {model_file}. Using default: {cfg.NUM_FEATURES}")
                output_size = cfg.NUM_FEATURES

            # Infer if LayerNorm is present
            has_layer_norm = any('layer_norm' in key or 'ln' in key for key in state_dict.keys())
            print(f"Inferred LSTM parameters: hidden_size={hidden_size}, num_layers={num_layers}, "
                  f"input_size={input_size}, output_size={output_size}, LayerNorm={has_layer_norm}")

            # Create the model
            model = CryptoLSTM(
                input_size=input_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=cfg.DROPOUT,
                output_size=output_size,
                has_layer_norm=has_layer_norm
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {'input_size': input_size, 'output_size': output_size,
                          'n_past': cfg.N_PAST, 'n_future': cfg.N_FUTURE, 'model_type': 'lstm'}

        elif any(key.startswith('transformer') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoTransformer")
            # Attempt to infer Transformer parameters from state_dict

            # Infer d_model from the first transformer's in_proj_weight
            d_model = cfg.HIDDEN_SIZE  # Default value
            for key in state_dict.keys():
                if 'transformer_encoder.layers.0.self_attn.in_proj_weight' in key:
                    weight = state_dict[key]
                    d_model = weight.shape[1]  # Typically (3*d_model, d_model)
                    break

            # Find a suitable nhead
            nhead = find_nhead(d_model)
            if nhead == 1 and d_model < 8:
                print(f"Could not find a suitable nhead for d_model={d_model}. Skipping model.")
                return None

            # Infer number of encoder and decoder layers
            transformer_encoder_keys = [k for k in state_dict.keys() if 'transformer_encoder.layers.' in k]
            num_encoder_layers = len(set(k.split('.')[3] for k in transformer_encoder_keys))
            transformer_decoder_keys = [k for k in state_dict.keys() if 'transformer_decoder.layers.' in k]
            num_decoder_layers = len(set(k.split('.')[3] for k in transformer_decoder_keys))

            # Infer dim_feedforward from the first linear layer
            dim_feedforward = cfg.DIM_FEEDFORWARD if hasattr(cfg, 'DIM_FEEDFORWARD') else 2048
            dropout = cfg.DROPOUT
            activation = 'gelu'  # Defaulting to 'gelu'

            print(f"Inferred Transformer parameters: d_model={d_model}, nhead={nhead}, "
                  f"num_encoder_layers={num_encoder_layers}, num_decoder_layers={num_decoder_layers}, "
                  f"dim_feedforward={dim_feedforward}, dropout={dropout}, activation={activation}")

            if 'output_fc.weight' in state_dict:
                num_outputs = state_dict['output_fc.weight'].shape[1]
                output_size = state_dict['output_fc.weight'].shape[0]
            else:
                num_outputs = cfg.NUM_FEATURES
                output_size = cfg.NUM_FEATURES

            # Create the model
            model = CryptoTransformer(
                input_size=cfg.NUM_FEATURES,
                d_model=d_model,
                nhead=nhead,
                num_encoder_layers=num_encoder_layers,
                num_decoder_layers=num_decoder_layers,
                dim_feedforward=dim_feedforward,
                dropout=dropout,
                activation=activation,
                n_future=cfg.N_FUTURE,
                num_outputs=output_size,
                max_seq_length=cfg.N_PAST + cfg.N_FUTURE
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {
                'input_size': cfg.NUM_FEATURES,
                'output_size': output_size,
                'n_past': cfg.N_PAST,
                'n_future': cfg.N_FUTURE,
                'model_type': 'transformer'
            }

        else:
            print(f"Unrecognized model format for {model_file}. Keys: {list(state_dict.keys())}")
            return None

    except Exception as e:
        print(f"Failed to load model {model_file}: {str(e)}")
        traceback.print_exc()
        return None

# Function to prepare input data
def prepare_input(input_data, device):
    input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)
    return input_tensor

# Function to get a random sample from the DataFrame
def get_random_sample(df, n_past, n_future, input_cols, target_cols):
    max_start = len(df) - n_past - n_future
    if max_start <= 0:
        raise ValueError("DataFrame is too short for the given n_past and n_future")
    start_idx = np.random.randint(0, max_start)
    input_data = df[input_cols].iloc[start_idx:start_idx + n_past].values
    target_data = df[target_cols].iloc[start_idx + n_past:start_idx + n_past + n_future].values
    return input_data, target_data

# Function to convert tensors to numpy arrays
def convert_to_numpy(input_tensor, target, prediction):
    input_np = input_tensor.cpu().numpy()
    target_np = target.cpu().numpy()
    prediction_np = prediction.cpu().numpy()
    return input_np, target_np, prediction_np

@dataclass
class Config:
    VERSION_N: int = 1
    RECORDS_TO_LOAD: int = 1205040
    N_PAST: int = 3 * 12 * 3  # 1 week of 10-minute intervals (108)
    N_FUTURE: int = 1 * 12 * 2  # 1 day of 10-minute intervals (24)
    BATCH_SIZE: int = 5000
    HIDDEN_SIZE: int = 512  # Adjusted to match inferred d_model=512
    NUM_LAYERS: int = 2
    DROPOUT: float = 0.2
    NUM_EPOCHS: int = 150
    HOT_RESTART: bool = True
    TRAIN_FIRST: bool = True
    EPOCH_TO_RESTART: int = 50
    BATCH_FACTOR: int = 81
    DEBUG_FREQ: int = 180
    num_cpus = multiprocessing.cpu_count()
    NUM_WORKERS = max((num_cpus // 4 - 4), 4) if num_cpus > 16 else 4
    DEBUG_ON: bool = False
    DATA_URL: str = 'https://sambo.us-iad-1.linodeobjects.com/fillnan_combined_df.csv'
    DATA_FILE: str = './data/fill_nan_df.csv'
    MODEL_PATH: str = ""
    MODEL_SAVE_PATH: str = f'./yay'
    DEVICE: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    EPSILON: float = 1e-4
    PATH_TO_SEARCH: str = "/content/drive/MyDrive/Kraken"
    # Additional parameters
    NUM_FEATURES: int = None  # To be set after loading data

cfg = Config()

# Load and preprocess data
def load_and_preprocess_data(file_path: str, download_url: str = None):
    ic("Starting data loading and preprocessing...")
    start_time = time.time()

    # Check if the file exists
    if not os.path.exists(file_path):
        ic(f"File {file_path} does not exist.")
        os.makedirs(os.path.dirname(file_path), exist_ok=True)

        if download_url:
            ic(f"Downloading file from {download_url}...")
            try:
                response = requests.get(download_url, stream=True)
                response.raise_for_status()
                with open(file_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                ic(f"File downloaded and saved to {file_path}")
            except requests.exceptions.RequestException as e:
                ic(f"Failed to download the file: {e}")
                raise
        else:
            ic("Download URL not provided. Cannot download the file.")
            raise FileNotFoundError(f"The file {file_path} does not exist and no download URL was provided.")

    # Load the DataFrame
    df = pd.read_csv(file_path, parse_dates=['timestamp'])
    df.set_index('timestamp', inplace=True)
    df = df.tail(cfg.RECORDS_TO_LOAD)
    scalers = {}
    start_time_preprocess = time.time()

    for col in df.columns:
        # Ensure no non-positive values before log transform
        if (df[col] <= 0).any():
            raise ValueError(f"Column {col} contains non-positive values, cannot apply log transform.")

        # Apply natural logarithm transformation
        df[col] = np.log(df[col])

        # Initialize and fit MinMaxScaler
        scaler = MinMaxScaler()
        df[col] = scaler.fit_transform(df[[col]])

        # Save the scaler
        scalers[col] = scaler

    ic(f"Data preprocessing completed in {time.time() - start_time_preprocess:.2f} seconds")
    ic(f"DataFrame shape: {df.shape}")

    return df, scalers

df, scalers = load_and_preprocess_data(cfg.DATA_FILE, cfg.DATA_URL)
cfg.NUM_FEATURES = df.shape[1]

def evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=10):
    top_models = []
    for model_file in model_files:
        cfg.MODEL_PATH = model_file
        print(f"\nLoading model from {cfg.MODEL_PATH}")

        result = load_model(cfg.MODEL_PATH, cfg, cfg.DEVICE)
        if result is None:
            print(f"Skipping model {cfg.MODEL_PATH} due to loading error")
            continue

        model, params = result
        input_size = params['input_size']
        output_size = params['output_size']
        n_past = params['n_past']
        n_future = params['n_future']
        num_features = cfg.NUM_FEATURES
        model_type = params.get('model_type', 'unknown')

        # Get the input columns
        input_cols = df.columns[:input_size]

        try:
            input_data, target_data = get_random_sample(df, n_past, n_future, input_cols, input_cols)
        except Exception as e:
            print(f"Error getting random sample: {str(e)}")
            continue

        # Prepare the tensors
        try:
            input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
            target_tensor = torch.tensor(target_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
        except Exception as e:
            print(f"Error preparing tensors: {str(e)}")
            continue

        try:
            model.eval()
            with torch.no_grad():
                if model_type == 'transformer':
                    # For Transformer, prepare tgt_input with zeros
                    tgt_input = torch.zeros((1, n_future, input_size), device=cfg.DEVICE)
                    prediction = model(input_tensor, tgt_input)
                elif model_type == 'lstm':
                    prediction = model(input_tensor)
                else:
                    print(f"Unknown model type for file: {model_file}")
                    continue
        except Exception as e:
            print(f"Failed to make predictions: {e}")
            continue

        # Convert tensors to numpy
        input_np = input_tensor.cpu().numpy()  # (1, n_past, num_features)
        target_np = target_tensor.cpu().numpy()  # (1, n_future, num_features)
        prediction_np = prediction.cpu().numpy()  # Expected to be (1, n_future, num_features)

        # Handle predictions
        try:
            if prediction_np.shape[1] != n_future:
                print(f"Prediction output has unexpected number of timesteps: {prediction_np.shape[1]}")
                continue
            # Ensure shapes are correct
            input_np = input_np.reshape(-1, n_past, num_features)
            target_np = target_np.reshape(-1, n_future, num_features)
            prediction_np = prediction_np.reshape(-1, n_future, num_features)
        except ValueError as ve:
            print(f"ValueError during reshaping: {ve}. Skipping model.")
            continue

        # Inverse transform
        try:
            inv_input, inv_target, inv_pred = get_original_values(
                df.columns, input_np, target_np, prediction_np, scalers
            )
        except Exception as e:
            print(f"Error during inverse transformation: {str(e)}")
            continue

        print("Calculating scores...")
        try:
            model_score = calculate_score(inv_target, inv_pred)
        except Exception as e:
            print(f"Error calculating score: {e}")
            continue

        # Append and maintain top N
        top_models.append((model_file, model_score))
        top_models = sorted(top_models, key=lambda x: x[1])
        top_models = top_models[:top_n]

        print(f"Current top {len(top_models)} models:")
        for m, s in top_models:
            print(f"  Model: {m}, Score: {s:.2%}")

    return top_models

# Create a list of model files
model_files = glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.ckpt"), recursive=True) + \
              glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.pth"), recursive=True)
print(f"Total model files found: {len(model_files)}")

# Optionally, truncate the list for testing
model_files = model_files[:20]  # Adjust as needed

# Evaluate models and get top N
top_n = 10
top_models = evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=top_n)

# Display Top N models
print(f"\nTop {len(top_models)} models:")
for model_file, score in top_models:
    print(f"Model: {model_file}, Score: {score:.2%}")


In [None]:
import torch
import pandas as pd
from dataclasses import dataclass
import multiprocessing
import glob
import numpy as np
import os
import time
import requests
from sklearn.preprocessing import MinMaxScaler
from icecream import ic
warnings.filterwarnings("ignore", message=".*enable_nested_tensor*")
warnings.filterwarnings("ignore", category=FutureWarning, message=".*torch.load.*")

# Initialize Rich Console
from rich.console import Console
console = Console()

# Define your CryptoTransformer class
class CryptoTransformer(torch.nn.Module):
    def __init__(self, input_size, d_model, nhead, num_encoder_layers, num_decoder_layers,
                 dim_feedforward, dropout, activation, n_future, num_outputs, max_seq_length):
        super(CryptoTransformer, self).__init__()
        # Update attribute names to match state_dict keys
        self.transformer_encoder = torch.nn.TransformerEncoder(
            torch.nn.TransformerEncoderLayer(
                d_model=d_model,
                nhead=nhead,
                dim_feedforward=dim_feedforward,
                dropout=dropout,
                activation=activation
            ),
            num_layers=num_encoder_layers
        )
        self.transformer_decoder = torch.nn.TransformerDecoder(
            torch.nn.TransformerDecoderLayer(
                d_model=d_model,
                nhead=nhead,
                dim_feedforward=dim_feedforward,
                dropout=dropout,
                activation=activation
            ),
            num_layers=num_decoder_layers
        )
        self.pos_encoder = torch.nn.Embedding(max_seq_length, d_model)
        self.pos_decoder = torch.nn.Embedding(max_seq_length, d_model)
        self.input_fc = torch.nn.Linear(input_size, d_model)
        self.output_fc = torch.nn.Linear(d_model, num_outputs)
        self.n_future = n_future

    def forward(self, src, tgt):
        # Adjust batch_first as per model's requirement
        src_seq_len = src.size(1)
        tgt_seq_len = tgt.size(1)
        src_positions = torch.arange(0, src_seq_len, device=src.device).unsqueeze(0)
        tgt_positions = torch.arange(0, tgt_seq_len, device=tgt.device).unsqueeze(0)
        src = self.input_fc(src) + self.pos_encoder(src_positions)
        tgt = self.input_fc(tgt) + self.pos_decoder(tgt_positions)
        memory = self.transformer_encoder(src.transpose(0, 1))
        output = self.transformer_decoder(tgt.transpose(0, 1), memory)
        output = self.output_fc(output.transpose(0, 1))
        return output

# Define your CryptoLSTM class with corrected LayerNorm naming
class CryptoLSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, output_size, has_layer_norm=False):
        super(CryptoLSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers,
                                  dropout=dropout, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        if has_layer_norm:
            self.layer_norm = torch.nn.LayerNorm(hidden_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        if hasattr(self, 'layer_norm'):
            out = self.layer_norm(out)
        out = self.fc(out)
        return out
def adjust_state_dict_keys(state_dict):
    new_state_dict = {}
    for k, v in state_dict.items():
        # Remove the 'model.' prefix if present
        new_key = k.replace('model.', '') if k.startswith('model.') else k
        new_state_dict[new_key] = v
    return new_state_dict
def load_model(model_file, cfg, device='cpu'):
    try:
        if not os.path.exists(model_file):
            print(f"File not found: {model_file}")
            return None

        # Load the state dict
        try:
            state_dict = torch.load(model_file, map_location=device)
        except EOFError:
            print(f"Failed to load model {model_file}: Ran out of input")
            return None

        # Check if it's a Lightning checkpoint
        if isinstance(state_dict, dict) and 'state_dict' in state_dict:
            state_dict = state_dict['state_dict']

        # Adjust state dict keys to remove 'model.' prefix
        state_dict = {k.replace('model.', ''): v for k, v in state_dict.items()}

        # Determine the model type based on key prefixes
        if any(key.startswith('lstm') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoLSTM")

            # Infer LSTM parameters
            lstm_weight_ih_keys = [k for k in state_dict.keys() if k.startswith('lstm.weight_ih_l')]
            num_layers = len(lstm_weight_ih_keys)
            if num_layers == 0:
                print(f"No LSTM layers found in {model_file}. Keys: {list(state_dict.keys())}")
                return None

            sample_weight_ih = state_dict[lstm_weight_ih_keys[0]]
            hidden_size = sample_weight_ih.shape[0] // 4  # LSTM gates
            input_size = sample_weight_ih.shape[1]  # Directly gives us the input feature size (`n_features`)

            # Infer the output size from the state dict
            if 'fc.weight' in state_dict:
                output_size = state_dict['fc.weight'].shape[0]
                # Infer the number of timesteps dynamically using output_size
                if output_size % input_size != 0:
                    print(f"Output size {output_size} is not divisible by input size {input_size}. Skipping model.")
                    return None  # Skip models with incompatible output sizes

                n_future = output_size // input_size  # Dynamically inferred number of timesteps
            elif 'fc.bias' in state_dict:
                output_size = state_dict['fc.bias'].shape[0]
                n_future = output_size // input_size
            else:
                print(f"Could not infer output_size for LSTM in {model_file}. Using default: {cfg.NUM_FEATURES}")
                output_size = cfg.NUM_FEATURES
                n_future = cfg.N_FUTURE  # Fall back to default timesteps

            # Now that `input_size` is the number of input features
            n_features = input_size

            # Infer if LayerNorm is present
            has_layer_norm = any('layer_norm' in key or 'ln' in key for key in state_dict.keys())
            print(f"Inferred LSTM parameters: hidden_size={hidden_size}, num_layers={num_layers}, "
                  f"n_features={n_features}, output_size={output_size}, LayerNorm={has_layer_norm}, n_future={n_future}")

            # Create the model
            model = CryptoLSTM(
                input_size=n_features,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=cfg.DROPOUT,
                output_size=output_size,
                has_layer_norm=has_layer_norm
            ).to(device)

            # Load state dict
            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
            if missing_keys:
                print(f"Missing keys: {missing_keys}")
            if unexpected_keys:
                print(f"Unexpected keys: {unexpected_keys}")

            return model, {'input_size': n_features, 'output_size': output_size,
                           'n_past': cfg.N_PAST, 'n_future': n_future, 'model_type': 'lstm'}
        elif any(key.startswith('transformer') for key in state_dict.keys()):
            print(f"Loading {model_file} as CryptoTransformer")

            # Infer Transformer parameters
            d_model = cfg.HIDDEN_SIZE
            nhead = find_nhead(d_model)
            num_encoder_layers = len(set(k.split('.')[2] for k in state_dict.keys() if k.startswith('transformer_encoder.layers.')))
            num_decoder_layers = len(set(k.split('.')[2] for k in state_dict.keys() if k.startswith('transformer_decoder.layers.')))
            dim_feedforward = cfg.DIM_FEEDFORWARD if hasattr(cfg, 'DIM_FEEDFORWARD') else 2048
            dropout = cfg.DROPOUT
            activation = 'gelu'

            print(f"Inferred Transformer parameters: d_model={d_model}, nhead={nhead}, "
                  f"num_encoder_layers={num_encoder_layers}, num_decoder_layers={num_decoder_layers}, "
                  f"dim_feedforward={dim_feedforward}, dropout={dropout}, activation={activation}")

            input_size = cfg.NUM_FEATURES
            output_size = cfg.NUM_FEATURES

            # Create the model
            model = CryptoTransformer(
                input_size=input_size,
                d_model=d_model,
                nhead=nhead,
                num_encoder_layers=num_encoder_layers,
                num_decoder_layers=num_decoder_layers,
                dim_feedforward=dim_feedforward,
                dropout=dropout,
                activation=activation,
                n_future=cfg.N_FUTURE,
                num_outputs=output_size,
                max_seq_length=cfg.N_PAST + cfg.N_FUTURE
            ).to(device)

            # Load state dict
            model.load_state_dict(state_dict, strict=False)

            return model, {
                'input_size': input_size,
                'output_size': output_size,
                'n_past': cfg.N_PAST,
                'n_future': cfg.N_FUTURE,
                'model_type': 'transformer'
            }

        else:
            print(f"Unrecognized model format for {model_file}. Keys: {list(state_dict.keys())}")
            return None

    except Exception as e:
        print(f"Failed to load model {model_file}: {str(e)}")
        traceback.print_exc()
        return None

# The rest of the code remains largely the same, with adjustments to input preparation and model invocation.

# Function to prepare input data
def prepare_input(input_data, device):
    input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)
    return input_tensor

# Function to get a random sample from the DataFrame
def get_random_sample(df, n_past, n_future, input_cols, target_cols):
    max_start = len(df) - n_past - n_future
    if max_start <= 0:
        raise ValueError("DataFrame is too short for the given n_past and n_future")
    start_idx = np.random.randint(0, max_start)
    input_data = df[input_cols].iloc[start_idx:start_idx + n_past].values
    target_data = df[target_cols].iloc[start_idx + n_past:start_idx + n_past + n_future].values
    return input_data, target_data

# Function to inverse transform data
def get_original_values(df_columns, input_np, target_np, prediction_np, scalers):
    inv_pred = pd.DataFrame()
    inv_target = pd.DataFrame()
    inv_input = pd.DataFrame()

    num_features = len(df_columns)
    for i, col in enumerate(df_columns):
        scaler = scalers.get(col)
        if scaler:
            # For predictions and targets, flatten the sequences
            pred_values = prediction_np[:, :, i].flatten()
            target_values = target_np[:, :, i].flatten()
            inv_pred[col] = np.exp(scaler.inverse_transform(pred_values.reshape(-1, 1)).flatten())
            inv_target[col] = np.exp(scaler.inverse_transform(target_values.reshape(-1, 1)).flatten())
            # For input, only take the last timestep
            input_values = input_np[:, :, i].flatten()
            inv_input[col] = np.exp(scaler.inverse_transform(input_values.reshape(-1, 1)).flatten())

    return inv_input, inv_target, inv_pred

# Rest of the code remains the same
# ...

# Adjust the evaluation function to match the input expectations of your models
def evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=10):
    top_models = []
    for model_file in model_files:
        cfg.MODEL_PATH = model_file
        print(f"\nLoading model from {cfg.MODEL_PATH}")

        result = load_model(cfg.MODEL_PATH, cfg, cfg.DEVICE)
        if result is None:
            print(f"Skipping model {cfg.MODEL_PATH} due to loading error")
            continue

        model, params = result
        input_size = params['input_size']
        output_size = params['output_size']
        n_past = params['n_past']
        n_future = params['n_future']
        num_features = cfg.NUM_FEATURES
        model_type = params.get('model_type', 'unknown')

        # Get the input columns
        input_cols = df.columns[:input_size]

        try:
            input_data, target_data = get_random_sample(df, n_past, n_future, input_cols, input_cols)
        except Exception as e:
            print(f"Error getting random sample: {str(e)}")
            continue

        # Prepare the tensors
        try:
            input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
            target_tensor = torch.tensor(target_data, dtype=torch.float32).unsqueeze(0).to(cfg.DEVICE)
        except Exception as e:
            print(f"Error preparing tensors: {str(e)}")
            continue

        try:
            model.eval()
            with torch.no_grad():
                if model_type == 'transformer':
                    # Prepare tgt_input appropriately
                    tgt_input = torch.zeros((1, n_future, input_size), device=cfg.DEVICE)
                    prediction = model(input_tensor, tgt_input)
                elif model_type == 'lstm':
                    prediction = model(input_tensor)
                else:
                    print(f"Unknown model type for file: {model_file}")
                    continue
        except Exception as e:
            print(f"Failed to make predictions: {e}")
            continue

        # Convert tensors to numpy
        input_np = input_tensor.cpu().numpy()  # (1, n_past, num_features)
        target_np = target_tensor.cpu().numpy()  # (1, n_future, num_features)
        prediction_np = prediction.cpu().numpy()  # Expected to be (1, n_future, num_features)

        # Handle predictions
        try:
            # if prediction_np.shape[1] != n_future:
            #     print(f"Prediction output has unexpected number of timesteps: {prediction_np.shape[1]}")
            #     continue
            # Ensure shapes are correct
            input_np = input_np.reshape(-1, n_past, num_features)
            target_np = target_np.reshape(-1, n_future, num_features)
            prediction_np = prediction_np.reshape(-1, n_future, num_features)
        except ValueError as ve:
            print(f"ValueError during reshaping: {ve}. Skipping model.")
            continue

        # Inverse transform
        try:
            inv_input, inv_target, inv_pred = get_original_values(
                df.columns, input_np, target_np, prediction_np, scalers
            )
        except Exception as e:
            print(f"Error during inverse transformation: {str(e)}")
            continue

        print("Calculating scores...")
        try:
            model_score = calculate_score(inv_target, inv_pred)
        except Exception as e:
            print(f"Error calculating score: {e}")
            continue

        # Append and maintain top N
        top_models.append((model_file, model_score))
        top_models = sorted(top_models, key=lambda x: x[1])
        top_models = top_models[:top_n]

        print(f"Current top {len(top_models)} models:")
        for m, s in top_models:
            print(f"  Model: {m}, Score: {s:.2%}")

    return top_models

# Main execution
if __name__ == "__main__":
    @dataclass
    class Config:
        VERSION_N: int = 1
        RECORDS_TO_LOAD: int = 1205040
        N_PAST: int = 3 * 12 * 3  # 1 week of 10-minute intervals (108)
        N_FUTURE: int = 1 * 12 * 2  # 1 day of 10-minute intervals (24)
        BATCH_SIZE: int = 5000
        HIDDEN_SIZE: int = 512  # Adjusted to match inferred d_model=512
        NUM_LAYERS: int = 2
        DROPOUT: float = 0.2
        NUM_EPOCHS: int = 150
        HOT_RESTART: bool = True
        TRAIN_FIRST: bool = True
        EPOCH_TO_RESTART: int = 50
        BATCH_FACTOR: int = 81
        DEBUG_FREQ: int = 180
        num_cpus = multiprocessing.cpu_count()
        NUM_WORKERS = max((num_cpus // 4 - 4), 4) if num_cpus > 16 else 4
        DEBUG_ON: bool = False
        DATA_URL: str = 'https://sambo.us-iad-1.linodeobjects.com/fillnan_combined_df.csv'
        DATA_FILE: str = './data/fill_nan_df.csv'
        MODEL_PATH: str = ""
        MODEL_SAVE_PATH: str = f'./yay'
        DEVICE: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        EPSILON: float = 1e-4
        PATH_TO_SEARCH: str = "/content/drive/MyDrive/Kraken"
        # Additional parameters
        NUM_FEATURES: int = None  # To be set after loading data

    cfg = Config()
    # Load and preprocess data
    df, scalers = load_and_preprocess_data(cfg.DATA_FILE, cfg.DATA_URL)
    cfg.NUM_FEATURES = df.shape[1]

    # Create a list of model files
    model_files = glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.ckpt"), recursive=True) + \
                  glob.glob(os.path.join(cfg.PATH_TO_SEARCH, "**/*.pth"), recursive=True)
    print(f"Total model files found: {len(model_files)}")

    # Evaluate models and get top N
    top_n = 10
    top_models = evaluate_and_get_top_models(model_files, cfg, df, scalers, top_n=top_n)

    # Display Top N models
    print(f"\nTop {len(top_models)} models:")
    for model_file, score in top_models:
        print(f"Model: {model_file}, Score: {score:.2%}")


ic| 'Starting data loading and preprocessing...'
ic| f"Data preprocessing completed in {time.time() - start_time_preprocess:.2f} seconds": 'Data preprocessing completed in 0.49 seconds'
ic| f"DataFrame shape: {df.shape}": 'DataFrame shape: (1102782, 13)'


Total model files found: 308

Loading model from /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt
Loading /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt as CryptoTransformer
Inferred Transformer parameters: d_model=512, nhead=16, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.2, activation=gelu
Calculating scores...
Overall Score: 49.50%
Current top 1 models:
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%

Loading model from /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt
Loading /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt as CryptoTransformer
Inferred Transformer parameters: d_model=512, nhead=16, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.2, activation=gelu
Calculating scores...
Overall Score: 63.68%
Current top 2 models:
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Sco

Traceback (most recent call last):
  File "<ipython-input-64-38609d2c3af6>", line 201, in load_model
    model.load_state_dict(state_dict, strict=False)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 2215, in load_state_dict
    raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
RuntimeError: Error(s) in loading state_dict for CryptoTransformer:
	size mismatch for transformer_encoder.layers.0.self_attn.in_proj_weight: copying a param with shape torch.Size([768, 256]) from checkpoint, the shape in current model is torch.Size([1536, 512]).
	size mismatch for transformer_encoder.layers.0.self_attn.in_proj_bias: copying a param with shape torch.Size([768]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for transformer_encoder.layers.0.self_attn.out_proj.weight: copying a param with shape torch.Size([256, 256]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for

Loading /content/drive/MyDrive/Kraken/models/model-71-epoch=30-val_loss=0.56.ckpt as CryptoTransformer
Inferred Transformer parameters: d_model=512, nhead=16, num_encoder_layers=3, num_decoder_layers=3, dim_feedforward=2048, dropout=0.2, activation=gelu
Failed to load model /content/drive/MyDrive/Kraken/models/model-71-epoch=30-val_loss=0.56.ckpt: Error(s) in loading state_dict for CryptoTransformer:
	size mismatch for transformer_encoder.layers.0.self_attn.in_proj_weight: copying a param with shape torch.Size([768, 256]) from checkpoint, the shape in current model is torch.Size([1536, 512]).
	size mismatch for transformer_encoder.layers.0.self_attn.in_proj_bias: copying a param with shape torch.Size([768]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for transformer_encoder.layers.0.self_attn.out_proj.weight: copying a param with shape torch.Size([256, 256]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for t

Traceback (most recent call last):
  File "<ipython-input-64-38609d2c3af6>", line 201, in load_model
    model.load_state_dict(state_dict, strict=False)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 2215, in load_state_dict
    raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
RuntimeError: Error(s) in loading state_dict for CryptoTransformer:
	size mismatch for transformer_encoder.layers.0.self_attn.in_proj_weight: copying a param with shape torch.Size([768, 256]) from checkpoint, the shape in current model is torch.Size([1536, 512]).
	size mismatch for transformer_encoder.layers.0.self_attn.in_proj_bias: copying a param with shape torch.Size([768]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for transformer_encoder.layers.0.self_attn.out_proj.weight: copying a param with shape torch.Size([256, 256]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for

Loading /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth as CryptoLSTM
Inferred LSTM parameters: hidden_size=700, num_layers=2, n_features=13, output_size=13, LayerNorm=False, n_future=1
Calculating scores...
Overall Score: 70.78%
Current top 9 models:
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth, Score: 70.78%
  Model: /content/drive/MyDrive/Kraken/models/model-epoch=35-val_final_loss=0.00.ckpt, Score: 104.66%
  Model: /content/drive/MyDrive/Kraken/models/model-epoch=14-val_loss=0.02.ckpt, Score: 216.60%
  Model: /content/drive/MyDrive/Kraken/models/model-epoch=32-val_loss=0.00.ckpt, Score: 423.46%
  Model: /content/drive/MyDrive/Kraken/models/model-epoch=65-val_loss=0.13.ckpt, Sc



ValueError during reshaping: cannot reshape array of size 756 into shape (108,13). Skipping model.

Loading model from /content/drive/MyDrive/Kraken/8crypto_lstm_model_epoch_42.pth
Loading /content/drive/MyDrive/Kraken/8crypto_lstm_model_epoch_42.pth as CryptoLSTM
Inferred LSTM parameters: hidden_size=2096, num_layers=1, n_features=7, output_size=105, LayerNorm=False, n_future=15
Unexpected keys: ['bn.weight', 'bn.bias', 'bn.running_mean', 'bn.running_var', 'bn.num_batches_tracked']
ValueError during reshaping: cannot reshape array of size 756 into shape (108,13). Skipping model.

Loading model from /content/drive/MyDrive/Kraken/8crypto_lstm_model_epoch_49.pth
Loading /content/drive/MyDrive/Kraken/8crypto_lstm_model_epoch_49.pth as CryptoLSTM
Inferred LSTM parameters: hidden_size=2096, num_layers=1, n_features=7, output_size=105, LayerNorm=False, n_future=15
Unexpected keys: ['bn.weight', 'bn.bias', 'bn.running_mean', 'bn.running_var', 'bn.num_batches_tracked']
ValueError during reshap



Calculating scores...
Overall Score: 84.21%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth, Score: 70.78%

Loading model from /content/drive/MyDrive/Kraken/3_fixed_crypt



Calculating scores...
Overall Score: 77.06%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth, Score: 70.78%

Loading model from /content/drive/MyDrive/Kraken/3_fixed_crypt



Calculating scores...
Overall Score: 78.02%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth, Score: 70.78%

Loading model from /content/drive/MyDrive/Kraken/3_fixed_crypt



Calculating scores...
Overall Score: 79.16%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth, Score: 70.78%

Loading model from /content/drive/MyDrive/Kraken/3_fixed_crypt



Calculating scores...
Overall Score: 88.44%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth, Score: 70.78%

Loading model from /content/drive/MyDrive/Kraken/3_fixed_crypt



Unexpected keys: ['bn.weight', 'bn.bias', 'bn.running_mean', 'bn.running_var', 'bn.num_batches_tracked']
Calculating scores...
Overall Score: 74.62%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/cryp



Unexpected keys: ['bn.weight', 'bn.bias', 'bn.running_mean', 'bn.running_var', 'bn.num_batches_tracked']
Calculating scores...
Overall Score: 73.35%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/cryp



Unexpected keys: ['bn.weight', 'bn.bias', 'bn.running_mean', 'bn.running_var', 'bn.num_batches_tracked']
Calculating scores...
Overall Score: 80.22%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/cryp



Calculating scores...
Overall Score: 84.27%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth, Score: 70.78%

Loading model from /content/drive/MyDrive/Kraken/3_fixed_crypt



Calculating scores...
Overall Score: 84.61%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth, Score: 70.78%

Loading model from /content/drive/MyDrive/Kraken/4eva_fixed_cr



Calculating scores...
Overall Score: 83.66%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth, Score: 70.78%

Loading model from /content/drive/MyDrive/Kraken/4eva_fixed_cr



Calculating scores...
Overall Score: 85.60%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth, Score: 70.78%

Loading model from /content/drive/MyDrive/Kraken/4eva_fixed_cr



Calculating scores...
Overall Score: 74.99%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth, Score: 70.78%

Loading model from /content/drive/MyDrive/Kraken/4eva_fixed_cr



Unexpected keys: ['bn.weight', 'bn.bias', 'bn.running_mean', 'bn.running_var', 'bn.num_batches_tracked']
Calculating scores...
Overall Score: 75.07%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/cryp



Unexpected keys: ['bn.weight', 'bn.bias', 'bn.running_mean', 'bn.running_var', 'bn.num_batches_tracked']
Calculating scores...
Overall Score: 71.73%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/cryp



Unexpected keys: ['bn.weight', 'bn.bias', 'bn.running_mean', 'bn.running_var', 'bn.num_batches_tracked']
Calculating scores...
Overall Score: 74.17%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/cryp



Calculating scores...
Overall Score: 77.55%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth, Score: 70.78%

Loading model from /content/drive/MyDrive/Kraken/4eva_fixed_cr



Calculating scores...
Overall Score: 77.88%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_10.pth, Score: 70.78%

Loading model from /content/drive/MyDrive/Kraken/4eva_fixed_cr



Calculating scores...
Overall Score: 69.67%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_88.pth, Score: 69.67%

Loading model from /content/drive/MyDrive/Kraken/4e



Calculating scores...
Overall Score: 87.09%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_88.pth, Score: 69.67%

Loading model from /content/drive/MyDrive/Kraken/4e



Calculating scores...
Overall Score: 62.97%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%

Loading model from /content/drive/MyDrive/Kraken/4



Calculating scores...
Overall Score: 73.96%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%

Loading model from /content/drive/MyDrive/Kraken/4



Unexpected keys: ['bn.weight', 'bn.bias', 'bn.running_mean', 'bn.running_var', 'bn.num_batches_tracked']
Calculating scores...
Overall Score: 72.40%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive



Unexpected keys: ['bn.weight', 'bn.bias', 'bn.running_mean', 'bn.running_var', 'bn.num_batches_tracked']
Calculating scores...
Overall Score: 86.04%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive



Calculating scores...
Overall Score: 76.77%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%

Loading model from /content/drive/MyDrive/Kraken/4



Calculating scores...
Overall Score: 78.21%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_30.pth, Score: 69.62%

Loading model from /content/drive/MyDrive/Kraken/4



Calculating scores...
Overall Score: 64.02%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_176.pth, Score: 64.02%

Loading model from /content/drive/MyDr



Calculating scores...
Overall Score: 75.81%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_176.pth, Score: 64.02%

Loading model from /content/drive/MyDr



Calculating scores...
Overall Score: 74.26%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_176.pth, Score: 64.02%

Loading model from /content/drive/MyDr



Calculating scores...
Overall Score: 77.63%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_176.pth, Score: 64.02%

Loading model from /content/drive/MyDr



Unexpected keys: ['bn.weight', 'bn.bias', 'bn.running_mean', 'bn.running_var', 'bn.num_batches_tracked']
Calculating scores...
Overall Score: 75.62%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive



Unexpected keys: ['bn.weight', 'bn.bias', 'bn.running_mean', 'bn.running_var', 'bn.num_batches_tracked']
Calculating scores...
Overall Score: 65.42%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive



Calculating scores...
Overall Score: 83.26%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_176.pth, Score: 64.02%

Loading model from /content/drive/MyDr



Calculating scores...
Overall Score: 70.47%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_176.pth, Score: 64.02%

Loading model from /content/drive/MyDr



Calculating scores...
Overall Score: 74.76%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_176.pth, Score: 64.02%

Loading model from /content/drive/MyDr



Calculating scores...
Overall Score: 73.09%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_176.pth, Score: 64.02%

Loading model from /content/drive/MyDr



Calculating scores...
Overall Score: 79.63%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_176.pth, Score: 64.02%

Loading model from /content/drive/MyDr



Calculating scores...
Overall Score: 58.56%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_297.pth, Score: 58.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=26-val_loss=1.70.ckpt, Score: 63.68%

Loading model from /content/drive/MyDr



Unexpected keys: ['bn.weight', 'bn.bias', 'bn.running_mean', 'bn.running_var', 'bn.num_batches_tracked']
Calculating scores...
Overall Score: 73.25%
Current top 10 models:
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_72.pth, Score: 40.76%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt, Score: 49.50%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_0.pth, Score: 50.25%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_20.pth, Score: 52.68%
  Model: /content/drive/MyDrive/Kraken/crypto_lstm_model_epoch_40.pth, Score: 55.91%
  Model: /content/drive/MyDrive/Kraken/9crypto_lstm_model_epoch_81.pth, Score: 56.52%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_297.pth, Score: 58.56%
  Model: /content/drive/MyDrive/Kraken/model-68-epoch=42-val_loss=1.49.ckpt, Score: 61.56%
  Model: /content/drive/MyDrive/Kraken/4eva_fixed_crypto_lstm_model_epoch_110.pth, Score: 62.97%
  Model: /content/drive/M

In [None]:
model_file = "/content/drive/MyDrive/Kraken/model-68-epoch=11-val_loss=1.90.ckpt"
state_dict = torch.load(model_file, map_location="cpu")



In [None]:
state_dict.items()

In [None]:
model = load_model(model_file, cfg, cfg.DEVICE)