In [1]:
!unzip -q /content/results.zip -d /content/data

In [2]:
import os
import pandas as pd
from datetime import datetime

# Function to extract the 10th date from an example folder
def extract_tenth_date(example_path):
    dates = []
    for file in os.listdir(example_path):
        if file.endswith('.txt'):
            date_str = file.split('.')[0]  # Extract YYYY-MM-DD from filename
            dates.append(date_str)

    # Sort dates chronologically
    dates.sort()

    # Return the 10th date if available
    if len(dates) >= 10:
        return dates[9]  # 0-indexed, so the 10th element is at index 9
    else:
        return None

# Function to process all examples in a directory
def process_directory(directory_path, stock_data):
    results = []

    for example_folder in os.listdir(directory_path):
        example_path = os.path.join(directory_path, example_folder)
        if os.path.isdir(example_path):
            tenth_date = extract_tenth_date(example_path)
            if tenth_date and tenth_date in stock_data.index:
                results.append(stock_data.loc[tenth_date])

    return pd.DataFrame(results)

# Main execution
def main(all_stock_path, train_dir, test_dir, val_dir):
    # Load stock data
    stock_data = pd.read_csv(all_stock_path, index_col=0)

    # Process each directory
    train_data = process_directory(train_dir, stock_data)
    test_data = process_directory(test_dir, stock_data)
    val_data = process_directory(val_dir, stock_data)

    # Save results
    train_data.to_csv('train.csv')
    test_data.to_csv('test.csv')
    val_data.to_csv('val.csv')

    print(f"Generated train.csv with {len(train_data)} entries")
    print(f"Generated test.csv with {len(test_data)} entries")
    print(f"Generated val.csv with {len(val_data)} entries")

# Replace these paths with your actual paths
all_stock_path = '/content/stock_prices_complete.csv'
train_dir = '/content/data/processed_dataset_v2/train'
test_dir = '/content/data/processed_dataset_v2/test'
val_dir = '/content/data/processed_dataset_v2/val'

main(all_stock_path, train_dir, test_dir, val_dir)

Generated train.csv with 340 entries
Generated test.csv with 42 entries
Generated val.csv with 44 entries


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from transformers import AutoModel, AutoConfig
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import math
import re

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

class LoRALinear(nn.Module):
    """
    Linear layer with Low-Rank Adaptation (LoRA)
    """
    def __init__(self, in_features, out_features, r=8, alpha=16):
        super().__init__()
        self.original = nn.Linear(in_features, out_features)
        self.lora_A = nn.Parameter(torch.zeros(in_features, r))
        self.lora_B = nn.Parameter(torch.zeros(r, out_features))
        self.scaling = alpha / r
        self.r = r
        # Initialize weights for LoRA
        nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
        nn.init.zeros_(self.lora_B)

    def forward(self, x):
        # Original path
        original_output = self.original(x)
        # LoRA path
        lora_output = (x @ self.lora_A) @ self.lora_B
        # Combine with scaling
        return original_output + (lora_output * self.scaling)


class LoRATransformerWrapper(nn.Module):
    """
    Wrap a transformer model with LoRA adaptation in attention layers
    """
    def __init__(self, model, r=8, alpha=16):
        super().__init__()
        self.model = model
        self.r = r
        self.alpha = alpha
        self.apply_lora()

    def apply_lora(self):
        """
        Apply LoRA to the query and value projection layers in attention blocks
        """
        for name, module in self.model.named_modules():
            if isinstance(module, nn.Linear) and any(key in name for key in ['query', 'value']):
                in_features, out_features = module.in_features, module.out_features
                parent_name = '.'.join(name.split('.')[:-1])
                layer_name = name.split('.')[-1]

                # Create LoRA layer
                lora_layer = LoRALinear(in_features, out_features, r=self.r, alpha=self.alpha)
                # Copy weights from original layer
                lora_layer.original.weight.data = module.weight.data.clone()
                if module.bias is not None:
                    lora_layer.original.bias.data = module.bias.data.clone()

                # Set the LoRA layer in the parent module
                parent = self.model
                for part in parent_name.split('.'):
                    if part:
                        parent = getattr(parent, part)
                setattr(parent, layer_name, lora_layer)

    def forward(self, *args, **kwargs):
        return self.model(*args, **kwargs)


class TinyBERTStockPredictor(nn.Module):
    """
    Stock prediction model using TinyBERT with LoRA
    Takes 9-day temporal embeddings of shape [batch_size, 9, 312]
    """
    def __init__(self, input_dim=312, hidden_dim=312, output_dim=30, lora_r=8, lora_alpha=16):
        super().__init__()

        # Load TinyBERT model
        self.bert_config = AutoConfig.from_pretrained("huawei-noah/TinyBERT_General_4L_312D")
        self.bert = AutoModel.from_pretrained("huawei-noah/TinyBERT_General_4L_312D")

        # Apply LoRA to TinyBERT
        self.bert = LoRATransformerWrapper(self.bert, r=lora_r, alpha=lora_alpha)

        # Output projection layers
        bert_output_dim = self.bert_config.hidden_size  # 312
        self.output_block = nn.Sequential(
            nn.Linear(bert_output_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(128, output_dim)
        )

    def forward(self, x):
        """
        Forward pass through the model

        Args:
            x (torch.Tensor): Input tensor of shape [batch_size, 9, 312]
                             (batch, days, embedding_dim)

        Returns:
            torch.Tensor: Predicted values for all metrics/stocks, shape [batch_size, output_dim]
        """
        batch_size, seq_len, embed_dim = x.shape  # batch, 9, 312

        # Create attention mask (all 1s as we want to attend to all tokens)
        attention_mask = torch.ones(batch_size, seq_len, device=x.device)

        # Pass through TinyBERT
        bert_output = self.bert(inputs_embeds=x, attention_mask=attention_mask)
        pooled_output = bert_output.pooler_output  # [batch, 312]

        # Pass through output layers
        output = self.output_block(pooled_output)  # [batch, output_dim]

        return output


class StockDataset:
    """
    Dataset for loading pre-processed embeddings and matching them with target values
    """
    def __init__(self, embeddings_path, stock_csv_path, metrics_count=6, stocks_count=5):
        """
        Initialize with paths to embeddings and stock data

        Args:
            embeddings_path (str): Path to embeddings tensor file (.pt)
            stock_csv_path (str): Path to CSV with target stock values for 10th day
            metrics_count (int): Number of metrics per stock (OHLCV + Adj Close = 6)
            stocks_count (int): Number of stocks
        """
        self.embeddings_path = embeddings_path
        self.stock_csv_path = stock_csv_path
        self.metrics_count = metrics_count
        self.stocks_count = stocks_count

        # Stock order mapping
        self.stock_order = ['AAPL', 'AMZN', 'GOOGL', 'META', 'NFLX']

        # Load the data
        self.load_data()

    def load_data(self):
        """
        Load embeddings and stock data, then create target tensors
        """
        print(f"Loading embeddings from {self.embeddings_path}")
        self.embeddings = torch.load(self.embeddings_path)

        print(f"Loading stock data from {self.stock_csv_path}")
        self.stock_df = self.load_stock_csv()

        # Create target tensors from stock CSV
        self.create_targets()

    def load_stock_csv(self):
        """Load and preprocess stock CSV file with target values"""
        print(f"Loading stock price data from {self.stock_csv_path}...")

        # Read the CSV file
        df = pd.read_csv(self.stock_csv_path)

        print("CSV structure sample:")
        print(df.head(1))
        print(f"CSV columns: {df.columns.tolist()}")

        return df

    def create_targets(self):
        """
        Create target tensors based on the specific CSV format with numbered columns
        Format: [Metric], [Metric].1, [Metric].2, etc. for each company
        """
        print("Creating target tensors...")

        # Base metrics in your CSV
        base_metrics = ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']

        # List to store all targets
        all_targets = []

        # Process each row in the dataframe
        for idx, row in self.stock_df.iterrows():
            # Extract target values for this row
            row_targets = []

            # For each stock (in order)
            for stock_idx in range(self.stocks_count):
                # For each metric
                for metric in base_metrics:
                    # For the first stock, column name is just the metric
                    # For subsequent stocks, column name is metric.1, metric.2, etc.
                    if stock_idx == 0:
                        col_name = metric
                    else:
                        col_name = f"{metric}.{stock_idx}"

                    # Get the value, with error handling
                    try:
                        value = float(row[col_name])
                        row_targets.append(value)
                    except (KeyError, ValueError) as e:
                        print(f"Error extracting {col_name} (for {self.stock_order[stock_idx]}): {e}")
                        # Use 0.0 as a fallback value
                        row_targets.append(0.0)

            all_targets.append(row_targets)

        if all_targets:
            self.targets = torch.tensor(all_targets, dtype=torch.float32)
            print(f"Created {len(all_targets)} targets with shape {self.targets.shape}")

            # Make sure we have the same number of targets as embeddings
            if len(all_targets) != self.embeddings.shape[0]:
                print(f"Warning: Number of targets ({len(all_targets)}) doesn't match number of embeddings ({self.embeddings.shape[0]})")

                # Take the minimum number to ensure alignment
                min_length = min(len(all_targets), self.embeddings.shape[0])
                self.targets = self.targets[:min_length]
                self.embeddings = self.embeddings[:min_length]
                print(f"Truncated to {min_length} matching examples")
        else:
            self.targets = None
            print("Warning: No targets created")

    def create_dataloader(self, batch_size=16, shuffle=True):
        """
        Create a DataLoader for the dataset

        Args:
            batch_size (int): Batch size
            shuffle (bool): Whether to shuffle samples

        Returns:
            torch.utils.data.DataLoader: DataLoader for the dataset
        """
        if self.targets is not None and self.embeddings is not None:
            dataset = TensorDataset(self.embeddings, self.targets)

            return DataLoader(
                dataset,
                batch_size=batch_size,
                shuffle=shuffle,
                num_workers=2,
                pin_memory=True
            )
        else:
            return None

class StockPredictionTrainer:
    """
    Trainer for the stock prediction model
    """
    def __init__(self, model, optimizer, metrics_count=6, stocks_count=5, device='cuda'):
        """
        Initialize the trainer

        Args:
            model (nn.Module): The model to train
            optimizer: PyTorch optimizer
            metrics_count (int): Number of metrics per stock (updated to 6)
            stocks_count (int): Number of stocks
            device (str): Device to use ('cuda' or 'cpu')
        """
        self.model = model.to(device)
        self.optimizer = optimizer
        self.metrics_count = metrics_count
        self.stocks_count = stocks_count
        self.device = device
        self.scheduler = None

        # Use MSE loss, focusing specifically on close price
        self.criterion = nn.MSELoss()

        # Calculate the indices of closing prices in output
        self.close_indices = []
        for i in range(stocks_count):
            # Close price is at index 3 in the metrics list
            close_idx = i * metrics_count + 3
            self.close_indices.append(close_idx)

    def set_scheduler(self, scheduler):
        """Set learning rate scheduler"""
        self.scheduler = scheduler

    def extract_close_prices(self, predictions, targets):
        """Extract only closing price predictions and targets"""
        close_pred = predictions[:, self.close_indices]
        close_targets = targets[:, self.close_indices]
        return close_pred, close_targets

    def train_step(self, inputs, targets):
        """Single training step"""
        # Move data to device
        inputs = inputs.to(self.device)
        targets = targets.to(self.device)

        # Zero gradients
        self.optimizer.zero_grad()

        # Forward pass
        outputs = self.model(inputs)

        # Extract closing prices for loss calculation
        close_pred, close_targets = self.extract_close_prices(outputs, targets)

        # Calculate loss on closing prices
        loss = self.criterion(close_pred, close_targets)

        # We also want to track the full MSE for all metrics
        full_loss = self.criterion(outputs, targets)

        # Backward pass
        loss.backward()

        # Update parameters
        self.optimizer.step()

        return loss.item(), full_loss.item()

    def validate(self, val_dataloader):
        """Validate model on validation data"""
        self.model.eval()
        close_val_loss = 0
        full_val_loss = 0
        all_close_preds = []
        all_close_targets = []

        with torch.no_grad():
            for batch in val_dataloader:
                inputs = batch[0].to(self.device)
                targets = batch[1].to(self.device)

                outputs = self.model(inputs)

                # Extract closing prices
                close_pred, close_targets = self.extract_close_prices(outputs, targets)

                # Calculate losses
                close_loss = self.criterion(close_pred, close_targets)
                full_loss = self.criterion(outputs, targets)

                close_val_loss += close_loss.item()
                full_val_loss += full_loss.item()

                all_close_preds.append(close_pred.cpu())
                all_close_targets.append(close_targets.cpu())

        # Stack predictions and targets
        all_close_preds = torch.cat(all_close_preds, dim=0)
        all_close_targets = torch.cat(all_close_targets, dim=0)

        # Calculate metrics
        close_preds_np = all_close_preds.numpy()
        close_targets_np = all_close_targets.numpy()

        # Overall metrics for closing prices
        close_mse = mean_squared_error(close_targets_np, close_preds_np)
        close_r2 = r2_score(close_targets_np, close_preds_np)

        # Per-stock metrics for closing prices
        stock_metrics = {}
        for s in range(self.stocks_count):
            stock_pred = close_preds_np[:, s]
            stock_target = close_targets_np[:, s]

            stock_mse = mean_squared_error(stock_target, stock_pred)
            stock_r2 = r2_score(stock_target, stock_pred)

            stock_metrics[f'stock_{s}'] = {
                'mse': stock_mse,
                'r2': stock_r2
            }

        self.model.train()

        return {
            'close_val_loss': close_val_loss / len(val_dataloader),
            'full_val_loss': full_val_loss / len(val_dataloader),
            'close_mse': close_mse,
            'close_r2': close_r2,
            'stock_metrics': stock_metrics
        }

    def predict(self, dataloader):
        """Generate predictions"""
        self.model.eval()
        all_preds = []

        with torch.no_grad():
            for batch in dataloader:
                inputs = batch[0].to(self.device)
                outputs = self.model(inputs)
                all_preds.append(outputs.cpu())

        return torch.cat(all_preds, dim=0)

    def save_model(self, path):
        """Save model weights"""
        torch.save(self.model.state_dict(), path)
        print(f"Model saved to {path}")

    def train(self, train_dataloader, val_dataloader, epochs, save_path=None, early_stopping_patience=10):
        """
        Train the model

        Args:
            train_dataloader: Training data loader
            val_dataloader: Validation data loader
            epochs (int): Number of epochs
            save_path (str, optional): Path to save the best model
            early_stopping_patience (int): Number of epochs to wait for improvement

        Returns:
            dict: Training history
        """
        history = {
            'train_close_loss': [],
            'train_full_loss': [],
            'val_close_loss': [],
            'val_full_loss': [],
            'close_mse': [],
            'close_r2': []
        }

        best_val_loss = float('inf')
        no_improvement_count = 0

        for epoch in range(epochs):
            # Training
            self.model.train()
            train_close_loss = 0
            train_full_loss = 0

            for batch in train_dataloader:
                inputs, targets = batch
                close_loss, full_loss = self.train_step(inputs, targets)
                train_close_loss += close_loss
                train_full_loss += full_loss

            avg_train_close_loss = train_close_loss / len(train_dataloader)
            avg_train_full_loss = train_full_loss / len(train_dataloader)

            # Validation
            val_metrics = self.validate(val_dataloader)

            # Update history
            history['train_close_loss'].append(avg_train_close_loss)
            history['train_full_loss'].append(avg_train_full_loss)
            history['val_close_loss'].append(val_metrics['close_val_loss'])
            history['val_full_loss'].append(val_metrics['full_val_loss'])
            history['close_mse'].append(val_metrics['close_mse'])
            history['close_r2'].append(val_metrics['close_r2'])

            # Step scheduler if needed
            if self.scheduler is not None:
                if isinstance(self.scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
                    self.scheduler.step(val_metrics['close_val_loss'])
                else:
                    self.scheduler.step()

            # Print progress
            print(f"Epoch {epoch+1}/{epochs} - "
                  f"Train Close Loss: {avg_train_close_loss:.6f}, "
                  f"Val Close Loss: {val_metrics['close_val_loss']:.6f}, "
                  f"Close MSE: {val_metrics['close_mse']:.6f}, "
                  f"Close R²: {val_metrics['close_r2']:.6f}")

            # Check for early stopping
            if val_metrics['close_val_loss'] < best_val_loss:
                best_val_loss = val_metrics['close_val_loss']
                no_improvement_count = 0

                # Save the best model
                if save_path:
                    self.save_model(save_path)
            else:
                no_improvement_count += 1
                if no_improvement_count >= early_stopping_patience:
                    print(f"Early stopping at epoch {epoch+1}")
                    break

        return history


def plot_metrics(history, save_dir=None):
    """
    Plot training metrics

    Args:
        history (dict): Training history
        save_dir (str, optional): Directory to save plots
    """
    if save_dir:
        os.makedirs(save_dir, exist_ok=True)

    # Plot closing price loss
    plt.figure(figsize=(10, 6))
    plt.plot(history['train_close_loss'], label='Train Close Loss')
    plt.plot(history['val_close_loss'], label='Val Close Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss (Closing Price)')
    plt.legend()
    plt.grid(True)
    if save_dir:
        plt.savefig(os.path.join(save_dir, 'close_loss_plot.png'))
    plt.close()

    # Plot full loss
    plt.figure(figsize=(10, 6))
    plt.plot(history['train_full_loss'], label='Train Full Loss')
    plt.plot(history['val_full_loss'], label='Val Full Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss (All Metrics)')
    plt.legend()
    plt.grid(True)
    if save_dir:
        plt.savefig(os.path.join(save_dir, 'full_loss_plot.png'))
    plt.close()

    # Plot MSE for closing price
    plt.figure(figsize=(10, 6))
    plt.plot(history['close_mse'], label='Validation Close MSE')
    plt.xlabel('Epoch')
    plt.ylabel('MSE')
    plt.title('Validation Mean Squared Error (Closing Price)')
    plt.legend()
    plt.grid(True)
    if save_dir:
        plt.savefig(os.path.join(save_dir, 'close_mse_plot.png'))
    plt.close()

    # Plot R² for closing price
    plt.figure(figsize=(10, 6))
    plt.plot(history['close_r2'], label='Validation Close R²')
    plt.xlabel('Epoch')
    plt.ylabel('R²')
    plt.title('Validation R² Score (Closing Price)')
    plt.legend()
    plt.grid(True)
    if save_dir:
        plt.savefig(os.path.join(save_dir, 'close_r2_plot.png'))
    plt.close()


def train_and_evaluate_model(
    embeddings_dir,
    stock_csv_path,
    output_dir="output_tinybert_lora_stock_prediction",
    metrics_count=6,  # Updated to 6 features
    stocks_count=5,
    epochs=50,
    batch_size=16,
    lr=3e-5,
    lora_r=8,
    lora_alpha=16
):
    """
    Train a TinyBERT with LoRA model for stock prediction

    Args:
        embeddings_dir (str): Directory containing embeddings and dates files
        stock_csv_path (str): Path to stock prices CSV file
        output_dir (str): Directory to save outputs
        metrics_count (int): Number of metrics per stock (now 6)
        stocks_count (int): Number of stocks
        epochs (int): Number of training epochs
        batch_size (int): Batch size
        lr (float): Learning rate
        lora_r (int): LoRA rank
        lora_alpha (int): LoRA alpha
    """
    # Setup device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Create output directory
    os.makedirs(output_dir, exist_ok=True)

    # Paths to embeddings and dates files
    train_embeddings_path = os.path.join(embeddings_dir, "train_embeddings_9x312.pt")
    train_dates_path = os.path.join(embeddings_dir, "train_dates.csv")

    val_embeddings_path = os.path.join(embeddings_dir, "val_embeddings_9x312.pt")
    val_dates_path = os.path.join(embeddings_dir, "val_dates.csv")

    test_embeddings_path = os.path.join(embeddings_dir, "test_embeddings_9x312.pt")
    test_dates_path = os.path.join(embeddings_dir, "test_dates.csv")

    # Create datasets
    train_dataset = StockDataset(
        train_embeddings_path,
        train_dates_path,
        stock_csv_path,
        metrics_count,
        stocks_count
    )

    val_dataset = StockDataset(
        val_embeddings_path,
        val_dates_path,
        stock_csv_path,
        metrics_count,
        stocks_count
    )

    test_dataset = StockDataset(
        test_embeddings_path,
        test_dates_path,
        stock_csv_path,
        metrics_count,
        stocks_count
    )

    # Create dataloaders
    train_dataloader = train_dataset.create_dataloader(batch_size=batch_size, shuffle=True)
    val_dataloader = val_dataset.create_dataloader(batch_size=batch_size, shuffle=False)
    test_dataloader = test_dataset.create_dataloader(batch_size=batch_size, shuffle=False)

    if not all([train_dataloader, val_dataloader, test_dataloader]):
        print("Error: Failed to create one or more dataloaders. Check data matching.")
        return None

    # Output dimension is metrics_count * stocks_count
    output_dim = metrics_count * stocks_count

    # Create model
    model = TinyBERTStockPredictor(
        input_dim=312,  # From the embeddings
        hidden_dim=312,  # TinyBERT hidden size
        output_dim=output_dim,
        lora_r=lora_r,
        lora_alpha=lora_alpha
    )

    print(f"Model created with {sum(p.numel() for p in model.parameters())} parameters")
    print(f"Input dimension: 312, Output dimension: {output_dim}")

    # Create optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=0.01)

    # Create scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=5, verbose=True
    )

    # Create trainer
    trainer = StockPredictionTrainer(
        model,
        optimizer,
        metrics_count=metrics_count,
        stocks_count=stocks_count,
        device=device
    )
    trainer.set_scheduler(scheduler)

    # Train model
    print(f"\nTraining for {epochs} epochs...")
    history = trainer.train(
        train_dataloader,
        val_dataloader,
        epochs=epochs,
        save_path=os.path.join(output_dir, 'best_model.pt'),
        early_stopping_patience=10
    )

    # Plot training metrics
    plot_metrics(history, save_dir=output_dir)

    # Generate test predictions
    print("\nGenerating test predictions...")
    test_predictions = trainer.predict(test_dataloader)

    # Save test predictions
    torch.save(test_predictions, os.path.join(output_dir, 'test_predictions.pt'))
    print(f"Test predictions saved to {os.path.join(output_dir, 'test_predictions.pt')}")

    # Extract closing prices from test predictions
    close_indices = [i * metrics_count + 3 for i in range(stocks_count)]
    close_predictions = test_predictions[:, close_indices]

    # Save close predictions separately
    torch.save(close_predictions, os.path.join(output_dir, 'test_close_predictions.pt'))
    print(f"Close price predictions saved to {os.path.join(output_dir, 'test_close_predictions.pt')}")

    # Final test evaluation
    test_metrics = trainer.validate(test_dataloader)
    print("\nFinal test metrics:")
    print(f"Close Loss: {test_metrics['close_val_loss']:.6f}")
    print(f"Close MSE: {test_metrics['close_mse']:.6f}")
    print(f"Close R²: {test_metrics['close_r2']:.6f}")

    # Per-stock metrics
    print("\nPer-stock closing price metrics:")
    for stock_idx, (stock_name, metrics) in enumerate(test_metrics['stock_metrics'].items()):
        stock_symbol = ['AAPL', 'AMZN', 'GOOGL', 'META', 'NFLX'][stock_idx % 5]
        print(f"{stock_symbol}: MSE={metrics['mse']:.6f}, R²={metrics['r2']:.6f}")

    # Save metrics as CSV
    stock_metrics_df = pd.DataFrame({
        'Stock': ['AAPL', 'AMZN', 'GOOGL', 'META', 'NFLX'],
        'MSE': [test_metrics['stock_metrics'][f'stock_{i}']['mse'] for i in range(stocks_count)],
        'R2': [test_metrics['stock_metrics'][f'stock_{i}']['r2'] for i in range(stocks_count)]
    })
    stock_metrics_df.to_csv(os.path.join(output_dir, 'stock_metrics.csv'), index=False)

    # Save overall metrics
    overall_metrics = {
        'close_mse': test_metrics['close_mse'],
        'close_r2': test_metrics['close_r2'],
        'close_loss': test_metrics['close_val_loss'],
        'full_loss': test_metrics['full_val_loss']
    }

    pd.DataFrame([overall_metrics]).to_csv(os.path.join(output_dir, 'overall_metrics.csv'), index=False)

    return test_metrics

def remap_targets_to_model_format(targets, output_dim=30):
    """
    Remaps the targets from the CSV format to the model's expected format.

    Original format is grouped by metric then company:
    [AAPL_AdjClose, AMZN_AdjClose, GOOGL_AdjClose, META_AdjClose, NFLX_AdjClose,
     AAPL_Close, AMZN_Close, GOOGL_Close, META_Close, NFLX_Close, ...etc]

    Model expects format grouped by company then metric:
    [AAPL_Open, AAPL_High, AAPL_Low, AAPL_Close, AAPL_AdjClose, AAPL_Volume,
     AMZN_Open, AMZN_High, ...etc]

    Args:
        targets (torch.Tensor): Tensor of shape [batch_size, 30] in CSV order
        output_dim (int): Expected output dimension (default 30)

    Returns:
        torch.Tensor: Reordered tensor in model's expected format
    """
    batch_size = targets.shape[0]

    # Create a new tensor for reordered targets
    reordered = torch.zeros((batch_size, output_dim), dtype=targets.dtype)

    # CSV order of metrics
    csv_metrics = ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']

    # Model expected order of metrics
    model_metrics = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']

    # Number of stocks and metrics
    stocks_count = 5
    metrics_count = 6

    # Do the reordering
    for stock_idx in range(stocks_count):
        for model_metric_idx, model_metric in enumerate(model_metrics):
            # Find the corresponding index in the CSV format
            csv_metric_idx = csv_metrics.index(model_metric)

            # Calculate source and target indices
            src_idx = csv_metric_idx * stocks_count + stock_idx
            tgt_idx = stock_idx * metrics_count + model_metric_idx

            # Copy the values
            reordered[:, tgt_idx] = targets[:, src_idx]

    return reordered


def main():
    """
    Main function to run the training and evaluation process
    """
    # Define paths to required data
    embeddings_dir = "/content"  # Base directory

    # Embeddings files
    train_embeddings_path = os.path.join(embeddings_dir, "train_embeddings_9x312(3).pt")
    val_embeddings_path = os.path.join(embeddings_dir, "val_embeddings_9x312(2).pt")
    test_embeddings_path = os.path.join(embeddings_dir, "test_embeddings_9x312(1).pt")

    # Target CSV files - these contain the stock data for the 10th day
    train_stocks_path = os.path.join(embeddings_dir, "train.csv")
    val_stocks_path = os.path.join(embeddings_dir, "val.csv")
    test_stocks_path = os.path.join(embeddings_dir, "test.csv")

    # Output directory
    output_dir = "output_tinybert_lora_stock_prediction"
    os.makedirs(output_dir, exist_ok=True)

    # Ensure the directories exist
    os.makedirs(embeddings_dir, exist_ok=True)

    # Check if data files exist
    required_files = [
        train_embeddings_path,
        val_embeddings_path,
        test_embeddings_path,
        train_stocks_path,
        val_stocks_path,
        test_stocks_path
    ]

    missing_files = [f for f in required_files if not os.path.exists(f)]
    if missing_files:
        print("Error: The following required files are missing:")
        for file in missing_files:
            print(f" - {file}")
        print("\nPlease ensure all required data files are available before running.")
        return

    # Create datasets using the specific CSV format handler
    train_dataset = StockDataset(
        train_embeddings_path,
        train_stocks_path,
        metrics_count=6,
        stocks_count=5
    )

    val_dataset = StockDataset(
        val_embeddings_path,
        val_stocks_path,
        metrics_count=6,
        stocks_count=5
    )

    test_dataset = StockDataset(
        test_embeddings_path,
        test_stocks_path,
        metrics_count=6,
        stocks_count=5
    )

    # Create dataloaders with a custom collate function to reorder targets
    def collate_fn(batch):
        inputs = torch.stack([item[0] for item in batch])
        targets = torch.stack([item[1] for item in batch])
        # Reorder targets to match model's expected format
        reordered_targets = remap_targets_to_model_format(targets)
        return inputs, reordered_targets

    train_dataloader = DataLoader(
        TensorDataset(train_dataset.embeddings, train_dataset.targets),
        batch_size=16,
        shuffle=True,
        collate_fn=collate_fn,
        num_workers=2,
        pin_memory=True
    )

    val_dataloader = DataLoader(
        TensorDataset(val_dataset.embeddings, val_dataset.targets),
        batch_size=16,
        shuffle=False,
        collate_fn=collate_fn,
        num_workers=2,
        pin_memory=True
    )

    test_dataloader = DataLoader(
        TensorDataset(test_dataset.embeddings, test_dataset.targets),
        batch_size=16,
        shuffle=False,
        collate_fn=collate_fn,
        num_workers=2,
        pin_memory=True
    )

    if not all([train_dataloader, val_dataloader, test_dataloader]):
        print("Error: Failed to create one or more dataloaders. Check data matching.")
        return None

    # Output dimension is metrics_count * stocks_count
    output_dim = 6 * 5  # 6 metrics for 5 stocks

    # Create model
    model = TinyBERTStockPredictor(
        input_dim=312,  # From the embeddings
        hidden_dim=312,  # TinyBERT hidden size
        output_dim=output_dim,
        lora_r=8,
        lora_alpha=16
    )

    print(f"Model created with {sum(p.numel() for p in model.parameters())} parameters")
    print(f"Input dimension: 312, Output dimension: {output_dim}")

    # Create optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5, weight_decay=0.01)

    # Create scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=5, verbose=True
    )

    # Create trainer
    trainer = StockPredictionTrainer(
        model,
        optimizer,
        metrics_count=6,
        stocks_count=5,
        device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    )
    trainer.set_scheduler(scheduler)

    # Train model
    print(f"\nTraining for 50 epochs...")
    history = trainer.train(
        train_dataloader,
        val_dataloader,
        epochs=50,
        save_path=os.path.join(output_dir, 'best_model.pt'),
        early_stopping_patience=10
    )

    # Plot training metrics
    plot_metrics(history, save_dir=output_dir)

    # Generate test predictions
    print("\nGenerating test predictions...")
    test_predictions = trainer.predict(test_dataloader)

    # Save test predictions
    torch.save(test_predictions, os.path.join(output_dir, 'test_predictions.pt'))
    print(f"Test predictions saved to {os.path.join(output_dir, 'test_predictions.pt')}")

    # Extract closing prices from test predictions
    close_indices = [i * 6 + 3 for i in range(5)]  # Index 3 is Close in the model's order
    close_predictions = test_predictions[:, close_indices]

    # Save close predictions separately
    torch.save(close_predictions, os.path.join(output_dir, 'test_close_predictions.pt'))
    print(f"Close price predictions saved to {os.path.join(output_dir, 'test_close_predictions.pt')}")

    # Final test evaluation
    test_metrics = trainer.validate(test_dataloader)
    print("\nFinal test metrics:")
    print(f"Close Loss: {test_metrics['close_val_loss']:.6f}")
    print(f"Close MSE: {test_metrics['close_mse']:.6f}")
    print(f"Close R²: {test_metrics['close_r2']:.6f}")

    # Per-stock metrics
    print("\nPer-stock closing price metrics:")
    stock_symbols = ['AAPL', 'AMZN', 'GOOGL', 'META', 'NFLX']
    for stock_idx, (stock_name, metrics) in enumerate(test_metrics['stock_metrics'].items()):
        stock_symbol = stock_symbols[stock_idx % 5]
        print(f"{stock_symbol}: MSE={metrics['mse']:.6f}, R²={metrics['r2']:.6f}")

    return test_metrics


# Execute the main function when the script is run
if __name__ == "__main__":
    main()

Using device: cuda
Loading embeddings from /content/train_embeddings_9x312(3).pt
Loading stock data from /content/train.csv
Loading stock price data from /content/train.csv...
CSV structure sample:
   Unnamed: 0  Adj Close  Adj Close.1  Adj Close.2  Adj Close.3  Adj Close.4  \
0  2018-01-09  40.966282    62.634998    55.374752   186.988708   209.309998   

     Close    Close.1  Close.2     Close.3  ...     Open.1     Open.2  \
0  43.5825  62.634998  55.6395  187.869995  ...  62.845001  55.922001   

       Open.3      Open.4      Volume    Volume.1    Volume.2    Volume.3  \
0  188.699997  212.110001  86336000.0  73226000.0  26808000.0  12393100.0   

    Volume.4  is_business_day  
0  6125900.0             True  

[1 rows x 32 columns]
CSV columns: ['Unnamed: 0', 'Adj Close', 'Adj Close.1', 'Adj Close.2', 'Adj Close.3', 'Adj Close.4', 'Close', 'Close.1', 'Close.2', 'Close.3', 'Close.4', 'High', 'High.1', 'High.2', 'High.3', 'High.4', 'Low', 'Low.1', 'Low.2', 'Low.3', 'Low.4', 'Open',

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from transformers import AutoModel, AutoConfig
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import math
import re
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import numpy as np

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

class NormalizationLayer(nn.Module):
    """
    Custom normalization layer for financial data
    """
    def __init__(self, method='standard'):
        super().__init__()
        self.method = method
        if method == 'standard':
            self.normalizer = StandardScaler()
        elif method == 'minmax':
            self.normalizer = MinMaxScaler(feature_range=(-1, 1))
        self.is_fitted = False

    def forward(self, x):
        # x shape: [batch_size, seq_len, features]
        batch_size, seq_len, features = x.shape

        # Reshape for sklearn normalizer
        x_reshaped = x.reshape(-1, features)

        if not self.is_fitted:
            x_normalized = torch.tensor(
                self.normalizer.fit_transform(x_reshaped.detach().cpu().numpy()),
                dtype=x.dtype, device=x.device
            )
            self.is_fitted = True
        else:
            x_normalized = torch.tensor(
                self.normalizer.transform(x_reshaped.detach().cpu().numpy()),
                dtype=x.dtype, device=x.device
            )

        # Reshape back
        return x_normalized.reshape(batch_size, seq_len, features)


class LoRALinear(nn.Module):
    """
    Linear layer with Low-Rank Adaptation (LoRA)
    """
    def __init__(self, in_features, out_features, r=8, alpha=16):
        super().__init__()
        self.original = nn.Linear(in_features, out_features)
        self.lora_A = nn.Parameter(torch.zeros(in_features, r))
        self.lora_B = nn.Parameter(torch.zeros(r, out_features))
        self.scaling = alpha / r
        self.r = r
        # Initialize weights for LoRA
        nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
        nn.init.zeros_(self.lora_B)

    def forward(self, x):
        # Original path
        original_output = self.original(x)
        # LoRA path
        lora_output = (x @ self.lora_A) @ self.lora_B
        # Combine with scaling
        return original_output + (lora_output * self.scaling)


class LoRATransformerWrapper(nn.Module):
    """
    Wrap a transformer model with LoRA adaptation in attention layers
    """
    def __init__(self, model, r=8, alpha=16):
        super().__init__()
        self.model = model
        self.r = r
        self.alpha = alpha
        self.apply_lora()

    def apply_lora(self):
        """
        Apply LoRA to the query and value projection layers in attention blocks
        """
        for name, module in self.model.named_modules():
            if isinstance(module, nn.Linear) and any(key in name for key in ['query', 'value']):
                in_features, out_features = module.in_features, module.out_features
                parent_name = '.'.join(name.split('.')[:-1])
                layer_name = name.split('.')[-1]

                # Create LoRA layer
                lora_layer = LoRALinear(in_features, out_features, r=self.r, alpha=self.alpha)
                # Copy weights from original layer
                lora_layer.original.weight.data = module.weight.data.clone()
                if module.bias is not None:
                    lora_layer.original.bias.data = module.bias.data.clone()

                # Set the LoRA layer in the parent module
                parent = self.model
                for part in parent_name.split('.'):
                    if part:
                        parent = getattr(parent, part)
                setattr(parent, layer_name, lora_layer)

    def forward(self, *args, **kwargs):
        return self.model(*args, **kwargs)


class ImprovedTinyBERTStockPredictor(nn.Module):
    """
    Improved stock prediction model with normalization and residual connections
    """
    def __init__(self, input_dim=312, hidden_dim=312, output_dim=30, lora_r=8, lora_alpha=16):
        super().__init__()

        # Add normalization layer
        self.norm_layer = NormalizationLayer(method='minmax')

        # Load TinyBERT model
        self.bert_config = AutoConfig.from_pretrained("huawei-noah/TinyBERT_General_4L_312D")
        self.bert = AutoModel.from_pretrained("huawei-noah/TinyBERT_General_4L_312D")

        # Apply LoRA to TinyBERT
        self.bert = LoRATransformerWrapper(self.bert, r=lora_r, alpha=lora_alpha)

        # Output projection layers with residual connections and batch normalization
        bert_output_dim = self.bert_config.hidden_size  # 312

        # First block
        self.fc1 = nn.Linear(bert_output_dim, 256)
        self.bn1 = nn.BatchNorm1d(256)

        # Second block
        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)

        # Output layer
        self.fc_out = nn.Linear(128, output_dim)

        # Dropout
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        """Forward pass with normalization and residual connections"""
        batch_size, seq_len, embed_dim = x.shape  # batch, 9, 312

        # Normalize inputs
        x = self.norm_layer(x)

        # Create attention mask (all 1s as we want to attend to all tokens)
        attention_mask = torch.ones(batch_size, seq_len, device=x.device)

        # Pass through TinyBERT
        bert_output = self.bert(inputs_embeds=x, attention_mask=attention_mask)
        pooled_output = bert_output.pooler_output  # [batch, 312]

        # First block with residual connection
        x1 = self.fc1(pooled_output)
        x1 = self.bn1(x1)
        x1 = F.relu(x1)
        x1 = self.dropout(x1)

        # Second block with residual connection
        x2 = self.fc2(x1)
        x2 = self.bn2(x2)
        x2 = F.relu(x2)
        x2 = self.dropout(x2)

        # Output projection
        output = self.fc_out(x2)

        return output


class StockDataset:
    """
    Dataset for loading pre-processed embeddings and matching them with target values
    """
    def __init__(self, embeddings_path, stock_csv_path, metrics_count=6, stocks_count=5):
        """
        Initialize with paths to embeddings and stock data

        Args:
            embeddings_path (str): Path to embeddings tensor file (.pt)
            stock_csv_path (str): Path to CSV with target stock values for 10th day
            metrics_count (int): Number of metrics per stock (OHLCV + Adj Close = 6)
            stocks_count (int): Number of stocks
        """
        self.embeddings_path = embeddings_path
        self.stock_csv_path = stock_csv_path
        self.metrics_count = metrics_count
        self.stocks_count = stocks_count

        # Stock order mapping
        self.stock_order = ['AAPL', 'AMZN', 'GOOGL', 'META', 'NFLX']

        # Load the data
        self.load_data()

    def load_data(self):
        """
        Load embeddings and stock data, then create target tensors
        """
        print(f"Loading embeddings from {self.embeddings_path}")
        self.embeddings = torch.load(self.embeddings_path)

        print(f"Loading stock data from {self.stock_csv_path}")
        self.stock_df = self.load_stock_csv()

        # Create target tensors from stock CSV
        self.create_targets()

    def load_stock_csv(self):
        """Load and preprocess stock CSV file with target values"""
        print(f"Loading stock price data from {self.stock_csv_path}...")

        # Read the CSV file
        df = pd.read_csv(self.stock_csv_path)

        print("CSV structure sample:")
        print(df.head(1))
        print(f"CSV columns: {df.columns.tolist()}")

        return df

    def create_targets(self):
        """
        Create target tensors based on the specific CSV format with numbered columns
        Format: [Metric], [Metric].1, [Metric].2, etc. for each company
        """
        print("Creating target tensors...")

        # Base metrics in your CSV
        base_metrics = ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']

        # List to store all targets
        all_targets = []

        # Process each row in the dataframe
        for idx, row in self.stock_df.iterrows():
            # Extract target values for this row
            row_targets = []

            # For each stock (in order)
            for stock_idx in range(self.stocks_count):
                # For each metric
                for metric in base_metrics:
                    # For the first stock, column name is just the metric
                    # For subsequent stocks, column name is metric.1, metric.2, etc.
                    if stock_idx == 0:
                        col_name = metric
                    else:
                        col_name = f"{metric}.{stock_idx}"

                    # Get the value, with error handling
                    try:
                        value = float(row[col_name])
                        row_targets.append(value)
                    except (KeyError, ValueError) as e:
                        print(f"Error extracting {col_name} (for {self.stock_order[stock_idx]}): {e}")
                        # Use 0.0 as a fallback value
                        row_targets.append(0.0)

            all_targets.append(row_targets)

        if all_targets:
            self.targets = torch.tensor(all_targets, dtype=torch.float32)
            print(f"Created {len(all_targets)} targets with shape {self.targets.shape}")

            # Make sure we have the same number of targets as embeddings
            if len(all_targets) != self.embeddings.shape[0]:
                print(f"Warning: Number of targets ({len(all_targets)}) doesn't match number of embeddings ({self.embeddings.shape[0]})")

                # Take the minimum number to ensure alignment
                min_length = min(len(all_targets), self.embeddings.shape[0])
                self.targets = self.targets[:min_length]
                self.embeddings = self.embeddings[:min_length]
                print(f"Truncated to {min_length} matching examples")
        else:
            self.targets = None
            print("Warning: No targets created")

    def create_dataloader(self, batch_size=16, shuffle=True):
        """
        Create a DataLoader for the dataset

        Args:
            batch_size (int): Batch size
            shuffle (bool): Whether to shuffle samples

        Returns:
            torch.utils.data.DataLoader: DataLoader for the dataset
        """
        if self.targets is not None and self.embeddings is not None:
            dataset = TensorDataset(self.embeddings, self.targets)

            return DataLoader(
                dataset,
                batch_size=batch_size,
                shuffle=shuffle,
                num_workers=2,
                pin_memory=True
            )
        else:
            return None

class ImprovedStockPredictionTrainer:
    """
    Improved trainer for the stock prediction model
    """
    def __init__(self, model, optimizer, metrics_count=6, stocks_count=5, device='cuda'):
        """
        Initialize the trainer with improved metrics
        """
        self.model = model.to(device)
        self.optimizer = optimizer
        self.metrics_count = metrics_count
        self.stocks_count = stocks_count
        self.device = device
        self.scheduler = None

        # Use Huber loss for robustness against outliers
        self.criterion = nn.HuberLoss(delta=1.0)

        # Calculate the indices of closing prices in output
        self.close_indices = []
        for i in range(stocks_count):
            # Close price is index 3 in the metrics list
            close_idx = i * metrics_count + 3
            self.close_indices.append(close_idx)

        # Initialize past targets and predictions for directional accuracy
        self.prev_close_targets = None
        self.prev_close_preds = None

    def set_scheduler(self, scheduler):
        """Set learning rate scheduler"""
        self.scheduler = scheduler

    def extract_close_prices(self, predictions, targets):
        """Extract only closing price predictions and targets"""
        close_pred = predictions[:, self.close_indices]
        close_targets = targets[:, self.close_indices]
        return close_pred, close_targets

    def calculate_directional_accuracy(self, current_preds, current_targets, prev_preds=None, prev_targets=None):
        """
        Calculate directional accuracy (up/down prediction accuracy)
        """
        # If no previous data provided, use stored values
        if prev_preds is None:
            prev_preds = self.prev_close_preds
        if prev_targets is None:
            prev_targets = self.prev_close_targets

        # If we still don't have previous values, we can't calculate direction
        if prev_preds is None or prev_targets is None:
            # Store current values for next time
            self.prev_close_preds = current_preds.detach().clone()
            self.prev_close_targets = current_targets.detach().clone()
            return None

        # Ensure we compare only the overlapping batch size
        min_batch_size = min(current_preds.shape[0], prev_preds.shape[0])

        # Convert to numpy and use only the overlapping batch size
        curr_preds_np = current_preds[:min_batch_size].detach().cpu().numpy()
        curr_targets_np = current_targets[:min_batch_size].detach().cpu().numpy()
        prev_preds_np = prev_preds[:min_batch_size].detach().cpu().numpy()
        prev_targets_np = prev_targets[:min_batch_size].detach().cpu().numpy()

        # Calculate directions (1 for up, 0 for down or same)
        pred_direction = (curr_preds_np > prev_preds_np).astype(int)
        target_direction = (curr_targets_np > prev_targets_np).astype(int)

        # Calculate accuracy
        correct = (pred_direction == target_direction)
        accuracy = np.mean(correct)

        # Store current values for next time (keep full batch)
        self.prev_close_preds = current_preds.detach().clone()
        self.prev_close_targets = current_targets.detach().clone()

        return accuracy

    def train_step(self, inputs, targets):
        """Single training step with gradient clipping"""
        # Move data to device
        inputs = inputs.to(self.device)
        targets = targets.to(self.device)

        # Zero gradients
        self.optimizer.zero_grad()

        # Forward pass
        outputs = self.model(inputs)

        # Extract closing prices for loss calculation
        close_pred, close_targets = self.extract_close_prices(outputs, targets)

        # Calculate loss on closing prices
        loss = self.criterion(close_pred, close_targets)

        # We also want to track the full loss for all metrics
        full_loss = self.criterion(outputs, targets)

        # Backward pass
        loss.backward()

        # Gradient clipping to prevent explosion
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)

        # Update parameters
        self.optimizer.step()

        # Calculate directional accuracy
        dir_accuracy = self.calculate_directional_accuracy(close_pred, close_targets)

        return loss.item(), full_loss.item(), dir_accuracy

    def validate(self, val_dataloader):
        """Validate model on validation data with direction accuracy"""
        self.model.eval()
        close_val_loss = 0
        full_val_loss = 0
        all_close_preds = []
        all_close_targets = []
        directional_accuracies = []

        # Reset previous values for validation
        prev_close_preds = None
        prev_close_targets = None

        with torch.no_grad():
            for batch in val_dataloader:
                inputs = batch[0].to(self.device)
                targets = batch[1].to(self.device)

                outputs = self.model(inputs)

                # Extract closing prices
                close_pred, close_targets = self.extract_close_prices(outputs, targets)

                # Calculate losses
                close_loss = self.criterion(close_pred, close_targets)
                full_loss = self.criterion(outputs, targets)

                close_val_loss += close_loss.item()
                full_val_loss += full_loss.item()

                # Calculate directional accuracy if we have previous values
                if prev_close_preds is not None and prev_close_targets is not None:
                    dir_acc = self.calculate_directional_accuracy(
                        close_pred, close_targets, prev_close_preds, prev_close_targets
                    )
                    if dir_acc is not None:
                        directional_accuracies.append(dir_acc)

                # Store for next iteration
                prev_close_preds = close_pred.detach().clone()
                prev_close_targets = close_targets.detach().clone()

                all_close_preds.append(close_pred.cpu())
                all_close_targets.append(close_targets.cpu())

        # Stack predictions and targets
        all_close_preds = torch.cat(all_close_preds, dim=0)
        all_close_targets = torch.cat(all_close_targets, dim=0)

        # Calculate metrics
        close_preds_np = all_close_preds.numpy()
        close_targets_np = all_close_targets.numpy()

        # Overall metrics for closing prices (scale down to avoid overflow)
        scale_factor = 1.0
        if np.max(np.abs(close_targets_np)) > 1000:
            scale_factor = np.max(np.abs(close_targets_np))

        scaled_preds = close_preds_np / scale_factor
        scaled_targets = close_targets_np / scale_factor

        close_mse = mean_squared_error(scaled_targets, scaled_preds)

        # Handle cases where R2 might overflow or be unstable
        try:
            close_r2 = r2_score(scaled_targets, scaled_preds)
        except:
            close_r2 = float('nan')

        # Average directional accuracy
        dir_accuracy = np.mean(directional_accuracies) if directional_accuracies else float('nan')

        # Per-stock metrics for closing prices
        stock_metrics = {}
        for s in range(self.stocks_count):
            stock_pred = scaled_preds[:, s]
            stock_target = scaled_targets[:, s]

            stock_mse = mean_squared_error(stock_target, stock_pred)

            try:
                stock_r2 = r2_score(stock_target, stock_pred)
            except:
                stock_r2 = float('nan')

            # Calculate directional accuracy per stock
            stock_dir_acc = self.calculate_stock_directional_accuracy(
                all_close_preds[:, s].numpy(),
                all_close_targets[:, s].numpy()
            )

            stock_metrics[f'stock_{s}'] = {
                'mse': stock_mse,
                'r2': stock_r2,
                'dir_accuracy': stock_dir_acc
            }

        self.model.train()

        return {
            'close_val_loss': close_val_loss / len(val_dataloader),
            'full_val_loss': full_val_loss / len(val_dataloader),
            'close_mse': close_mse,
            'close_r2': close_r2,
            'directional_accuracy': dir_accuracy,
            'stock_metrics': stock_metrics
        }

    def calculate_stock_directional_accuracy(self, predictions, targets):
        """
        Calculate directional accuracy for a single stock's time series
        """
        if len(predictions) <= 1 or len(targets) <= 1:
            return float('nan')

        # Calculate day-to-day changes
        pred_changes = np.diff(predictions)
        target_changes = np.diff(targets)

        # Convert to directional signals (1 for up, 0 for down/same)
        pred_direction = (pred_changes > 0).astype(int)
        target_direction = (target_changes > 0).astype(int)

        # Calculate accuracy
        correct = (pred_direction == target_direction)
        accuracy = np.mean(correct)

        return accuracy

    # Other methods remain the same
    # (predict, save_model, train)
    def predict(self, dataloader):
        """Generate predictions"""
        self.model.eval()
        all_preds = []

        with torch.no_grad():
            for batch in dataloader:
                inputs = batch[0].to(self.device)
                outputs = self.model(inputs)
                all_preds.append(outputs.cpu())

        return torch.cat(all_preds, dim=0)

    def save_model(self, path):
        """Save model weights"""
        torch.save(self.model.state_dict(), path)
        print(f"Model saved to {path}")

    def train(self, train_dataloader, val_dataloader, epochs, save_path=None, early_stopping_patience=10):
        """
        Train the model with improved tracking and early stopping
        """
        history = {
            'train_close_loss': [],
            'train_full_loss': [],
            'val_close_loss': [],
            'val_full_loss': [],
            'close_mse': [],
            'close_r2': [],
            'directional_accuracy': []
        }

        best_val_loss = float('inf')
        no_improvement_count = 0

        for epoch in range(epochs):
            # Training
            self.model.train()
            train_close_loss = 0
            train_full_loss = 0
            train_dir_accuracies = []

            for batch in train_dataloader:
                inputs, targets = batch
                close_loss, full_loss, dir_acc = self.train_step(inputs, targets)
                train_close_loss += close_loss
                train_full_loss += full_loss
                if dir_acc is not None:
                    train_dir_accuracies.append(dir_acc)

            avg_train_close_loss = train_close_loss / len(train_dataloader)
            avg_train_full_loss = train_full_loss / len(train_dataloader)
            avg_train_dir_acc = np.mean(train_dir_accuracies) if train_dir_accuracies else float('nan')

            # Validation
            val_metrics = self.validate(val_dataloader)

            # Update history
            history['train_close_loss'].append(avg_train_close_loss)
            history['train_full_loss'].append(avg_train_full_loss)
            history['val_close_loss'].append(val_metrics['close_val_loss'])
            history['val_full_loss'].append(val_metrics['full_val_loss'])
            history['close_mse'].append(val_metrics['close_mse'])
            history['close_r2'].append(val_metrics['close_r2'])
            history['directional_accuracy'].append(val_metrics.get('directional_accuracy', float('nan')))

            # Step scheduler if needed
            if self.scheduler is not None:
                if isinstance(self.scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
                    self.scheduler.step(val_metrics['close_val_loss'])
                else:
                    self.scheduler.step()

            # Print progress
            print(f"Epoch {epoch+1}/{epochs} - "
                  f"Train Close Loss: {avg_train_close_loss:.6f}, "
                  f"Val Close Loss: {val_metrics['close_val_loss']:.6f}, "
                  f"Close MSE: {val_metrics['close_mse']:.6f}, "
                  f"Close R²: {val_metrics['close_r2']:.6f}, "
                  f"Dir Acc: {val_metrics.get('directional_accuracy', float('nan')):.4f}")

            # Check for early stopping
            if val_metrics['close_val_loss'] < best_val_loss:
                best_val_loss = val_metrics['close_val_loss']
                no_improvement_count = 0

                # Save the best model
                if save_path:
                    self.save_model(save_path)
            else:
                no_improvement_count += 1
                if no_improvement_count >= early_stopping_patience:
                    print(f"Early stopping at epoch {epoch+1}")
                    break

        return history

def plot_improved_metrics(history, save_dir=None):
    """
    Plot training metrics with directional accuracy
    """
    if save_dir:
        os.makedirs(save_dir, exist_ok=True)

    # Plot closing price loss
    plt.figure(figsize=(10, 6))
    plt.plot(history['train_close_loss'], label='Train Close Loss')
    plt.plot(history['val_close_loss'], label='Val Close Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss (Closing Price)')
    plt.legend()
    plt.grid(True)
    if save_dir:
        plt.savefig(os.path.join(save_dir, 'close_loss_plot.png'))
    plt.close()

    # Plot directional accuracy
    plt.figure(figsize=(10, 6))
    plt.plot(history['directional_accuracy'], label='Directional Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Up/Down Movement Prediction Accuracy')
    plt.legend()
    plt.grid(True)
    if save_dir:
        plt.savefig(os.path.join(save_dir, 'directional_accuracy_plot.png'))
    plt.close()

    # Other plots remain the same as before
    # Plot full loss
    plt.figure(figsize=(10, 6))
    plt.plot(history['train_full_loss'], label='Train Full Loss')
    plt.plot(history['val_full_loss'], label='Val Full Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss (All Metrics)')
    plt.legend()
    plt.grid(True)
    if save_dir:
        plt.savefig(os.path.join(save_dir, 'full_loss_plot.png'))
    plt.close()

    # Plot MSE for closing price
    plt.figure(figsize=(10, 6))
    plt.plot(history['close_mse'], label='Validation Close MSE')
    plt.xlabel('Epoch')
    plt.ylabel('MSE')
    plt.title('Validation Mean Squared Error (Closing Price)')
    plt.legend()
    plt.grid(True)
    if save_dir:
        plt.savefig(os.path.join(save_dir, 'close_mse_plot.png'))
    plt.close()

    # Plot R² for closing price
    plt.figure(figsize=(10, 6))
    plt.plot(history['close_r2'], label='Validation Close R²')
    plt.xlabel('Epoch')
    plt.ylabel('R²')
    plt.title('Validation R² Score (Closing Price)')
    plt.legend()
    plt.grid(True)
    if save_dir:
        plt.savefig(os.path.join(save_dir, 'close_r2_plot.png'))
    plt.close()


def train_and_evaluate_model(
    embeddings_dir,
    stock_csv_path,
    output_dir="output_tinybert_lora_stock_prediction",
    metrics_count=6,  # Updated to 6 features
    stocks_count=5,
    epochs=50,
    batch_size=16,
    lr=3e-5,
    lora_r=8,
    lora_alpha=16
):
    """
    Train a TinyBERT with LoRA model for stock prediction

    Args:
        embeddings_dir (str): Directory containing embeddings and dates files
        stock_csv_path (str): Path to stock prices CSV file
        output_dir (str): Directory to save outputs
        metrics_count (int): Number of metrics per stock (now 6)
        stocks_count (int): Number of stocks
        epochs (int): Number of training epochs
        batch_size (int): Batch size
        lr (float): Learning rate
        lora_r (int): LoRA rank
        lora_alpha (int): LoRA alpha
    """
    # Setup device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Create output directory
    os.makedirs(output_dir, exist_ok=True)

    # Paths to embeddings and dates files
    train_embeddings_path = os.path.join(embeddings_dir, "train_embeddings_9x312.pt")
    train_dates_path = os.path.join(embeddings_dir, "train_dates.csv")

    val_embeddings_path = os.path.join(embeddings_dir, "val_embeddings_9x312.pt")
    val_dates_path = os.path.join(embeddings_dir, "val_dates.csv")

    test_embeddings_path = os.path.join(embeddings_dir, "test_embeddings_9x312.pt")
    test_dates_path = os.path.join(embeddings_dir, "test_dates.csv")

    # Create datasets
    train_dataset = StockDataset(
        train_embeddings_path,
        train_dates_path,
        stock_csv_path,
        metrics_count,
        stocks_count
    )

    val_dataset = StockDataset(
        val_embeddings_path,
        val_dates_path,
        stock_csv_path,
        metrics_count,
        stocks_count
    )

    test_dataset = StockDataset(
        test_embeddings_path,
        test_dates_path,
        stock_csv_path,
        metrics_count,
        stocks_count
    )

    # Create dataloaders
    train_dataloader = train_dataset.create_dataloader(batch_size=batch_size, shuffle=True)
    val_dataloader = val_dataset.create_dataloader(batch_size=batch_size, shuffle=False)
    test_dataloader = test_dataset.create_dataloader(batch_size=batch_size, shuffle=False)

    if not all([train_dataloader, val_dataloader, test_dataloader]):
        print("Error: Failed to create one or more dataloaders. Check data matching.")
        return None

    # Output dimension is metrics_count * stocks_count
    output_dim = metrics_count * stocks_count

    # Create model
    model = TinyBERTStockPredictor(
        input_dim=312,  # From the embeddings
        hidden_dim=312,  # TinyBERT hidden size
        output_dim=output_dim,
        lora_r=lora_r,
        lora_alpha=lora_alpha
    )

    print(f"Model created with {sum(p.numel() for p in model.parameters())} parameters")
    print(f"Input dimension: 312, Output dimension: {output_dim}")

    # Create optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=0.01)

    # Create scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=5, verbose=True
    )

    # Create trainer
    trainer = StockPredictionTrainer(
        model,
        optimizer,
        metrics_count=metrics_count,
        stocks_count=stocks_count,
        device=device
    )
    trainer.set_scheduler(scheduler)

    # Train model
    print(f"\nTraining for {epochs} epochs...")
    history = trainer.train(
        train_dataloader,
        val_dataloader,
        epochs=epochs,
        save_path=os.path.join(output_dir, 'best_model.pt'),
        early_stopping_patience=10
    )

    # Plot training metrics
    plot_metrics(history, save_dir=output_dir)

    # Generate test predictions
    print("\nGenerating test predictions...")
    test_predictions = trainer.predict(test_dataloader)

    # Save test predictions
    torch.save(test_predictions, os.path.join(output_dir, 'test_predictions.pt'))
    print(f"Test predictions saved to {os.path.join(output_dir, 'test_predictions.pt')}")

    # Extract closing prices from test predictions
    close_indices = [i * metrics_count + 3 for i in range(stocks_count)]
    close_predictions = test_predictions[:, close_indices]

    # Save close predictions separately
    torch.save(close_predictions, os.path.join(output_dir, 'test_close_predictions.pt'))
    print(f"Close price predictions saved to {os.path.join(output_dir, 'test_close_predictions.pt')}")

    # Final test evaluation
    test_metrics = trainer.validate(test_dataloader)
    print("\nFinal test metrics:")
    print(f"Close Loss: {test_metrics['close_val_loss']:.6f}")
    print(f"Close MSE: {test_metrics['close_mse']:.6f}")
    print(f"Close R²: {test_metrics['close_r2']:.6f}")

    # Per-stock metrics
    print("\nPer-stock closing price metrics:")
    for stock_idx, (stock_name, metrics) in enumerate(test_metrics['stock_metrics'].items()):
        stock_symbol = ['AAPL', 'AMZN', 'GOOGL', 'META', 'NFLX'][stock_idx % 5]
        print(f"{stock_symbol}: MSE={metrics['mse']:.6f}, R²={metrics['r2']:.6f}")

    # Save metrics as CSV
    stock_metrics_df = pd.DataFrame({
        'Stock': ['AAPL', 'AMZN', 'GOOGL', 'META', 'NFLX'],
        'MSE': [test_metrics['stock_metrics'][f'stock_{i}']['mse'] for i in range(stocks_count)],
        'R2': [test_metrics['stock_metrics'][f'stock_{i}']['r2'] for i in range(stocks_count)]
    })
    stock_metrics_df.to_csv(os.path.join(output_dir, 'stock_metrics.csv'), index=False)

    # Save overall metrics
    overall_metrics = {
        'close_mse': test_metrics['close_mse'],
        'close_r2': test_metrics['close_r2'],
        'close_loss': test_metrics['close_val_loss'],
        'full_loss': test_metrics['full_val_loss']
    }

    pd.DataFrame([overall_metrics]).to_csv(os.path.join(output_dir, 'overall_metrics.csv'), index=False)

    return test_metrics

def remap_targets_to_model_format(targets, output_dim=30):
    """
    Remaps the targets from the CSV format to the model's expected format.

    Original format is grouped by metric then company:
    [AAPL_AdjClose, AMZN_AdjClose, GOOGL_AdjClose, META_AdjClose, NFLX_AdjClose,
     AAPL_Close, AMZN_Close, GOOGL_Close, META_Close, NFLX_Close, ...etc]

    Model expects format grouped by company then metric:
    [AAPL_Open, AAPL_High, AAPL_Low, AAPL_Close, AAPL_AdjClose, AAPL_Volume,
     AMZN_Open, AMZN_High, ...etc]

    Args:
        targets (torch.Tensor): Tensor of shape [batch_size, 30] in CSV order
        output_dim (int): Expected output dimension (default 30)

    Returns:
        torch.Tensor: Reordered tensor in model's expected format
    """
    batch_size = targets.shape[0]

    # Create a new tensor for reordered targets
    reordered = torch.zeros((batch_size, output_dim), dtype=targets.dtype)

    # CSV order of metrics
    csv_metrics = ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']

    # Model expected order of metrics
    model_metrics = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']

    # Number of stocks and metrics
    stocks_count = 5
    metrics_count = 6

    # Do the reordering
    for stock_idx in range(stocks_count):
        for model_metric_idx, model_metric in enumerate(model_metrics):
            # Find the corresponding index in the CSV format
            csv_metric_idx = csv_metrics.index(model_metric)

            # Calculate source and target indices
            src_idx = csv_metric_idx * stocks_count + stock_idx
            tgt_idx = stock_idx * metrics_count + model_metric_idx

            # Copy the values
            reordered[:, tgt_idx] = targets[:, src_idx]

    return reordered


def improved_main():
    """
    Main function with improved model and evaluation
    """
    # Define paths to required data (same as before)
    embeddings_dir = "/content"  # Base directory

    # Embeddings files
    train_embeddings_path = os.path.join(embeddings_dir, "train_embeddings_9x312(3).pt")
    val_embeddings_path = os.path.join(embeddings_dir, "val_embeddings_9x312(2).pt")
    test_embeddings_path = os.path.join(embeddings_dir, "test_embeddings_9x312(1).pt")

    # Target CSV files
    train_stocks_path = os.path.join(embeddings_dir, "train.csv")
    val_stocks_path = os.path.join(embeddings_dir, "val.csv")
    test_stocks_path = os.path.join(embeddings_dir, "test.csv")

    # Output directory
    output_dir = "improved_tinybert_lora_stock_prediction"
    os.makedirs(output_dir, exist_ok=True)

    # Ensure the directories exist
    os.makedirs(embeddings_dir, exist_ok=True)

    # Check if data files exist (same checks as before)
    required_files = [
        train_embeddings_path,
        val_embeddings_path,
        test_embeddings_path,
        train_stocks_path,
        val_stocks_path,
        test_stocks_path
    ]

    missing_files = [f for f in required_files if not os.path.exists(f)]
    if missing_files:
        print("Error: The following required files are missing:")
        for file in missing_files:
            print(f" - {file}")
        print("\nPlease ensure all required data files are available before running.")
        return

    # Create datasets using the specific CSV format handler (same as before)
    train_dataset = StockDataset(
        train_embeddings_path,
        train_stocks_path,
        metrics_count=6,
        stocks_count=5
    )

    val_dataset = StockDataset(
        val_embeddings_path,
        val_stocks_path,
        metrics_count=6,
        stocks_count=5
    )

    test_dataset = StockDataset(
        test_embeddings_path,
        test_stocks_path,
        metrics_count=6,
        stocks_count=5
    )

    # Create dataloaders with a custom collate function to reorder targets (same as before)
    def collate_fn(batch):
        inputs = torch.stack([item[0] for item in batch])
        targets = torch.stack([item[1] for item in batch])
        # Reorder targets to match model's expected format
        reordered_targets = remap_targets_to_model_format(targets)
        return inputs, reordered_targets

    train_dataloader = DataLoader(
        TensorDataset(train_dataset.embeddings, train_dataset.targets),
        batch_size=16,
        shuffle=True,
        collate_fn=collate_fn,
        num_workers=2,
        pin_memory=True
    )

    val_dataloader = DataLoader(
        TensorDataset(val_dataset.embeddings, val_dataset.targets),
        batch_size=16,
        shuffle=False,  # Important to keep sequential for directional accuracy
        collate_fn=collate_fn,
        num_workers=2,
        pin_memory=True
    )

    test_dataloader = DataLoader(
        TensorDataset(test_dataset.embeddings, test_dataset.targets),
        batch_size=16,
        shuffle=False,  # Important to keep sequential for directional accuracy
        collate_fn=collate_fn,
        num_workers=2,
        pin_memory=True
    )

    if not all([train_dataloader, val_dataloader, test_dataloader]):
        print("Error: Failed to create one or more dataloaders. Check data matching.")
        return None

    # Output dimension is metrics_count * stocks_count
    output_dim = 6 * 5  # 6 metrics for 5 stocks

    # Create improved model
    model = ImprovedTinyBERTStockPredictor(
        input_dim=312,  # From the embeddings
        hidden_dim=312,  # TinyBERT hidden size
        output_dim=output_dim,
        lora_r=8,
        lora_alpha=16
    )

    print(f"Model created with {sum(p.numel() for p in model.parameters())} parameters")
    print(f"Input dimension: 312, Output dimension: {output_dim}")

    # Create optimizer with weight decay
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5, weight_decay=0.05)

    # Create scheduler - use CosineAnnealingLR instead of ReduceLROnPlateau
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=20, eta_min=1e-6
    )

    # Create improved trainer
    trainer = ImprovedStockPredictionTrainer(
        model,
        optimizer,
        metrics_count=6,
        stocks_count=5,
        device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    )
    trainer.set_scheduler(scheduler)

    # Train model
    print(f"\nTraining for 50 epochs...")
    history = trainer.train(
        train_dataloader,
        val_dataloader,
        epochs=50,
        save_path=os.path.join(output_dir, 'best_model.pt'),
        early_stopping_patience=10
    )

    # Plot training metrics with directional accuracy
    plot_improved_metrics(history, save_dir=output_dir)

    # Generate test predictions
    print("\nGenerating test predictions...")
    test_predictions = trainer.predict(test_dataloader)

    # Save test predictions
    torch.save(test_predictions, os.path.join(output_dir, 'test_predictions.pt'))
    print(f"Test predictions saved to {os.path.join(output_dir, 'test_predictions.pt')}")

    # Extract closing prices from test predictions
    close_indices = [i * 6 + 3 for i in range(5)]  # Index 3 is Close in the model's order
    close_predictions = test_predictions[:, close_indices]

    # Save close predictions separately
    torch.save(close_predictions, os.path.join(output_dir, 'test_close_predictions.pt'))
    print(f"Close price predictions saved to {os.path.join(output_dir, 'test_close_predictions.pt')}")

    # Final test evaluation
    test_metrics = trainer.validate(test_dataloader)
    print("\nFinal test metrics:")
    print(f"Close Loss: {test_metrics['close_val_loss']:.6f}")
    print(f"Close MSE: {test_metrics['close_mse']:.6f}")
    print(f"Close R²: {test_metrics['close_r2']:.6f}")
    print(f"Directional Accuracy: {test_metrics.get('directional_accuracy', float('nan')):.4f}")

    # Per-stock metrics
    print("\nPer-stock closing price metrics:")
    stock_symbols = ['AAPL', 'AMZN', 'GOOGL', 'META', 'NFLX']
    for stock_idx, (stock_name, metrics) in enumerate(test_metrics['stock_metrics'].items()):
        stock_symbol = stock_symbols[stock_idx % 5]
        print(f"{stock_symbol}: MSE={metrics['mse']:.6f}, R²={metrics['r2']:.6f}, "
              f"Dir Acc={metrics.get('dir_accuracy', float('nan')):.4f}")

    # Save directional accuracy metrics separately
    dir_acc_metrics = {
        'overall': test_metrics.get('directional_accuracy', float('nan')),
        'stocks': {
            stock: metrics.get('dir_accuracy', float('nan'))
            for stock, (_, metrics) in zip(stock_symbols, test_metrics['stock_metrics'].items())
        }
    }

    pd.DataFrame([dir_acc_metrics['overall']], columns=['DirectionalAccuracy']).to_csv(
        os.path.join(output_dir, 'directional_accuracy.csv'), index=False
    )

    pd.DataFrame({
        'Stock': stock_symbols,
        'DirectionalAccuracy': [dir_acc_metrics['stocks'].get(stock, float('nan')) for stock in stock_symbols]
    }).to_csv(os.path.join(output_dir, 'stock_directional_accuracy.csv'), index=False)

    return test_metrics


# Replace the main function call with improved version
if __name__ == "__main__":
    improved_main()

Using device: cuda
Loading embeddings from /content/train_embeddings_9x312(3).pt
Loading stock data from /content/train.csv
Loading stock price data from /content/train.csv...
CSV structure sample:
   Unnamed: 0  Adj Close  Adj Close.1  Adj Close.2  Adj Close.3  Adj Close.4  \
0  2018-01-09  40.966282    62.634998    55.374752   186.988708   209.309998   

     Close    Close.1  Close.2     Close.3  ...     Open.1     Open.2  \
0  43.5825  62.634998  55.6395  187.869995  ...  62.845001  55.922001   

       Open.3      Open.4      Volume    Volume.1    Volume.2    Volume.3  \
0  188.699997  212.110001  86336000.0  73226000.0  26808000.0  12393100.0   

    Volume.4  is_business_day  
0  6125900.0             True  

[1 rows x 32 columns]
CSV columns: ['Unnamed: 0', 'Adj Close', 'Adj Close.1', 'Adj Close.2', 'Adj Close.3', 'Adj Close.4', 'Close', 'Close.1', 'Close.2', 'Close.3', 'Close.4', 'High', 'High.1', 'High.2', 'High.3', 'High.4', 'Low', 'Low.1', 'Low.2', 'Low.3', 'Low.4', 'Open',