In [None]:
import os
import pandas as pd
import torch
import time
import optuna
import logging
import traceback
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
from torch.optim import Adam, AdamW
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from google.colab import drive

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

class SequenceClassificationDataset(Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels

    def __len__(self):
        return len(self.inputs['input_ids'])

    def __getitem__(self, idx):
        input_ids = self.inputs['input_ids'][idx]
        attention_mask = self.inputs['attention_mask'][idx]
        label = self.labels[idx]
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': torch.tensor(label, dtype=torch.long)
        }

class FinBertFineTuning:
    def __init__(self, dataset_path, train_file, validation_file, feature_col, label_col, batch_size,
                 learning_rate, num_epochs, max_len, optimizer_name='Adam', weight_decay=0.0,
                 warmup_ratio=0.0, dropout_rate=0.1, device='cpu'):
        self.dataset_path = dataset_path
        self.train_file = train_file
        self.validation_file = validation_file
        self.feature_col = feature_col
        self.label_col = label_col
        # Using FinBERT model
        self.model_name = 'yiyanghkust/finbert-tone'
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.max_len = max_len
        self.optimizer_name = optimizer_name
        self.weight_decay = weight_decay
        self.warmup_ratio = warmup_ratio
        self.dropout_rate = dropout_rate

        # Check for CUDA availability and set device
        if device == 'cuda' and not torch.cuda.is_available():
            logger.warning("CUDA not available, defaulting to CPU")
            device = 'cpu'
        self.device = torch.device(device)
        logger.info(f"Using device: {self.device}")

        try:
            # Mount Google Drive
            drive.mount('/content/gdrive', force_remount=True)
            logger.info("Google Drive mounted successfully")
        except Exception as e:
            logger.error(f"Error mounting Google Drive: {str(e)}")
            raise

        try:
            # Load tokenizer - using AutoTokenizer for FinBERT
            logger.info(f"Loading tokenizer from {self.model_name}")
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)

            # Load datasets
            logger.info(f"Loading datasets from {self.dataset_path}")
            train_path = os.path.join(self.dataset_path, self.train_file)
            val_path = os.path.join(self.dataset_path, self.validation_file)

            if not os.path.exists(train_path):
                logger.error(f"Train file not found: {train_path}")
                raise FileNotFoundError(f"Train file not found: {train_path}")

            if not os.path.exists(val_path):
                logger.error(f"Validation file not found: {val_path}")
                raise FileNotFoundError(f"Validation file not found: {val_path}")

            self.train_df = pd.read_csv(train_path)
            self.validation_df = pd.read_csv(val_path)

            logger.info(f"Train set size: {len(self.train_df)}")
            logger.info(f"Validation set size: {len(self.validation_df)}")

            # Verify columns exist
            if self.feature_col not in self.train_df.columns:
                logger.error(f"Feature column '{self.feature_col}' not found in training data")
                raise ValueError(f"Feature column '{self.feature_col}' not found in training data")

            if self.label_col not in self.train_df.columns:
                logger.error(f"Label column '{self.label_col}' not found in training data")
                raise ValueError(f"Label column '{self.label_col}' not found in training data")

            # Calculate number of unique labels
            self.num_labels = len(self.train_df[self.label_col].unique())
            logger.info(f"Number of unique labels: {self.num_labels}")
            if self.num_labels < 2:
                logger.warning("Less than 2 unique labels found in training data!")

            # Tokenize datasets
            logger.info("Tokenizing datasets")
            self.tokenized_train = self.tokenize_dataset(self.train_df, self.feature_col, self.label_col)
            self.tokenized_validation = self.tokenize_dataset(self.validation_df, self.feature_col, self.label_col)

            # Model configuration - using AutoConfig for FinBERT
            logger.info("Loading model configuration")
            self.model_config = AutoConfig.from_pretrained(self.model_name, num_labels=self.num_labels,
                                                        hidden_dropout_prob=self.dropout_rate,
                                                        attention_probs_dropout_prob=self.dropout_rate)

            logger.info("Loading model for sequence classification")
            self.model = AutoModelForSequenceClassification.from_pretrained(
                self.model_name,
                config=self.model_config
            )
            self.model.to(self.device)

            # Optimizer - use only Adam and AdamW
            logger.info(f"Setting up optimizer: {self.optimizer_name}")
            if self.optimizer_name == 'Adam':
                self.optimizer = torch.optim.Adam(self.model.parameters(),
                                                lr=self.learning_rate,
                                                weight_decay=self.weight_decay)
            elif self.optimizer_name == 'AdamW':
                self.optimizer = AdamW(self.model.parameters(),
                                    lr=self.learning_rate,
                                    weight_decay=self.weight_decay)
            else:
                logger.error(f"Unsupported optimizer: {self.optimizer_name}")
                raise ValueError(f"Unsupported optimizer: {self.optimizer_name}")

            # Learning rate scheduler
            if self.warmup_ratio > 0:
                from transformers import get_linear_schedule_with_warmup
                num_training_steps = len(self.train_df) // self.batch_size * self.num_epochs
                num_warmup_steps = int(num_training_steps * self.warmup_ratio)
                logger.info(f"Setting up learning rate scheduler with {num_warmup_steps} warmup steps")
                self.scheduler = get_linear_schedule_with_warmup(
                    self.optimizer,
                    num_warmup_steps=num_warmup_steps,
                    num_training_steps=num_training_steps
                )
            else:
                self.scheduler = None

            # DataLoaders
            logger.info("Creating DataLoaders")
            self.train_dataloader = self.create_dataloader(self.tokenized_train)
            self.validation_dataloader = self.create_dataloader(self.tokenized_validation, shuffle=False)

        except Exception as e:
            logger.error(f"Error during initialization: {str(e)}")
            logger.error(traceback.format_exc())
            raise

    def tokenize_dataset(self, df, feature_col, label_col):
        logger.info(f"Tokenizing {len(df)} examples with max_length={self.max_len}")
        # Some extra checks
        text_samples = list(df[feature_col])
        if len(text_samples) == 0:
            logger.error("No text samples found")
            raise ValueError("No text samples found")

        # Check for empty strings
        empty_samples = [i for i, text in enumerate(text_samples) if not isinstance(text, str) or len(text.strip()) == 0]
        if empty_samples:
            logger.warning(f"Found {len(empty_samples)} empty text samples. Replacing with placeholder text.")
            for i in empty_samples:
                text_samples[i] = "empty text"

        # Tokenize with error handling
        try:
            tokenized = self.tokenizer(
                text_samples,
                padding='max_length',  # Changed to use max_length padding strategy
                truncation=True,
                max_length=self.max_len,
                return_tensors='pt'
            )

            # Check that labels are numeric and within range
            labels = list(df[label_col])
            if not all(isinstance(label, (int, float)) or (isinstance(label, str) and label.isdigit()) for label in labels):
                logger.error("Labels must be numeric values")
                raise ValueError("Labels must be numeric values")

            # Convert string labels to int if necessary
            labels = [int(label) if isinstance(label, str) else int(label) for label in labels]

            # Ensure labels are in correct range (0 to num_labels-1)
            unique_labels = set(labels)
            expected_range = set(range(self.num_labels))
            if not unique_labels.issubset(expected_range):
                logger.error(f"Labels outside expected range. Found {unique_labels}, expected range {expected_range}")
                raise ValueError(f"Labels outside expected range. Found {unique_labels}, expected range {expected_range}")

            return tokenized, labels

        except Exception as e:
            logger.error(f"Error in tokenization: {str(e)}")
            logger.error(traceback.format_exc())
            raise

    def create_dataloader(self, tokenized_dataset, shuffle=True):
        try:
            dataset = SequenceClassificationDataset(tokenized_dataset[0], tokenized_dataset[1])
            dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=shuffle)
            logger.info(f"Created DataLoader with {len(dataset)} examples, batch size {self.batch_size}")
            return dataloader
        except Exception as e:
            logger.error(f"Error creating DataLoader: {str(e)}")
            logger.error(traceback.format_exc())
            raise

    def evaluate_model(self, dataloader):
        logger.info("Evaluating model")
        self.model.eval()
        all_labels = []
        all_predictions = []
        all_losses = []

        try:
            with torch.no_grad():
                for batch in dataloader:
                    try:
                        inputs = {key: value.to(self.device) for key, value in batch.items()}
                        labels = inputs["labels"]
                        outputs = self.model(**inputs)
                        loss = outputs.loss
                        logits = outputs.logits
                        all_losses.append(loss.item())

                        _, predicted = torch.max(logits, 1)
                        all_labels.extend(labels.cpu().numpy())
                        all_predictions.extend(predicted.cpu().numpy())
                    except Exception as e:
                        logger.error(f"Error processing batch during evaluation: {str(e)}")
                        logger.error(traceback.format_exc())
                        continue  # Skip this batch and try the next one

            accuracy = accuracy_score(all_labels, all_predictions)
            avg_loss = sum(all_losses) / len(all_losses) if all_losses else 0
            logger.info(f"Evaluation results: accuracy={accuracy:.4f}, loss={avg_loss:.4f}")
            return accuracy, avg_loss

        except Exception as e:
            logger.error(f"Error during model evaluation: {str(e)}")
            logger.error(traceback.format_exc())
            # Return very poor performance to signal an error
            return 0.0, float('inf')

    def train(self, trial=None):
        logger.info(f"Starting training for {self.num_epochs} epochs")
        best_val_accuracy = 0.0
        early_stopping_counter = 0
        early_stopping_patience = 2  # early stopping - stop if no improvement for 2 epochs

        try:
            for epoch in range(self.num_epochs):
                logger.info(f"Starting epoch {epoch+1}/{self.num_epochs}")
                self.model.train()
                train_losses = []

                # Training loop
                for batch_idx, batch in enumerate(tqdm(self.train_dataloader, desc=f'Epoch {epoch + 1}/{self.num_epochs}')):
                    try:
                        inputs = {key: value.to(self.device) for key, value in batch.items()}

                        # Clear previous gradients
                        self.optimizer.zero_grad()

                        # Forward pass
                        outputs = self.model(**inputs)
                        loss = outputs.loss
                        train_losses.append(loss.item())

                        # Backward pass and optimize
                        loss.backward()
                        self.optimizer.step()

                        if self.scheduler:
                            self.scheduler.step()

                        # Log periodically
                        if (batch_idx + 1) % 20 == 0:
                            logger.info(f"Epoch {epoch+1}, Batch {batch_idx+1}, Loss: {loss.item():.4f}")

                    except Exception as e:
                        logger.error(f"Error in training batch {batch_idx}: {str(e)}")
                        logger.error(traceback.format_exc())
                        continue  # Skip problematic batches

                # Calculate average training loss for this epoch
                avg_train_loss = sum(train_losses) / len(train_losses) if train_losses else float('inf')
                logger.info(f"Epoch {epoch+1} average training loss: {avg_train_loss:.4f}")

                # Validation
                validation_accuracy, validation_loss = self.evaluate_model(self.validation_dataloader)

                logger.info(f'Epoch {epoch + 1}/{self.num_epochs} - '
                      f'Training Loss: {avg_train_loss:.4f} - '
                      f'Validation Loss: {validation_loss:.4f} - '
                      f'Validation Accuracy: {validation_accuracy:.4f}')

                # Check if we've improved
                if validation_accuracy > best_val_accuracy:
                    best_val_accuracy = validation_accuracy
                    early_stopping_counter = 0
                    logger.info(f"New best validation accuracy: {best_val_accuracy:.4f}")
                else:
                    early_stopping_counter += 1
                    logger.info(f"No improvement in validation accuracy. Early stopping counter: {early_stopping_counter}/{early_stopping_patience}")

                # Report intermediate objective value for Optuna pruning
                if trial is not None:
                    trial.report(validation_accuracy, epoch)
                    # Handle pruning based on the intermediate value
                    if trial.should_prune():
                        logger.info(f"Trial {trial.number} pruned at epoch {epoch+1}")
                        raise optuna.exceptions.TrialPruned()

                # Use early stopping
                if early_stopping_counter >= early_stopping_patience:
                    logger.info(f"Early stopping triggered after epoch {epoch + 1}")
                    break

            logger.info(f"Training completed. Best validation accuracy: {best_val_accuracy:.4f}")
            return best_val_accuracy

        except optuna.exceptions.TrialPruned:
            logger.info("Trial pruned by Optuna")
            raise
        except Exception as e:
            logger.error(f"Error during training: {str(e)}")
            logger.error(traceback.format_exc())
            return 0.0  # Return poor performance to signal an error

    def save_model(self, directory):
        try:
            os.makedirs(directory, exist_ok=True)
            logger.info(f"Saving model to {directory}")
            self.model.save_pretrained(directory)
            self.tokenizer.save_pretrained(directory)

            # Save hyperparameters
            hyperparams = {
                'batch_size': self.batch_size,
                'learning_rate': self.learning_rate,
                'num_epochs': self.num_epochs,
                'max_len': self.max_len,
                'optimizer': self.optimizer_name,
                'weight_decay': self.weight_decay,
                'warmup_ratio': self.warmup_ratio,
                'dropout_rate': self.dropout_rate
            }

            with open(os.path.join(directory, 'hyperparameters.txt'), 'w') as f:
                for key, value in hyperparams.items():
                    f.write(f"{key}: {value}\n")

            logger.info(f"Model and hyperparameters saved successfully to {directory}")

        except Exception as e:
            logger.error(f"Error saving model: {str(e)}")
            logger.error(traceback.format_exc())
            raise



def visualize_study(study):
    """Visualize the results of an Optuna study."""
    # Only proceed if we have completed trials
    completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
    if not completed_trials:
        logger.warning("No completed trials to visualize")
        return

    try:
        logger.info("Generating visualization plots")

        # Plot optimization history
        fig1 = optuna.visualization.plot_optimization_history(study)
        fig1.show()
        logger.info("Generated optimization history plot")

        # Plot parameter importances
        fig2 = optuna.visualization.plot_param_importances(study)
        fig2.show()
        logger.info("Generated parameter importance plot")

        # Plot parallel coordinate plot
        fig3 = optuna.visualization.plot_parallel_coordinate(study)
        fig3.show()
        logger.info("Generated parallel coordinate plot")

        # Plot slice plot
        fig4 = optuna.visualization.plot_slice(study)
        fig4.show()
        logger.info("Generated slice plot")

        # Plot contour plot
        fig5 = optuna.visualization.plot_contour(study)
        fig5.show()
        logger.info("Generated contour plot")

    except Exception as e:
        logger.error(f"Visualization failed: {str(e)}")
        logger.error(traceback.format_exc())

def objective(trial):
    logger.info(f"Starting trial {trial.number}")

    # Define the hyperparameter search space with focused options
    batch_size = trial.suggest_categorical('batch_size', [4, 8, 16, 32, 64])
    learning_rate = trial.suggest_float('learning_rate', 5e-6, 1e-4, log=True)
    num_epochs = trial.suggest_int('num_epochs', 2, 10)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'AdamW'])
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-2, log=True)
    warmup_ratio = trial.suggest_float('warmup_ratio', 0.0, 0.2)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)

    # Log selected hyperparameters
    logger.info(f"Trial {trial.number} hyperparameters: batch_size={batch_size}, "
          f"learning_rate={learning_rate}, num_epochs={num_epochs}, "
          f"optimizer={optimizer_name}, weight_decay={weight_decay}, "
          f"warmup_ratio={warmup_ratio}, dropout_rate={dropout_rate}")

    # Fixed parameters
    max_len = 512
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # Paths and filenames
    absolute_path = "/content/gdrive/My Drive/Projects/Financial-Sentiment/"
    dataset_path = absolute_path + "Datasets/"
    train_file = 'train_set.csv'
    validation_file = 'validation_set.csv'
    feature_col = 'Sentence'
    label_col = 'SentimentNumerical'

    # Initialize and train the model
    try:
        classifier = FinBertFineTuning(
            dataset_path, train_file, validation_file, feature_col, label_col,
            batch_size, learning_rate, num_epochs, max_len,
            optimizer_name, weight_decay, warmup_ratio, dropout_rate, device
        )

        # Train and evaluate
        validation_accuracy = classifier.train(trial)
        logger.info(f"Trial {trial.number} completed with accuracy: {validation_accuracy:.4f}")

        # Save the trial results to the all_trials.txt file
        trial_results_path = os.path.join(absolute_path, 'OptimizationResults', 'all_trials.txt')
        os.makedirs(os.path.dirname(trial_results_path), exist_ok=True)

        with open(trial_results_path, 'a') as f:
            f.write(f"\n----- Trial {trial.number} -----\n")
            f.write(f"Validation Accuracy: {validation_accuracy:.4f}\n")
            f.write(f"Batch Size: {batch_size}\n")
            f.write(f"Learning Rate: {learning_rate}\n")
            f.write(f"Number of Epochs: {num_epochs}\n")
            f.write(f"Optimizer: {optimizer_name}\n")
            f.write(f"Weight Decay: {weight_decay}\n")
            f.write(f"Warmup Ratio: {warmup_ratio}\n")
            f.write(f"Dropout Rate: {dropout_rate}\n")
            f.write("--------------------------\n")

        # Only save the model if it's particularly good (e.g., top 25% of trials)
        if trial.should_prune():
            return validation_accuracy

        if len(trial.study.trials) > 3:
            completed_trials = [t for t in trial.study.trials if t.state == optuna.trial.TrialState.COMPLETE]
            if len(completed_trials) > 0:
                accuracies = [t.value for t in completed_trials if t.value is not None]
                if len(accuracies) > 0 and validation_accuracy >= sorted(accuracies)[-int(len(accuracies)/4)]:
                    trained_model = f"finbert_trial_{trial.number}_acc_{validation_accuracy:.4f}"
                    model_path = os.path.join(absolute_path, 'TrainedModels', trained_model)
                    classifier.save_model(model_path)
                    logger.info(f"Saved model for trial {trial.number} with accuracy {validation_accuracy:.4f}")

        return validation_accuracy

    except optuna.exceptions.TrialPruned:
        logger.info(f"Trial {trial.number} pruned")
        raise  # Re-raise the pruned exception for Optuna to handle
    except Exception as e:
        logger.error(f"Trial {trial.number} failed with error: {str(e)}")
        logger.error(traceback.format_exc())

        # Log failed trials too
        trial_results_path = os.path.join(absolute_path, 'OptimizationResults', 'all_trials.txt')
        os.makedirs(os.path.dirname(trial_results_path), exist_ok=True)

        with open(trial_results_path, 'a') as f:
            f.write(f"\n----- Trial {trial.number} [FAILED] -----\n")
            f.write(f"Error: {str(e)}\n")
            f.write(f"Batch Size: {batch_size}\n")
            f.write(f"Learning Rate: {learning_rate}\n")
            f.write(f"Number of Epochs: {num_epochs}\n")
            f.write(f"Optimizer: {optimizer_name}\n")
            f.write(f"Weight Decay: {weight_decay}\n")
            f.write(f"Warmup Ratio: {warmup_ratio}\n")
            f.write(f"Dropout Rate: {dropout_rate}\n")
            f.write("--------------------------\n")

        # Return a very low score to indicate failure
        return float('-inf')

def run_optimization(n_trials=100, timeout=None):
    start_time = time.time()
    logger.info(f"Starting optimization with {n_trials} trials, timeout={timeout}")

    # Set up file paths
    absolute_path = "/content/gdrive/My Drive/Projects/Financial-Sentiment/"
    os.makedirs(os.path.join(absolute_path, 'OptimizationResults'), exist_ok=True)
    all_trials_path = os.path.join(absolute_path, 'OptimizationResults', 'all_trials.txt')

    # Initialize the all trials file with a header
    with open(all_trials_path, 'w') as f:
        f.write("========== FINBERT HYPERPARAMETER OPTIMIZATION RESULTS ==========\n")
        f.write(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write(f"Number of trials: {n_trials}\n")
        f.write("=============================================================\n")

    # Check for required libraries - I have already installed them mannualy
    try:
        import optuna
    except ImportError:
        logger.info("Installing optuna...")
        !pip install optuna
        import optuna

    try:
        from transformers import get_linear_schedule_with_warmup
    except ImportError:
        logger.info("Installing transformers...")
        !pip install transformers
        from transformers import get_linear_schedule_with_warmup

    # Create an Optuna study that maximizes accuracy with better pruning
    study = optuna.create_study(
        direction='maximize',
        pruner=optuna.pruners.SuccessiveHalvingPruner(
            min_resource=1, reduction_factor=4, min_early_stopping_rate=0
        )
    )

    # Run the optimization with exception handling
    try:
        study.optimize(objective, n_trials=n_trials, timeout=timeout)
    except KeyboardInterrupt:
        logger.info("Optimization stopped by user.")
    except Exception as e:
        logger.error(f"Error during optimization: {str(e)}")
        logger.error(traceback.format_exc())

    # Log summary to the all trials file
    with open(all_trials_path, 'a') as f:
        f.write("\n========== OPTIMIZATION SUMMARY ==========\n")
        f.write(f"Completed: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write(f"Total optimization time: {(time.time() - start_time) / 60:.2f} minutes\n")
        f.write(f"Number of trials: {len(study.trials)}\n")
        f.write(f"Number of completed trials: {len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE])}\n")

    logger.info("Optimization finished!")
    logger.info(f"Number of trials: {len(study.trials)}")
    logger.info(f"Number of completed trials: {len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE])}")

    # Get best trial if any completed successfully
    completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
    if completed_trials:
        best_trial = study.best_trial
        logger.info(f"Best trial:")
        logger.info(f"  Value (validation accuracy): {best_trial.value:.4f}")
        logger.info(f"  Params:")
        for key, value in best_trial.params.items():
            logger.info(f"    {key}: {value}")

        # Save the best model results to a detailed file
        best_model_path = os.path.join(absolute_path, 'OptimizationResults', 'best_model_results.txt')
        with open(best_model_path, 'w') as f:
            f.write("========== FINBERT BEST MODEL RESULTS ==========\n")
            f.write(f"Date: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            f.write(f"Best Trial Number: {best_trial.number}\n")
            f.write(f"Best Validation Accuracy: {best_trial.value:.4f}\n\n")
            f.write("Hyperparameters:\n")
            f.write("--------------\n")
            for key, value in best_trial.params.items():
                f.write(f"{key}: {value}\n")
            f.write("\n")

            # Add some statistics and analysis just for check them later
            f.write("Performance Analysis:\n")
            f.write("--------------\n")
            f.write(f"Total trials completed: {len(completed_trials)}\n")
            f.write(f"Optimization time: {(time.time() - start_time) / 60:.2f} minutes\n")

            # Compare to worst trial
            worst_trial = min(completed_trials, key=lambda t: t.value)
            f.write(f"Worst trial accuracy: {worst_trial.value:.4f} (Trial {worst_trial.number})\n")
            f.write(f"Improvement over worst: {(best_trial.value - worst_trial.value):.4f} ({(best_trial.value / worst_trial.value - 1) * 100:.2f}%)\n")

            # Calculate average performance
            avg_accuracy = sum(t.value for t in completed_trials) / len(completed_trials)
            f.write(f"Average trial accuracy: {avg_accuracy:.4f}\n")
            f.write(f"Improvement over average: {(best_trial.value - avg_accuracy):.4f} ({(best_trial.value / avg_accuracy - 1) * 100:.2f}%)\n\n")

            # Add parameter importance if available
            try:
                importance = optuna.importance.get_param_importances(study)
                f.write("Parameter Importance:\n")
                f.write("--------------\n")
                for param, score in importance.items():
                    f.write(f"{param}: {score:.4f}\n")
            except Exception as e:
                f.write(f"Could not calculate parameter importance: {str(e)}\n")

            f.write("\n========== END OF REPORT ==========\n")

        logger.info(f"Best model results saved to {best_model_path}")
    else:
        logger.warning("No trials completed successfully.")

        # Still create a best_model_results.txt indicating no successful trials
        best_model_path = os.path.join(absolute_path, 'OptimizationResults', 'best_model_results.txt')
        with open(best_model_path, 'w') as f:
            f.write("========== FINBERT BEST MODEL RESULTS ==========\n")
            f.write(f"Date: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            f.write("No trials completed successfully.\n")
            f.write("Please check the logs for more information on the errors encountered.\n")
            f.write("\n========== END OF REPORT ==========\n")

    logger.info(f"Total optimization time: {(time.time() - start_time) / 60:.2f} minutes")

    # Return the study for further analysis if needed
    return study

if __name__ == "__main__":
    logger.info("========== STARTING FINBERT HYPERPARAMETER OPTIMIZATION ==========")

    # Run optimization with 100 trials
    study = run_optimization(n_trials=100, timeout=None)

    # Visualize the results
    visualize_study(study)

    # Only proceed if we have completed trials
    completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
    if not completed_trials:
        logger.error("No trials completed successfully. Cannot train final model.")
    else:
        # Train the final model with the best hyperparameters
        logger.info("\nTraining final model with best hyperparameters...")
        best_params = study.best_params

        # Paths and filenames
        absolute_path = "/content/gdrive/My Drive/Projects/Financial-Sentiment/"
        dataset_path = absolute_path + "Datasets/"
        train_file = 'train_set.csv'
        validation_file = 'validation_set.csv'
        feature_col = 'Sentence'
        label_col = 'SentimentNumerical'
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

        try:
            final_classifier = FinBertFineTuning(
                dataset_path, train_file, validation_file, feature_col, label_col,
                best_params['batch_size'], best_params['learning_rate'],
                best_params['num_epochs'], 512, best_params['optimizer'],
                best_params['weight_decay'], best_params['warmup_ratio'],
                best_params['dropout_rate'], device
            )

            final_accuracy = final_classifier.train()
            logger.info(f"Final model validation accuracy: {final_accuracy:.4f}")

            final_model_path = absolute_path + 'TrainedModels/final_finbert_optimized_model'
            final_classifier.save_model(final_model_path)
            logger.info(f"Final model saved to {final_model_path}")

            # Add final model results to the best model report
            best_model_path = os.path.join(absolute_path, 'OptimizationResults', 'best_model_results.txt')
            with open(best_model_path, 'a') as f:
                f.write("\n========== FINAL MODEL TRAINING RESULTS ==========\n")
                f.write(f"Final model trained with best hyperparameters\n")
                f.write(f"Final validation accuracy: {final_accuracy:.4f}\n")
                f.write(f"Model saved to: {final_model_path}\n")
                f.write("========== END OF FINAL MODEL REPORT ==========\n")

        except Exception as e:
            logger.error(f"Error training final model: {str(e)}")
            logger.error(traceback.format_exc())

            # Document the failure in the best model report
            best_model_path = os.path.join(absolute_path, 'OptimizationResults', 'best_model_results.txt')
            with open(best_model_path, 'a') as f:
                f.write("\n========== FINAL MODEL TRAINING RESULTS ==========\n")
                f.write(f"Error training final model: {str(e)}\n")
                f.write("========== END OF FINAL MODEL REPORT ==========\n")

    logger.info("========== FINBERT HYPERPARAMETER OPTIMIZATION COMPLETED ==========")

[I 2025-04-11 15:57:14,804] A new study created in memory with name: no-name-15008619-8e2f-4076-9db8-ac0bc0d57140


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 3/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 15:58:56,393] Trial 0 finished with value: 0.8042635658914729 and parameters: {'batch_size': 16, 'learning_rate': 9.883731167307301e-05, 'num_epochs': 3, 'optimizer': 'Adam', 'weight_decay': 1.812249897555332e-05, 'warmup_ratio': 0.19460520071344836, 'dropout_rate': 0.10872215577338783}. Best is trial 0 with value: 0.8042635658914729.


Mounted at /content/gdrive


Epoch 1/8: 100%|██████████| 387/387 [00:34<00:00, 11.31it/s]
Epoch 2/8: 100%|██████████| 387/387 [00:34<00:00, 11.36it/s]
[I 2025-04-11 16:00:15,546] Trial 1 pruned. 


Mounted at /content/gdrive


Epoch 1/7: 100%|██████████| 194/194 [00:31<00:00,  6.19it/s]
Epoch 2/7: 100%|██████████| 194/194 [00:31<00:00,  6.20it/s]
[I 2025-04-11 16:01:28,494] Trial 2 pruned. 


Mounted at /content/gdrive


Epoch 1/10: 100%|██████████| 387/387 [00:34<00:00, 11.35it/s]
Epoch 2/10: 100%|██████████| 387/387 [00:34<00:00, 11.37it/s]
[I 2025-04-11 16:02:47,686] Trial 3 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 194/194 [00:31<00:00,  6.19it/s]
Epoch 2/2: 100%|██████████| 194/194 [00:31<00:00,  6.20it/s]
[I 2025-04-11 16:04:00,461] Trial 4 pruned. 


Mounted at /content/gdrive


Epoch 1/10: 100%|██████████| 194/194 [00:31<00:00,  6.20it/s]
Epoch 2/10: 100%|██████████| 194/194 [00:31<00:00,  6.21it/s]
Epoch 3/10: 100%|██████████| 194/194 [00:31<00:00,  6.21it/s]
Epoch 4/10: 100%|██████████| 194/194 [00:31<00:00,  6.21it/s]
[I 2025-04-11 16:06:22,891] Trial 5 finished with value: 0.7887596899224806 and parameters: {'batch_size': 8, 'learning_rate': 8.126058564017888e-05, 'num_epochs': 10, 'optimizer': 'AdamW', 'weight_decay': 0.0014035299644954455, 'warmup_ratio': 0.12107007072035006, 'dropout_rate': 0.21492093313805946}. Best is trial 0 with value: 0.8042635658914729.


Mounted at /content/gdrive


Epoch 1/4: 100%|██████████| 25/25 [00:27<00:00,  1.09s/it]
Epoch 2/4: 100%|██████████| 25/25 [00:27<00:00,  1.08s/it]
[I 2025-04-11 16:07:27,767] Trial 6 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 387/387 [00:34<00:00, 11.31it/s]
Epoch 2/3: 100%|██████████| 387/387 [00:34<00:00, 11.30it/s]
Epoch 3/3: 100%|██████████| 387/387 [00:34<00:00, 11.32it/s]
[I 2025-04-11 16:09:27,113] Trial 7 finished with value: 0.7965116279069767 and parameters: {'batch_size': 4, 'learning_rate': 7.274448955032222e-05, 'num_epochs': 3, 'optimizer': 'Adam', 'weight_decay': 0.0024663180875589396, 'warmup_ratio': 0.01875441439976975, 'dropout_rate': 0.26509711245326706}. Best is trial 0 with value: 0.8042635658914729.


Mounted at /content/gdrive


Epoch 1/4: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/4: 100%|██████████| 49/49 [00:27<00:00,  1.77it/s]
Epoch 3/4: 100%|██████████| 49/49 [00:27<00:00,  1.77it/s]
Epoch 4/4: 100%|██████████| 49/49 [00:27<00:00,  1.77it/s]
[I 2025-04-11 16:11:35,879] Trial 8 finished with value: 0.7945736434108527 and parameters: {'batch_size': 32, 'learning_rate': 5.543515631677898e-05, 'num_epochs': 4, 'optimizer': 'AdamW', 'weight_decay': 8.801919762125617e-06, 'warmup_ratio': 0.026875790897854523, 'dropout_rate': 0.2647276687236906}. Best is trial 0 with value: 0.8042635658914729.


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 194/194 [00:31<00:00,  6.19it/s]
Epoch 2/2: 100%|██████████| 194/194 [00:31<00:00,  6.19it/s]
[I 2025-04-11 16:12:48,843] Trial 9 pruned. 


Mounted at /content/gdrive


Epoch 1/5: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/5: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:13:57,753] Trial 10 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:15:06,079] Trial 11 pruned. 


Mounted at /content/gdrive


Epoch 1/5: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/5: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:16:14,525] Trial 12 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 387/387 [00:34<00:00, 11.30it/s]
Epoch 2/3: 100%|██████████| 387/387 [00:34<00:00, 11.30it/s]
[I 2025-04-11 16:17:33,899] Trial 13 pruned. 


Mounted at /content/gdrive


Epoch 1/6: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/6: 100%|██████████| 49/49 [00:27<00:00,  1.77it/s]
[I 2025-04-11 16:18:39,815] Trial 14 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 25/25 [00:27<00:00,  1.09s/it]
Epoch 2/3: 100%|██████████| 25/25 [00:27<00:00,  1.08s/it]
[I 2025-04-11 16:19:43,632] Trial 15 pruned. 


Mounted at /content/gdrive


Epoch 1/5: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/5: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:20:52,025] Trial 16 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 387/387 [00:34<00:00, 11.26it/s]
Epoch 2/2: 100%|██████████| 387/387 [00:34<00:00, 11.27it/s]
[I 2025-04-11 16:22:11,760] Trial 17 pruned. 


Mounted at /content/gdrive


Epoch 1/8: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/8: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 3/8: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 4/8: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 5/8: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 6/8: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:25:31,899] Trial 18 finished with value: 0.8062015503875969 and parameters: {'batch_size': 16, 'learning_rate': 5.329770645523595e-05, 'num_epochs': 8, 'optimizer': 'Adam', 'weight_decay': 0.0024130584668293907, 'warmup_ratio': 0.09695020447413139, 'dropout_rate': 0.3074817112864218}. Best is trial 18 with value: 0.8062015503875969.


Mounted at /content/gdrive


Epoch 1/8: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/8: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:26:40,272] Trial 19 pruned. 


Mounted at /content/gdrive


Epoch 1/8: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/8: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:27:49,010] Trial 20 pruned. 


Mounted at /content/gdrive


Epoch 1/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 3/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 4/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:30:02,770] Trial 21 finished with value: 0.7984496124031008 and parameters: {'batch_size': 16, 'learning_rate': 6.662640084149188e-05, 'num_epochs': 6, 'optimizer': 'Adam', 'weight_decay': 0.00435395148901141, 'warmup_ratio': 0.046841301937265475, 'dropout_rate': 0.25250511326420955}. Best is trial 18 with value: 0.8062015503875969.


Mounted at /content/gdrive


Epoch 1/9: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/9: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 3/9: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 4/9: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:32:16,276] Trial 22 finished with value: 0.7945736434108527 and parameters: {'batch_size': 16, 'learning_rate': 5.9623215160293785e-05, 'num_epochs': 9, 'optimizer': 'Adam', 'weight_decay': 0.0029916783747446084, 'warmup_ratio': 0.051911210907973536, 'dropout_rate': 0.1756350370048073}. Best is trial 18 with value: 0.8062015503875969.


Mounted at /content/gdrive


Epoch 1/7: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/7: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:33:25,080] Trial 23 pruned. 


Mounted at /content/gdrive


Epoch 1/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:34:34,337] Trial 24 pruned. 


Mounted at /content/gdrive


Epoch 1/9: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/9: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:35:42,696] Trial 25 pruned. 


Mounted at /content/gdrive


Epoch 1/7: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/7: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 3/7: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 4/7: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
[I 2025-04-11 16:37:49,607] Trial 26 finished with value: 0.8023255813953488 and parameters: {'batch_size': 32, 'learning_rate': 9.950566439153908e-05, 'num_epochs': 7, 'optimizer': 'AdamW', 'weight_decay': 0.0017321552692389443, 'warmup_ratio': 0.08158016878212655, 'dropout_rate': 0.29968982647354325}. Best is trial 18 with value: 0.8062015503875969.


Mounted at /content/gdrive


Epoch 1/7: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/7: 100%|██████████| 49/49 [00:27<00:00,  1.77it/s]
[I 2025-04-11 16:38:55,911] Trial 27 pruned. 


Mounted at /content/gdrive


Epoch 1/9: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/9: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
[I 2025-04-11 16:40:01,221] Trial 28 pruned. 


Mounted at /content/gdrive


Epoch 1/7: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/7: 100%|██████████| 49/49 [00:27<00:00,  1.77it/s]
[I 2025-04-11 16:41:06,533] Trial 29 pruned. 


Mounted at /content/gdrive


Epoch 1/8: 100%|██████████| 25/25 [00:27<00:00,  1.09s/it]
Epoch 2/8: 100%|██████████| 25/25 [00:27<00:00,  1.08s/it]
[I 2025-04-11 16:42:10,613] Trial 30 pruned. 


Mounted at /content/gdrive


Epoch 1/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/6: 100%|██████████| 97/97 [00:29<00:00,  3.34it/s]
Epoch 3/6: 100%|██████████| 97/97 [00:29<00:00,  3.34it/s]
Epoch 4/6: 100%|██████████| 97/97 [00:29<00:00,  3.34it/s]
Epoch 5/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 6/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:45:27,979] Trial 31 finished with value: 0.8003875968992248 and parameters: {'batch_size': 16, 'learning_rate': 6.663722050833919e-05, 'num_epochs': 6, 'optimizer': 'AdamW', 'weight_decay': 0.004754868742868332, 'warmup_ratio': 0.08369686561270434, 'dropout_rate': 0.23612852313400562}. Best is trial 18 with value: 0.8062015503875969.


Mounted at /content/gdrive


Epoch 1/8: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/8: 100%|██████████| 49/49 [00:27<00:00,  1.77it/s]
Epoch 3/8: 100%|██████████| 49/49 [00:27<00:00,  1.77it/s]
Epoch 4/8: 100%|██████████| 49/49 [00:27<00:00,  1.77it/s]
[I 2025-04-11 16:47:34,820] Trial 32 finished with value: 0.7945736434108527 and parameters: {'batch_size': 32, 'learning_rate': 7.10554460121508e-05, 'num_epochs': 8, 'optimizer': 'AdamW', 'weight_decay': 0.005692740471152267, 'warmup_ratio': 0.08989906282345769, 'dropout_rate': 0.2257098643737076}. Best is trial 18 with value: 0.8062015503875969.


Mounted at /content/gdrive


Epoch 1/7: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/7: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:48:43,827] Trial 33 pruned. 


Mounted at /content/gdrive


Epoch 1/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:49:52,771] Trial 34 pruned. 


Mounted at /content/gdrive


Epoch 1/9: 100%|██████████| 194/194 [00:31<00:00,  6.18it/s]
Epoch 2/9: 100%|██████████| 194/194 [00:31<00:00,  6.18it/s]
[I 2025-04-11 16:51:05,811] Trial 35 pruned. 


Mounted at /content/gdrive


Epoch 1/10: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/10: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
[I 2025-04-11 16:52:11,607] Trial 36 pruned. 


Mounted at /content/gdrive


Epoch 1/6: 100%|██████████| 25/25 [00:27<00:00,  1.09s/it]
Epoch 2/6: 100%|██████████| 25/25 [00:27<00:00,  1.08s/it]
[I 2025-04-11 16:53:15,357] Trial 37 pruned. 


Mounted at /content/gdrive


Epoch 1/7: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/7: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:54:23,922] Trial 38 pruned. 


Mounted at /content/gdrive


Epoch 1/8: 100%|██████████| 194/194 [00:31<00:00,  6.19it/s]
Epoch 2/8: 100%|██████████| 194/194 [00:31<00:00,  6.19it/s]
Epoch 3/8: 100%|██████████| 194/194 [00:31<00:00,  6.20it/s]
Epoch 4/8: 100%|██████████| 194/194 [00:31<00:00,  6.20it/s]
[I 2025-04-11 16:56:46,654] Trial 39 finished with value: 0.7926356589147286 and parameters: {'batch_size': 8, 'learning_rate': 3.736126461553204e-05, 'num_epochs': 8, 'optimizer': 'AdamW', 'weight_decay': 0.0007402164422432436, 'warmup_ratio': 0.10530430656330395, 'dropout_rate': 0.19726214175697188}. Best is trial 18 with value: 0.8062015503875969.


Mounted at /content/gdrive


Epoch 1/5: 100%|██████████| 387/387 [00:34<00:00, 11.33it/s]
Epoch 2/5: 100%|██████████| 387/387 [00:34<00:00, 11.31it/s]
[I 2025-04-11 16:58:05,809] Trial 40 pruned. 


Mounted at /content/gdrive


Epoch 1/6: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 16:59:14,764] Trial 41 pruned. 


Mounted at /content/gdrive


Epoch 1/6: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 3/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 4/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 5/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:02:00,575] Trial 42 pruned. 


Mounted at /content/gdrive


Epoch 1/5: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/5: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:03:09,047] Trial 43 pruned. 


Mounted at /content/gdrive


Epoch 1/7: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/7: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:04:17,450] Trial 44 pruned. 


Mounted at /content/gdrive


Epoch 1/4: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:05:27,356] Trial 45 pruned. 


Mounted at /content/gdrive


Epoch 1/6: 100%|██████████| 194/194 [00:31<00:00,  6.18it/s]
Epoch 2/6: 100%|██████████| 194/194 [00:31<00:00,  6.18it/s]
[I 2025-04-11 17:06:41,313] Trial 46 pruned. 


Mounted at /content/gdrive


Epoch 1/4: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/4: 100%|██████████| 49/49 [00:27<00:00,  1.77it/s]
[I 2025-04-11 17:07:47,674] Trial 47 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/2: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:08:57,999] Trial 48 finished with value: 0.8042635658914729 and parameters: {'batch_size': 16, 'learning_rate': 7.49483027460452e-05, 'num_epochs': 2, 'optimizer': 'Adam', 'weight_decay': 0.0011907559700569021, 'warmup_ratio': 0.1893934462289233, 'dropout_rate': 0.21638002487135835}. Best is trial 18 with value: 0.8062015503875969.


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 387/387 [00:34<00:00, 11.30it/s]
Epoch 2/2: 100%|██████████| 387/387 [00:34<00:00, 11.31it/s]
[I 2025-04-11 17:10:17,832] Trial 49 finished with value: 0.7926356589147286 and parameters: {'batch_size': 4, 'learning_rate': 8.768522961933652e-05, 'num_epochs': 2, 'optimizer': 'AdamW', 'weight_decay': 0.0011563368952458733, 'warmup_ratio': 0.18677760816415806, 'dropout_rate': 0.20563527021274547}. Best is trial 18 with value: 0.8062015503875969.


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 25/25 [00:27<00:00,  1.09s/it]
Epoch 2/2: 100%|██████████| 25/25 [00:27<00:00,  1.08s/it]
[I 2025-04-11 17:11:21,811] Trial 50 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/2: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:12:30,739] Trial 51 finished with value: 0.8003875968992248 and parameters: {'batch_size': 16, 'learning_rate': 9.958507628896159e-05, 'num_epochs': 2, 'optimizer': 'Adam', 'weight_decay': 0.00559291280127984, 'warmup_ratio': 0.1806479721893647, 'dropout_rate': 0.22966538517654386}. Best is trial 18 with value: 0.8062015503875969.


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:13:39,193] Trial 52 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/2: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:14:48,936] Trial 53 finished with value: 0.8081395348837209 and parameters: {'batch_size': 16, 'learning_rate': 7.954736306677901e-05, 'num_epochs': 2, 'optimizer': 'Adam', 'weight_decay': 0.0026812548306981475, 'warmup_ratio': 0.16052122551850673, 'dropout_rate': 0.21651770313238117}. Best is trial 53 with value: 0.8081395348837209.


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 3/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:16:30,398] Trial 54 finished with value: 0.8023255813953488 and parameters: {'batch_size': 16, 'learning_rate': 7.435969435155802e-05, 'num_epochs': 3, 'optimizer': 'Adam', 'weight_decay': 0.0008413342677825397, 'warmup_ratio': 0.19876608049729275, 'dropout_rate': 0.1337746169301942}. Best is trial 53 with value: 0.8081395348837209.


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:17:39,456] Trial 55 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/2: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:18:48,082] Trial 56 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:19:56,798] Trial 57 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/2: 100%|██████████| 49/49 [00:27<00:00,  1.77it/s]
[I 2025-04-11 17:21:02,117] Trial 58 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 3/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:22:43,794] Trial 59 finished with value: 0.8275193798449613 and parameters: {'batch_size': 16, 'learning_rate': 4.98575881380721e-05, 'num_epochs': 3, 'optimizer': 'Adam', 'weight_decay': 0.0013144023030408456, 'warmup_ratio': 0.15596017251959468, 'dropout_rate': 0.11966268445099029}. Best is trial 59 with value: 0.8275193798449613.


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 387/387 [00:34<00:00, 11.26it/s]
Epoch 2/3: 100%|██████████| 387/387 [00:34<00:00, 11.29it/s]
Epoch 3/3: 100%|██████████| 387/387 [00:34<00:00, 11.29it/s]
[I 2025-04-11 17:24:43,044] Trial 60 finished with value: 0.8062015503875969 and parameters: {'batch_size': 4, 'learning_rate': 5.2377724124203745e-05, 'num_epochs': 3, 'optimizer': 'Adam', 'weight_decay': 0.00033125121674195855, 'warmup_ratio': 0.15710504504115083, 'dropout_rate': 0.10025569042361429}. Best is trial 59 with value: 0.8275193798449613.


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 387/387 [00:34<00:00, 11.29it/s]
Epoch 2/3: 100%|██████████| 387/387 [00:34<00:00, 11.30it/s]
Epoch 3/3: 100%|██████████| 387/387 [00:34<00:00, 11.30it/s]
[I 2025-04-11 17:26:42,187] Trial 61 finished with value: 0.8003875968992248 and parameters: {'batch_size': 4, 'learning_rate': 3.957467513057236e-05, 'num_epochs': 3, 'optimizer': 'Adam', 'weight_decay': 0.00026900956693602104, 'warmup_ratio': 0.15119147005314515, 'dropout_rate': 0.11821538895593597}. Best is trial 59 with value: 0.8275193798449613.


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 387/387 [00:34<00:00, 11.28it/s]
Epoch 2/2: 100%|██████████| 387/387 [00:34<00:00, 11.31it/s]
[I 2025-04-11 17:28:01,519] Trial 62 finished with value: 0.7984496124031008 and parameters: {'batch_size': 4, 'learning_rate': 5.118275458405567e-05, 'num_epochs': 2, 'optimizer': 'Adam', 'weight_decay': 0.001370140755079933, 'warmup_ratio': 0.1617824129615567, 'dropout_rate': 0.10262890946789548}. Best is trial 59 with value: 0.8275193798449613.


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 387/387 [00:34<00:00, 11.28it/s]
Epoch 2/3: 100%|██████████| 387/387 [00:34<00:00, 11.30it/s]
[I 2025-04-11 17:29:20,929] Trial 63 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 387/387 [00:34<00:00, 11.28it/s]
Epoch 2/2: 100%|██████████| 387/387 [00:34<00:00, 11.28it/s]
[I 2025-04-11 17:30:40,397] Trial 64 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.31it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:31:49,400] Trial 65 pruned. 


Mounted at /content/gdrive


Epoch 1/4: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/4: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
[I 2025-04-11 17:32:55,371] Trial 66 pruned. 


Mounted at /content/gdrive


Epoch 1/4: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 3/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 4/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:35:08,453] Trial 67 finished with value: 0.8023255813953488 and parameters: {'batch_size': 16, 'learning_rate': 5.9343222758941404e-05, 'num_epochs': 4, 'optimizer': 'Adam', 'weight_decay': 0.00034468824853548603, 'warmup_ratio': 0.1563351838985647, 'dropout_rate': 0.1429827943418926}. Best is trial 59 with value: 0.8275193798449613.


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 194/194 [00:31<00:00,  6.18it/s]
Epoch 2/2: 100%|██████████| 194/194 [00:31<00:00,  6.19it/s]
[I 2025-04-11 17:36:21,337] Trial 68 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 25/25 [00:27<00:00,  1.09s/it]
Epoch 2/2: 100%|██████████| 25/25 [00:27<00:00,  1.08s/it]
[I 2025-04-11 17:37:25,334] Trial 69 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:38:33,961] Trial 70 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:39:42,413] Trial 71 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:40:50,693] Trial 72 pruned. 


Mounted at /content/gdrive


Epoch 1/4: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:41:59,489] Trial 73 pruned. 


Mounted at /content/gdrive


Epoch 1/8: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/8: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:43:07,861] Trial 74 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 387/387 [00:34<00:00, 11.29it/s]
Epoch 2/2: 100%|██████████| 387/387 [00:34<00:00, 11.29it/s]
[I 2025-04-11 17:44:27,616] Trial 75 pruned. 


Mounted at /content/gdrive


Epoch 1/9: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/9: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:45:35,990] Trial 76 pruned. 


Mounted at /content/gdrive


Epoch 1/4: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/4: 100%|██████████| 49/49 [00:27<00:00,  1.77it/s]
[I 2025-04-11 17:46:41,925] Trial 77 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:47:50,383] Trial 78 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/2: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:49:00,426] Trial 79 finished with value: 0.7887596899224806 and parameters: {'batch_size': 16, 'learning_rate': 8.323292742995405e-05, 'num_epochs': 2, 'optimizer': 'Adam', 'weight_decay': 0.0017171802141792822, 'warmup_ratio': 0.18114028795281698, 'dropout_rate': 0.33195158569720196}. Best is trial 59 with value: 0.8275193798449613.


Mounted at /content/gdrive


Epoch 1/8: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/8: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:50:09,289] Trial 80 pruned. 


Mounted at /content/gdrive


Epoch 1/4: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 3/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 4/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:52:22,741] Trial 81 finished with value: 0.8003875968992248 and parameters: {'batch_size': 16, 'learning_rate': 5.847348571092987e-05, 'num_epochs': 4, 'optimizer': 'Adam', 'weight_decay': 0.00028177017024881584, 'warmup_ratio': 0.1583231602779359, 'dropout_rate': 0.1433840892998444}. Best is trial 59 with value: 0.8275193798449613.


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:53:31,891] Trial 82 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:54:40,273] Trial 83 pruned. 


Mounted at /content/gdrive


Epoch 1/5: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/5: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:55:49,061] Trial 84 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/2: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
[I 2025-04-11 17:56:54,331] Trial 85 pruned. 


Mounted at /content/gdrive


Epoch 1/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 17:58:03,687] Trial 86 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 25/25 [00:27<00:00,  1.09s/it]
Epoch 2/3: 100%|██████████| 25/25 [00:27<00:00,  1.08s/it]
[I 2025-04-11 17:59:07,707] Trial 87 pruned. 


Mounted at /content/gdrive


Epoch 1/7: 100%|██████████| 194/194 [00:31<00:00,  6.18it/s]
Epoch 2/7: 100%|██████████| 194/194 [00:31<00:00,  6.19it/s]
[I 2025-04-11 18:00:20,921] Trial 88 pruned. 


Mounted at /content/gdrive


Epoch 1/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/4: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 18:01:29,306] Trial 89 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 387/387 [00:34<00:00, 11.27it/s]
Epoch 2/2: 100%|██████████| 387/387 [00:34<00:00, 11.31it/s]
[I 2025-04-11 18:02:49,325] Trial 90 finished with value: 0.7906976744186046 and parameters: {'batch_size': 4, 'learning_rate': 2.8201388952446246e-05, 'num_epochs': 2, 'optimizer': 'Adam', 'weight_decay': 0.00020515671558004976, 'warmup_ratio': 0.13030429826757708, 'dropout_rate': 0.12863724687445588}. Best is trial 59 with value: 0.8275193798449613.


Mounted at /content/gdrive


Epoch 1/5: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/5: 100%|██████████| 97/97 [00:29<00:00,  3.34it/s]
[I 2025-04-11 18:03:58,072] Trial 91 pruned. 


Mounted at /content/gdrive


Epoch 1/7: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/7: 100%|██████████| 97/97 [00:29<00:00,  3.34it/s]
[I 2025-04-11 18:05:06,383] Trial 92 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 18:06:18,172] Trial 93 pruned. 


Mounted at /content/gdrive


Epoch 1/8: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/8: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 18:07:27,746] Trial 94 pruned. 


Mounted at /content/gdrive


Epoch 1/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/6: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 18:08:37,159] Trial 95 pruned. 


Mounted at /content/gdrive


Epoch 1/10: 100%|██████████| 49/49 [00:27<00:00,  1.76it/s]
Epoch 2/10: 100%|██████████| 49/49 [00:27<00:00,  1.77it/s]
[I 2025-04-11 18:09:43,622] Trial 96 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 18:10:51,916] Trial 97 pruned. 


Mounted at /content/gdrive


Epoch 1/2: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 2/2: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
[I 2025-04-11 18:12:00,329] Trial 98 pruned. 


Mounted at /content/gdrive


Epoch 1/9: 100%|██████████| 387/387 [00:34<00:00, 11.29it/s]
Epoch 2/9: 100%|██████████| 387/387 [00:34<00:00, 11.27it/s]
[I 2025-04-11 18:13:19,841] Trial 99 pruned. 


Mounted at /content/gdrive


Epoch 1/3: 100%|██████████| 97/97 [00:29<00:00,  3.32it/s]
Epoch 2/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]
Epoch 3/3: 100%|██████████| 97/97 [00:29<00:00,  3.33it/s]


In [1]:
import pandas as pd
import os
import torch
import time
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from tqdm import tqdm
from google.colab import drive

class FinBertPredictions:
    def __init__(self, model_path, device, max_len):
        drive.mount('/content/gdrive')  # Mount Google Drive
        self.model_path = model_path
        self.max_len = max_len
        self.device = torch.device(device)
        self.model, self.tokenizer = self.load_fine_tuned_model()

    def load_fine_tuned_model(self):
        # Load the FinBERT model and tokenizer
        model = AutoModelForSequenceClassification.from_pretrained(self.model_path)
        tokenizer = AutoTokenizer.from_pretrained(self.model_path)
        model.to(self.device)
        return model, tokenizer

    def predict(self, text):
        # Properly tokenize the input for FinBERT
        encoded_input = self.tokenizer(
            text,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        # Move inputs to the correct device
        encoded_input = {k: v.to(self.device) for k, v in encoded_input.items()}

        # Make prediction
        with torch.no_grad():
            self.model.eval()
            outputs = self.model(**encoded_input)
            logits = outputs.logits
            prediction = torch.argmax(logits, dim=1).item()

        return prediction

    def predict_and_save(self, dataset_path, test_file, feature_col, prediction_col):
        # Load the test dataset
        test_df = pd.read_csv(os.path.join(dataset_path, test_file))

        # Backup the original file
        backup_file = os.path.join(dataset_path, 'test_set_original.csv')
        if not os.path.exists(backup_file):
            test_df.to_csv(backup_file, index=False)

        # Iterate through each row in the DataFrame
        for index, row in tqdm(test_df.iterrows(), total=len(test_df)):
            content = row[feature_col]

            # Skip empty or NaN content
            if pd.isna(content) or content == '':
                test_df.at[index, prediction_col] = -1  # Use an appropriate placeholder
                test_df.at[index, 'time-finbert-adam'] = 0
                continue

            # Measure start time
            start_time = time.perf_counter()

            # Predict the sentiment
            predicted_rating = self.predict(content)

            # Measure end time and calculate elapsed time
            end_time = time.perf_counter()
            elapsed_time = end_time - start_time

            # Update the DataFrame
            test_df.at[index, prediction_col] = predicted_rating
            test_df.at[index, 'time-finbert-adam'] = elapsed_time

            # Save intermediate results periodically (e.g., every 10 rows)
            if index % 10 == 0:
                test_df.to_csv(os.path.join(dataset_path, test_file), index=False)

        # Save final results to CSV
        test_df.to_csv(os.path.join(dataset_path, test_file), index=False)

max_len = 512
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Paths and filenames
absolute_path = "/content/gdrive/My Drive/Projects/Financial-Sentiment/"
dataset_path = absolute_path + "Datasets/"
test_file = "test_set.csv"
trained_model = absolute_path + 'FinBert-TrainedModels/final_finbert_optimized_model'
feature_col = 'Sentence'
prediction_col = 'FINBERT_bayes_opt_prediction'

# Print status information
print(f"Using device: {device}")
print(f"Loading model from: {trained_model}")
print(f"Processing test file: {os.path.join(dataset_path, test_file)}")

# Instantiate the FinBertPredictions class
prediction = FinBertPredictions(trained_model, device, max_len)

# Run prediction and save results to CSV
prediction.predict_and_save(dataset_path, test_file, feature_col, prediction_col)

Using device: cuda
Loading model from: /content/gdrive/My Drive/Projects/Financial-Sentiment/FinBert-TrainedModels/final_finbert_optimized_model
Processing test file: /content/gdrive/My Drive/Projects/Financial-Sentiment/Datasets/test_set.csv
Mounted at /content/gdrive


100%|██████████| 516/516 [00:06<00:00, 75.23it/s]
