In [None]:
!pip install optuna
import os
import pandas as pd
import torch
import time
import optuna
import logging
import traceback
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
from torch.optim import Adam, AdamW
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from google.colab import drive
drive.mount('/content/gdrive')

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

class SequenceClassificationDataset(Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels

    def __len__(self):
        return len(self.inputs['input_ids'])

    def __getitem__(self, idx):
        input_ids = self.inputs['input_ids'][idx]
        attention_mask = self.inputs['attention_mask'][idx]
        label = self.labels[idx]
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': torch.tensor(label, dtype=torch.long)
        }

class BertFineTuning:
    def __init__(self, dataset_path, train_file, validation_file, feature_col, label_col, batch_size,
                 learning_rate, num_epochs, max_len, optimizer_name='Adam', weight_decay=0.0,
                 warmup_ratio=0.0, dropout_rate=0.1, device='cpu'):
        self.dataset_path = dataset_path
        self.train_file = train_file
        self.validation_file = validation_file
        self.feature_col = feature_col
        self.label_col = label_col
        self.model_name = 'bert-base-uncased'
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.max_len = max_len
        self.optimizer_name = optimizer_name
        self.weight_decay = weight_decay
        self.warmup_ratio = warmup_ratio
        self.dropout_rate = dropout_rate

        # Check for CUDA availability and set device
        if device == 'cuda' and not torch.cuda.is_available():
            logger.warning("CUDA not available, defaulting to CPU")
            device = 'cpu'
        self.device = torch.device(device)
        logger.info(f"Using device: {self.device}")

        try:
            # Load tokenizer
            logger.info(f"Loading tokenizer from {self.model_name}")
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)

            # Load datasets
            logger.info(f"Loading datasets from {self.dataset_path}")
            train_path = os.path.join(self.dataset_path, self.train_file)
            val_path = os.path.join(self.dataset_path, self.validation_file)

            if not os.path.exists(train_path):
                logger.error(f"Train file not found: {train_path}")
                raise FileNotFoundError(f"Train file not found: {train_path}")

            if not os.path.exists(val_path):
                logger.error(f"Validation file not found: {val_path}")
                raise FileNotFoundError(f"Validation file not found: {val_path}")

            self.train_df = pd.read_csv(train_path)
            self.validation_df = pd.read_csv(val_path)

            logger.info(f"Train set size: {len(self.train_df)}")
            logger.info(f"Validation set size: {len(self.validation_df)}")

            # Verify columns exist
            if self.feature_col not in self.train_df.columns:
                logger.error(f"Feature column '{self.feature_col}' not found in training data")
                raise ValueError(f"Feature column '{self.feature_col}' not found in training data")

            if self.label_col not in self.train_df.columns:
                logger.error(f"Label column '{self.label_col}' not found in training data")
                raise ValueError(f"Label column '{self.label_col}' not found in training data")

            # Calculate number of unique labels
            self.num_labels = len(self.train_df[self.label_col].unique())
            logger.info(f"Number of unique labels: {self.num_labels}")
            if self.num_labels < 2:
                logger.warning("Less than 2 unique labels found in training data!")

            # Tokenize datasets
            logger.info("Tokenizing datasets")
            self.tokenized_train = self.tokenize_dataset(self.train_df, self.feature_col, self.label_col)
            self.tokenized_validation = self.tokenize_dataset(self.validation_df, self.feature_col, self.label_col)

            # Model configuration
            logger.info("Loading model configuration")
            self.model_config = AutoConfig.from_pretrained(self.model_name, num_labels=self.num_labels,
                                                        hidden_dropout_prob=self.dropout_rate,
                                                        attention_probs_dropout_prob=self.dropout_rate)

            logger.info("Loading model for sequence classification")
            self.model = AutoModelForSequenceClassification.from_pretrained(
                self.model_name,
                config=self.model_config
            )
            self.model.to(self.device)

            # Optimizer
            logger.info(f"Setting up optimizer: {self.optimizer_name}")
            if self.optimizer_name == 'Adam':
                self.optimizer = torch.optim.Adam(self.model.parameters(),
                                                lr=self.learning_rate,
                                                weight_decay=self.weight_decay)
            elif self.optimizer_name == 'AdamW':
                self.optimizer = AdamW(self.model.parameters(),
                                    lr=self.learning_rate,
                                    weight_decay=self.weight_decay)
            else:
                logger.error(f"Unsupported optimizer: {self.optimizer_name}")
                raise ValueError(f"Unsupported optimizer: {self.optimizer_name}")

            # Learning rate scheduler
            if self.warmup_ratio > 0:
                from transformers import get_linear_schedule_with_warmup
                num_training_steps = len(self.train_df) // self.batch_size * self.num_epochs
                num_warmup_steps = int(num_training_steps * self.warmup_ratio)
                logger.info(f"Setting up learning rate scheduler with {num_warmup_steps} warmup steps")
                self.scheduler = get_linear_schedule_with_warmup(
                    self.optimizer,
                    num_warmup_steps=num_warmup_steps,
                    num_training_steps=num_training_steps
                )
            else:
                self.scheduler = None

            # DataLoaders
            logger.info("Creating DataLoaders")
            self.train_dataloader = self.create_dataloader(self.tokenized_train)
            self.validation_dataloader = self.create_dataloader(self.tokenized_validation, shuffle=False)

        except Exception as e:
            logger.error(f"Error during initialization: {str(e)}")
            logger.error(traceback.format_exc())
            raise

    def tokenize_dataset(self, df, feature_col, label_col):
        logger.info(f"Tokenizing {len(df)} examples with max_length={self.max_len}")
        # we have text data but check it again
        text_samples = list(df[feature_col])
        if len(text_samples) == 0:
            logger.error("No text samples found")
            raise ValueError("No text samples found")

        # Check for empty strings
        empty_samples = [i for i, text in enumerate(text_samples) if not isinstance(text, str) or len(text.strip()) == 0]
        if empty_samples:
            logger.warning(f"Found {len(empty_samples)} empty text samples. Replacing with placeholder text.")
            for i in empty_samples:
                text_samples[i] = "empty text"

        # Tokenize with error handling
        try:
            tokenized = self.tokenizer(
                text_samples,
                padding='max_length',
                truncation=True,
                max_length=self.max_len,
                return_tensors='pt'
            )

            # Check that labels are numeric and within range
            labels = list(df[label_col])
            if not all(isinstance(label, (int, float)) or (isinstance(label, str) and label.isdigit()) for label in labels):
                logger.error("Labels must be numeric values")
                raise ValueError("Labels must be numeric values")

            # Convert string labels to int if necessary
            labels = [int(label) if isinstance(label, str) else int(label) for label in labels]

            # Ensure labels are in correct range (0 to num_labels-1)
            unique_labels = set(labels)
            expected_range = set(range(self.num_labels))
            if not unique_labels.issubset(expected_range):
                logger.error(f"Labels outside expected range. Found {unique_labels}, expected range {expected_range}")
                raise ValueError(f"Labels outside expected range. Found {unique_labels}, expected range {expected_range}")

            return tokenized, labels

        except Exception as e:
            logger.error(f"Error in tokenization: {str(e)}")
            logger.error(traceback.format_exc())
            raise

    def create_dataloader(self, tokenized_dataset, shuffle=True):
        try:
            dataset = SequenceClassificationDataset(tokenized_dataset[0], tokenized_dataset[1])
            dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=shuffle)
            logger.info(f"Created DataLoader with {len(dataset)} examples, batch size {self.batch_size}")
            return dataloader
        except Exception as e:
            logger.error(f"Error creating DataLoader: {str(e)}")
            logger.error(traceback.format_exc())
            raise

    def evaluate_model(self, dataloader):
        logger.info("Evaluating model")
        self.model.eval()
        all_labels = []
        all_predictions = []
        all_losses = []

        try:
            with torch.no_grad():
                for batch in dataloader:
                    try:
                        inputs = {key: value.to(self.device) for key, value in batch.items()}
                        labels = inputs["labels"]
                        outputs = self.model(**inputs)
                        loss = outputs.loss
                        logits = outputs.logits
                        all_losses.append(loss.item())

                        _, predicted = torch.max(logits, 1)
                        all_labels.extend(labels.cpu().numpy())
                        all_predictions.extend(predicted.cpu().numpy())
                    except Exception as e:
                        logger.error(f"Error processing batch during evaluation: {str(e)}")
                        logger.error(traceback.format_exc())
                        continue  # Skip this batch and try the next one

            accuracy = accuracy_score(all_labels, all_predictions)
            avg_loss = sum(all_losses) / len(all_losses) if all_losses else 0
            logger.info(f"Evaluation results: accuracy={accuracy:.4f}, loss={avg_loss:.4f}")
            return accuracy, avg_loss

        except Exception as e:
            logger.error(f"Error during model evaluation: {str(e)}")
            logger.error(traceback.format_exc())
            # Return very poor performance to signal an error
            return 0.0, float('inf')

    def train(self, trial=None):
        logger.info(f"Starting training for {self.num_epochs} epochs")
        best_val_accuracy = 0.0
        early_stopping_counter = 0
        early_stopping_patience = 2  # Stop if no improvement for 2 epochs

        try:
            for epoch in range(self.num_epochs):
                logger.info(f"Starting epoch {epoch+1}/{self.num_epochs}")
                self.model.train()
                train_losses = []

                # Training loop
                for batch_idx, batch in enumerate(tqdm(self.train_dataloader, desc=f'Epoch {epoch + 1}/{self.num_epochs}')):
                    try:
                        inputs = {key: value.to(self.device) for key, value in batch.items()}

                        # Clear previous gradients
                        self.optimizer.zero_grad()

                        # Forward pass
                        outputs = self.model(**inputs)
                        loss = outputs.loss
                        train_losses.append(loss.item())

                        # Backward pass and optimize
                        loss.backward()
                        self.optimizer.step()

                        if self.scheduler:
                            self.scheduler.step()

                        # Log periodically
                        if (batch_idx + 1) % 20 == 0:
                            logger.info(f"Epoch {epoch+1}, Batch {batch_idx+1}, Loss: {loss.item():.4f}")

                    except Exception as e:
                        logger.error(f"Error in training batch {batch_idx}: {str(e)}")
                        logger.error(traceback.format_exc())
                        continue  # Skip problematic batches

                # Calculate average training loss for this epoch
                avg_train_loss = sum(train_losses) / len(train_losses) if train_losses else float('inf')
                logger.info(f"Epoch {epoch+1} average training loss: {avg_train_loss:.4f}")

                # Validation
                validation_accuracy, validation_loss = self.evaluate_model(self.validation_dataloader)

                logger.info(f'Epoch {epoch + 1}/{self.num_epochs} - '
                      f'Training Loss: {avg_train_loss:.4f} - '
                      f'Validation Loss: {validation_loss:.4f} - '
                      f'Validation Accuracy: {validation_accuracy:.4f}')

                # Check if we've improved
                if validation_accuracy > best_val_accuracy:
                    best_val_accuracy = validation_accuracy
                    early_stopping_counter = 0
                    logger.info(f"New best validation accuracy: {best_val_accuracy:.4f}")
                else:
                    early_stopping_counter += 1
                    logger.info(f"No improvement in validation accuracy. Early stopping counter: {early_stopping_counter}/{early_stopping_patience}")

                # Report intermediate objective value for Optuna pruning
                if trial is not None:
                    trial.report(validation_accuracy, epoch)
                    # Handle pruning based on the intermediate value
                    if trial.should_prune():
                        logger.info(f"Trial {trial.number} pruned at epoch {epoch+1}")
                        raise optuna.exceptions.TrialPruned()

                # Use early stopping
                if early_stopping_counter >= early_stopping_patience:
                    logger.info(f"Early stopping triggered after epoch {epoch + 1}")
                    break

            logger.info(f"Training completed. Best validation accuracy: {best_val_accuracy:.4f}")
            return best_val_accuracy

        except optuna.exceptions.TrialPruned:
            # This is an expected exception from Optuna pruning
            logger.info("Trial pruned by Optuna")
            raise
        except Exception as e:
            logger.error(f"Error during training: {str(e)}")
            logger.error(traceback.format_exc())
            return 0.0  # Return poor performance to signal an error

    def save_model(self, directory):
        try:
            os.makedirs(directory, exist_ok=True)
            logger.info(f"Saving model to {directory}")
            self.model.save_pretrained(directory)
            self.tokenizer.save_pretrained(directory)

            # Save hyperparameters
            hyperparams = {
                'batch_size': self.batch_size,
                'learning_rate': self.learning_rate,
                'num_epochs': self.num_epochs,
                'max_len': self.max_len,
                'optimizer': self.optimizer_name,
                'weight_decay': self.weight_decay,
                'warmup_ratio': self.warmup_ratio,
                'dropout_rate': self.dropout_rate
            }

            with open(os.path.join(directory, 'hyperparameters.txt'), 'w') as f:
                for key, value in hyperparams.items():
                    f.write(f"{key}: {value}\n")

            logger.info(f"Model and hyperparameters saved successfully to {directory}")

        except Exception as e:
            logger.error(f"Error saving model: {str(e)}")
            logger.error(traceback.format_exc())
            raise



def visualize_study(study):
    """Visualize the results of an Optuna study."""
    # Only proceed if we have completed trials
    completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
    if not completed_trials:
        logger.warning("No completed trials to visualize")
        return

    try:
        logger.info("Generating visualization plots")

        # Plot optimization history
        fig1 = optuna.visualization.plot_optimization_history(study)
        fig1.show()
        logger.info("Generated optimization history plot")

        # Plot parameter importances
        fig2 = optuna.visualization.plot_param_importances(study)
        fig2.show()
        logger.info("Generated parameter importance plot")

        # Plot parallel coordinate plot
        fig3 = optuna.visualization.plot_parallel_coordinate(study)
        fig3.show()
        logger.info("Generated parallel coordinate plot")

        # Plot slice plot
        fig4 = optuna.visualization.plot_slice(study)
        fig4.show()
        logger.info("Generated slice plot")

        # Plot contour plot
        fig5 = optuna.visualization.plot_contour(study)
        fig5.show()
        logger.info("Generated contour plot")

    except Exception as e:
        logger.error(f"Visualization failed: {str(e)}")
        logger.error(traceback.format_exc())

def objective(trial):
    logger.info(f"Starting trial {trial.number}")

    # Define the hyperparameter search space with focused options
    batch_size = trial.suggest_categorical('batch_size', [4, 8, 16, 32, 64])
    learning_rate = trial.suggest_float('learning_rate', 5e-6, 1e-4, log=True)
    num_epochs = trial.suggest_int('num_epochs', 2, 10)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'AdamW'])
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-2, log=True)
    warmup_ratio = trial.suggest_float('warmup_ratio', 0.0, 0.2)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)

    # Log selected hyperparameters
    logger.info(f"Trial {trial.number} hyperparameters: batch_size={batch_size}, "
          f"learning_rate={learning_rate}, num_epochs={num_epochs}, "
          f"optimizer={optimizer_name}, weight_decay={weight_decay}, "
          f"warmup_ratio={warmup_ratio}, dropout_rate={dropout_rate}")

    # Fixed parameters
    max_len = 512
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # Paths and filenames
    absolute_path = "/content/gdrive/My Drive/Projects/Financial-Sentiment/"
    dataset_path = absolute_path + "Datasets/"
    train_file = 'train_set.csv'
    validation_file = 'validation_set.csv'
    feature_col = 'Sentence'
    label_col = 'SentimentNumerical'

    # Initialize and train the model
    try:
        classifier = BertFineTuning(
            dataset_path, train_file, validation_file, feature_col, label_col,
            batch_size, learning_rate, num_epochs, max_len,
            optimizer_name, weight_decay, warmup_ratio, dropout_rate, device
        )

        # Train and evaluate
        validation_accuracy = classifier.train(trial)
        logger.info(f"Trial {trial.number} completed with accuracy: {validation_accuracy:.4f}")

        # Save the trial results to the all_trials.txt file
        trial_results_path = os.path.join(absolute_path, 'OptimizationResults', 'all_trials.txt')
        os.makedirs(os.path.dirname(trial_results_path), exist_ok=True)

        with open(trial_results_path, 'a') as f:
            f.write(f"\n----- Trial {trial.number} -----\n")
            f.write(f"Validation Accuracy: {validation_accuracy:.4f}\n")
            f.write(f"Batch Size: {batch_size}\n")
            f.write(f"Learning Rate: {learning_rate}\n")
            f.write(f"Number of Epochs: {num_epochs}\n")
            f.write(f"Optimizer: {optimizer_name}\n")
            f.write(f"Weight Decay: {weight_decay}\n")
            f.write(f"Warmup Ratio: {warmup_ratio}\n")
            f.write(f"Dropout Rate: {dropout_rate}\n")
            f.write("--------------------------\n")

        # Only save the model if it's particularly good (e.g., top 25% of trials)
        if trial.should_prune():
            return validation_accuracy

        if len(trial.study.trials) > 3:
            completed_trials = [t for t in trial.study.trials if t.state == optuna.trial.TrialState.COMPLETE]
            if len(completed_trials) > 0:
                accuracies = [t.value for t in completed_trials if t.value is not None]
                if len(accuracies) > 0 and validation_accuracy >= sorted(accuracies)[-int(len(accuracies)/4)]:
                    trained_model = f"bert_base_trial_{trial.number}_acc_{validation_accuracy:.4f}"
                    model_path = os.path.join(absolute_path, 'TrainedModels', trained_model)
                    classifier.save_model(model_path)
                    logger.info(f"Saved model for trial {trial.number} with accuracy {validation_accuracy:.4f}")

        return validation_accuracy

    except optuna.exceptions.TrialPruned:
        logger.info(f"Trial {trial.number} pruned")
        raise  # Re-raise the pruned exception for Optuna to handle
    except Exception as e:
        logger.error(f"Trial {trial.number} failed with error: {str(e)}")
        logger.error(traceback.format_exc())

        # Log failed trials too
        trial_results_path = os.path.join(absolute_path, 'OptimizationResults', 'all_trials.txt')
        os.makedirs(os.path.dirname(trial_results_path), exist_ok=True)

        with open(trial_results_path, 'a') as f:
            f.write(f"\n----- Trial {trial.number} [FAILED] -----\n")
            f.write(f"Error: {str(e)}\n")
            f.write(f"Batch Size: {batch_size}\n")
            f.write(f"Learning Rate: {learning_rate}\n")
            f.write(f"Number of Epochs: {num_epochs}\n")
            f.write(f"Optimizer: {optimizer_name}\n")
            f.write(f"Weight Decay: {weight_decay}\n")
            f.write(f"Warmup Ratio: {warmup_ratio}\n")
            f.write(f"Dropout Rate: {dropout_rate}\n")
            f.write("--------------------------\n")

        # Return a very low score to indicate failure
        return float('-inf')

def run_optimization(n_trials=100, timeout=None):
    start_time = time.time()
    logger.info(f"Starting optimization with {n_trials} trials, timeout={timeout}")

    # Set up file paths
    absolute_path = "/content/gdrive/My Drive/Projects/Financial-Sentiment/"
    os.makedirs(os.path.join(absolute_path, 'OptimizationResults'), exist_ok=True)
    all_trials_path = os.path.join(absolute_path, 'OptimizationResults', 'all_trials.txt')

    # Initialize the all trials file with a header
    with open(all_trials_path, 'w') as f:
        f.write("========== BERT BASE UNCASED HYPERPARAMETER OPTIMIZATION RESULTS ==========\n")
        f.write(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write(f"Number of trials: {n_trials}\n")
        f.write("=============================================================\n")

    # Check for required libraries
    try:
        import optuna
    except ImportError:
        logger.info("Installing optuna...")
        !pip install optuna
        import optuna

    try:
        from transformers import get_linear_schedule_with_warmup
    except ImportError:
        logger.info("Installing transformers...")
        !pip install transformers
        from transformers import get_linear_schedule_with_warmup

    # Create an Optuna study that maximizes accuracy with better pruning
    study = optuna.create_study(
        direction='maximize',
        pruner=optuna.pruners.SuccessiveHalvingPruner(
            min_resource=1, reduction_factor=4, min_early_stopping_rate=0
        )
    )

    # Run the optimization with exception handling
    try:
        study.optimize(objective, n_trials=n_trials, timeout=timeout)
    except KeyboardInterrupt:
        logger.info("Optimization stopped by user.")
    except Exception as e:
        logger.error(f"Error during optimization: {str(e)}")
        logger.error(traceback.format_exc())

    # Log summary to the all trials file
    with open(all_trials_path, 'a') as f:
        f.write("\n========== OPTIMIZATION SUMMARY ==========\n")
        f.write(f"Completed: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write(f"Total optimization time: {(time.time() - start_time) / 60:.2f} minutes\n")
        f.write(f"Number of trials: {len(study.trials)}\n")
        f.write(f"Number of completed trials: {len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE])}\n")

    logger.info("Optimization finished!")
    logger.info(f"Number of trials: {len(study.trials)}")
    logger.info(f"Number of completed trials: {len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE])}")

    # Get best trial if any completed successfully
    completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
    if completed_trials:
        best_trial = study.best_trial
        logger.info(f"Best trial:")
        logger.info(f"  Value (validation accuracy): {best_trial.value:.4f}")
        logger.info(f"  Params:")
        for key, value in best_trial.params.items():
            logger.info(f"    {key}: {value}")

        # Save the best model results to a detailed file
        best_model_path = os.path.join(absolute_path, 'OptimizationResults', 'best_model_results.txt')
        with open(best_model_path, 'w') as f:
            f.write("========== BERT BASE UNCASED BEST MODEL RESULTS ==========\n")
            f.write(f"Date: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            f.write(f"Best Trial Number: {best_trial.number}\n")
            f.write(f"Best Validation Accuracy: {best_trial.value:.4f}\n\n")
            f.write("Hyperparameters:\n")
            f.write("--------------\n")
            for key, value in best_trial.params.items():
                f.write(f"{key}: {value}\n")
            f.write("\n")

            # Add some statistics and analysis
            f.write("Performance Analysis:\n")
            f.write("--------------\n")
            f.write(f"Total trials completed: {len(completed_trials)}\n")
            f.write(f"Optimization time: {(time.time() - start_time) / 60:.2f} minutes\n")

            # Compare to worst trial
            worst_trial = min(completed_trials, key=lambda t: t.value)
            f.write(f"Worst trial accuracy: {worst_trial.value:.4f} (Trial {worst_trial.number})\n")
            f.write(f"Improvement over worst: {(best_trial.value - worst_trial.value):.4f} ({(best_trial.value / worst_trial.value - 1) * 100:.2f}%)\n")

            # Calculate average performance
            avg_accuracy = sum(t.value for t in completed_trials) / len(completed_trials)
            f.write(f"Average trial accuracy: {avg_accuracy:.4f}\n")
            f.write(f"Improvement over average: {(best_trial.value - avg_accuracy):.4f} ({(best_trial.value / avg_accuracy - 1) * 100:.2f}%)\n\n")

            # Add parameter importance if available
            try:
                importance = optuna.importance.get_param_importances(study)
                f.write("Parameter Importance:\n")
                f.write("--------------\n")
                for param, score in importance.items():
                    f.write(f"{param}: {score:.4f}\n")
            except Exception as e:
                f.write(f"Could not calculate parameter importance: {str(e)}\n")

            f.write("\n========== END OF REPORT ==========\n")

        logger.info(f"Best model results saved to {best_model_path}")
    else:
        logger.warning("No trials completed successfully.")

        # Still create a best_model_results.txt indicating no successful trials
        best_model_path = os.path.join(absolute_path, 'OptimizationResults', 'best_model_results.txt')
        with open(best_model_path, 'w') as f:
            f.write("========== BERT BASE UNCASED BEST MODEL RESULTS ==========\n")
            f.write(f"Date: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write("No trials completed successfully during optimization.\n")
            f.write("Please check the logs for errors and try again with different settings.\n")
            f.write("\n========== END OF REPORT ==========\n")

    # Try to visualize the study results
    try:
        if len(completed_trials) > 0:
            visualize_study(study)
        else:
            logger.warning("No completed trials to visualize")
    except Exception as e:
        logger.error(f"Error generating visualizations: {str(e)}")

    # Return the study for further analysis
    return study

if __name__ == "__main__":
    logger.info("========== STARTING BERT HYPERPARAMETER OPTIMIZATION ==========")

    # Run optimization with 100 trials
    study = run_optimization(n_trials=100, timeout=None)

    # Visualize the results
    visualize_study(study)

    # Only proceed if we have completed trials
    completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
    if not completed_trials:
        logger.error("No trials completed successfully. Cannot train final model.")
    else:
        # Train the final model with the best hyperparameters
        logger.info("\nTraining final model with best hyperparameters...")
        best_params = study.best_params

        # Paths and filenames
        absolute_path = "/content/gdrive/My Drive/Projects/Financial-Sentiment/"
        dataset_path = absolute_path + "Datasets/"
        train_file = 'train_set.csv'
        validation_file = 'validation_set.csv'
        feature_col = 'Sentence'
        label_col = 'SentimentNumerical'
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

        try:
            final_classifier = BertFineTuning(
                dataset_path, train_file, validation_file, feature_col, label_col,
                best_params['batch_size'], best_params['learning_rate'],
                best_params['num_epochs'], 512, best_params['optimizer'],
                best_params['weight_decay'], best_params['warmup_ratio'],
                best_params['dropout_rate'], device
            )

            final_accuracy = final_classifier.train()
            logger.info(f"Final model validation accuracy: {final_accuracy:.4f}")

            final_model_path = absolute_path + 'TrainedModels/final_bert_optimized_model'
            final_classifier.save_model(final_model_path)
            logger.info(f"Final model saved to {final_model_path}")

            # Add final model results to the best model report
            best_model_path = os.path.join(absolute_path, 'OptimizationResults', 'best_model_results.txt')
            with open(best_model_path, 'a') as f:
                f.write("\n========== FINAL MODEL TRAINING RESULTS ==========\n")
                f.write(f"Final model trained with best hyperparameters\n")
                f.write(f"Final validation accuracy: {final_accuracy:.4f}\n")
                f.write(f"Model saved to: {final_model_path}\n")
                f.write("========== END OF FINAL MODEL REPORT ==========\n")

        except Exception as e:
            logger.error(f"Error training final model: {str(e)}")
            logger.error(traceback.format_exc())

            # Document the failure in the best model report
            best_model_path = os.path.join(absolute_path, 'OptimizationResults', 'best_model_results.txt')
            with open(best_model_path, 'a') as f:
                f.write("\n========== FINAL MODEL TRAINING RESULTS ==========\n")
                f.write(f"Error training final model: {str(e)}\n")
                f.write("========== END OF FINAL MODEL REPORT ==========\n")

    logger.info("========== BERT HYPERPARAMETER OPTIMIZATION COMPLETED ==========")

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.2.1
Mounted at /content/gdrive


[I 2025-04-12 03:58:34,298] A new study created in memory with name: no-name-436e798d-5226-4c9f-9ffd-20c84defafa4
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/4: 100%|██████████| 387/387 [00:35<00:00, 11.01it/s]
Epoch 2/4: 100%|██████████| 387/387 [00:34<00:00, 11.36it/s]
Epoch 3/4: 100%|██████████| 387/387 [00:34<00:00, 11.36it/s]
Epoch 4/4: 100%|██████████| 387/387 [00:34<00:00, 11.31it/s]
[I 2025-04-12 04:01:15,374] Trial 0 finished with value: 0.8236434108527132 and parameters: {'batch_size': 4, 'learning_rate': 1.8252486172187426e-05, 'num_epochs': 4, 'optimizer': 'Adam', 'weight_decay': 0.0005857608677563052, 'warmup_ratio': 0.16775728047751792, 'dropout_rate': 0.13579281935674767}. Best is trial 0 with value: 0.8236434108527132.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncase

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 387/387 [00:34<00:00, 11.30it/s]
Epoch 2/3: 100%|██████████| 387/387 [00:34<00:00, 11.32it/s]
Epoch 3/3: 100%|██████████| 387/387 [00:34<00:00, 11.34it/s]


In [None]:
import pandas as pd
import os
import torch
import time
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from tqdm import tqdm
from google.colab import drive

class BertPredictions:
    def __init__(self, model_path, device, max_len):
        drive.mount('/content/gdrive')  # Mount Google Drive
        self.model_path = model_path
        self.max_len = max_len
        self.device = torch.device(device)
        self.model, self.tokenizer = self.load_fine_tuned_model()

    def load_fine_tuned_model(self):
        # Load the BERT model and tokenizer
        model = AutoModelForSequenceClassification.from_pretrained(self.model_path)
        tokenizer = AutoTokenizer.from_pretrained(self.model_path)
        model.to(self.device)
        return model, tokenizer

    def predict(self, text):
        # Properly tokenize the input for BERT
        encoded_input = self.tokenizer(
            text,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        # Move inputs to the correct device
        encoded_input = {k: v.to(self.device) for k, v in encoded_input.items()}

        # Make prediction
        with torch.no_grad():
            self.model.eval()
            outputs = self.model(**encoded_input)
            logits = outputs.logits
            prediction = torch.argmax(logits, dim=1).item()

        return prediction

    def predict_and_save(self, dataset_path, test_file, feature_col, prediction_col):
        # Load the test dataset
        test_df = pd.read_csv(os.path.join(dataset_path, test_file))

        # Backup the original file
        backup_file = os.path.join(dataset_path, 'test_set_original.csv')
        if not os.path.exists(backup_file):
            test_df.to_csv(backup_file, index=False)

        # Iterate through each row in the DataFrame
        for index, row in tqdm(test_df.iterrows(), total=len(test_df)):
            content = row[feature_col]

            # Skip empty or NaN content
            if pd.isna(content) or content == '':
                test_df.at[index, prediction_col] = -1  # Use an appropriate placeholder
                test_df.at[index, 'time-bert-adam'] = 0
                continue

            # Measure start time
            start_time = time.perf_counter()

            # Predict the sentiment
            predicted_rating = self.predict(content)

            # Measure end time and calculate elapsed time
            end_time = time.perf_counter()
            elapsed_time = end_time - start_time

            # Update the DataFrame
            test_df.at[index, prediction_col] = predicted_rating
            test_df.at[index, 'time-bert-adam'] = elapsed_time

            # Save intermediate results periodically (e.g., every 10 rows)
            if index % 10 == 0:
                test_df.to_csv(os.path.join(dataset_path, test_file), index=False)

        # Save final results to CSV
        test_df.to_csv(os.path.join(dataset_path, test_file), index=False)

max_len = 512
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Paths and filenames
absolute_path = "/content/gdrive/My Drive/Projects/Financial-Sentiment/"
dataset_path = absolute_path + "Datasets/"
test_file = "test_set.csv"
trained_model = absolute_path + 'Bert-TrainedModels/final_bert_optimized_model'
feature_col = 'Sentence'
prediction_col = 'BERT_bayes_opt_prediction'

# Print status information
print(f"Using device: {device}")
print(f"Loading model from: {trained_model}")
print(f"Processing test file: {os.path.join(dataset_path, test_file)}")

# Instantiate the BertPredictions class
prediction = BertPredictions(trained_model, device, max_len)

# Run prediction and save results to CSV
prediction.predict_and_save(dataset_path, test_file, feature_col, prediction_col)

Using device: cuda
Loading model from: /content/gdrive/My Drive/Projects/Financial-Sentiment/Bert-TrainedModels/final_bert_optimized_model
Processing test file: /content/gdrive/My Drive/Projects/Financial-Sentiment/Datasets/test_set.csv
Mounted at /content/gdrive


100%|██████████| 516/516 [00:07<00:00, 65.86it/s]
