## Multi-class classification using BART

In [None]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

In [None]:
!pip install accelerate -U

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from transformers import BartTokenizer, BartForSequenceClassification, TrainingArguments, Trainer
import torch
import numpy as np
from sklearn.metrics import classification_report, accuracy_score
from transformers import TrainerCallback
import os
import shutil
import re
import time
from pathlib import Path
#[3.328079168300429e-05, 0.11210091359531205, 5, 9, 21]
def main_model(file_name, ext, type):

    path_type = "Balanced" if type == 1 else "Unbalanced"

    current_file_path = Path(__file__).parent
    path_to_project = current_file_path.parents[1]

    df = pd.read_excel(f"{path_to_project}/Data/Datasets/{path_type}/{file_name}.{ext}")

    results_dir = f"{path_to_project}/Models/BART/Output/{path_type}/{file_name}"
    dump_dir = results_dir+"/Dump"

    if os.path.isdir(results_dir):
        shutil.rmtree(results_dir)

    os.mkdir(results_dir)
    os.mkdir(dump_dir)

    df = df[df['review'].notna() & (df['review'] != '')]
    # Select the text and label columns
    df['review'] = df['review'].str.replace('[^\x20-\x7E]', '', regex=True)
    X = df['review'].values
    y = df['label'].values

    X_train_CV, X_test_full, y_train_CV, y_test_full = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


    # Encode the labels to a numeric format
    label_encoder = LabelEncoder()
    y_train_CV_encoded = label_encoder.fit_transform(y_train_CV)
    y_test_full_encoded = label_encoder.transform(y_test_full)


    tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

    # Tokenization function
    def tokenize_function(texts):
        return tokenizer(texts, padding="max_length", truncation=True, max_length=128)

    loss_logging_callback = LossLoggingCallback()

    # Stratified K-Fold Cross-Validation
    n_splits = 5
    kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    # Variables to accumulate scores
    best_accuracy = 0
    best_model = None
    accuracy_scores = []
    metrics_df = pd.DataFrame()


    for fold, (train_index, val_index) in enumerate(kf.split(X_train_CV, y_train_CV_encoded)):
        print(f"Fold {fold+1}/{n_splits}")
        start_time = time.time()
        # Split the data
        X_train, X_val = X_train_CV[train_index], X_train_CV[val_index]
        y_train, y_val = y_train_CV_encoded[train_index], y_train_CV_encoded[val_index]


        # Tokenize the data
        train_encodings = tokenize_function(X_train.tolist())
        val_encodings = tokenize_function(X_val.tolist())

        # Create dataset objects
        train_dataset = ReviewDataset(train_encodings, y_train)
        val_dataset = ReviewDataset(val_encodings, y_val)

        # Initialize the model for each fold
        model = BartForSequenceClassification.from_pretrained('facebook/bart-base', num_labels=len(label_encoder.classes_))

        # Define training arguments for each fold, adjust hyperparameters as needed
        training_args = TrainingArguments(
            output_dir=f"{dump_dir}/res",
            num_train_epochs=5,
            per_device_train_batch_size=9,
            per_device_eval_batch_size=21,
            warmup_steps=500,
            weight_decay=0.11210091359531205,
            logging_dir=f"{dump_dir}/logs",
            logging_strategy="epoch",
            evaluation_strategy="epoch",
            learning_rate=3.328079168300429e-05,
            max_grad_norm=1.0,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            save_strategy="epoch",
            save_total_limit=2,
            lr_scheduler_type='linear'
        )

        # Initialize Trainer
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            compute_metrics=compute_metrics,
            callbacks=[loss_logging_callback]
        )

        # Train
        trainer.train()

        loss_logging_callback.save_logs_to_excel(f"{results_dir}/fold_loss.xlsx")

        # Evaluate
        results = trainer.evaluate()
        accuracy_scores.append(results['eval_accuracy'])

        if results['eval_accuracy'] > best_accuracy:
            best_accuracy = results['eval_accuracy']
            best_model = model  # Assign the best model

        # Get predictions and true labels
        predictions = trainer.predict(val_dataset)
        pred_labels = get_pred_labels(predictions)
        true_labels = y_val

        # Calculate accuracy
        accuracy = accuracy_score(true_labels, pred_labels)
        label_names = label_encoder.inverse_transform(range(len(label_encoder.classes_)))

        # Calculate precision, recall, and F1-score
        report_dict = classification_report(true_labels, pred_labels, output_dict=True, zero_division=0, target_names=label_names)
        # avg_metrics = report_dict['weighted avg']  # Use 'macro avg' or 'weighted avg' based on your preference
        end_time = time.time()
        # Append the metrics for this fold to the DataFrame
        metrics_df = metrics_df.append({
            ('Fold', ''): fold + 1,
            ('Accuracy', ''): accuracy,
            ('Train Time', ''): str(end_time - start_time)+" s",
            ('Bug Report', 'P'): report_dict['bug report']['precision'],
            ('Bug Report', 'R'): report_dict['bug report']['recall'],
            ('Bug Report', 'F1'): report_dict['bug report']['f1-score'],
            ('Feature Request', 'P'): report_dict['feature request']['precision'],
            ('Feature Request', 'R'): report_dict['feature request']['recall'],
            ('Feature Request', 'F1'): report_dict['feature request']['f1-score'],
            ('Rating', 'P'): report_dict['rating']['precision'],
            ('Rating', 'R'): report_dict['rating']['recall'],
            ('Rating', 'F1'): report_dict['rating']['f1-score'],
            ('User Experience', 'P'): report_dict['user experience']['precision'],
            ('User Experience', 'R'): report_dict['user experience']['recall'],
            ('User Experience', 'F1'): report_dict['user experience']['f1-score']
        }, ignore_index=True)

    # Save the DataFrame to a CSV file after completing all folds
    metrics_df.columns = pd.MultiIndex.from_tuples([(c,) if isinstance(c, str) else c for c in metrics_df.columns])
    metrics_df.to_excel(f"{results_dir}/fold_metrics.xlsx", index=True)

    # Evaluate the best model on the test set
    test_encodings = tokenize_function(X_test_full.tolist())
    test_dataset = ReviewDataset(test_encodings, y_test_full_encoded)
    test_trainer = Trainer(model=best_model)
    test_results = test_trainer.predict(test_dataset)
    test_predictions = get_pred_labels(test_results)
    test_accuracy = accuracy_score(y_test_full_encoded, test_predictions)

    label_names_full = label_encoder.inverse_transform(range(len(label_encoder.classes_)))

    # Calculate precision, recall, and F1-score
    report_dict_full = classification_report(y_test_full_encoded, test_predictions, output_dict=True, zero_division=0, target_names=label_names_full)

    full_metrics_df = pd.DataFrame()

    full_metrics_df = full_metrics_df.append({
            ('Accuracy', ''): test_accuracy,
            ('Bug Report', 'P'): report_dict_full['bug report']['precision'],
            ('Bug Report', 'R'): report_dict_full['bug report']['recall'],
            ('Bug Report', 'F1'): report_dict_full['bug report']['f1-score'],
            ('Feature Request', 'P'): report_dict_full['feature request']['precision'],
            ('Feature Request', 'R'): report_dict_full['feature request']['recall'],
            ('Feature Request', 'F1'): report_dict_full['feature request']['f1-score'],
            ('Rating', 'P'): report_dict_full['rating']['precision'],
            ('Rating', 'R'): report_dict_full['rating']['recall'],
            ('Rating', 'F1'): report_dict_full['rating']['f1-score'],
            ('User Experience', 'P'): report_dict_full['user experience']['precision'],
            ('User Experience', 'R'): report_dict_full['user experience']['recall'],
            ('User Experience', 'F1'): report_dict_full['user experience']['f1-score']
        }, ignore_index=True)

    full_metrics_df.columns = pd.MultiIndex.from_tuples([(c,) if isinstance(c, str) else c for c in full_metrics_df.columns])
    full_metrics_df.to_excel(f"{results_dir}/metrics_results_full_test.xlsx", index=True)

    print(f"Test Accuracy: {test_accuracy}")

    # Generate and print the classification report
    print(classification_report(y_test_full_encoded, test_predictions, target_names=label_encoder.classes_, zero_division=0))

    shutil.rmtree(dump_dir)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # If logits are wrapped in a tuple, unwrap them. Otherwise, leave as is.
    if isinstance(logits, tuple):  # Adjust this line if the structure is different
        logits = logits[0]
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(predictions, labels)}

def get_pred_labels(trainer_prediction):
    logits = trainer_prediction.predictions
    labels = trainer_prediction.label_ids
    # Check if the logits are wrapped in a tuple (this is usually not the case with predict(), but included for completeness)
    if isinstance(logits, tuple):
        logits = logits[0]
    # Compute the predicted class indices
    predictions = np.argmax(logits, axis=-1)

    return predictions

# Custom dataset class
class ReviewDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

class LossLoggingCallback(TrainerCallback):
    """A custom callback to log training and validation loss."""
    def __init__(self):
        super().__init__()
        self.log_history = []
        self.log_train_loss_history = []

    def on_log(self, args, state, control, logs=None, **kwargs):
        # This method captures both training and evaluation logs, so it's more general than on_epoch_end
        if logs is not None:
            # Capture both training and evaluation steps
            if 'loss' in logs:  # Indicates a training step
                self.log_train_loss_history.append({
                    'epoch': state.epoch,
                    'training_loss': logs.get('loss'),
                })
            elif 'eval_loss' in logs:  # Indicates an evaluation step
                # Make sure to capture the last training loss as well
                last_training_loss = self.log_train_loss_history[-1]['training_loss'] if self.log_train_loss_history else None
                self.log_history.append({
                    'epoch': state.epoch,
                    'training_loss': last_training_loss,  # Include last known training loss for reference
                    'validation_loss': logs.get('eval_loss'),
                    'eval_runtime':logs.get('eval_runtime')
                })

    def save_logs_to_excel(self, file_name):
        """Save the recorded logs to a Excel file."""
        pd.DataFrame(self.log_history).to_excel(file_name, index=False)

__file__ = "/content/drive/MyDrive/FinalProject/Models/BART/BART.ipynb"
current_file_path = Path(__file__).parent
path_to_project = current_file_path.parents[1]

directory_path_multi = path_to_project / 'Data' / 'Datasets' / 'Balanced'

files_multi = [(file.name, file.stat().st_size)
               for file in directory_path_multi.iterdir()
               if file.is_file() and not file.name.startswith('.')]

files_multi.sort(key=lambda x: x[1])





Running each dataset on the model separately due to storage constraints

In [None]:
print(f"Now doing: {files_multi[1][0].split('.')[0]}")
main_model(files_multi[1][0].split('.')[0], files_multi[1][0].split('.')[1], 1)
print(f"Now doing: {files_multi[2][0].split('.')[0]}")
main_model(files_multi[2][0].split('.')[0], files_multi[2][0].split('.')[1], 1)

Now doing: dataset_balanced_4000


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

Fold 1/5


model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1337,0.716097,0.715625
2,0.7515,0.716639,0.7375
3,0.5432,0.66182,0.776563
4,0.3187,0.721166,0.8
5,0.1936,0.719498,0.828125


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


Fold 2/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1349,0.713338,0.732812
2,0.7347,0.64492,0.754687
3,0.4957,0.700848,0.776563
4,0.3022,1.059623,0.764062
5,0.1843,0.97682,0.767188


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_balanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id

  metrics_df = metrics_df.append({


Fold 3/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1327,0.832237,0.7
2,0.7436,0.725027,0.7
3,0.5318,0.769006,0.726562
4,0.2957,0.952885,0.778125
5,0.172,1.041649,0.764062


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_balanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id

  metrics_df = metrics_df.append({


Fold 4/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1384,0.839027,0.68125
2,0.7443,0.679819,0.74375
3,0.5014,0.707213,0.76875
4,0.3051,0.884496,0.771875
5,0.1497,1.030085,0.779687


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_balanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id

  metrics_df = metrics_df.append({


Fold 5/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.123,0.659818,0.757812
2,0.727,0.734013,0.715625
3,0.5191,0.728083,0.78125
4,0.3152,0.750375,0.779687
5,0.1703,0.830405,0.8


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_balanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id

  metrics_df = metrics_df.append({


  full_metrics_df = full_metrics_df.append({


Test Accuracy: 0.78125
                 precision    recall  f1-score   support

     bug report       0.83      0.88      0.85       200
feature request       0.76      0.82      0.79       200
         rating       0.82      0.69      0.75       200
user experience       0.72      0.74      0.73       200

       accuracy                           0.78       800
      macro avg       0.78      0.78      0.78       800
   weighted avg       0.78      0.78      0.78       800

Now doing: dataset_balanced_8000
Fold 1/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9809,0.866072,0.704688
2,0.5805,0.580987,0.792969
3,0.353,0.586236,0.832031
4,0.2127,0.480798,0.885156


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9809,0.866072,0.704688
2,0.5805,0.580987,0.792969
3,0.353,0.586236,0.832031
4,0.2127,0.480798,0.885156
5,0.1171,0.522662,0.891406


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


Fold 2/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9826,0.71293,0.733594
2,0.5791,0.624048,0.760156
3,0.3349,0.616757,0.835938
4,0.1833,0.659899,0.860938
5,0.0896,0.709505,0.872656


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_balanced_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id

  metrics_df = metrics_df.append({


Fold 3/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9847,0.651592,0.747656
2,0.5717,0.516288,0.810156
3,0.3261,0.518493,0.867188
4,0.1956,0.582946,0.886719
5,0.0946,0.637755,0.89375


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_balanced_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id

  metrics_df = metrics_df.append({


Fold 4/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9804,0.619845,0.763281
2,0.6057,0.54906,0.795312
3,0.3703,0.497614,0.859375
4,0.2084,0.533606,0.889844
5,0.1105,0.577234,0.894531


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_balanced_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id

  metrics_df = metrics_df.append({


Fold 5/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9739,0.649811,0.755469
2,0.5812,0.575523,0.8
3,0.345,0.499072,0.871094
4,0.1982,0.547131,0.882031
5,0.1042,0.602524,0.8875


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_balanced_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id

  metrics_df = metrics_df.append({


  full_metrics_df = full_metrics_df.append({


Test Accuracy: 0.885625
                 precision    recall  f1-score   support

     bug report       0.90      0.95      0.92       400
feature request       0.89      0.89      0.89       400
         rating       0.93      0.83      0.88       400
user experience       0.82      0.88      0.85       400

       accuracy                           0.89      1600
      macro avg       0.89      0.89      0.89      1600
   weighted avg       0.89      0.89      0.89      1600



In [None]:
print(f"Now doing: {files_multi[3][0].split('.')[0]}")
main_model(files_multi[3][0].split('.')[0], files_multi[3][0].split('.')[1], 1)
print(f"Now doing: {files_multi[4][0].split('.')[0]}")
main_model(files_multi[4][0].split('.')[0], files_multi[4][0].split('.')[1], 1)

Now doing: dataset_gpt_balanced_4000
Fold 1/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.3032,0.970847,0.58125
2,0.5094,0.336554,0.91875
3,0.2479,0.246451,0.95
4,0.1793,0.234713,0.948438
5,0.1113,0.257092,0.953125


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


Fold 2/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.2832,0.737175,0.709375
2,0.451,0.386376,0.925
3,0.2505,0.262058,0.942187
4,0.1857,0.243446,0.948438
5,0.1326,0.305518,0.945312


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_toke

  metrics_df = metrics_df.append({


Fold 3/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.2705,0.747478,0.715625
2,0.5009,0.316767,0.9375
3,0.2695,0.261628,0.951562
4,0.1751,0.292895,0.939063
5,0.1168,0.296408,0.946875


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_toke

  metrics_df = metrics_df.append({


Fold 4/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.2809,0.791082,0.689063
2,0.4622,0.64659,0.80625
3,0.2325,0.205817,0.960938
4,0.1788,0.234453,0.946875
5,0.1214,0.239507,0.953125


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_toke

  metrics_df = metrics_df.append({


Fold 5/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.2735,0.797301,0.680751
2,0.469,0.263166,0.943662
3,0.2602,0.273912,0.946792
4,0.1896,0.223367,0.951487
5,0.1176,0.307387,0.931142


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_toke

  metrics_df = metrics_df.append({


  full_metrics_df = full_metrics_df.append({


Test Accuracy: 0.9675
                 precision    recall  f1-score   support

     bug report       0.99      0.96      0.98       200
feature request       0.99      0.98      0.98       200
         rating       0.96      0.94      0.95       200
user experience       0.92      0.98      0.95       200

       accuracy                           0.97       800
      macro avg       0.97      0.97      0.97       800
   weighted avg       0.97      0.97      0.97       800

Now doing: dataset_gpt_balanced_8000
Fold 1/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8358,0.226062,0.958594
2,0.1967,0.129002,0.977344
3,0.1381,0.165315,0.971094
4,0.0864,0.173331,0.971094
5,0.0512,0.161498,0.976562


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


Fold 2/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8797,0.246212,0.95
2,0.1758,0.14625,0.975781
3,0.1103,0.116032,0.975781
4,0.0709,0.113473,0.978125
5,0.0426,0.127842,0.977344


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_toke

  metrics_df = metrics_df.append({


Fold 3/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8999,0.276715,0.941406
2,0.194,0.178792,0.971875
3,0.1197,0.172439,0.972656
4,0.0867,0.197932,0.970313
5,0.0524,0.198142,0.969531


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_toke

  metrics_df = metrics_df.append({


Fold 4/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8943,0.237667,0.945312
2,0.1662,0.183934,0.964063
3,0.1234,0.19904,0.967187
4,0.083,0.219575,0.964063
5,0.0488,0.232005,0.964063


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_toke

  metrics_df = metrics_df.append({


Fold 5/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8588,0.222347,0.953088
2,0.1798,0.181873,0.965598
3,0.1329,0.147743,0.973417
4,0.0921,0.147136,0.978108
5,0.0613,0.15935,0.97498


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_toke

  metrics_df = metrics_df.append({


  full_metrics_df = full_metrics_df.append({


Test Accuracy: 0.98375
                 precision    recall  f1-score   support

     bug report       0.99      0.97      0.98       400
feature request       1.00      0.98      0.99       400
         rating       0.98      0.99      0.98       400
user experience       0.97      1.00      0.98       400

       accuracy                           0.98      1600
      macro avg       0.98      0.98      0.98      1600
   weighted avg       0.98      0.98      0.98      1600



In [None]:
print(f"Now doing: {files_multi[5][0].split('.')[0]}")
main_model(files_multi[5][0].split('.')[0], files_multi[5][0].split('.')[1], 1)

Now doing: dataset_gpt_balanced_20000
Fold 1/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3897,0.099815,0.979688
2,0.0901,0.079041,0.985
3,0.0563,0.05882,0.989062
4,0.0355,0.08014,0.989375


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3897,0.099815,0.979688
2,0.0901,0.079041,0.985
3,0.0563,0.05882,0.989062
4,0.0355,0.08014,0.989375
5,0.0213,0.086336,0.989062


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


Fold 2/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4059,0.086388,0.987187
2,0.0812,0.073526,0.987812
3,0.0602,0.074098,0.988125
4,0.0373,0.060269,0.990625
5,0.0199,0.061281,0.990625


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_20000/Dump/res/checkpoint-7115 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_tok

  metrics_df = metrics_df.append({


Fold 3/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3893,0.099227,0.98375
2,0.0752,0.105518,0.984688
3,0.0561,0.139558,0.974688
4,0.036,0.078742,0.987812
5,0.0193,0.092609,0.987187


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_20000/Dump/res/checkpoint-7115 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_tok

  metrics_df = metrics_df.append({


Fold 4/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4054,0.08659,0.985
2,0.0723,0.058463,0.990313
3,0.0603,0.06777,0.99
4,0.0312,0.070834,0.989688
5,0.0137,0.077259,0.99


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_20000/Dump/res/checkpoint-7115 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_tok

  metrics_df = metrics_df.append({


Fold 5/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3987,0.119435,0.979994
2,0.0768,0.084506,0.987809
3,0.0515,0.079264,0.987496
4,0.0298,0.080686,0.987496
5,0.0171,0.077842,0.987183


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_20000/Dump/res/checkpoint-7115 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_tok

  metrics_df = metrics_df.append({


  full_metrics_df = full_metrics_df.append({


Test Accuracy: 0.993
                 precision    recall  f1-score   support

     bug report       0.99      0.99      0.99      1000
feature request       1.00      0.99      1.00      1000
         rating       0.99      0.99      0.99      1000
user experience       0.99      0.99      0.99      1000

       accuracy                           0.99      4000
      macro avg       0.99      0.99      0.99      4000
   weighted avg       0.99      0.99      0.99      4000



In [None]:
print(f"Now doing: {files_multi[6][0].split('.')[0]}")
main_model(files_multi[6][0].split('.')[0], files_multi[6][0].split('.')[1], 1)

Now doing: dataset_gpt_balanced_32000
Fold 1/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2549,0.069666,0.989648
2,0.0502,0.05032,0.991797
3,0.0375,0.049635,0.992383
4,0.026,0.04419,0.993164
5,0.0127,0.053193,0.993359


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


Fold 2/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.264,0.06084,0.989453
2,0.0512,0.031485,0.994727
3,0.0376,0.027175,0.994727
4,0.0236,0.029871,0.994531
5,0.0129,0.039747,0.994922


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_32000/Dump/res/checkpoint-11380 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_to

  metrics_df = metrics_df.append({


Fold 3/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2624,0.094733,0.983984
2,0.0513,0.047519,0.992773
3,0.0356,0.039649,0.994141
4,0.0245,0.044042,0.993555
5,0.0119,0.051682,0.993359


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_32000/Dump/res/checkpoint-11380 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_to

  metrics_df = metrics_df.append({


Fold 4/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2632,0.068768,0.987891
2,0.0511,0.047052,0.99043
3,0.0347,0.051512,0.991602
4,0.0246,0.046213,0.994141
5,0.0121,0.05659,0.993359


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_32000/Dump/res/checkpoint-11380 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_to

  metrics_df = metrics_df.append({


Fold 5/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2745,0.070148,0.989842
2,0.0536,0.053374,0.992381
3,0.0342,0.048243,0.993553
4,0.0227,0.042476,0.992772
5,0.0099,0.046125,0.993163


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Balanced/dataset_gpt_balanced_32000/Dump/res/checkpoint-11380 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_to

  metrics_df = metrics_df.append({


  full_metrics_df = full_metrics_df.append({


Test Accuracy: 0.994375
                 precision    recall  f1-score   support

     bug report       1.00      1.00      1.00      1600
feature request       1.00      1.00      1.00      1600
         rating       0.99      0.99      0.99      1600
user experience       0.99      0.99      0.99      1600

       accuracy                           0.99      6400
      macro avg       0.99      0.99      0.99      6400
   weighted avg       0.99      0.99      0.99      6400



In [None]:
directory_path_unbalanced = path_to_project / 'Data' / 'Datasets' / 'Unbalanced'

files_unbalanced = [(file.name, file.stat().st_size)
               for file in directory_path_unbalanced.iterdir()
               if file.is_file() and not file.name.startswith('.')]

files_unbalanced.sort(key=lambda x: x[1])

In [None]:
print(f"Now doing: {files_unbalanced[0][0].split('.')[0]}")
main_model(files_unbalanced[0][0].split('.')[0], files_unbalanced[0][0].split('.')[1], 2)
print(f"Now doing: {files_unbalanced[1][0].split('.')[0]}")
main_model(files_unbalanced[1][0].split('.')[0], files_unbalanced[1][0].split('.')[1], 2)

Now doing: dataset_unbalanced_4000


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

Fold 1/5


model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0952,0.71902,0.734375
2,0.6793,0.599432,0.801562
3,0.4514,0.51579,0.835938
4,0.2664,0.594305,0.85
5,0.1419,0.637849,0.860938


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


Fold 2/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0922,0.667296,0.7375
2,0.6417,0.670883,0.767188
3,0.4172,0.643683,0.7875
4,0.2503,0.808369,0.821875
5,0.1149,0.885504,0.829688


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_unbalanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_toke

  metrics_df = metrics_df.append({


Fold 3/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0791,0.728757,0.715625
2,0.6826,0.540833,0.785937
3,0.4238,0.581161,0.835938
4,0.2597,0.62866,0.842187
5,0.1258,0.705461,0.857812


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_unbalanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_toke

  metrics_df = metrics_df.append({


Fold 4/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0927,0.690871,0.754687
2,0.6458,0.523728,0.807813
3,0.4274,0.574516,0.832812
4,0.2406,0.664932,0.867188
5,0.1055,0.719454,0.853125


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_unbalanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_toke

  metrics_df = metrics_df.append({


Fold 5/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0781,0.677464,0.740625
2,0.6638,0.633365,0.76875
3,0.4352,0.620418,0.785937
4,0.2295,0.831357,0.8
5,0.1295,0.887396,0.8125


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_unbalanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_toke

  metrics_df = metrics_df.append({


  full_metrics_df = full_metrics_df.append({


Test Accuracy: 0.82375
                 precision    recall  f1-score   support

     bug report       0.83      0.89      0.86       251
feature request       0.86      0.84      0.85       282
         rating       0.90      0.68      0.78       148
user experience       0.68      0.81      0.74       119

       accuracy                           0.82       800
      macro avg       0.82      0.81      0.81       800
   weighted avg       0.83      0.82      0.82       800

Now doing: dataset_gpt_unbalanced_4000
Fold 1/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.185,0.606562,0.80625
2,0.355,0.110625,0.98125
3,0.1079,0.064429,0.984375
4,0.0524,0.048608,0.989062
5,0.0263,0.044213,0.9875


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


Fold 2/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1539,1.02456,0.634375
2,0.3182,0.159639,0.971875
3,0.1144,0.093093,0.985938
4,0.0583,0.12563,0.978125
5,0.0332,0.10254,0.982812


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_gpt_unbalanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_

  metrics_df = metrics_df.append({


Fold 3/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1637,0.911059,0.65625
2,0.3389,0.261155,0.945312
3,0.104,0.097756,0.985938
4,0.0534,0.121374,0.982812
5,0.0287,0.12168,0.982812


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_gpt_unbalanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_

  metrics_df = metrics_df.append({


Fold 4/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1501,1.119831,0.545312
2,0.3975,0.147767,0.964063
3,0.1167,0.121739,0.98125
4,0.058,0.154185,0.978125
5,0.0308,0.152002,0.979688


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_gpt_unbalanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_

  metrics_df = metrics_df.append({


Fold 5/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1762,0.730435,0.739062
2,0.3381,0.222268,0.964063
3,0.1032,0.1234,0.98125
4,0.0463,0.143124,0.979688
5,0.0167,0.148467,0.979688


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_gpt_unbalanced_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_

  metrics_df = metrics_df.append({


  full_metrics_df = full_metrics_df.append({


Test Accuracy: 0.985
                 precision    recall  f1-score   support

     bug report       0.99      0.98      0.98       100
feature request       0.99      0.97      0.98       150
         rating       1.00      0.98      0.99       250
user experience       0.97      0.99      0.98       300

       accuracy                           0.98       800
      macro avg       0.99      0.98      0.99       800
   weighted avg       0.99      0.98      0.99       800



In [None]:
print(f"Now doing: {files_unbalanced[2][0].split('.')[0]}")
main_model(files_unbalanced[2][0].split('.')[0], files_unbalanced[2][0].split('.')[1], 2)

Now doing: dataset_gpt_unbalanced_8000
Fold 1/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.7408,0.206736,0.958594
2,0.1172,0.071571,0.9875
3,0.0558,0.081072,0.986719
4,0.0341,0.076401,0.989844
5,0.0225,0.1051,0.983594


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


Fold 2/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8333,0.246019,0.95
2,0.1139,0.091965,0.985938
3,0.0439,0.08267,0.989062
4,0.0302,0.087097,0.986719
5,0.0187,0.084065,0.988281


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_gpt_unbalanced_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_

  metrics_df = metrics_df.append({


Fold 3/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8019,0.148907,0.967187
2,0.1076,0.091225,0.9875
3,0.0528,0.088977,0.9875
4,0.033,0.074318,0.989062
5,0.0233,0.08545,0.9875


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_gpt_unbalanced_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_

  metrics_df = metrics_df.append({


Fold 4/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.7521,0.207078,0.967187
2,0.1096,0.076639,0.989844
3,0.0554,0.071589,0.989062
4,0.0217,0.081597,0.989062
5,0.0134,0.075698,0.989844


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_gpt_unbalanced_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_

  metrics_df = metrics_df.append({


Fold 5/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8145,0.134374,0.968726
2,0.1017,0.065714,0.989836


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8145,0.134374,0.968726
2,0.1017,0.065714,0.989836
3,0.0535,0.057245,0.992181
4,0.0245,0.054962,0.992181
5,0.0159,0.057987,0.992181


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_gpt_unbalanced_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


  full_metrics_df = full_metrics_df.append({


Test Accuracy: 0.994375
                 precision    recall  f1-score   support

     bug report       1.00      0.99      0.99       200
feature request       1.00      1.00      1.00       300
         rating       1.00      0.99      0.99       500
user experience       0.99      1.00      0.99       600

       accuracy                           0.99      1600
      macro avg       1.00      0.99      0.99      1600
   weighted avg       0.99      0.99      0.99      1600



In [None]:
print(f"Now doing: {files_unbalanced[3][0].split('.')[0]}")
main_model(files_unbalanced[3][0].split('.')[0], files_unbalanced[3][0].split('.')[1], 2)

Now doing: dataset_gpt_unbalanced_16000


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

Fold 1/5


model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.419,0.073987,0.987891
2,0.0626,0.072323,0.988281
3,0.0334,0.094485,0.987891
4,0.0211,0.066701,0.991016
5,0.0104,0.076022,0.991016


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


Fold 2/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4209,0.080783,0.987109
2,0.0592,0.079417,0.986328
3,0.0425,0.053605,0.992969
4,0.0251,0.053311,0.992969
5,0.0131,0.051711,0.992188


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_gpt_unbalanced_16000/Dump/res/checkpoint-5690 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos

  metrics_df = metrics_df.append({


Fold 3/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4291,0.071305,0.989844
2,0.0711,0.037259,0.993359
3,0.0418,0.061048,0.991016
4,0.024,0.05797,0.992578
5,0.0143,0.065118,0.992578


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_gpt_unbalanced_16000/Dump/res/checkpoint-5690 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos

  metrics_df = metrics_df.append({


Fold 4/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4086,0.134663,0.978906
2,0.0586,0.041802,0.992578
3,0.041,0.049536,0.993359
4,0.028,0.050651,0.992969
5,0.0113,0.060449,0.991797


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_gpt_unbalanced_16000/Dump/res/checkpoint-5690 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos

  metrics_df = metrics_df.append({


Fold 5/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4219,0.057937,0.988277
2,0.0628,0.045237,0.992575
3,0.0386,0.04043,0.994138
4,0.0293,0.047631,0.993748
5,0.0171,0.047808,0.993748


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Unbalanced/dataset_gpt_unbalanced_16000/Dump/res/checkpoint-5690 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos

  metrics_df = metrics_df.append({


  full_metrics_df = full_metrics_df.append({


Test Accuracy: 0.995625
                 precision    recall  f1-score   support

     bug report       0.99      0.99      0.99       400
feature request       1.00      1.00      1.00       600
         rating       1.00      0.99      1.00      1000
user experience       0.99      1.00      1.00      1200

       accuracy                           1.00      3200
      macro avg       1.00      0.99      1.00      3200
   weighted avg       1.00      1.00      1.00      3200



## Multi-label classification using BART

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from transformers import BartTokenizer, BartForSequenceClassification, TrainingArguments, Trainer
import torch
from torch.utils.data import Dataset
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report
from transformers import TrainerCallback
import os
import shutil
import re
import time
from pathlib import Path
import numpy as np

def multi_main_model(file_name, ext):

    current_file_path = Path(__file__).parent

    path_to_project = current_file_path.parents[1]

    df = pd.read_csv(f"{path_to_project}/Data/Datasets/Multi-label/{file_name}.{ext}")

    results_dir = f"{path_to_project}/Models/BART/Output/Multi-label/{file_name}"
    dump_dir = results_dir+"/Dump"

    if os.path.isdir(results_dir):
        shutil.rmtree(results_dir)

    os.mkdir(results_dir)
    os.mkdir(dump_dir)

    df = df[df['review'].notna() & (df['review'] != '')]
    df['review'] = df['review'].str.replace('[^\x20-\x7E]', '', regex=True)

    X = df['review'].values
    y = df[['bug report', 'user experience', 'rating', 'feature request']].values

    X_train_CV, X_test_full, y_train_CV, y_test_full = train_test_split(X, y, test_size=0.2, random_state=42)

    tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

    def tokenize_function(examples):
        return tokenizer(examples, padding="max_length", truncation=True, max_length=128)

    loss_logging_callback = LossLoggingCallback()

    # K-Fold Cross-Validation
    n_splits = 5
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    # Variables to accumulate scores
    best_f1 = 0
    best_model = None
    metrics_df = pd.DataFrame()

    for fold, (train_index, val_index) in enumerate(kf.split(X_train_CV, y_train_CV)):
        print(f"Fold {fold+1}/{n_splits}")
        start_time = time.time()

        X_train, X_val = X_train_CV[train_index], X_train_CV[val_index]
        y_train, y_val = y_train_CV[train_index], y_train_CV[val_index]

        train_encodings = tokenize_function(X_train.tolist())
        val_encodings = tokenize_function(X_val.tolist())

        train_dataset = MultiLabelDataset(train_encodings, y_train)
        val_dataset = MultiLabelDataset(val_encodings, y_val)

        model = BartForSequenceClassification.from_pretrained('facebook/bart-base', num_labels=4, problem_type="multi_label_classification")

        training_args = TrainingArguments(
            output_dir=f"{dump_dir}/res",
            num_train_epochs=5,
            per_device_train_batch_size=9,
            per_device_eval_batch_size=21,
            warmup_steps=500,
            weight_decay=0.11210091359531205,
            logging_dir=f"{dump_dir}/logs",
            logging_strategy="epoch",
            evaluation_strategy="epoch",
            learning_rate=3.328079168300429e-05,
            max_grad_norm=1.0,
            load_best_model_at_end=True,
            metric_for_best_model="f1",
            save_strategy="epoch",
            save_total_limit=2,
            lr_scheduler_type='linear'
        )

        def compute_metrics(trainer_prediction):
            logits = trainer_prediction.predictions
            labels = trainer_prediction.label_ids
            # Check if the logits are wrapped in a tuple (this is usually not the case with predict(), but included for completeness)
            if isinstance(logits, tuple):
                logits = logits[0]
            # Compute the predicted class indices
            predictions = torch.sigmoid(torch.tensor(logits)).numpy()
            threshold = 0.5
            predictions = (predictions > threshold).astype(int)
            precision = precision_score(labels, predictions, average='micro')
            recall = recall_score(labels, predictions, average='micro')
            f1 = f1_score(labels, predictions, average='micro')
            return {'precision': precision, 'recall': recall, 'f1': f1}

        def get_pred(trainer_prediction):
            logits = trainer_prediction.predictions
            labels = trainer_prediction.label_ids
            # Check if the logits are wrapped in a tuple (this is usually not the case with predict(), but included for completeness)
            if isinstance(logits, tuple):
                logits = logits[0]
            # Compute the predicted class indices
            predictions = torch.sigmoid(torch.tensor(logits)).numpy()
            return predictions

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            compute_metrics=compute_metrics,
            callbacks=[loss_logging_callback]
        )

        trainer.train()

        loss_logging_callback.save_logs_to_excel(f"{results_dir}/fold_loss.xlsx")

        results = trainer.evaluate()

        if results['eval_f1'] > best_f1:
            best_f1 = results['eval_f1']
            best_model = model


        predictions = trainer.predict(val_dataset)
        pred_probs = get_pred(predictions)
        threshold = 0.5
        binary_predictions = (pred_probs > threshold).astype(int)

        # True labels
        true_labels = predictions.label_ids
        f1 = f1_score(true_labels, binary_predictions, average='micro')

        report_dict = classification_report(true_labels, binary_predictions, output_dict=True, zero_division=0, target_names=['bug report', 'user experience', 'rating', 'feature request'])
        # avg_metrics = report_dict['weighted avg']  # Use 'macro avg' or 'weighted avg' based on your preference
        end_time = time.time()
        # Append the metrics for this fold to the DataFrame
        metrics_df = metrics_df.append({
            ('Fold', ''): fold + 1,
            ('F1-Score', ''): f1,
            ('Train Time', ''): str(end_time - start_time)+" s",
            ('Bug Report', 'P'): report_dict['bug report']['precision'],
            ('Bug Report', 'R'): report_dict['bug report']['recall'],
            ('Bug Report', 'F1'): report_dict['bug report']['f1-score'],
            ('Feature Request', 'P'): report_dict['feature request']['precision'],
            ('Feature Request', 'R'): report_dict['feature request']['recall'],
            ('Feature Request', 'F1'): report_dict['feature request']['f1-score'],
            ('Rating', 'P'): report_dict['rating']['precision'],
            ('Rating', 'R'): report_dict['rating']['recall'],
            ('Rating', 'F1'): report_dict['rating']['f1-score'],
            ('User Experience', 'P'): report_dict['user experience']['precision'],
            ('User Experience', 'R'): report_dict['user experience']['recall'],
            ('User Experience', 'F1'): report_dict['user experience']['f1-score']
        }, ignore_index=True)

    metrics_df.columns = pd.MultiIndex.from_tuples([(c,) if isinstance(c, str) else c for c in metrics_df.columns])
    metrics_df.to_excel(f"{results_dir}/fold_metrics.xlsx", index=True)

    test_encodings = tokenize_function(X_test_full.tolist())
    test_dataset = MultiLabelDataset(test_encodings, y_test_full)
    test_trainer = Trainer(model=best_model)
    test_predictions = test_trainer.predict(test_dataset)
    test_pred_probs = get_pred(test_predictions)
    threshold = 0.5
    test_binary_predictions = (test_pred_probs > threshold).astype(int)

    test_true_labels = test_predictions.label_ids
    test_f1 = f1_score(test_true_labels, test_binary_predictions, average='micro')

    test_report_dict = classification_report(test_true_labels, test_binary_predictions, output_dict=True, zero_division=0, target_names=['bug report', 'user experience', 'rating', 'feature request'])
    # avg_metrics = report_dict['weighted avg']  # Use 'macro avg' or 'weighted avg' based on your preference
    # Append the metrics for this fold to the DataFrame
    test_metrics_df = pd.DataFrame()

    test_metrics_df = test_metrics_df.append({
            ('F1', ''): test_f1,
            ('Bug Report', 'P'): test_report_dict['bug report']['precision'],
            ('Bug Report', 'R'): test_report_dict['bug report']['recall'],
            ('Bug Report', 'F1'): test_report_dict['bug report']['f1-score'],
            ('Feature Request', 'P'): test_report_dict['feature request']['precision'],
            ('Feature Request', 'R'): test_report_dict['feature request']['recall'],
            ('Feature Request', 'F1'): test_report_dict['feature request']['f1-score'],
            ('Rating', 'P'): test_report_dict['rating']['precision'],
            ('Rating', 'R'): test_report_dict['rating']['recall'],
            ('Rating', 'F1'): test_report_dict['rating']['f1-score'],
            ('User Experience', 'P'): test_report_dict['user experience']['precision'],
            ('User Experience', 'R'): test_report_dict['user experience']['recall'],
            ('User Experience', 'F1'): test_report_dict['user experience']['f1-score']
        }, ignore_index=True)

    test_metrics_df.columns = pd.MultiIndex.from_tuples([(c,) if isinstance(c, str) else c for c in test_metrics_df.columns])
    test_metrics_df.to_excel(f"{results_dir}/metrics_results_full_test.xlsx", index=True)

    print(f"Test F1: {test_f1}")

    # Generate and print the classification report
    print(test_report_dict)

    shutil.rmtree(dump_dir)

class MultiLabelDataset(Dataset):

    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float32)  # Ensure float32 for BCEWithLogitsLoss
        return item

    def __len__(self):
        return len(self.labels)
class LossLoggingCallback(TrainerCallback):
    """A custom callback to log training and validation loss."""
    def __init__(self):
        super().__init__()
        self.log_history = []
        self.log_train_loss_history = []

    def on_log(self, args, state, control, logs=None, **kwargs):
        # This method captures both training and evaluation logs, so it's more general than on_epoch_end
        if logs is not None:
            # Capture both training and evaluation steps
            if 'loss' in logs:  # Indicates a training step
                self.log_train_loss_history.append({
                    'epoch': state.epoch,
                    'training_loss': logs.get('loss'),
                })
            elif 'eval_loss' in logs:  # Indicates an evaluation step
                # Make sure to capture the last training loss as well
                last_training_loss = self.log_train_loss_history[-1]['training_loss'] if self.log_train_loss_history else None
                self.log_history.append({
                    'epoch': state.epoch,
                    'training_loss': last_training_loss,  # Include last known training loss for reference
                    'validation_loss': logs.get('eval_loss'),
                    'eval_runtime':logs.get('eval_runtime')
                })

    def save_logs_to_excel(self, file_name):
        """Save the recorded logs to a Excel file."""
        pd.DataFrame(self.log_history).to_excel(file_name, index=False)

__file__ = "/content/drive/MyDrive/FinalProject/Models/BART/BART.ipynb"
current_file_path = Path(__file__).parent
path_to_project = current_file_path.parents[1]

directory_path_multi_label = path_to_project / 'Data' / 'Datasets' / 'Multi-label'

files_multi_label = [(file.name, file.stat().st_size)
               for file in directory_path_multi_label.iterdir()
               if file.is_file() and not file.name.startswith('.')]

files_multi_label.sort(key=lambda x: x[1])

In [5]:
print(f"Now doing: {files_multi_label[1][0].split('.')[0]}")
multi_main_model(files_multi_label[1][0].split('.')[0], files_multi_label[1][0].split('.')[1])

Now doing: dataset_gpt_multi_label_4000


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

Fold 1/5


model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.6255,0.548425,0.67033,0.804749,0.731415
2,0.3639,0.244866,0.984227,0.823219,0.896552
3,0.2172,0.183266,0.983398,0.885664,0.931976
4,0.1439,0.172309,0.977948,0.897098,0.93578
5,0.1081,0.179944,0.961003,0.91029,0.934959


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


Fold 2/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.6167,0.479571,0.81087,0.643658,0.717653
2,0.3731,0.235545,0.961765,0.846419,0.900413
3,0.2156,0.198577,0.9683,0.869715,0.916364
4,0.1742,0.183885,0.964419,0.888697,0.925011
5,0.1126,0.15656,0.96496,0.926661,0.945423


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Multi-label/dataset_gpt_multi_label_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eo

  metrics_df = metrics_df.append({


Fold 3/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.617,0.495122,0.71643,0.785962,0.749587
2,0.3618,0.225017,0.966732,0.856153,0.908088
3,0.2178,0.190547,0.985149,0.862218,0.919593
4,0.1607,0.161203,0.968807,0.915078,0.941176
5,0.1195,0.158397,0.973247,0.914211,0.942806


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Multi-label/dataset_gpt_multi_label_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eo

  metrics_df = metrics_df.append({


Fold 4/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.6124,0.529975,0.767857,0.651515,0.704918
2,0.3568,0.24483,0.97446,0.835017,0.899365
3,0.2074,0.222767,0.92103,0.903199,0.912027
4,0.143,0.163294,0.970771,0.922559,0.946051
5,0.1075,0.163093,0.970822,0.924242,0.94696


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Multi-label/dataset_gpt_multi_label_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eo

  metrics_df = metrics_df.append({


Fold 5/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.6176,0.513711,0.747631,0.713229,0.730025
2,0.3723,0.264214,0.965251,0.821693,0.887705
3,0.217,0.234219,0.951818,0.860312,0.903755
4,0.1702,0.176516,0.972493,0.900575,0.935154
5,0.1134,0.183264,0.96743,0.90304,0.934127


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Multi-label/dataset_gpt_multi_label_4000/Dump/res/checkpoint-1425 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eo

  metrics_df = metrics_df.append({


  test_metrics_df = test_metrics_df.append({


Test F1: 0.9292717086834733
{'bug report': {'precision': 0.9892086330935251, 'recall': 0.9548611111111112, 'f1-score': 0.9717314487632509, 'support': 288}, 'user experience': {'precision': 0.9503424657534246, 'recall': 0.9422750424448217, 'f1-score': 0.9462915601023018, 'support': 589}, 'rating': {'precision': 0.883495145631068, 'recall': 0.708171206225681, 'f1-score': 0.7861771058315334, 'support': 257}, 'feature request': {'precision': 0.9905660377358491, 'recall': 0.9375, 'f1-score': 0.963302752293578, 'support': 336}, 'micro avg': {'precision': 0.9574314574314574, 'recall': 0.9027210884353741, 'f1-score': 0.9292717086834733, 'support': 1470}, 'macro avg': {'precision': 0.9534030705534667, 'recall': 0.8857018399454035, 'f1-score': 0.916875716747666, 'support': 1470}, 'weighted avg': {'precision': 0.9554641086844435, 'recall': 0.9027210884353741, 'f1-score': 0.9271711749070873, 'support': 1470}, 'samples avg': {'precision': 0.8715625, 'recall': 0.83375, 'f1-score': 0.8418869047619048

In [6]:
print(f"Now doing: {files_multi_label[2][0].split('.')[0]}")
multi_main_model(files_multi_label[2][0].split('.')[0], files_multi_label[2][0].split('.')[1])
print(f"Now doing: {files_multi_label[3][0].split('.')[0]}")
multi_main_model(files_multi_label[3][0].split('.')[0], files_multi_label[3][0].split('.')[1])

Now doing: dataset_gpt_multi_label_8000
Fold 1/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.4595,0.21534,0.969062,0.872025,0.917986
2,0.1612,0.116893,0.991388,0.9304,0.959926
3,0.101,0.096948,0.996216,0.945667,0.970283
4,0.078,0.096862,0.983759,0.951953,0.967595
5,0.0599,0.089913,0.984252,0.954198,0.968992


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


Fold 2/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.4684,0.197522,0.987888,0.862529,0.920962
2,0.1597,0.134471,0.978754,0.931954,0.954781
3,0.1056,0.123352,0.978032,0.941609,0.959475
4,0.0817,0.11474,0.971254,0.947586,0.959274
5,0.064,0.109935,0.982356,0.947126,0.964419


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Multi-label/dataset_gpt_multi_label_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eo

  metrics_df = metrics_df.append({


Fold 3/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.4669,0.195888,0.998908,0.854673,0.921179
2,0.16,0.127452,0.983226,0.931308,0.956563
3,0.0958,0.106649,0.975036,0.949065,0.961875
4,0.0696,0.098168,0.985929,0.949533,0.967389
5,0.0492,0.104324,0.98025,0.950935,0.96537


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Multi-label/dataset_gpt_multi_label_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eo

  metrics_df = metrics_df.append({


Fold 4/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.4699,0.20561,0.981413,0.865574,0.919861
2,0.1632,0.137907,0.973145,0.933489,0.952905
3,0.107,0.110024,0.974026,0.948478,0.961082
4,0.0729,0.092567,0.980788,0.95644,0.968461
5,0.0512,0.095714,0.979367,0.955972,0.967528


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Multi-label/dataset_gpt_multi_label_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eo

  metrics_df = metrics_df.append({


Fold 5/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.4664,0.204564,0.99559,0.857143,0.921194
2,0.1596,0.124656,0.986869,0.927385,0.956203
3,0.1112,0.113902,0.986007,0.936402,0.960565
4,0.0797,0.111013,0.980411,0.950166,0.965052
5,0.0587,0.105456,0.973837,0.953963,0.963798


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Multi-label/dataset_gpt_multi_label_8000/Dump/res/checkpoint-2845 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eo

  metrics_df = metrics_df.append({


  test_metrics_df = test_metrics_df.append({


Test F1: 0.9672544080604534
{'bug report': {'precision': 0.9919028340080972, 'recall': 0.9702970297029703, 'f1-score': 0.980980980980981, 'support': 505}, 'user experience': {'precision': 0.9952516619183286, 'recall': 0.9579524680073126, 'f1-score': 0.976245924545878, 'support': 1094}, 'rating': {'precision': 0.9974293059125964, 'recall': 0.8308351177730193, 'f1-score': 0.9065420560747663, 'support': 467}, 'feature request': {'precision': 0.9965034965034965, 'recall': 0.9710391822827938, 'f1-score': 0.9836065573770492, 'support': 587}, 'micro avg': {'precision': 0.9952153110047847, 'recall': 0.94082171127026, 'f1-score': 0.9672544080604534, 'support': 2653}, 'macro avg': {'precision': 0.9952718245856296, 'recall': 0.932530949441524, 'f1-score': 0.9618438797446687, 'support': 2653}, 'weighted avg': {'precision': 0.9952745147461273, 'recall': 0.94082171127026, 'f1-score': 0.9665060784831622, 'support': 2653}, 'samples avg': {'precision': 0.8693229166666666, 'recall': 0.8405208333333333, 

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.2979,0.091121,0.989359,0.948718,0.968612
2,0.0888,0.087214,0.982086,0.961015,0.971436
3,0.061,0.050754,0.991478,0.974097,0.982711
4,0.0369,0.046803,0.995461,0.975406,0.985331
5,0.0268,0.052539,0.987308,0.976975,0.982115


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight'].


  metrics_df = metrics_df.append({


Fold 2/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.2783,0.106358,0.987634,0.938626,0.962507
2,0.0789,0.081122,0.993735,0.952729,0.9728
3,0.0508,0.058534,0.993031,0.967616,0.980159
4,0.0343,0.055902,0.989888,0.971533,0.980625
5,0.0233,0.057866,0.989113,0.972839,0.980908


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Multi-label/dataset_gpt_multi_label_16000/Dump/res/checkpoint-5690 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_e

  metrics_df = metrics_df.append({


Fold 3/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.2832,0.093298,0.985592,0.954275,0.969681
2,0.0782,0.055596,0.993145,0.973134,0.983038
3,0.0449,0.054748,0.990816,0.975459,0.983077
4,0.0321,0.049632,0.99317,0.97675,0.984892
5,0.0225,0.052011,0.992653,0.977267,0.9849


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Multi-label/dataset_gpt_multi_label_16000/Dump/res/checkpoint-5690 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_e

  metrics_df = metrics_df.append({


Fold 4/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.2883,0.104328,0.978842,0.95559,0.967076
2,0.0769,0.06047,0.995222,0.967983,0.981414
3,0.0468,0.057687,0.985912,0.975729,0.980794
4,0.0324,0.061688,0.982333,0.976246,0.97928
5,0.0221,0.057681,0.986198,0.977795,0.981978


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Multi-label/dataset_gpt_multi_label_16000/Dump/res/checkpoint-5690 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_e

  metrics_df = metrics_df.append({


Fold 5/5


Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.2881,0.110935,0.979575,0.944838,0.961893
2,0.0808,0.069113,0.996459,0.960862,0.978336
3,0.0509,0.059379,0.991398,0.968742,0.979939
4,0.0364,0.060018,0.990892,0.971631,0.981167
5,0.0255,0.061204,0.989322,0.97347,0.981332


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
Checkpoint destination directory /content/drive/MyDrive/FinalProject/Models/BART/Output/Multi-label/dataset_gpt_multi_label_16000/Dump/res/checkpoint-5690 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_e

  metrics_df = metrics_df.append({


  test_metrics_df = test_metrics_df.append({


Test F1: 0.9824748877965378
{'bug report': {'precision': 0.9910714285714286, 'recall': 0.9833887043189369, 'f1-score': 0.9872151195108393, 'support': 903}, 'user experience': {'precision': 0.9975272007912958, 'recall': 0.9786511402231927, 'f1-score': 0.9879990203281901, 'support': 2061}, 'rating': {'precision': 0.9871060171919771, 'recall': 0.9223560910307899, 'f1-score': 0.9536332179930795, 'support': 747}, 'feature request': {'precision': 0.9960278053624627, 'recall': 0.9794921875, 'f1-score': 0.9876907927129492, 'support': 1024}, 'micro avg': {'precision': 0.9943759463551807, 'recall': 0.9708553326293559, 'f1-score': 0.9824748877965378, 'support': 4735}, 'macro avg': {'precision': 0.992933112979291, 'recall': 0.9659720307682298, 'f1-score': 0.9791345376362646, 'support': 4735}, 'weighted avg': {'precision': 0.9943277145437022, 'recall': 0.9708553326293559, 'f1-score': 0.9823612712552435, 'support': 4735}, 'samples avg': {'precision': 0.8453385416666666, 'recall': 0.83515625, 'f1-sco