In [1]:
!pip install transformers torch datasets sacrebleu rouge_score accelerate
from transformers import BartTokenizer, BartForConditionalGeneration, Trainer, TrainingArguments
from transformers import DataCollatorForSeq2Seq, EarlyStoppingCallback
from datasets import load_dataset, load_metric
import torch
import numpy as np


Collecting datasets
  Downloading datasets-2.19.0-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sacrebleu
  Downloading sacrebleu-2.4.2-py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting accelerate
  Downloading accelerate-0.29.3-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.6/297.6 kB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-m

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
def load_and_prepare_data(model_name):

    dataset = load_dataset("neulab/tldr")
    train_test_split = dataset["train"].train_test_split(test_size=0.2)

    tokenizer = BartTokenizer.from_pretrained(model_name)

    def preprocess_function(examples):
        input_texts = ["translate English to Bash: " + inp for inp in examples['nl']]
        model_inputs = tokenizer(input_texts, max_length=30, truncation=True, padding="max_length")
        labels = tokenizer(examples['cmd'], max_length=30, truncation=True, padding="max_length")
        model_inputs['labels'] = labels['input_ids']
        return model_inputs

    tokenized_train = train_test_split['train'].map(preprocess_function, batched=True)
    tokenized_validation = train_test_split['test'].map(preprocess_function, batched=True)

    return {'train': tokenized_train, 'validation': tokenized_validation}


In [4]:
from transformers import BartTokenizer, BartForConditionalGeneration

def load_model(model_name):
    """
    Loads a BART model and its tokenizer from the specified pre-trained name or path.

    Parameters:
        model_name (str): The name or path of the pre-trained model on Hugging Face's model hub
                          or your local saved model.

    Returns:
        tokenizer (BartTokenizer): The tokenizer for the specified BART model.
        model (BartForConditionalGeneration): The BART model loaded with pre-trained weights.
    """
    # Load the tokenizer for the specified model
    tokenizer = BartTokenizer.from_pretrained(model_name)

    # Load the BART model
    model = BartForConditionalGeneration.from_pretrained(model_name)

    # Return both the model and tokenizer
    return tokenizer, model


In [5]:
bleu_metric = load_metric('sacrebleu')
rouge_metric = load_metric('rouge')

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    if isinstance(logits, tuple):  # Extract logits from model outputs
        logits = logits[0]

    decoded_preds = tokenizer.batch_decode(torch.argmax(logits, dim=-1), skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    decoded_preds = [pred.strip() for pred in decoded_preds]
    decoded_labels = [[label.strip()] for label in decoded_labels]

    bleu = bleu_metric.compute(predictions=decoded_preds, references=decoded_labels)
    rouge = rouge_metric.compute(predictions=decoded_preds, references=decoded_labels)

    return {
        'bleu': bleu['score'],
        'rouge-l': rouge['rougeL'].mid.fmeasure,
        'perplexity': torch.exp(torch.tensor(logits)).mean().item()
    }


  bleu_metric = load_metric('sacrebleu')
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/2.85k [00:00<?, ?B/s]

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/2.17k [00:00<?, ?B/s]

In [6]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'


In [None]:
model_name = "facebook/bart-base"
tokenizer, model = load_model(model_name)
tokenized_datasets = load_and_prepare_data(model_name)


training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    eval_steps=500,
    learning_rate=2e-5,
    per_device_train_batch_size=1,  # Already minimal; consider gradient accumulation if further reduction is needed
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=32,  # Increase if necessary
    num_train_epochs=5,
    weight_decay=0.01,
    save_strategy="steps",
    save_steps=500,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model='bleu',
    report_to="none",
    fp16=False
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    data_collator=DataCollatorForSeq2Seq(tokenizer, model=model),
    compute_metrics=lambda eval_pred: compute_metrics(eval_pred, tokenizer),
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)

trainer.train()


You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


Map:   0%|          | 0/5131 [00:00<?, ? examples/s]

Map:   0%|          | 0/1283 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss


Step,Training Loss,Validation Loss


In [13]:
from google.colab import drive
drive.mount('/content/drive')
model.save_pretrained("/content/drive/My Drive/bart_command_translator_V2")
tokenizer.save_pretrained("/content/drive/My Drive/bart_command_translator_V2")

Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


('/content/drive/My Drive/bart_command_translator_V2/tokenizer_config.json',
 '/content/drive/My Drive/bart_command_translator_V2/special_tokens_map.json',
 '/content/drive/My Drive/bart_command_translator_V2/vocab.json',
 '/content/drive/My Drive/bart_command_translator_V2/merges.txt',
 '/content/drive/My Drive/bart_command_translator_V2/added_tokens.json')

In [10]:
from transformers import BartTokenizer, BartForConditionalGeneration

def load_model_from_path(model_path):
    """
    Loads a BART model and its tokenizer from a specified path.

    Parameters:
        model_path (str): The file path or directory where the model and tokenizer are saved.

    Returns:
        tokenizer (BartTokenizer): The tokenizer for the BART model loaded from the specified path.
        model (BartForConditionalGeneration): The BART model loaded from the specified path.
    """
    # Load the tokenizer and model using the provided model path
    tokenizer = BartTokenizer.from_pretrained(model_path)
    model = BartForConditionalGeneration.from_pretrained(model_path)

    return tokenizer, model


In [11]:
def translate_to_command(input_text, tokenizer, model, device='cpu'):
    """
    Translate input text into a command using the provided BART model.

    Parameters:
        input_text (str): Natural language text to be translated.
        tokenizer (BartTokenizer): Tokenizer for the BART model.
        model (BartForConditionalGeneration): Pre-trained BART model.
        device (str): Device to perform computation on ('cpu' or 'cuda').

    Returns:
        str: Translated command.
    """
    # Move the model to the specified device and set it to evaluation mode
    model.to(device)
    model.eval()

    # Prepare the input text for the model
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)

    # Generate the command using the model
    outputs = model.generate(input_ids, max_length=50, num_beams=5, early_stopping=True)

    # Decode and return the generated text
    command = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return command


In [14]:
model_path = "/content/drive/My Drive/bart_command_translator_V2"
tokenizer, model = load_model_from_path(model_path)

for i in range(100):
    example = tokenized_datasets['validation'][i]
    input_text = tokenizer.decode(example['input_ids'], skip_special_tokens=True)
    actual_command = tokenizer.decode(example['labels'], skip_special_tokens=True)
    predicted_command = translate_to_command(input_text, tokenizer, model)

    print(f"Input Text: {input_text}")
    print(f"Actual Command: {actual_command}")
    print(f"Predicted Command: {predicted_command}")
    print("-" * 80)


Input Text: translate English to Bash: replace environment variables in an input file from a spaceseparated list
Actual Command: envsubst '{{$USER $SHELL $HOME}}' < {{path/to/input_file}}
Predicted Command: screenspace {{path/to/input.sc}}
--------------------------------------------------------------------------------
Input Text: translate English to Bash: automatically start recording a video on launch
Actual Command: obs --startrecording
Predicted Command: youtube -i {{video}}
--------------------------------------------------------------------------------
Input Text: translate English to Bash: download a torrent to a specific directory
Actual Command: transmission-cli --download-dir {{path/to/download_directory}} {{url|magnet|path/to/file
Predicted Command: torrent {{path/to/file.torrent}}
--------------------------------------------------------------------------------
Input Text: translate English to Bash: compare diff files, ignoring whitespace
Actual Command: interdiff -w {{old_

In [16]:
import torch
import numpy as np
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_metric

# Assuming sacrebleu and rouge are the metrics you want
bleu_metric = load_metric('sacrebleu')
rouge_metric = load_metric('rouge')

def compute_metrics(eval_pred, tokenizer):
    logits, labels = eval_pred
    if isinstance(logits, tuple):
        logits = logits[0]

    # Ensure logits are PyTorch tensors
    if isinstance(logits, np.ndarray):
        logits = torch.tensor(logits)

    # Ensure labels are tensors (convert if they're not)
    if isinstance(labels, np.ndarray):
        labels = torch.tensor(labels)

    decoded_preds = tokenizer.batch_decode(torch.argmax(logits, dim=-1), skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    decoded_preds = [pred.strip() for pred in decoded_preds]
    decoded_labels = [[label.strip()] for label in decoded_labels]
    bleu = bleu_metric.compute(predictions=decoded_preds, references=decoded_labels)
    rouge = rouge_metric.compute(predictions=decoded_preds, references=decoded_labels)
    return {
        'bleu': bleu['score'],
        'rouge-l': rouge['rougeL'].mid.fmeasure
    }

# Load the model and tokenizer from the saved directory
model_path = "/content/drive/My Drive/bart_command_translator_V2"
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Load the evaluation dataset
# This needs to be adjusted according to how you have stored or can load your dataset
eval_dataset = load_and_prepare_data(model_path)['validation']  # Placeholder function

# Setup the Trainer instance
training_args = TrainingArguments(
    output_dir="./results",  # Change this if you want to save evaluation results
    per_device_eval_batch_size=1  # Adjust based on your available resources
)

trainer = Trainer(
    model=model,
    args=training_args,
    eval_dataset=eval_dataset,
    compute_metrics=lambda eval_pred: compute_metrics(eval_pred, tokenizer)
)

# Evaluate the model
results = trainer.evaluate()

# Print the results
print(results)


You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


{'eval_loss': 1.077932596206665, 'eval_bleu': 31.6993525869949, 'eval_rouge-l': 0.33450529363648085, 'eval_runtime': 1052.3463, 'eval_samples_per_second': 1.219, 'eval_steps_per_second': 1.219}


In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, Trainer, TrainingArguments
import torch
from datasets import load_metric

# Assuming sacrebleu and rouge are the metrics you want
bleu_metric = load_metric('sacrebleu')
rouge_metric = load_metric('rouge')

def compute_metrics(eval_pred, tokenizer):
    logits, labels = eval_pred
    if isinstance(logits, tuple):
        logits = logits[0]
    decoded_preds = tokenizer.batch_decode(torch.argmax(logits, dim=-1), skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    decoded_preds = [pred.strip() for pred in decoded_preds]
    decoded_labels = [[label.strip()] for label in decoded_labels]
    bleu = bleu_metric.compute(predictions=decoded_preds, references=decoded_labels)
    rouge = rouge_metric.compute(predictions=decoded_preds, references=decoded_labels)
    return {
        'bleu': bleu['score'],
        'rouge-l': rouge['rougeL'].mid.fmeasure
    }

# Load the model and tokenizer from the saved directory
model_path = "/content/drive/My Drive/bart_command_translator_V2"
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Load the evaluation dataset
# This needs to be adjusted according to how you have stored or can load your dataset
eval_dataset = load_and_prepare_data(model_path)['train']  # Placeholder function

# Setup the Trainer instance
training_args = TrainingArguments(
    output_dir="./results",  # Change this if you want to save evaluation results
    per_device_eval_batch_size=1  # Adjust based on your available resources
)

trainer = Trainer(
    model=model,
    args=training_args,
    eval_dataset=eval_dataset,
    compute_metrics=lambda eval_pred: compute_metrics(eval_pred, tokenizer)
)

# Evaluate the model
results = trainer.evaluate()

# Print the results
print(results)
