In [3]:
# from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import transformers
from transformers import BartTokenizer, BartForConditionalGeneration
import torch
from torch.utils.data import DataLoader
import torch.optim as optim

device = torch.device("cuda")
tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
model = BartForConditionalGeneration.from_pretrained("facebook/bart-base").to(device)

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import json
from torch.utils.data import Dataset, DataLoader

class JsonDataset(Dataset):
    def __init__(self, json_file):
        with open(json_file, 'r') as f:
            self.data = json.load(f)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        input_text = self.data[idx]["Text_inputs"]
        output_text = self.data[idx]["Outputs"]
        return input_text, output_text

# with open("Examples_with_min_60_tokens.json", "r") as f:
#     data = json.load(f)

# for item in data:
#     item["Text_inputs"] = f"<sos> {item['Text_inputs']} <eos>"
#     item["Outputs"] = f"<sos> {item['Outputs']} <eos>"

# with open("Examples_with_sos_eos.json", "w") as f:
#     json.dump(data, f, indent=2)

tokenizer.add_special_tokens({
    "bos_token": "<sos>",
    "eos_token": "<eos>"
})
model.resize_token_embeddings(len(tokenizer))

inputs = tokenizer(
    "<sos> Paint 3 bolts in rack <eos>",
    return_tensors="pt",
    truncation=True,
    padding=True,
    max_length=100,
).to(model.device)

generated_ids = model.generate(
    **inputs,
    min_length=60,
    max_length=120,
    num_beams=1,
    early_stopping=True,
    bos_token_id=tokenizer.bos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id,
)

output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(output)


BATCH_SIZE = 64

dataset = JsonDataset("Examples_with_min_60_tokens.json")
train_data = DataLoader(dataset, BATCH_SIZE, shuffle=True)

optimizer = optim.AdamW(model.parameters(), lr=5e-5)

lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.95)

In [None]:
def train(model, tokenizer, train_data, device, epochs=100, lr=5e-5):
    model.to(device)
    model.train()

    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)

    for epoch in range(epochs):
        total_loss = 0

        for input_texts, output_texts in train_data:
            # Tokenize inputs
            inputs = tokenizer(
                list(input_texts),
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=100
            )
            inputs = {k: v.to(device) for k, v in inputs.items()}

            # Tokenize labels
            labels = tokenizer(
                list(output_texts),
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=70
            ).input_ids.to(device)
            labels[labels == tokenizer.pad_token_id] = -100  # Ignore padding in loss

            # Forward pass with internal loss
            outputs = model(**inputs, labels=labels)
            loss = outputs.loss


            # Backpropagation
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_data)
        print(f"Epoch {epoch + 1}, Avg Loss: {avg_loss:.4f}")

def evaluate(model, tokenizer, input_text, device):
    model.eval()
    inputs = tokenizer(input_text, return_tensors="pt").to(device)
    outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True, truncation=False, max_length = 512)

In [13]:
train(model, tokenizer, train_data, device)

Epoch 1, Avg Loss: 0.0347
Epoch 2, Avg Loss: 0.0197
Epoch 3, Avg Loss: 0.0205
Epoch 4, Avg Loss: 0.0243
Epoch 5, Avg Loss: 0.0198
Epoch 6, Avg Loss: 0.0189
Epoch 7, Avg Loss: 0.0183
Epoch 8, Avg Loss: 0.0197
Epoch 9, Avg Loss: 0.0286
Epoch 10, Avg Loss: 0.0189
Epoch 11, Avg Loss: 0.0183
Epoch 12, Avg Loss: 0.0180
Epoch 13, Avg Loss: 0.0176
Epoch 14, Avg Loss: 0.0176
Epoch 15, Avg Loss: 0.0220
Epoch 16, Avg Loss: 0.0184
Epoch 17, Avg Loss: 0.0180
Epoch 18, Avg Loss: 0.0175
Epoch 19, Avg Loss: 0.0175
Epoch 20, Avg Loss: 0.0176
Epoch 21, Avg Loss: 0.0175
Epoch 22, Avg Loss: 0.0174
Epoch 23, Avg Loss: 0.0175
Epoch 24, Avg Loss: 0.0173
Epoch 25, Avg Loss: 0.0175
Epoch 26, Avg Loss: 0.0171
Epoch 27, Avg Loss: 0.0172
Epoch 28, Avg Loss: 0.0175
Epoch 29, Avg Loss: 0.0176
Epoch 30, Avg Loss: 0.0173
Epoch 31, Avg Loss: 0.0173
Epoch 32, Avg Loss: 0.0170
Epoch 33, Avg Loss: 0.0173
Epoch 34, Avg Loss: 0.0179
Epoch 35, Avg Loss: 0.0172
Epoch 36, Avg Loss: 0.0172
Epoch 37, Avg Loss: 0.0174
Epoch 38, 

In [None]:
# torch.save(model.state_dict(), "bart_finetuned.pth")

In [15]:
def evaluate(model, tokenizer, input_text, device):
    model.eval()
    inputs = tokenizer(input_text, return_tensors="pt").to(device)
    outputs = model.generate(**inputs)

    output = tokenizer.decode(outputs[0], skip_special_tokens=False)
    print(output)

    return tokenizer.decode(outputs[0], skip_special_tokens=True, truncation=False)

In [21]:
# model.load_state_dict(torch.load("bart_finetuned.pth"))

input_text = "Pick and Place robot: I want to place 4 boxes next to each other on a table"
# input_text = "Palletizing robot: Perform task on 1 box(s) at the rack"
output_text = evaluate(model, tokenizer, input_text, device)
print(f"Input: {input_text}")
print(f"Output: {output_text}")

</s><s><sos> # Start task # Use Pick and Place robot # Use camera # get position of object</s>
Input: Pick and Place robot: I want to place 4 boxes next to each other on a table
Output:  # Start task # Use Pick and Place robot # Use camera # get position of object
