In [2]:
from transformers import BartForConditionalGeneration, BartTokenizer
from datasets import Dataset
import json
import torch



  from .autonotebook import tqdm as notebook_tqdm


In [None]:
with open("../eval_selection/hella_dataset.json", "r", encoding="utf-8") as f:
    data = json.load(f)

In [4]:
# Перетворення під BART-формат
rows = []
for item in data:
    question = item["instruction"]
    answer = item["response"]
    rows.append({
        "input": f"question: {question}",
        "output": answer
    })

# Створення Dataset
dataset = Dataset.from_list(rows)

print("example:", dataset[0])

example: {'input': 'question: You turned on PowerCool in the refrigerator. What happens next? A) The refrigerator will always stay at maximum cooling. B) After a few hours, it returns to the previous temperature. C) The refrigerator stops working completely.', 'output': 'B'}


In [5]:
model_path = "../models/bart_finetuned_ChatGPT"

FT = True
if FT:
    tokenizer = BartTokenizer.from_pretrained(model_path)
    model = BartForConditionalGeneration.from_pretrained(model_path)
else:
    tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
    model = BartForConditionalGeneration.from_pretrained("facebook/bart-base")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
model.to(device)

Using device: cuda


BartForConditionalGeneration(
  (model): BartModel(
    (shared): BartScaledWordEmbedding(50265, 768, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50265, 768, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
      (layers): ModuleList(
        (0-5): 6 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_layer_n

In [6]:
def generate_response(instruction):
    input_text = f"question: {instruction}"  # якщо саме так навчав, або просто instruction
    inputs = tokenizer(
    input_text,
    return_tensors="pt",
    truncation=True,
    padding="max_length",
    max_length=512
    )

    # Перекидаємо input_ids та attention_mask на GPU
    inputs = {k: v.to(device) for k, v in inputs.items()}

    output_ids = model.generate(**inputs, max_length=64)
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)


In [7]:
# Вивід
for example in dataset.select(range(5)):
    instr = example["input"]
    real = example["output"]
    gen = generate_response(instr)

    print("Instruction:", instr)
    print("Expected:", real)
    print("Generated:", gen)
    print("-" * 50)

Instruction: question: You turned on PowerCool in the refrigerator. What happens next? A) The refrigerator will always stay at maximum cooling. B) After a few hours, it returns to the previous temperature. C) The refrigerator stops working completely.
Expected: B
Generated: PowerCool is a function that speeds up the cooling in the fridge section by operating the cooling fan at full speed. This mode runs for two and a half hours before the fridge goes back to its normal temperature. It is useful when you need to chill a large amount of food rapidly.
--------------------------------------------------
Instruction: question: A person left the fridge door open for a long time. What will likely happen? A) The temperature inside will rise and food may spoil. B) The refrigerator will automatically close the door. C) The temperature will drop further.
Expected: A
Generated: When the door is left open for a long duration, it can result in a substantial temperature increase inside, which may jeop

In [8]:
with open("../eval_selection/piqa_dataset.json", "r", encoding="utf-8") as f:
    data = json.load(f)
# Перетворення під BART-формат
rows = []
for item in data:
    question = item["instruction"]
    answer = item["response"]
    rows.append({
        "input": f"question: {question}",
        "output": answer
    })

# Створення Dataset
dataset = Dataset.from_list(rows)

print("example:", dataset[0])

# Вивід
for example in dataset.select(range(5)):
    instr = example["input"]
    real = example["output"]
    gen = generate_response(instr)

    print("Instruction:", instr)
    print("Expected:", real)
    print("Generated:", gen)
    print("-" * 50)

example: {'input': 'question: How should eggs be stored in the fridge – in the door or in a carton on a shelf?', 'output': 'in a carton on a shelf'}
Instruction: question: How should eggs be stored in the fridge – in the door or in a carton on a shelf?
Expected: in a carton on a shelf
Generated: Store eggs on a refrigerator shelf inside their carton, not in the door bins. Door shelves experience greater temperature swings, which may reduce the freshness of perishable foods.
--------------------------------------------------
Instruction: question: What is the safe way to clean the fridge – unplug it first or wash inside with a water jet?
Expected: unplug it first
Generated: To clean your refrigerator, first use a water jet to clear away dust, moisture, or other foreign materials from the power plug terminals and contact points. For more thorough cleaning, a soft, damp, lint-free cloth or paper towel is recommended. Avoid using benzene, thinners, vinegar,
--------------------------------

In [9]:
with open("../eval_selection/boolq_dataset.json", "r", encoding="utf-8") as f:
    data = json.load(f)
# Перетворення під BART-формат
rows = []
for item in data:
    question = item["instruction"]
    answer = item["response"]
    rows.append({
        "input": f"question: {question}",
        "output": answer
    })

# Створення Dataset
dataset = Dataset.from_list(rows)

# print("example:", dataset[0])

# Вивід
for example in dataset.select(range(5)):
    instr = example["input"]
    real = example["output"]
    gen = generate_response(instr)

    print("Instruction:", instr)
    print("Expected:", real)
    print("Generated:", gen)
    print("-" * 50)

Instruction: question: Can the refrigerator be transported lying down?
Expected: no
Generated: The refrigerator should be transported by two or more people to ensure safety. It must be installed on a firm and level floor, avoiding damp, dusty, or outdoor areas and direct sunlight. These precautions help maintain its stability and functionality.
--------------------------------------------------
Instruction: question: Should the refrigerator only be connected to a grounded outlet?
Expected: yes
Generated: The refrigerator must be connected to a grounded outlet with an equipment-grounding conductor. This prevents electrical shocks and complies with local codes and ordinances.
--------------------------------------------------
Instruction: question: Can raw meat be stored on a shelf without a container?
Expected: no
Generated: Storing raw meat in a container prevents them from coming into contact with other foods or dripping, which is crucial for avoiding cross-contamination and maintaini

In [10]:
with open("../eval_selection/winogrande_dataset.json", "r", encoding="utf-8") as f:
    data = json.load(f)
# Перетворення під BART-формат
rows = []
for item in data:
    question = item["instruction"]
    answer = item["response"]
    rows.append({
        "input": f"question: {question}",
        "output": answer
    })

# Створення Dataset
dataset = Dataset.from_list(rows)

# print("example:", dataset[0])

# Вивід
for example in dataset.select(range(5)):
    instr = example["input"]
    real = example["output"]
    gen = generate_response(instr)

    print("Instruction:", instr)
    print("Expected:", real)
    print("Generated:", gen)
    print("-" * 50)

Instruction: question: The user placed raw meat in a container and put it in the fridge so _ would not contaminate other food. meat container
Expected: meat
Generated: Using proper storage containers for raw meat stops them from coming into contact with other items or dripping, thereby reducing the risk of cross-contamination and protecting food safety.
--------------------------------------------------
Instruction: question: The child opened the fridge door, although _ was heavy. child door
Expected: door
Generated: The door must be kept closed as much as possible to maintain stable temperatures and prevent temperature fluctuations that can affect food freshness.
--------------------------------------------------
Instruction: question: The technician checked the compressor because _ was noisy. technician compressor
Expected: compressor
Generated: The compressor’s compressor must be replaced by a professional at a Samsung service center, as it is not intended for user installation. Att