In [1]:
#load backtranslated dataset
from datasets import load_from_disk
squad_bt = load_from_disk("Backtranslated_squad_x2")

from datasets import load_dataset
squad = load_dataset("squad", split="train")
squad = squad.train_test_split(test_size=0.2, seed=42)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("drill06_temp_distillation")

from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer
model = AutoModelForQuestionAnswering.from_pretrained("drill06_temp_distillation")




In [2]:
def preprocess_function(examples):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=384,
        truncation=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    offset_mapping = inputs.pop("offset_mapping")
    answers = examples["answers"]
    start_positions = []
    end_positions = []

    for i, offset in enumerate(offset_mapping):
        answer = answers[i]
        start_char = answer["answer_start"][0]
        end_char = answer["answer_start"][0] + len(answer["text"][0])
        sequence_ids = inputs.sequence_ids(i)

        # Find the start and end of the context
        idx = 0
        while sequence_ids[idx] != 1:
            idx += 1
        context_start = idx
        while sequence_ids[idx] == 1:
            idx += 1
        context_end = idx - 1

        # If the answer is not fully inside the context, label it (0, 0)
        if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
            start_positions.append(0)
            end_positions.append(0)
        else:
            # Otherwise it's the start and end token positions
            idx = context_start
            while idx <= context_end and offset[idx][0] <= start_char:
                idx += 1
            start_positions.append(idx - 1)

            idx = context_end
            while idx >= context_start and offset[idx][1] >= end_char:
                idx -= 1
            end_positions.append(idx + 1)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs
tokenized_squad = squad_bt.map(preprocess_function, batched=True, remove_columns=squad_bt["train"].column_names)

Map:   0%|          | 0/140158 [00:00<?, ? examples/s]

Map:   0%|          | 0/17520 [00:00<?, ? examples/s]

In [3]:
# ✅ Use GPU if available
import torch
from transformers import Trainer, TrainingArguments
from transformers import DefaultDataCollator

data_collator = DefaultDataCollator()

if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"There are {torch.cuda.device_count()} GPU(s) available.")
    print("Device name:", torch.cuda.get_device_name(0))
    model.to(device)
else:
    device = torch.device("cpu")
    print("No GPU found, using CPU.")

# ✅ Training configuration (optimized for disk usage)
training_args = TrainingArguments(
    output_dir="drill12_temp&bt",       # folder to save model
    eval_strategy="epoch",         # correct parameter name
    learning_rate=1e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    save_strategy="epoch",               # save checkpoint only once per epoch
    save_total_limit=1,                  # keep only the last checkpoint
    load_best_model_at_end=True,
    #gradient_accumulation_steps=2,      # optional if limited GPU memory
    #fp16=True,                           # if you have CUDA (mixed precision)
    push_to_hub=False,
    report_to="none",                    # disable wandb or tensorboard logs
    logging_dir=None,                    # avoid creating logging folders
)

# ✅ Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_squad["train"],
    eval_dataset=tokenized_squad["test"],
    tokenizer=tokenizer,                 # fixed from "processing_class"
    data_collator=data_collator,
)

# ✅ Try to resume training if a checkpoint exists
import os
last_checkpoint = None
if os.path.isdir(training_args.output_dir):
    from transformers.trainer_utils import get_last_checkpoint
    last_checkpoint = get_last_checkpoint(training_args.output_dir)

if last_checkpoint:
    print(f"Resuming training from checkpoint: {last_checkpoint}")
    trainer.train(resume_from_checkpoint=last_checkpoint)
else:
    print("Starting training from scratch...")
    trainer.train()

There are 1 GPU(s) available.
Device name: NVIDIA GeForce RTX 4060


  trainer = Trainer(


Starting training from scratch...


Epoch,Training Loss,Validation Loss
1,0.9767,1.10011
2,0.7696,1.133806


In [4]:
# Save the model and tokenizer
trainer.save_model("./drill12_temp&bt")
tokenizer.save_pretrained("./drill12_temp&bt")

('./drill12_temp&bt\\tokenizer_config.json',
 './drill12_temp&bt\\special_tokens_map.json',
 './drill12_temp&bt\\vocab.txt',
 './drill12_temp&bt\\added_tokens.json',
 './drill12_temp&bt\\tokenizer.json')

In [5]:
max_length = 384
stride = 128


def preprocess_validation_examples(examples):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=max_length,
        truncation="only_second",
        stride=stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    sample_map = inputs.pop("overflow_to_sample_mapping")
    example_ids = []

    for i in range(len(inputs["input_ids"])):
        sample_idx = sample_map[i]
        example_ids.append(examples["id"][sample_idx])

        sequence_ids = inputs.sequence_ids(i)
        offset = inputs["offset_mapping"][i]
        inputs["offset_mapping"][i] = [
            o if sequence_ids[k] == 1 else None for k, o in enumerate(offset)
        ]

    inputs["example_id"] = example_ids
    return inputs

validation_dataset = squad["test"].map(
    preprocess_validation_examples,
    batched=True,
    remove_columns=squad["test"].column_names,
)
len(squad["test"]), len(validation_dataset)
from tqdm.auto import tqdm
import collections
import numpy as np
import evaluate
metric = evaluate.load("squad")

n_best = 20
max_answer_length = 30

def compute_metrics(start_logits, end_logits, features, examples):
    example_to_features = collections.defaultdict(list)
    for idx, feature in enumerate(features):
        example_to_features[feature["example_id"]].append(idx)

    predicted_answers = []
    for example in tqdm(examples):
        example_id = example["id"]
        context = example["context"]
        answers = []

        # Loop through all features associated with that example
        for feature_index in example_to_features[example_id]:
            start_logit = start_logits[feature_index]
            end_logit = end_logits[feature_index]
            offsets = features[feature_index]["offset_mapping"]

            start_indexes = np.argsort(start_logit)[-1 : -n_best - 1 : -1].tolist()
            end_indexes = np.argsort(end_logit)[-1 : -n_best - 1 : -1].tolist()
            for start_index in start_indexes:
                for end_index in end_indexes:
                    # Skip answers that are not fully in the context
                    if offsets[start_index] is None or offsets[end_index] is None:
                        continue
                    # Skip answers with a length that is either < 0 or > max_answer_length
                    if (
                        end_index < start_index
                        or end_index - start_index + 1 > max_answer_length
                    ):
                        continue

                    answer = {
                        "text": context[offsets[start_index][0] : offsets[end_index][1]],
                        "logit_score": start_logit[start_index] + end_logit[end_index],
                    }
                    answers.append(answer)

        # Select the answer with the best score
        if len(answers) > 0:
            best_answer = max(answers, key=lambda x: x["logit_score"])
            predicted_answers.append(
                {"id": example_id, "prediction_text": best_answer["text"]}
            )
        else:
            predicted_answers.append({"id": example_id, "prediction_text": ""})

    theoretical_answers = [{"id": ex["id"], "answers": ex["answers"]} for ex in examples]
    return metric.compute(predictions=predicted_answers, references=theoretical_answers)

Map:   0%|          | 0/17520 [00:00<?, ? examples/s]

In [6]:
predictions, _, _ = trainer.predict(validation_dataset)
start_logits, end_logits = predictions
compute_metrics(start_logits, end_logits, validation_dataset, squad["test"])

  0%|          | 0/17520 [00:00<?, ?it/s]

{'exact_match': 64.56050228310502, 'f1': 78.02282616087746}

In [None]:
question = "What is the weight of Jo?"
context="Prince weight 800 kg and Jo weight 600 kg."
print(question)
from transformers import pipeline
question_answerer = pipeline("question-answering", 
                             model="./drill12_temp&bt",
                            tokenizer="./drill12_temp&bt",
                            fp16=True)

result = question_answerer(question=question, context=context)
print(result['answer'])

Device set to use cuda:0


What is the weight of Jo?
600 kg


Sir Isaac Newton, a 17th-century English physicist and mathematician, introduced the three laws of motion in his famous work Philosophiæ Naturalis Principia Mathematica, published in 1687. These laws provide a comprehensive description of how objects behave when forces act upon them. Together, they form the foundation of classical mechanics and continue to be used in physics, engineering, and aerospace science.Newton’s First Law, also known as the Law of Inertia, states that an object will remain at rest or continue moving at a constant velocity in a straight line unless acted upon by a net external force. This explains why passengers in a car lung forward when the vehicle suddenly stops—their bodies were moving at the same speed as the car and continue moving forward due to inertia.Newton’s Second Law describes how forces affect motion. It states that the acceleration of an object is directly proportional to the net force acting on it and inversely proportional to its mass. This is expressed mathematically as F = m × a. The larger the force applied to an object, the greater its acceleration; however, heavier objects require more force to achieve the same acceleration. For example, pushing an empty shopping cart is easier than pushing a full one because it has less mass.Newton’s Third Law states that for every action, there is an equal and opposite reaction. This means that forces always occur in pairs. When a person walks, their foot pushes backward on the ground, and the ground pushes forward on the person with equal strength, allowing them to move. This is also why rockets can move in space: they push exhaust gases backward at high speed, and the reaction force pushes the rocket forward.These laws apply to countless real-world situations. Engineers use Newton’s laws to design vehicles, calculate load distributions in buildings, and analyze motion in machinery. Astronomers use them to understand planetary motion and gravitational interactions. Even modern technologies like drones and robotic arms rely on Newton’s principles for stability and control.Although modern physics, especially relativity and quantum mechanics, extends beyond Newton’s framework, the laws of motion remain accurate for most everyday situations involving moderate speeds, large objects, and non-extreme conditions. They are still the first tools taught to students learning physics and mechanics.

In [31]:
question = "Are Newton’s laws still accurate in modern physics?"
context="Sir Isaac Newton, a 17th-century English physicist and mathematician, introduced the three laws of motion in his famous work Philosophiæ Naturalis Principia Mathematica, published in 1687. These laws provide a comprehensive description of how objects behave when forces act upon them. Together, they form the foundation of classical mechanics and continue to be used in physics, engineering, and aerospace science.Newton’s First Law, also known as the Law of Inertia, states that an object will remain at rest or continue moving at a constant velocity in a straight line unless acted upon by a net external force. This explains why passengers in a car lung forward when the vehicle suddenly stops—their bodies were moving at the same speed as the car and continue moving forward due to inertia.Newton’s Second Law describes how forces affect motion. It states that the acceleration of an object is directly proportional to the net force acting on it and inversely proportional to its mass. This is expressed mathematically as F = m × a. The larger the force applied to an object, the greater its acceleration; however, heavier objects require more force to achieve the same acceleration. For example, pushing an empty shopping cart is easier than pushing a full one because it has less mass.Newton’s Third Law states that for every action, there is an equal and opposite reaction. This means that forces always occur in pairs. When a person walks, their foot pushes backward on the ground, and the ground pushes forward on the person with equal strength, allowing them to move. This is also why rockets can move in space: they push exhaust gases backward at high speed, and the reaction force pushes the rocket forward.These laws apply to countless real-world situations. Engineers use Newton’s laws to design vehicles, calculate load distributions in buildings, and analyze motion in machinery. Astronomers use them to understand planetary motion and gravitational interactions. Even modern technologies like drones and robotic arms rely on Newton’s principles for stability and control.Although modern physics, especially relativity and quantum mechanics, extends beyond Newton’s framework, the laws of motion remain accurate for most everyday situations involving moderate speeds, large objects, and non-extreme conditions. They are still the first tools taught to students learning physics and mechanics."
print(question)
from transformers import pipeline
question_answerer = pipeline("question-answering", 
                             model="./drill12_temp&bt",
                            tokenizer="./drill12_temp&bt",
                            fp16=True)

result = question_answerer(question=question, context=context)
print(result['answer'])

Device set to use cuda:0


Are Newton’s laws still accurate in modern physics?
they form the foundation of classical mechanics


The story of the Three Little Pigs begins with three siblings who decide to build their own houses. The first little pig, wanting to finish quickly and spend the rest of the day playing, builds a simple house made of straw. Although it is easy to construct, the house is weak and offers little protection.The second little pig puts in a bit more effort and builds his house from sticks. The stick house is stronger than the straw one, but it is still not very sturdy. He finishes his work faster than expected and joins the first pig to relax.The third little pig, however, works hard and carefully builds a strong house from bricks. His siblings laugh at him for taking so long, but he knows that a solid house will keep him safe.One day, a big bad wolf comes along. Hungry and determined to catch the pigs, he approaches the first pig’s straw house. The wolf knocks on the door and demands to be let in, but the pig refuses. Angry, the wolf takes a huge breath and blows the straw house down with ease. The first pig runs away to the stick house.The wolf follows and arrives at the second pig’s stick house. He again demands to be let in, and when the pigs refuse, he huffs and puffs and blows down the stick house as well. Both pigs flee to their brother’s brick house.Finally, the wolf comes to the brick house. He tries to blow it down, but no matter how hard he huffs and puffs, the brick house stands strong. Growing frustrated, the wolf climbs onto the roof and attempts to enter through the chimney. However, the clever third pig has already prepared a pot of boiling water in the fireplace.When the wolf drops down the chimney, he falls straight into the boiling water and leaps out in pain, fleeing into the forest, never to bother the pigs again. The three little pigs learn that hard work and careful planning pay off, and they live safely in the brick house from that day forward.

In [69]:
question = "Do quantum particles behave like everyday objects?"
context="In quantum physics, very small particles like electrons and photons behave differently from everyday objects. These particles can act as both waves and particles, a concept called wave–particle duality. Their exact position cannot be known with complete certainty; instead, scientists describe their location using probability. Quantum particles can also be entangled, meaning the state of one particle is instantly connected to the state of another, even if they are far apart."
print(question)
from transformers import pipeline
question_answerer = pipeline("question-answering", 
                             model="./drill12_temp&bt",
                            tokenizer="./drill12_temp&bt",
                            fp16=True)

result = question_answerer(question=question, context=context)
print(result['answer'])

Device set to use cuda:0


Do quantum particles behave like everyday objects?
differently
