In [None]:
from datasets import load_dataset, Dataset

# If your data is in a pandas DataFrame
import pandas as pd

data = pd.read_csv('path_to_your_data.csv')  # Replace with your data path
dataset = Dataset.from_pandas(data)

In [None]:
from transformers import AutoTokenizer

model_name = "bert-base-uncased"  # Replace with the model you want to fine-tune
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess_data(example):
    inputs = tokenizer(
        example["question"],
        example["context"],
        truncation=True,
        padding="max_length",
        max_length=384,
        return_offsets_mapping=True
    )
    start_char = example["answers"]["answer_start"][0]
    end_char = start_char + len(example["answers"]["text"][0])
    
    # Find the start and end tokens corresponding to the start_char and end_char
    offsets = inputs["offset_mapping"]
    start_token = end_token = 0
    for i, (offset_start, offset_end) in enumerate(offsets):
        if offset_start <= start_char < offset_end:
            start_token = i
        if offset_start < end_char <= offset_end:
            end_token = i
            break

    inputs["start_positions"] = start_token
    inputs["end_positions"] = end_token
    return inputs

tokenized_dataset = dataset.map(preprocess_data, batched=True)

In [None]:
from transformers import AutoModelForQuestionAnswering

model = AutoModelForQuestionAnswering.from_pretrained(model_name)

In [None]:
from transformers import Trainer, TrainingArguments

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Define the trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
)

# Train the model
trainer.train()

In [None]:
trainer.evaluate()

In [None]:
model.save_pretrained("fine_tuned_model")
tokenizer.save_pretrained("fine_tuned_model")