In [1]:
!pip install transformers datasets accelerate sentence-transformers peft


Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (1

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from sentence_transformers import SentenceTransformer
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset


In [3]:
# Load the SQuAD dataset
dataset = load_dataset("squad")
train_data = dataset['train']
validation_data = dataset['validation']


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/7.62k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/1.82M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

In [4]:
# Load pre-trained model and tokenizer
base_model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(base_model_name)

# Configure PEFT with LoRA
lora_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    inference_mode=False,
    r=8,  # rank
    lora_alpha=32,
    lora_dropout=0.1,
)

# Apply PEFT to the model
peft_model = get_peft_model(model, lora_config)
peft_model.print_trainable_parameters()

# Example fine-tuning (truncated for clarity)
def preprocess_function(examples):
    inputs = [f"question: {q} context: {c}" for q, c in zip(examples["question"], examples["context"])]
    targets = [a["text"][0] for a in examples["answers"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    labels = tokenizer(targets, max_length=128, truncation=True, padding="max_length").input_ids
    model_inputs["labels"] = labels
    return model_inputs

tokenized_train_data = train_data.map(preprocess_function, batched=True)
peft_model.train()  # Add training loop


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

trainable params: 294,912 || all params: 60,801,536 || trainable%: 0.4850


Map:   0%|          | 0/87599 [00:00<?, ? examples/s]

PeftModelForSeq2SeqLM(
  (base_model): LoraModel(
    (model): T5ForConditionalGeneration(
      (shared): Embedding(32128, 512)
      (encoder): T5Stack(
        (embed_tokens): Embedding(32128, 512)
        (block): ModuleList(
          (0): T5Block(
            (layer): ModuleList(
              (0): T5LayerSelfAttention(
                (SelfAttention): T5Attention(
                  (q): lora.Linear(
                    (base_layer): Linear(in_features=512, out_features=512, bias=False)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.1, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=512, out_features=8, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=8, out_features=512, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
               

In [6]:
# Use a Sentence Transformer for embedding-based retrieval
retriever = SentenceTransformer("multi-qa-mpnet-base-dot-v1")

# Mock document store (use FAISS or ElasticSearch for large-scale retrieval)
document_store = [
    {"text": "The Eiffel Tower is located in Paris, France."},
    {"text": "The Great Wall of China spans several regions in China."}
]

# Retrieve relevant context
def retrieve_context(question):
    question_embedding = retriever.encode(question, convert_to_tensor=True)
    scores = [torch.cosine_similarity(question_embedding, retriever.encode(doc["text"], convert_to_tensor=True), dim=0).item()
              for doc in document_store] # Change: Added dim=0
    best_doc = document_store[scores.index(max(scores))]
    return best_doc["text"]


# Example
question = "Where is the Eiffel Tower located?"
context = retrieve_context(question)
print(f"Retrieved Context: {context}")


Retrieved Context: The Eiffel Tower is located in Paris, France.


In [7]:
# Reward function mockup
def reward_function(predicted_answer, human_feedback):
    return 1 if predicted_answer == human_feedback else -1

# RLHF fine-tuning
def train_with_rlhf(peft_model, feedback_data):
    optimizer = torch.optim.Adam(peft_model.parameters(), lr=1e-5)
    for question, context, human_feedback in feedback_data:
        inputs = tokenizer(f"question: {question} context: {context}", return_tensors="pt", padding=True, truncation=True)
        outputs = peft_model.generate(**inputs)
        predicted_answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
        reward = reward_function(predicted_answer, human_feedback)
        loss = -torch.log(torch.sigmoid(torch.tensor(reward)))  # Example loss function
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


In [8]:
def generate_answer(question, peft_model):
    context = retrieve_context(question)
    input_text = f"question: {question} context: {context}"
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
    outputs = peft_model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example
answer = generate_answer("Where is the Great Wall of China?", peft_model)
print(f"Answer: {answer}")


Answer: several regions in China


