In [9]:
# Cell 1: Synthetic Dataset Generation and Preprocessing
import pandas as pd
from sklearn.model_selection import train_test_split
data = {
    "question": [
        "What is attention in neural networks?",
        "How do transformer models work?",
        "What are the advantages of self-attention?"
    ],
    "answer": [
        "Attention is a mechanism that allows models to focus on relevant parts of the input.",
        "Transformer models use self-attention and feed-forward layers to process sequences in parallel.",
        "Self-attention helps capture long-range dependencies and improves parallelization."
    ]
}

df = pd.DataFrame(data)
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
train_df.to_csv("train_qa.csv", index=False)
val_df.to_csv("val_qa.csv", index=False)

print("Datasets saved: train_qa.csv and val_qa.csv")

Datasets saved: train_qa.csv and val_qa.csv


In [10]:
# Cell 2: Fine-Tuning Qwen 2.5 3B Model
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling, transformers.utils
import pandas as pd
from datasets import Dataset

# Load training and validation data from CSV
train_df = pd.read_csv("train_qa.csv")
val_df = pd.read_csv("val_qa.csv")

# Convert the pandas DataFrames to Hugging Face Datasets
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

# Select model variant: use the instruct version or the base model.
model_name = "Qwen/Qwen2.5-3B-Instruct"  # or "Qwen/Qwen2.5-3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)

def tokenize_function(example):
    # Concatenate question and answer with a separator.
    text = f"Question: {example['question']}\nAnswer: {example['answer']}\n"
    return tokenizer(text, truncation=True, max_length=1024)

# Apply tokenization to the datasets
train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)

# Prepare the data collator for language modeling
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Define training arguments (adjust hyperparameters as needed)
training_args = TrainingArguments(
    output_dir="./qwen_finetuned",
    evaluation_strategy="steps",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=3,
    save_steps=500,
    eval_steps=500,
    logging_steps=100,
    learning_rate=5e-5,
    fp16=True,
)

# Initialize the Trainer and start training
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator,
)

trainer.train()


SyntaxError: invalid syntax (616282695.py, line 3)

In [ ]:
# Cell 3: Save the Fine-Tuned Model and Tokenizer
model.save_pretrained("./qwen_finetuned")
tokenizer.save_pretrained("./qwen_finetuned")
print("Model and tokenizer saved to './qwen_finetuned'")


In [ ]:
# Cell 4: Quantization Command
!python convert_to_gguf.py --input_dir ./qwen_finetuned --output_file qwen_finetuned.gguf --quantization 4bit


In [ ]:
# Cell 5: Inference with the Quantized Model
import subprocess

def run_inference(prompt: str) -> str:
    # This command assumes a CLI tool "gguf_infer" that accepts the model and prompt.
    result = subprocess.run(
        ["gguf_infer", "--model", "qwen_finetuned.gguf", "--prompt", prompt],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True
    )
    if result.returncode != 0:
        print("Error during inference:", result.stderr)
        return ""
    return result.stdout


sample_prompt = "What is attention in neural networks?"
print("Input:", sample_prompt)
print("Output:", run_inference(sample_prompt))


In [ ]:
# Cell 6: Evaluation Framework
import evaluate

rouge = evaluate.load("rouge")

def evaluate_model(generated_answers, references):
    results = rouge.compute(predictions=generated_answers, references=references)
    return results

generated = ["Attention is a mechanism that allows models to focus on relevant parts of the input."]
references = ["Attention is a mechanism that allows models to focus on relevant parts of the input."]
print("Evaluation Results:", evaluate_model(generated, references))
