In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
import vllm

In [None]:
# Load the model and tokenizer
MODEL_NAME = "deepseek-ai/deepseek-math-7b-rl"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")

In [None]:
# LoRA Configuration
lora_config = LoraConfig(
    r=8,  # LoRA rank
    lora_alpha=16,  # Alpha scaling
    lora_dropout=0.1,  # Dropout probability
    bias="none"")

In [None]:
# Apply LoRA to the model
model = get_peft_model(model, lora_config)

# Load dataset (Replace with actual dataset)
dataset = load_dataset("wikitext", "wikitext-2-raw-v1")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

dataset = dataset.map(tokenize_function, batched=True)

In [None]:
# Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"]
)

In [None]:
# Fine-tune the model
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./fine-tuned-model")
tokenizer.save_pretrained("./fine-tuned-model")


In [None]:
# Optimize Inference with vLLM
llm = vllm.LLM(model="./fine-tuned-model")

def generate_response(prompt):
    outputs = llm.generate([prompt])
    return outputs[0].outputs[0].text

In [None]:


# Example Inference
test_prompt = "Solve the equation: 3x + 5 = 20"
print("Generated Response:", generate_response(test_prompt))