# Generate data

In [None]:
# Generate problems.json
# Array of objects containing "problem" and "solution"
# Each "problem" is a long jumble of words containing three numbers interspersed. The model has to find those numbers
import random

num_data = 1000
for i in range(num_data):
    # For problem, randomly sample 400 characters from the alphabet. Then go back and add three single digit numbers
    alphabet = "abcdefghijklmnopqrstuvwxyz"
    problem = "".join(random.choices(alphabet, k=100))
    # Add three single digit numbers to the problem
    nums = [random.randint(0, 9) for _ in range(3)]
    # Randomly sample the position they should go
    positions = random.sample(range(len(problem)), 3)
    for i in range(3):
        problem = problem[: positions[i]] + str(nums[i]) + problem[positions[i] + 1 :]
    print(problem)
    # print(nums)

# Sample from the distilled model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

base_model_name = "Qwen/Qwen2.5-0.5B-Instruct"
adapter_path = "6fffa21a285c4901a6dc30771f59a148/"

# Load base model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
model = AutoModelForCausalLM.from_pretrained(base_model_name)

# Load LoRA adapter
model = PeftModel.from_pretrained(model, adapter_path)

In [None]:
# Sample input for the model
system_prompt_path = "s.txt"
with open(system_prompt_path, "r") as f:
    system_prompt = f.read()

user_query = "eqsfrod1igjuzisevnrdljkfsgbvbmjwoaumjsokimcgjgduszdyccbqgc1uqga5gumtqlzflmccvkfjvlezcgcseqcsikybzumf"
# 115
inputs = tokenizer(
    system_prompt + "\n" + user_query + "\n Response:", return_tensors="pt"
)

# Generate a response from the model
with torch.no_grad():
    output_ids = model.generate(
        **inputs,
        max_new_tokens=1024,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        eos_token_id=tokenizer.eos_token_id
    )

# Decode and print the generated text
generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print("Generated response:\n", generated_text)