## Fine-tuning with OpenRouter and Local Model

This notebook uses OpenRouter for data generation and trains a local model.
1. Data generation: OpenRouter (various models available)
2. Training target: Local model

In [None]:
# Environment Setup
!python -m venv llm_env

# Windows activation
!call llm_env\Scripts\activate.bat
# For Unix/Linux/MacOS:
# !source llm_env/bin/activate

# Install dependencies
!pip install -q accelerate==0.21.0 \
    peft==0.4.0 \
    bitsandbytes==0.40.2 \
    transformers==4.31.0 \
    trl==0.4.7 \
    openai \
    torch \
    pandas \
    datasets

In [None]:
import os
import openai
import random

# OpenRouter Configuration
openai.api_base = "https://openrouter.ai/api/v1"
openai.api_key = "YOUR_OPENROUTER_KEY"  # Replace with your key

# Model Configuration
OPENROUTER_MODEL = "anthropic/claude-2"  # Choose your OpenRouter model
LOCAL_MODEL_PATH = "./local_model"  # Path to your local model

# Training parameters
prompt = "A model that takes in a puzzle-like reasoning-heavy question in English, and responds with a well-reasoned, step-by-step thought out response in Spanish."
temperature = 0.4
number_of_examples = 100

In [None]:
def generate_example(prompt, prev_examples, temperature=0.5):
    messages = [{
        "role": "system",
        "content": f"""You are generating data which will be used to train a machine learning model.
        Generate data samples with prompt/response pairs in this format:
        ```
        prompt
        -----------
        $prompt_goes_here
        -----------
        
        response
        -----------
        $response_goes_here
        -----------
        ```
        Make samples unique and diverse. Here is the model we want to train:
        {prompt}"""
    }]

    if prev_examples:
        if len(prev_examples) > 10:
            prev_examples = random.sample(prev_examples, 10)
        for example in prev_examples:
            messages.append({
                "role": "assistant", 
                "content": example
            })

    response = openai.ChatCompletion.create(
        model=OPENROUTER_MODEL,
        messages=messages,
        temperature=temperature,
        headers={"HTTP-Referer": "https://localhost:1234"},  # Required for OpenRouter
    )
    return response.choices[0].message['content']

# Generate examples
prev_examples = []
for i in range(number_of_examples):
    print(f'Generating example {i}')
    example = generate_example(prompt, prev_examples, temperature)
    prev_examples.append(example)

In [None]:
def generate_system_message(prompt):
    messages = [{
        "role": "system",
        "content": "Generate a concise system prompt for model inference. Format: Given $INPUT_DATA, you will $WHAT_THE_MODEL_SHOULD_DO"
    },
    {
        "role": "user",
        "content": prompt.strip()
    }]
    
    response = openai.ChatCompletion.create(
        model=OPENROUTER_MODEL,
        messages=messages,
        temperature=temperature,
        headers={"HTTP-Referer": "https://localhost:1234"}
    )
    return response.choices[0].message['content']

system_message = generate_system_message(prompt)
print(f'System message: {system_message}')

In [None]:
# Create datasets
import pandas as pd

prompts = []
responses = []

for example in prev_examples:
    try:
        split_example = example.split('-----------')
        prompts.append(split_example[1].strip())
        responses.append(split_example[3].strip())
    except:
        pass

df = pd.DataFrame({
    'prompt': prompts,
    'response': responses
})
df = df.drop_duplicates()

# Split datasets
train_df = df.sample(frac=0.9, random_state=42)
test_df = df.drop(train_df.index)

train_df.to_json('train.jsonl', orient='records', lines=True)
test_df.to_json('test.jsonl', orient='records', lines=True)

In [None]:
# Model Training Setup
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from peft import LoraConfig
from trl import SFTTrainer
from datasets import load_dataset

# Load local model
model = AutoModelForCausalLM.from_pretrained(
    LOCAL_MODEL_PATH,
    load_in_8bit=True,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_PATH)

# Configure LoRA
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM"
)

# Load datasets
train_dataset = load_dataset('json', data_files='train.jsonl', split="train")
valid_dataset = load_dataset('json', data_files='test.jsonl', split="train")

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    learning_rate=2e-4,
    weight_decay=0.001,
    logging_steps=5
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    peft_config=peft_config,
    tokenizer=tokenizer,
    args=training_args,
    max_seq_length=2048
)

trainer.train()

In [None]:
# Test the model
from transformers import pipeline

def generate_response(prompt):
    messages = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n{prompt} [/INST]"
    
    pipe = pipeline(
        task="text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=2048
    )
    
    result = pipe(messages)
    return result[0]['generated_text']

# Test inference
test_prompt = "Write a function that reverses a string."
result = generate_response(test_prompt)
print(result)