## LLM Fine-tuning with Ollama
Using Llama 3.1:8b Instruct model for local fine-tuning

In [None]:
# Create and activate virtual environment
!python -m venv llm_env

# Windows activation
!call llm_env\Scripts\activate.bat
# For Unix/Linux/MacOS:
# !source llm_env/bin/activate

# Verify environment
import sys
print(f"Python interpreter: {sys.executable}")
print(f"Python version: {sys.version}")

In [None]:
# Install dependencies
!pip install -q accelerate==0.21.0 \
    peft==0.4.0 \
    bitsandbytes==0.40.2 \
    transformers==4.31.0 \
    trl==0.4.7 \
    ollama-python \
    torch \
    pandas \
    datasets

In [None]:
# Configuration
import os
import torch
import random
import pandas as pd
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments
)
from peft import LoraConfig
from trl import SFTTrainer

MODEL_CONFIG = {
    "base_model": "llama2:3.1-8b-instruct",
    "fine_tuned_model": "llama-3.1-8b-custom"
}

prompt = "A model that takes in a puzzle-like reasoning-heavy question in English, and responds with a well-reasoned, step-by-step thought out response in Spanish."
temperature = 0.4
number_of_examples = 100

In [None]:
# Data Generation
import ollama

def generate_example(prompt, prev_examples, temperature=0.5):
    messages = [{
        "role": "system",
        "content": f"""You are generating data which will be used to train a machine learning model.
        Generate data samples with prompt/response pairs in this format:
        ```
        prompt
        -----------
        $prompt_goes_here
        -----------
        
        response
        -----------
        $response_goes_here
        -----------
        ```
        Make samples unique and diverse. Here is the model we want to train:
        {prompt}"""
    }]

    if prev_examples:
        if len(prev_examples) > 10:
            prev_examples = random.sample(prev_examples, 10)
        for example in prev_examples:
            messages.append({
                "role": "assistant", 
                "content": example
            })

    response = ollama.chat(
        model=MODEL_CONFIG["base_model"],
        messages=messages,
        temperature=temperature
    )
    return response['message']['content']

# Generate examples
prev_examples = []
for i in range(number_of_examples):
    print(f'Generating example {i}')
    example = generate_example(prompt, prev_examples, temperature)
    prev_examples.append(example)

In [None]:
# Create Datasets
prompts = []
responses = []

for example in prev_examples:
    try:
        split_example = example.split('-----------')
        prompts.append(split_example[1].strip())
        responses.append(split_example[3].strip())
    except:
        pass

df = pd.DataFrame({
    'prompt': prompts,
    'response': responses
})
df = df.drop_duplicates()

# Split datasets
train_df = df.sample(frac=0.9, random_state=42)
test_df = df.drop(train_df.index)

train_df.to_json('train.jsonl', orient='records', lines=True)
test_df.to_json('test.jsonl', orient='records', lines=True)

In [None]:
# Model Training Setup
train_dataset = load_dataset('json', data_files='train.jsonl', split="train")
valid_dataset = load_dataset('json', data_files='test.jsonl', split="train")

model = AutoModelForCausalLM.from_pretrained(
    f"ollama/{MODEL_CONFIG['base_model']}",
    load_in_8bit=True,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(f"ollama/{MODEL_CONFIG['base_model']}")

peft_config = LoraConfig(
    lora_alpha=64,
    lora_dropout=0.05,
    r=32,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj"]
)

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=1e-4,
    weight_decay=0.001,
    logging_steps=5
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    peft_config=peft_config,
    tokenizer=tokenizer,
    args=training_args,
    max_seq_length=2048
)

trainer.train()

In [None]:
# Inference
def generate_response(prompt):
    messages = [{
        "role": "system",
        "content": system_message
    },
    {
        "role": "user",
        "content": prompt
    }]
    
    response = ollama.chat(
        model=MODEL_CONFIG["fine_tuned_model"],
        messages=messages,
        temperature=0.7,
        max_tokens=2048
    )
    return response['message']['content']

# Test the model
test_prompt = "Write a function that reverses a string."
result = generate_response(test_prompt)
print(result)