# OpenAI Fine-tuning for AllyIn Compass
## Enterprise-specific query understanding and response generation

In [13]:
import json
import os
import time
from pathlib import Path
from datetime import datetime
import openai


In [14]:
client = openai.OpenAI()
print("✅ OpenAI client initialized")

✅ OpenAI client initialized


In [15]:
def load_positive_feedback(min_examples=8):
    """Load positive feedback from your app"""
    feedback_file = Path("../feedback/feedback.jsonl")
    
    if not feedback_file.exists():
        print("❌ No feedback file found!")
        return []
    
    positive_examples = []
    with open(feedback_file, 'r') as f:
        for line in f:
            try:
                data = json.loads(line)
                if data.get('rating', 0) > 0:  # Positive feedback only
                    positive_examples.append({
                        'query': data['query'],
                        'answer': data['answer']
                    })
            except:
                continue
    
    print(f"✅ Found {len(positive_examples)} positive examples")
    
    if len(positive_examples) < min_examples:
        print(f"⚠️ Need at least {min_examples} examples")
        return []
    
    return positive_examples

In [17]:
def load_positive_feedback(min_examples=5):
    """Load positive feedback examples for fine-tuning"""
    feedback_file = Path("../feedback/feedback.jsonl")
    
    if not feedback_file.exists():
        print("❌ No feedback file found!")
        return []
    
    positive_examples = []
    with open(feedback_file, 'r') as f:
        for line in f:
            try:
                data = json.loads(line)
                if data.get('rating', 0) > 0:
                    positive_examples.append({
                        'query': data['query'],
                        'answer': data['answer']
                    })
            except:
                continue
    
    print(f"✅ Found {len(positive_examples)} positive examples")
    
    if len(positive_examples) < min_examples:
        print(f"⚠️ Need at least {min_examples} examples for fine-tuning")
        print(f"⏳ Collect {min_examples - len(positive_examples)} more positive feedback")
        return []
    
    return positive_examples

examples = load_positive_feedback()

✅ Found 10 positive examples


In [20]:
def format_for_openai(examples):
    """Format for OpenAI fine-tuning"""
    training_data = []
    
    system_prompt = (
        "You are AllyIn Compass, an enterprise AI assistant. "
        "Provide precise, factual answers with specific names and numbers. "
        "Use bullet points for lists. Keep responses concise and data-focused."
    )
    
    for ex in examples:
        training_data.append({
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": ex['query']},
                {"role": "assistant", "content": ex['answer']}
            ]
        })
    
    return training_data

if examples:
    training_data = format_for_openai(examples)
    print(f"✅ Formatted {len(training_data)} training examples")

✅ Formatted 10 training examples


In [27]:
if examples:
    # Save training file
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    training_file = f"allyin_training_{timestamp}.jsonl"
    
    with open(training_file, 'w') as f:
        for item in training_data:
            f.write(json.dumps(item) + '\n')
    
    print(f"📁 Created: {training_file}")
    
    # Upload to OpenAI
    with open(training_file, 'rb') as f:
        file_response = client.files.create(file=f, purpose="fine-tune")
    
    file_id = file_response.id
    print(f"📤 Uploaded file: {file_id}")



📁 Created: allyin_training_20250527_041134.jsonl
📤 Uploaded file: file-4eRBGTDjvT3HBAHtECRMR7


In [28]:
if examples:
    job = client.fine_tuning.jobs.create(
        training_file=file_id,
        model="gpt-4o-mini-2024-07-18",
        hyperparameters={
            "n_epochs": 3,
            "batch_size": 16,  # Increased batch size for faster training
            "learning_rate_multiplier": 0.9  # Higher learning rate
        },
        suffix="allyin-compass"
    )
    
    job_id = job.id
    print(f"🚀 Fine-tuning started!")
    print(f"🔑 Job ID: {job_id}")
    print(f"📊 Status: {job.status}")
    print(f"🌐 Monitor: https://platform.openai.com/finetune/{job_id}")

🚀 Fine-tuning started!
🔑 Job ID: ftjob-xZGcx49eHSqMmp7qIKarfx9U
📊 Status: validating_files
🌐 Monitor: https://platform.openai.com/finetune/ftjob-xZGcx49eHSqMmp7qIKarfx9U


In [30]:
def check_status(job_id):
    """Check fine-tuning status"""
    job = client.fine_tuning.jobs.retrieve(job_id)
    
    print(f"📊 Status: {job.status}")
    
    if job.status == "succeeded":
        model_id = job.fine_tuned_model
        print(f"🎉 Success! Model: {model_id}")
        
        # Save for RAG pipeline
        config_dir = Path("../config")
        config_dir.mkdir(exist_ok=True)
        with open(config_dir / "model.txt", 'w') as f:
            f.write(model_id)
        print(f"💾 Saved to config/model.txt")
        return model_id
    
    elif job.status == "failed":
        print(f"❌ Failed!")
        if hasattr(job, 'error'):
            print(f"Error: {job.error}")
    
    else:
        print(f"⏳ Still running...")
    
    return None

# Check your job status (replace with your job_id)
# if 'ftjob-xZGcx49eHSqMmp7qIKarfx9U' in locals():
model_id = check_status('ftjob-xZGcx49eHSqMmp7qIKarfx9U')

📊 Status: succeeded
🎉 Success! Model: ft:gpt-4o-mini-2024-07-18:nyu:allyin-compass:Bbjlxp8T
💾 Saved to config/model.txt


In [9]:
def test_model(model_id, test_queries):
    """Test the fine-tuned model with sample queries"""
    
    print(f"Testing model: {model_id}\n")
    print("=" * 80)
    
    for i, query in enumerate(test_queries, 1):
        try:
            response = client.chat.completions.create(
                model=model_id,
                messages=[
                    {"role": "system", "content": "You are AllyIn Compass, an enterprise AI assistant that searches through company databases, documents, and knowledge graphs to provide comprehensive answers."},
                    {"role": "user", "content": query}
                ],
                max_tokens=400,
                temperature=0.1  # Low temperature for consistent responses
            )
            
            print(f"Test {i}: {query}")
            print(f"Response: {response.choices[0].message.content}")
            print(f"Tokens used: {response.usage.total_tokens}")
            print("-" * 80)
            
        except Exception as e:
            print(f"❌ Error testing query '{query}': {str(e)}")
            print("-" * 80)

# Test queries
test_queries = [
    "Show me compliance violations",
    "What are our revenue trends?",
    "Which customers need attention?",
    "Find environmental issues",
    "Biotech safety status"
]

# Test the fine-tuned model
if 'fine_tuned_model' in locals():
    test_model(fine_tuned_model, test_queries)
else:
    print("⚠️ Fine-tuned model not available for testing")
    print("You can test it later once fine-tuning completes by running:")
    print("test_model('your-fine-tuned-model-id', test_queries)")

⚠️ Fine-tuned model not available for testing
You can test it later once fine-tuning completes by running:
test_model('your-fine-tuned-model-id', test_queries)


In [12]:
if __name__ == "__main__":
    print("🧠 AllyIn Compass Fine-Tuning Pipeline")
    print("=" * 50)
    
    # Load examples
    examples = load_positive_feedback()
    
    if examples:
        # Format and save
        formatted = format_for_finetuning(examples)
        
        # Simulate training
        adapter_path = simulate_lora_finetuning()
        
        # Evaluate
        # evaluate_improvement()
        
        print("\n✅ Fine-tuning pipeline complete!")
        print(f"📁 Model adapter saved at: {adapter_path}")
    else:
        print("\n⚠️ Not enough positive feedback for fine-tuning")
        print("💡 Collect more feedback through the UI first!")

🧠 AllyIn Compass Fine-Tuning Pipeline
✅ Found 8 positive examples
🚀 Starting LoRA fine-tuning simulation...
📊 LoRA Configuration:
  - Rank (r): 2
  - Alpha: 4
  - Dropout: 0.1
  - Target modules: attention layers

✅ LoRA adapter saved to ../models/lora_adapter
🎉 Fine-tuning simulation complete!

✅ Fine-tuning pipeline complete!
📁 Model adapter saved at: ../models/lora_adapter
