# Research into Chatbot stuff + Langchain

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from openai_harmony import (
    load_harmony_encoding, HarmonyEncodingName,
    Conversation, Message, Role
)

In [None]:
# Load GPT-OSS-20B model with Transformers
model_id = "openai/gpt-oss-20b"
tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    torch_dtype=torch.bfloat16, 
    device_map="auto"
)

# Load Harmony encoding for proper message rendering
enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)

print("✓ Model and Harmony encoding loaded successfully!")

ImportError: Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`

In [None]:
# Single-turn conversation function
def ask_gpt_oss(user_message: str, system_prompt: str = "You are a helpful assistant.\nReasoning: medium") -> str:
    """
    Single-turn query to GPT-OSS-20B using Harmony encoding.
    
    Args:
        user_message: The user's question
        system_prompt: System instructions (can include reasoning level)
    
    Returns:
        The assistant's final response text
    """
    # Build a Harmony conversation
    convo = Conversation.from_messages([
        Message.from_role_and_content(Role.SYSTEM, system_prompt),
        Message.from_role_and_content(Role.USER, user_message)
    ])
    
    # Render → token ids for completion
    input_ids = torch.tensor(
        [enc.render_conversation_for_completion(convo, Role.ASSISTANT)], 
        device=model.device
    )
    
    # Generate (Harmony handles stop tokens)
    out = model.generate(input_ids=input_ids, max_new_tokens=256)
    
    # Strip the prompt part
    new_tokens = out[0].tolist()[len(input_ids[0]):]
    
    # Parse Harmony messages from the completion tokens
    parsed = enc.parse_messages_from_completion_tokens(new_tokens, Role.ASSISTANT)
    
    # Get the 'final' assistant message
    final_texts = [
        m.content for m in parsed.messages 
        if m.role == Role.ASSISTANT and (m.channel or "final") == "final"
    ]
    
    return final_texts[-1] if final_texts else parsed.messages[-1].content


# Test with RAG-related questions
print("Q1: Give two bullets on what RAG is.")
print(ask_gpt_oss("Give two bullets on what RAG is."))
print("\n" + "="*50 + "\n")

print("Q2: Using that, name one benefit of chunking transcripts.")
print(ask_gpt_oss("Using that, name one benefit of chunking transcripts."))
print("\n" + "="*50 + "\n")

print("Q3: Suggest one metric to evaluate retrieval quality.")
print(ask_gpt_oss("Suggest one metric to evaluate retrieval quality."))