# Socratic RCM - Run on Colab GPU

Run this notebook in VS Code with the Colab extension:
1. Select Kernel → Colab → New Colab Server
2. Run all cells
3. Interact with the workflow in the output

In [None]:
# Install dependencies
!pip install transformers accelerate torch -q

In [None]:
# Check GPU
import torch
print(f"GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Load model - Qwen 2.5 (excellent instruction following)
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Choose model size based on GPU:
# - T4 (16GB): Use 7B with float16
# - A100: Can use larger models
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"  # Great quality
# Alternative smaller: "Qwen/Qwen2.5-3B-Instruct" (faster, less VRAM)

print(f"Loading {MODEL_NAME}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)
print("Model loaded!")

In [None]:
# Create a simple LLM client that uses the loaded model
class ColabLLMClient:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        self.pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=512,
            do_sample=True,
            temperature=0.7,
        )
    
    def send_message(self, system_prompt: str, user_message: str) -> str:
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_message}
        ]
        output = self.pipe(messages)
        return output[0]["generated_text"][-1]["content"]
    
    def send_json(self, system_prompt: str, user_message: str) -> dict:
        import json
        response = self.send_message(system_prompt + "\nRespond with valid JSON only.", user_message)
        try:
            return json.loads(response)
        except:
            return {"valid": True, "feedback": ""}

llm_client = ColabLLMClient(model, tokenizer)
print("LLM client ready!")

In [None]:
# Test the model
test_response = llm_client.send_message(
    "You are a helpful assistant.",
    "Say hello in one sentence."
)
print(f"Test response: {test_response}")

In [None]:
# Download orchestrator files from GitHub
!pip install requests -q

import os

# Always re-download to get latest versions
print("Downloading orchestrator files...")
!wget -q -O runtime_parser.py https://raw.githubusercontent.com/Baglecake/Socratic-RCM/main/local_rcm/runtime_parser.py
!wget -q -O canvas_state.py https://raw.githubusercontent.com/Baglecake/Socratic-RCM/main/local_rcm/canvas_state.py
!wget -q -O orchestrator.py https://raw.githubusercontent.com/Baglecake/Socratic-RCM/main/local_rcm/orchestrator.py
!wget -q -O llm_client.py https://raw.githubusercontent.com/Baglecake/Socratic-RCM/main/local_rcm/llm_client.py
!wget -q -O bios_reduced_prompt.txt https://raw.githubusercontent.com/Baglecake/Socratic-RCM/main/local_rcm/bios_reduced_prompt.txt
!mkdir -p runtime-files
!wget -q -O runtime-files/B42_Runtime_Phase1_Conceptualization.txt https://raw.githubusercontent.com/Baglecake/Socratic-RCM/main/local_rcm/runtime-files/B42_Runtime_Phase1_Conceptualization.txt
!wget -q -O runtime-files/B42_Runtime_Phase2_Drafting.txt https://raw.githubusercontent.com/Baglecake/Socratic-RCM/main/local_rcm/runtime-files/B42_Runtime_Phase2_Drafting.txt
!wget -q -O runtime-files/B42_Runtime_Phase3_Review.txt https://raw.githubusercontent.com/Baglecake/Socratic-RCM/main/local_rcm/runtime-files/B42_Runtime_Phase3_Review.txt
print("Files downloaded!")

In [None]:
# Import orchestrator components
from runtime_parser import Runtime
from canvas_state import CanvasState
from orchestrator import WorkflowOrchestrator, StudentInteractionHandler

# Load runtime files
runtime = Runtime(
    "runtime-files/B42_Runtime_Phase1_Conceptualization.txt",
    "runtime-files/B42_Runtime_Phase2_Drafting.txt",
    "runtime-files/B42_Runtime_Phase3_Review.txt"
)
print(f"Loaded {len(runtime.steps)} workflow steps")

# Load BIOS prompt
with open("bios_reduced_prompt.txt", "r") as f:
    bios_prompt = f.read()
print("BIOS prompt loaded")

In [None]:
# Create student interaction handler using our Colab LLM
class ColabStudentHandler(StudentInteractionHandler):
    def __init__(self, llm_client, bios_prompt):
        self.llm = llm_client
        self.bios_prompt = bios_prompt
    
    def ask_question(self, question: str, rcm_cue: str = None, context: str = None) -> str:
        # Just return the question - LLM adds RCM flavor
        full_prompt = question
        if rcm_cue:
            full_prompt += f" RCM: '{rcm_cue}'"
        return self.llm.send_message(self.bios_prompt, full_prompt)
    
    def validate_answer(self, answer: str, constraint: str, context: str = None) -> dict:
        # Simple validation - accept most answers
        if not answer or len(answer.strip()) < 3:
            return {"valid": False, "feedback": "Please provide a more detailed answer."}
        return {"valid": True, "feedback": ""}

student_handler = ColabStudentHandler(llm_client, bios_prompt)
print("Student handler ready!")

In [None]:
# Create and run the orchestrator
orchestrator = WorkflowOrchestrator(
    runtime=runtime,
    student_handler=student_handler,
    starting_step="1.1"
)

print("\n" + "="*60)
print("SOCRATIC RCM - Running on Colab GPU")
print("="*60)
print("\nYou will be asked questions step by step.")
print("Type your answers and press Enter.")
print("="*60 + "\n")

# Run the workflow
final_canvas = orchestrator.run_workflow()

In [None]:
# Save final state
orchestrator.save_state("workflow_final_state.json")
print("\nWorkflow state saved to workflow_final_state.json")
print(f"Total steps completed: {len(orchestrator.get_student_answers())}")