In [7]:
# --- 0. Install Dependencies ---
!pip install -q -U transformers accelerate bitsandbytes peft trl datasets

In [9]:
# --- 1. Imports ---
import torch
from datasets import load_dataset
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training # <-- ADD THIS IMPORT
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling
)
# We no longer need SFTTrainer or SFTConfig

# --- 2. Configuration ---
MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
DATASET_PATH = "finetune_data.jsonl"
ADAPTER_NAME = "phi3-mini-course-suggester"

In [10]:
# --- 3. Load Model with 4-bit Quantization ---
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

print("‚è≥ Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

#################################################################
# THIS IS THE FIX:
# 1. Remove the old gradient checkpointing line
# model.gradient_checkpointing_enable() # <-- REMOVE THIS

# 2. Add the PEFT helper function to prepare the model
model = prepare_model_for_kbit_training(model)
print("‚úÖ Model prepared for k-bit training.")
#################################################################


tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print("‚úÖ Model and tokenizer loaded.\n")

‚è≥ Loading model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

‚úÖ Model prepared for k-bit training.
‚úÖ Model and tokenizer loaded.



In [11]:
# --- 4. Load and Tokenize Dataset ---

def format_and_tokenize(example):
    """
    Applies the chat template and then tokenizes the full conversation.
    This prepares the data for the standard Trainer.
    """
    # 1. Create the message format
    messages = [
        {"role": "user", "content": example["prompt"]},
        {"role": "assistant", "content": example["completion"]},
    ]

    # 2. Apply the chat template
    chat_string = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False
    )

    # 3. Tokenize the string
    #################################################################
    # THIS IS THE FIXED LINE:
    # We set padding="max_length" to force all examples to be the same size.
    #################################################################
    tokenized_output = tokenizer(
        chat_string,
        truncation=True,
        padding="max_length", # <-- This is the fix
        max_length=1024,
    )

    # 4. Create labels for training
    tokenized_output["labels"] = tokenized_output["input_ids"][:]

    return tokenized_output

# Load the raw JSONL data
try:
    dataset = load_dataset("json", data_files=DATASET_PATH, split="train")
except FileNotFoundError:
    print(f"Error: '{DATASET_PATH}' not found.")
    print("Please make sure you have uploaded your finetune_data.jsonl file to Colab.")
    raise

print(f"‚úÖ Raw dataset loaded with {len(dataset)} examples.")

# Apply the tokenization to the entire dataset
tokenized_dataset = dataset.map(
    format_and_tokenize,
    remove_columns=list(dataset.features)
)

print(f"‚úÖ Dataset processed and tokenized.")
print("Example of tokenized data:\n", tokenized_dataset[0])

Generating train split: 0 examples [00:00, ? examples/s]

‚úÖ Raw dataset loaded with 100 examples.


Map:   0%|          | 0/100 [00:00<?, ? examples/s]

‚úÖ Dataset processed and tokenized.
Example of tokenized data:
 {'input_ids': [32010, 1932, 526, 278, 4413, 363, 1105, 309, 27439, 1199, 313, 4741, 29906, 29945, 29906, 6877, 32007, 32001, 1105, 309, 27439, 1199, 313, 4741, 29906, 29945, 29906, 29897, 756, 13081, 1973, 373, 15050, 4515, 3250, 29892, 498, 1295, 3250, 29892, 322, 28728, 515, 29871, 29896, 29896, 29901, 29900, 29900, 13862, 304, 29871, 29896, 29906, 29901, 29900, 29900, 11278, 297, 365, 29900, 29955, 29889, 1670, 29915, 29879, 884, 263, 15031, 4867, 21467, 363, 27822, 29892, 323, 1041, 3250, 29892, 322, 498, 1295, 3250, 515, 29871, 29906, 29901, 29900, 29900, 11278, 304, 29871, 29945, 29901, 29900, 29900, 11278, 29889, 32007, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000,

In [12]:
# --- 5. LoRA Configuration ---
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    task_type="CAUSAL_LM",
)

# Apply PEFT to the model
model = get_peft_model(model, lora_config)
print("‚úÖ LoRA adapters applied to model.")

# --- 6. Training Configuration ---
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    logging_steps=25,
    save_strategy="epoch",
    optim="paged_adamw_8bit",
    bf16=True,
    report_to="tensorboard",
)

#################################################################
# THIS IS THE FIX:
# We create a custom trainer to remove the bad argument
#################################################################
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        # This is the argument that causes the error, so we pop it
        if "num_items_in_batch" in kwargs:
            kwargs.pop("num_items_in_batch")

        # Now, call the original compute_loss function
        return super().compute_loss(model, inputs, return_outputs=return_outputs, **kwargs)

print("‚úÖ Custom Trainer defined.")

‚úÖ LoRA adapters applied to model.
‚úÖ Custom Trainer defined.


In [13]:
# --- 7. Initialize Trainer ---
# We need a data collator to handle padding our tokenized batches
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,  # This is a Causal LM, not Masked LM
)

#################################################################
# THIS IS THE FIX:
# We instantiate our `CustomTrainer` instead of the default `Trainer`.
#################################################################
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset, # Use our pre-tokenized dataset
    tokenizer=tokenizer,
    data_collator=data_collator, # Pass the data collator
)

# --- 8. Train ---
print("\nüöÄ Starting fine-tuning...")
trainer.train()
print("‚úÖ Training complete!")

  trainer = CustomTrainer(



üöÄ Starting fine-tuning...


  return fn(*args, **kwargs)


Step,Training Loss
25,2.0524


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


‚úÖ Training complete!


In [14]:
model.save_pretrained(ADAPTER_NAME)
tokenizer.save_pretrained(ADAPTER_NAME)


('phi3-mini-course-suggester/tokenizer_config.json',
 'phi3-mini-course-suggester/special_tokens_map.json',
 'phi3-mini-course-suggester/chat_template.jinja',
 'phi3-mini-course-suggester/tokenizer.model',
 'phi3-mini-course-suggester/added_tokens.json',
 'phi3-mini-course-suggester/tokenizer.json')

In [15]:
import torch
from peft import PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)

# --- Configuration ---
BASE_MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
ADAPTER_PATH = "phi3-mini-course-suggester" # The adapter you just saved

print("‚è≥ Loading quantized base model...")

# --- 1. Load the Quantized Base Model ---
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

# --- 2. Load the Tokenizer ---
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"‚è≥ Loading adapter from {ADAPTER_PATH}...")

# --- 3. Apply the LoRA Adapter ---
model = PeftModel.from_pretrained(model, ADAPTER_PATH)
model.eval()

print("‚úÖ Model and adapter loaded. Starting chatbot...")
print("Type 'quit' or 'exit' to end the chat.")

# --- Chat Loop ---
chat_history = []

while True:
    user_input = input("You: ")
    if user_input.lower() in ["quit", "exit"]:
        break

    chat_history.append({"role": "user", "content": user_input})

    prompt_string = tokenizer.apply_chat_template(
        chat_history,
        tokenize=False,
        add_generation_prompt=True
    )

    inputs = tokenizer(
        prompt_string,
        return_tensors="pt",
        add_special_tokens=False
    ).to(model.device)

    print("\nAssistant: ", end="", flush=True)

    #################################################################
    # THIS IS THE FIX:
    # We add `use_cache=False` to avoid the 'seen_tokens' error.
    #################################################################
    generation_output = model.generate(
        **inputs,
        max_new_tokens=512,
        eos_token_id=tokenizer.eos_token_id,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id,
        use_cache=False # <-- THE FIX IS HERE
    )

    new_tokens = generation_output[0][inputs['input_ids'].shape[1]:]
    response = tokenizer.decode(new_tokens, skip_special_tokens=True)

    print(response)

    chat_history.append({"role": "assistant", "content": response})
    print("\n" + "-"*20 + "\n")

print("Chatbot session ended. Goodbye!")

‚è≥ Loading quantized base model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

‚è≥ Loading adapter from phi3-mini-course-suggester...
‚úÖ Model and adapter loaded. Starting chatbot...
Type 'quit' or 'exit' to end the chat.
You: tell me an easy course of  9 credits

Assistant: I recommend CE761 (CE650A) - CE650A: Mechanical Design (3-0). It's a 3-unit course with 3 lectures (T/P/D) and 2 practicals (P). It's taught by Prof. S. Anand. CE761 (CE650A) - CE650A: Mechanical Design (3-0) is a 3-unit course with 3 lectures (T/P/D) and 2 practicals (P). It's taught by Prof. S. Anand.

--------------------

You: single 9 credit courses

Assistant: I recommend CE761 (CE650A) - CE650A: Mechanical Design (3-0). It's a 3-unit course with 3 lectures (T/P/D) and 2 practicals (P). It's taught by Prof. S. Anand. CE761 (CE650A) - CE650A: Mechanical Design (3-0) is a 3-unit course with 3 lectures (T/P/D) and 2 practicals (P). It's taught by Prof. S. Anand.

--------------------

You: Is CE214 hard?

Assistant: 

KeyboardInterrupt: 

In [16]:
# --- 9. Save Adapter ---
print("‚úÖ Training complete!")
trainer.model.save_pretrained(ADAPTER_NAME)
tokenizer.save_pretrained(ADAPTER_NAME)
print(f"‚úÖ Adapter saved at: {ADAPTER_NAME}")

# --- 10. Inference Test ---
print("\nüß† Running inference test...")
del model
del trainer
torch.cuda.empty_cache()

# Reload the base model and apply the new adapters
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
model = PeftModel.from_pretrained(base_model, ADAPTER_NAME, local_files_only=True)
tokenizer = AutoTokenizer.from_pretrained(ADAPTER_NAME, local_files_only=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
print("‚úÖ Fine-tuned model reloaded for inference.")

# Test Prompt
prompt = "Find me an easy CE course."
messages = [{"role": "user", "content": prompt}]
input_ids = tokenizer.apply_chat_template(
    messages,
    return_tensors="pt",
    add_generation_prompt=True
).to("cuda")

#################################################################
# THIS IS THE FIX for the 'AttributeError: 'DynamicCache' bug:
# We explicitly disable the cache during generation.
#################################################################
outputs = model.generate(
    input_ids,
    max_new_tokens=256,
    do_sample=False,
    eos_token_id=tokenizer.eos_token_id,
    use_cache=False  # <-- THIS IS THE FIX
)

print("\nUser:", prompt)
response = outputs[0][input_ids.shape[-1]:]
print("Assistant (JSON Query):", tokenizer.decode(response, skip_special_tokens=True))

‚úÖ Training complete!
‚úÖ Adapter saved at: phi3-mini-course-suggester

üß† Running inference test...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


‚úÖ Fine-tuned model reloaded for inference.

User: Find me an easy CE course.
Assistant (JSON Query): Sure, here are a few options: CE378 (CE678) - CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678: CE678:


In [None]:
import json
import re

# --- 1. Load Database and Helper Tool ---
print("Loading database...")
try:
    with open('final_database.json', 'r', encoding='utf-8') as f:
        course_database = json.load(f)
    print(f"‚úÖ Database loaded with {len(course_database)} courses.")
except FileNotFoundError:
    print("Error: 'final_database.json' not found. Please upload it first.")
    raise

# Define the Database Search Tool (RAG Tool)
def query_course_database(query_json):
    print(f"üõ†Ô∏è  Tool: Searching for {query_json}")
    results = list(course_database)
    filtered_results = []

    for course in results:
        match = True
        for key, value in query_json.items():
            if key in ["sortBy", "sortOrder"]: continue
            if key == "TimeOfDay":
                if not check_time_of_day(course.get("LectureSchedule"), value):
                    match = False; break
                continue
            if key.endswith("_contains"):
                field = key.split('_')[0]
                course_value = str(course.get(field, ""))
                if value.lower() not in course_value.lower():
                    match = False; break
            elif key.endswith("_gte"):
                field = key.split('_')[0]
                course_value = course.get(field)
                if not (isinstance(course_value, (int, float)) and course_value >= value):
                    match = False; break
            elif key.endswith("_lte"):
                field = key.split('_')[0]
                course_value = course.get(field)
                if not (isinstance(course_value, (int, float)) and course_value <= value):
                    match = False; break
            elif key.endswith("_not"):
                field = key.split('_')[0]
                course_value = str(course.get(field, ""))
                if str(value).lower() == course_value.lower():
                    match = False; break
            elif key == "Analysis":
                course_value = course.get("Grading", {}).get("Analysis", "")
                if str(value).lower() != course_value.lower():
                    match = False; break
            else:
                course_value = str(course.get(key, ""))
                if str(value).lower() != course_value.lower():
                    match = False; break
        if match:
            filtered_results.append(course)

    sort_key = query_json.get("sortBy")
    sort_order = query_json.get("sortOrder", "asc")
    if sort_key == "AGP":
        reverse = (sort_order == "desc")
        filtered_results.sort(key=lambda x: x.get("AGP", 0 if reverse else float('inf')), reverse=reverse)
    return filtered_results[:5]

def check_time_of_day(schedule, time_of_day):
    if not schedule: return False
    start_times = re.findall(r"(\d{2}):\d{2}", schedule)
    for time_str in start_times:
        try:
            hour = int(time_str)
            if time_of_day == "Morning" and 8 <= hour < 12: return True
            if time_of_day == "Afternoon" and 12 <= hour < 17: return True
            if time_of_day == "Evening" and hour >= 17: return True
        except ValueError: continue
    return False

# --- 2. Define Agent Planner (Uses Fine-Tuned Model) ---
def get_json_query(prompt):
    """Planner: Uses the fine-tuned model to generate the JSON query."""
    messages = [{"role": "user", "content": prompt}]
    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True).to("cuda")

    outputs = model.generate(
        input_ids,
        max_new_tokens=256,
        do_sample=False,
        eos_token_id=tokenizer.eos_token_id,
        use_cache=False # <-- Fix for the 'DynamicCache' bug
    )
    response_str = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)

    try:
        start = response_str.find('{')
        end = response_str.rfind('}') + 1
        return json.loads(response_str[start:end])
    except:
        print(f"Warning: Planner failed to generate valid JSON. Output: {response_str}")
        return None

# --- 3. Define Agent Executor (Main Loop) ---
def run_course_suggester_agent(user_prompt):
    """Executor: Runs the full cycle and synthesizes the final answer."""
    print(f"\n--- New Request: {user_prompt} ---")

    # 1. Plan
    print("ü§ñ Planner: Converting request to query...")
    json_query = get_json_query(user_prompt)
    if json_query is None:
        print("Status: Failed to parse query.")
        return "Sorry, I had trouble parsing your request into a valid query."
    print(f"üîç Generated Query: {json_query}")

    # 2. Execute
    course_results = query_course_database(json_query)

    # 3. Synthesize
    print("ü§ñ Executor: Synthesizing final answer...")
    synthesis_prompt = f"""
    You are a polite university course advisor. The user asked: "{user_prompt}"
    Your tool found the following results:
    {json.dumps(course_results, indent=2, ensure_ascii=False)}

    Based *only* on this data, provide a helpful, natural-language answer.
    If no results were found, state that clearly.
    """

    messages = [{"role": "user", "content": synthesis_prompt}]
    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True).to("cuda")

    outputs = model.generate(
        input_ids,
        max_new_tokens=512,
        do_sample=True,
        temperature=0.7,
        eos_token_id=tokenizer.eos_token_id,
        use_cache=False # <-- Fix for the 'DynamicCache' bug
    )
    final_answer = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)

    print(f"‚úÖ Final Response:\n{final_answer}")
    return final_answer

# --- 4. Run the Full Agent! ---
run_course_suggester_agent("What is the easiest 9-unit course in CE?")
print("\n" + "="*50 + "\n")
run_course_suggester_agent("Find a tough course in the afternoon.")

Loading database...
‚úÖ Database loaded with 66 courses.

--- New Request: What is the easiest 9-unit course in CE? ---
ü§ñ Planner: Converting request to query...
Status: Failed to parse query.



--- New Request: Find a tough course in the afternoon. ---
ü§ñ Planner: Converting request to query...


In [None]:
import json

print("üß† Starting Quantitative Evaluation...")

# NOTE: This test MUST be run in the same session as your agent
# It relies on the 'get_json_query' function and the loaded model.

# 1. Define the Evaluation Test Set (20 New Prompts)
evaluation_set = [
    {"prompt": "Show me all CE courses.", "expected": '{"Branch": "CE"}'},
    {"prompt": "What's the hardest 9-unit course in Civil Engineering?", "expected": '{"Branch": "CE", "Units_contains": "(9)", "sortBy": "AGP", "sortOrder": "asc"}'},
    {"prompt": "I need a class with a practical on Wednesday.", "expected": '{"PracticalSchedule_contains": "W"}'},
    {"prompt": "Find me a class that has 'HYDRAULICS' in the name.", "expected": '{"CourseName_contains": "HYDRAULICS"}'},
    {"prompt": "List all courses by 'SAUMYEN GUHA'.", "expected": '{"Instructor_contains": "SAUMYEN GUHA"}'},
    {"prompt": "What are the 'Generous Grading' courses?", "expected": '{"Analysis": "Generous Grading"}'},
    {"prompt": "Find me a class that meets at 2:00 PM.", "expected": '{"LectureSchedule_contains": "14:00"}'},
    {"prompt": "I need an evening class.", "expected": '{"TimeOfDay": "Evening"}'},
    {"prompt": "Show me a 4-unit course.", "expected": '{"Units_contains": "(4)"}'},
    {"prompt": "What's an easy course on a Thursday?", "expected": '{"LectureSchedule_contains": "Th", "sortBy": "AGP", "sortOrder": "desc"}'},
    {"prompt": "List all 'Tough Course' options.", "expected": '{"Analysis": "Tough Course"}'},
    {"prompt": "Find a class with a tutorial on Tuesday.", "expected": '{"TutorialSchedule_contains": "T"}'},
    {"prompt": "I need a class that is NOT 'DC/REGULAR'.", "expected": '{"CourseType_not": "DC/REGULAR"}'},
    {"prompt": "Show me all 'PRF' courses.", "expected": '{"CourseType_contains": "PRF"}'},
    {"prompt": "What does 'ANIMESH DAS' teach?", "expected": '{"Instructor_contains": "ANIMESH DAS"}'},
    {"prompt": "Find me an 11-unit course that is a 'Moderate Challenge'.", "expected": '{"Units_contains": "(11)", "Analysis": "Moderate Challenge"}'},
    {"prompt": "List all courses with 'DESIGN' in the name.", "expected": '{"CourseName_contains": "DESIGN"}'},
    {"prompt": "What is the easiest class taught by 'SUDHIR MISRA'?", "expected": '{"Instructor_contains": "SUDHIR MISRA", "sortBy": "AGP", "sortOrder": "desc"}'},
    {"prompt": "Show me a morning class with 'Fair Grading'.", "expected": '{"TimeOfDay": "Morning", "Analysis": "Fair Grading"}'},
    {"prompt": "Find me a class with an AGP under 7.5.", "expected": '{"AGP_lte": 7.5}'}
]

correct_predictions = 0
total_predictions = len(evaluation_set)
log_file_content = ""

print(f"Running {total_predictions} quantitative tests...")

for i, item in enumerate(evaluation_set):
    prompt = item["prompt"]
    expected_json_str = item["expected"]

    # 1. Get prediction from the fine-tuned model
    generated_json = get_json_query(prompt)

    # 2. Compare the string representations
    # We load/dump to normalize formatting (e.g., key order, spacing)
    try:
        generated_json_str = json.dumps(generated_json, sort_keys=True)
        expected_json_str_sorted = json.dumps(json.loads(expected_json_str), sort_keys=True)
    except Exception:
        generated_json_str = "" # Model failed to output JSON
        expected_json_str_sorted = json.dumps(json.loads(expected_json_str), sort_keys=True)


    test_result = "FAIL"
    if generated_json_str == expected_json_str_sorted:
        correct_predictions += 1
        test_result = "PASS"

    log_entry = f"--- Test {i+1}/{total_predictions} ---\n"
    log_entry += f"PROMPT: {prompt}\n"
    log_entry += f"EXPECTED: {expected_json_str_sorted}\n"
    log_entry += f"GENERATED: {generated_json_str}\n"
    log_entry += f"RESULT: {test_result}\n\n"

    print(f"Test {i+1}/{total_predictions}: {test_result}")
    log_file_content += log_entry

# 3. Calculate and print final accuracy
accuracy = (correct_predictions / total_predictions) * 100
log_entry = f"--- FINAL RESULT ---\n"
log_entry += f"Exact Match Accuracy: {correct_predictions}/{total_predictions} = {accuracy:.1f}%\n"
log_file_content += log_entry

print("\n" + "="*50)
print(f"Quantitative Evaluation Complete.")
print(f"Exact Match Accuracy: {correct_predictions}/{total_predictions} = {accuracy:.1f}%")
print("="*50 + "\n")

# Save the log to a file
with open("quantitative_evaluation_log.txt", "w", encoding="utf-8") as f:
    f.write(log_file_content)

print("‚úÖ Quantitative evaluation log saved to 'quantitative_evaluation_log.txt'")

In [None]:
print("üß† Starting Qualitative Evaluation...")
print("This will be your chat log for the report.\n")

log_content = ""

def run_and_log(prompt):
    global log_content
    response = run_course_suggester_agent(prompt)
    log_content += f"--- User Request ---\n{prompt}\n\n--- Agent Response ---\n{response}\n\n"
    print("\n" + "="*50 + "\n")

run_and_log("I need an easy morning class about 'Structures'.")
run_and_log("Find me a tough course in the afternoon that has a practical on a Monday.")
run_and_log("What are the 11-unit courses taught by 'SAUMYEN GUHA'?")
run_and_log("Show me all 'Generous Grading' courses that are NOT in the 'OE-1' slot.")
run_and_log("List all courses with 'Fair Grading' that have a lecture on Friday.")

# Save the qualitative log
with open("qualitative_evaluation_log.txt", "w", encoding="utf-8") as f:
    f.write(log_content)

print("‚úÖ Qualitative evaluation log saved to 'qualitative_evaluation_log.txt'")