In [1]:
# ==========================================
# PART 1: INSTALL LIBRARIES
# ==========================================
# We install transformers for the model and openai for data generation
!pip install transformers torch openai scikit-learn -q

import os
import torch
from transformers import pipeline
from openai import OpenAI

# ==========================================
# PART 2: SETUP GEN-AI (Data Generation)
# ==========================================
# הערה: בשלב הפרויקט הסופי תצטרכו מפתח API אמיתי.
# לדמו הזה, יצרתי פונקציה שמדמה את התשובה של ה-LLM כדי שהקוד ירוץ לכם מיד.

def generate_synthetic_student_answer(topic, error_type):
    """
    Simulates a call to GPT-4 to generate a student misconception.
    """
    # This is the PROMPT you would send to the LLM:
    prompt = f"Topic: {topic}. Generate a short student answer that demonstrates a '{error_type}'."

    print(f" Asking GenAI: '{prompt}'...")

    # --- REAL CODE (Uncomment when you have API Key) ---
    # client = OpenAI(api_key="YOUR_KEY")
    # response = client.chat.completions.create(
    #     model="gpt-4",
    #     messages=[{"role": "user", "content": prompt}]
    # )
    # return response.choices[0].message.content
    # ---------------------------------------------------

    # --- DEMO SIMULATION (For Presentation) ---
    if topic == "Gravity" and error_type == "Misconception":
        return "Heavy objects fall faster than light objects because gravity pulls them harder."
    elif topic == "Coding" and error_type == "Syntax Error":
        return "I used 'print x' without parentheses, but python 3 requires brackets."
    else:
        return "Generic synthetic answer generated by LLM."

# ==========================================
# PART 3: SETUP MODEL (Inference)
# ==========================================
# For the proposal phase, we use a Zero-Shot classifier as a baseline.
# Later, we will fine-tune a DeBERTa model.

print(" Loading Baseline Model (Zero-Shot Classifier)...")
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

def classify_response(student_answer):
    # These are our project labels
    candidate_labels = ["Correct", "Partially Correct", "Misconception"]

    # Run inference
    result = classifier(student_answer, candidate_labels)

    # Return top label and score
    return result['labels'][0], result['scores'][0]

# ==========================================
# PART 4: RUN THE DEMO (The Pipeline)
# ==========================================
print("\n--- STARTING PROJECT DEMO ---\n")

# Example 1: Generate Synthetic Data (The Novelty)
topic = "Gravity"
error_type = "Misconception"
synthetic_data = generate_synthetic_student_answer(topic, error_type)
print(f" Generated Data: \"{synthetic_data}\"\n")

# Example 2: Classify the Generated Data
label, confidence = classify_response(synthetic_data)
print(f" Model Prediction: [{label}] (Confidence: {confidence:.2f})\n")

# Example 3: Classify Real Data (Hard Negative)
# A tricky example: Correct keywords ("Gravity", "Force"), but wrong logic.
real_student_input = "Gravity is a force that pushes objects away from the earth."
print(f" Analyzing Real Student Input: \"{real_student_input}\"")
label, confidence = classify_response(real_student_input)
print(f" Model Prediction: [{label}] (Confidence: {confidence:.2f})")

print("\n--- DEMO COMPLETE ---")

 Loading Baseline Model (Zero-Shot Classifier)...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0



--- STARTING PROJECT DEMO ---

 Asking GenAI: 'Topic: Gravity. Generate a short student answer that demonstrates a 'Misconception'.'...
 Generated Data: "Heavy objects fall faster than light objects because gravity pulls them harder."

 Model Prediction: [Partially Correct] (Confidence: 0.71)

 Analyzing Real Student Input: "Gravity is a force that pushes objects away from the earth."
 Model Prediction: [Partially Correct] (Confidence: 0.68)

--- DEMO COMPLETE ---
