In [1]:
"""
This script demonstrates how CodeLlama-7B-Python can be used for
student competence analysis in Python learning.

It includes:
  - Code embeddings for structural/semantic understanding
  - Code continuation (hint-style)
  - A feedback system to detect misconceptions or gaps
"""

import re
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

In [2]:
# Load CodeLlama

def load_codellama():
    """Load CodeLlama-7B-Python model and tokenizer."""
    print("Loading CodeLlama-7B-Python...")
    tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-Python-hf")
    model = AutoModelForCausalLM.from_pretrained(
        "codellama/CodeLlama-7b-Python-hf",
        torch_dtype=torch.float16,
        device_map="auto"
    )
    return tokenizer, model


tokenizer, model = load_codellama()

Loading CodeLlama-7B-Python...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/749 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]



In [3]:
# Generate Continuation

def generate_continuation(code: str, max_length: int = 256):
    """Generate code continuation or hints using CodeLlama."""
    inputs = tokenizer(code, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )

    continuation = tokenizer.decode(outputs[0], skip_special_tokens=True)
    continuation = continuation.encode("ascii", errors="ignore").decode()
    return continuation

In [4]:
# Feedback System

def detect_gaps(student_code: str):
    """Detect misconceptions and reasoning gaps in student Python code."""
    suggestions = []

    if "while" in student_code and "break" not in student_code and "return" not in student_code:
        suggestions.append("Your loop may run forever. Do you have a stopping condition?")

    if re.search(r"/\s*len\(", student_code):
        suggestions.append("What happens if the list is empty? Division by zero might occur.")

    if "while" in student_code and re.search(r"\w+\s*=\s*\w+\s*[\+\-]", student_code) is None:
        suggestions.append("Are you updating your loop variable correctly inside the loop?")

    if "total" in student_code and "return" in student_code:
        if not re.search(r"total\s*=\s*total\s*[\+\-]", student_code):
            suggestions.append("Does your return value represent an accumulated result, or just the last value?")

    if "def " not in student_code:
        suggestions.append("Try wrapping your logic in a function for clarity and reusability.")

    if not suggestions:
        suggestions.append("No obvious conceptual gaps detected. Looks good!")

    return suggestions


def generate_feedback(code: str):
    """Run the full pipeline: continuation + feedback."""
    print("\n=== Student Code ===\n")
    print(code)

    continuation = generate_continuation(code)
    print("\n=== CodeLlama Continuation (as hint) ===\n")
    print(continuation)

    print("\n=== Feedback Suggestions ===")
    for fb in detect_gaps(code):
        print("-", fb)

In [None]:
# Example Test Cases

if __name__ == "__main__":
    # Example 1
    student_code_1 = """
def divide(a, b):
    while True:
        return a / b
"""
    generate_feedback(student_code_1)

    # Example 2
    student_code_2 = """
def calculate_average(numbers):
    total = 0
    for i in range(len(numbers)):
        total = numbers[i]
    return total / len(numbers)
"""
    generate_feedback(student_code_2)

    # Interactive Mode
    while True:
        print("\nPaste your Python code (type END to quit):")
        lines = []
        while True:
            line = input()
            if line.strip().upper() == "END":
                break
            lines.append(line)

        if not lines:
            break

        student_code = "\n".join(lines)
        generate_feedback(student_code)


=== Student Code ===


def divide(a, b):
    while True:
        return a / b

