In [1]:
!pip install transformers accelerate sentencepiece



In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import json
import torch
from typing import List, Dict, Any

In [3]:
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
device = 0 if torch.cuda.is_available() else -1
print(f"Using device: {'GPU' if device==0 else 'CPU'}")

# Do not force model_max_length; load as-is from config
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto" if device == 0 else None,
    torch_dtype=torch.bfloat16 if device == 0 else torch.float32,
    low_cpu_mem_usage=True,
)

# Llama-style padding: set pad to eos
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

chat_pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=50,
)

Using device: GPU


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Device set to use cuda:0


In [4]:
# Helper to render a chat with the official template
def render_chat(system, user):
    messages = [
        {"role": "system", "content": system},
        {"role": "user", "content": user},
    ]
    return tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )

In [5]:
SYSTEM_PROMPT = "You are a careful academic assistant. Be precise and give clear structured output (not JSON, not CSV, no files)."

In [6]:
def build_detection_prompt(submission: str, few_shots: list) -> str:
    shot_texts = []
    for s in few_shots:
        shot_texts.append(
            f'Submission: """{s.get("final_submission","")}"""\n'
            f'Your analysis (2–4 bullet points): <analysis>\n'
            f'Label: {s.get("label_type","")}\n'
        )
    examples_block = "\n\n".join(shot_texts) if shot_texts else "/* no examples available */"
    user_block = f"""You are an AI text-source classifier for academic integrity.
Decide whether the student submission is Human, AI, or Hybrid (AI-assisted).
Guidelines:
- Consider discourse features (specificity, subjectivity, personal context), style consistency, local/global coherence, repetitiveness, and cliché patterns.
- Hybrid = meaningful human writing with some AI assistance, or explicit admission of mixed use.
Examples:
{examples_block}
Now analyze the NEW submission and respond in plain text with the following structure:
Label: ...
Rationale:
- point 1
- point 2
Flags: ...
NEW submission:
\"\"\"{submission}\"\"\""""
    return render_chat(SYSTEM_PROMPT, user_block)

In [7]:
def build_feedback_prompt(domain: str, assignment_prompt: str, rubric_text: str, submission: str) -> str:
    user_block = f"""You are a supportive assessor. Provide actionable feedback aligned to the rubric.
Return plain structured text only (no JSON, no files).
Sections to include:
1) Overall Summary: 2–4 sentences on strengths and priorities.
2) Criteria Feedback: for each rubric criterion include:
   - Criterion
   - Rating (excellent, good, average, needs_improvement, poor)
   - Evidence (1–3 bullet points citing excerpts or behaviors)
   - Improvement Tip: one concrete step
3) Suggested Grade: short string (optional)
Context:
- Domain: {domain}
- Assignment prompt: {assignment_prompt}
Rubric (verbatim):
{rubric_text}
Student submission:
\"\"\"{submission}\"\"\""""
    return render_chat(SYSTEM_PROMPT, user_block)

In [8]:
# Rubric formatting helper
# -----------------------------
def format_rubric(rubric):
    formatted_rubric = f"Rubric ID: {rubric['rubric_id']}\n\nCriteria:\n"
    for item in rubric['criteria']:
        formatted_rubric += f"Criterion: {item['criterion_id']}\nName: {item['name']}\nDescription: {item['description']}\nPerformance Descriptors:\n"
        for key, val in item['performance_descriptors'].items():
            formatted_rubric += f"  - {key}: {val}\n"
    return formatted_rubric

In [9]:
# Load JSON data
# -----------------------------
with open("psychology.json") as f:
    data = json.load(f)

rubric_text = format_rubric(data['rubric'])
few_shots = data.get("few_shots", [])

# -----------------------------
# Loop through all submissions
# -----------------------------
for i, submission in enumerate(data['submissions'], 1):
    submission_text = submission['final_submission']
    label_type = submission.get("label_type", "Unknown")

    # Generate feedback
    feedback_prompt = build_feedback_prompt(
        domain=data['domain'],
        assignment_prompt=data.get("prompt", "Analyze student submission and provide detailed feedback"),
        rubric_text=rubric_text,
        submission=submission_text
    )
    feedback_outputs = chat_pipe(
        feedback_prompt,
        return_full_text=False,
        eos_token_id=tokenizer.eos_token_id
    )
    feedback_response = feedback_outputs[0]['generated_text'].strip()

    # Run AI Detection
    detection_prompt = build_detection_prompt(submission_text, few_shots)
    detection_outputs = chat_pipe(
        detection_prompt,
        return_full_text=False,
        eos_token_id=tokenizer.eos_token_id
    )
    detection_response = detection_outputs[0]['generated_text'].strip()

    # Print results
    print(f"\n--- SUBMISSION {i} (Label: {label_type}) ---")
    print("\n--- RUBRIC-ALIGNED FEEDBACK ---\n")
    print(feedback_response)
    print("\n--- ACADEMIC INTEGRITY DETECTION ---\n")
    print(detection_response)


This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (2048). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.



--- SUBMISSION 1 (Label: AI) ---

--- RUBRIC-ALIGNED FEEDBACK ---

Here is a revised version of the rubric for the psychology assignment:

Criteria:
Criterion: c1
Name: Conceptual Understanding of Biases
Description: Demonstrates comprehension of selected biases, including theoretical foundations.
Performance Descriptors:
  - excellent: Shows precise and in-depth understanding of cognitive biases, with clear links to relevant psychological theories.
  - good: Understands biases well, though one may be less developed. Sound theoretical support.
  - average: Adequate explanation of at least two biases; minor inaccuracies present.
  - needs_improvement: Limited explanation with surface-level or confused understanding of biases.
  - poor: Major misunderstanding or misrepresentation of the biases or their definitions.
Criterion: c2
Name: Application to Real-World Scenarios
Description: Applies biases meaningfully to examples from everyday or professional contexts.
Performance Descriptors:


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset



--- SUBMISSION 5 (Label: Hybrid) ---

--- RUBRIC-ALIGNED FEEDBACK ---

I am unable to provide actionable feedback based on the rubric provided. However, based on the rubric, the following is a suggestion for the evaluation of the student's work:

1) Overall Summary:
- A clear and concise summary of the student's work, highlighting the overall focus and purpose of the essay.
- A brief summary of the student's understanding of the cognitive biases discussed in the assignment, such as confirmation bias, anchoring, and the availability heuristic.

2) Criteria Feedback:
- A short string (optional) that provides specific feedback on the student's understanding and application of cognitive biases in their work.
- A list of criteria (c1-c4) and a grading scale that includes "excellent," "good," "average," "needs improvement," and "poor."
- For "criteria," the grading scale should include "excellent" for excellent performance, "good" for good performance, and "needs improvement" for areas that

In [10]:
# Load JSON data
# -----------------------------
with open("engineering.json") as f:
    data = json.load(f)

rubric_text = format_rubric(data['rubric'])
few_shots = data.get("few_shots", [])

# -----------------------------
# Loop through all submissions
# -----------------------------
for i, submission in enumerate(data['submissions'], 1):
    submission_text = submission['final_submission']
    label_type = submission.get("label_type", "Unknown")

    # Generate feedback
    feedback_prompt = build_feedback_prompt(
        domain=data['domain'],
        assignment_prompt=data.get("prompt", "Analyze student submission and provide detailed feedback"),
        rubric_text=rubric_text,
        submission=submission_text
    )
    feedback_outputs = chat_pipe(
        feedback_prompt,
        return_full_text=False,
        eos_token_id=tokenizer.eos_token_id
    )
    feedback_response = feedback_outputs[0]['generated_text'].strip()

    # Run AI Detection
    detection_prompt = build_detection_prompt(submission_text, few_shots)
    detection_outputs = chat_pipe(
        detection_prompt,
        return_full_text=False,
        eos_token_id=tokenizer.eos_token_id
    )
    detection_response = detection_outputs[0]['generated_text'].strip()

    # Print results
    print(f"\n--- SUBMISSION {i} (Label: {label_type}) ---")
    print("\n--- RUBRIC-ALIGNED FEEDBACK ---\n")
    print(feedback_response)
    print("\n--- ACADEMIC INTEGRITY DETECTION ---\n")
    print(detection_response)


--- SUBMISSION 1 (Label: AI) ---

--- RUBRIC-ALIGNED FEEDBACK ---

Here is a revised version of the rubric for the supportive assessor, with additional feedback and actionable guidance:

Criteria:
Criterion: c1
Name: Scope & Comprehensiveness
Description: Explains the entire process from initial design to final assembly, including all key phases.
Performance Descriptors:
  - excellent: Provides an exceptionally thorough and detailed account of all phases from design to assembly, expertly integrating equipment layout, efficiency metrics, and safety protocols.
  - good: Covers most essential phases of production line setup, addressing equipment, efficiency, and safety well, with minor areas for further detail.
  - average: Addresses most phases but with limited detail; some key aspects of design-to-assembly flow, efficiency, or safety may be omitted.
  - needs_improvement: Addresses some aspects of production line setup, but with significant omissions in key areas like design-to-assembl

In [11]:
# Load JSON data
# -----------------------------
with open("it.json") as f:
    data = json.load(f)

rubric_text = format_rubric(data['rubric'])
few_shots = data.get("few_shots", [])

# -----------------------------
# Loop through all submissions
# -----------------------------
for i, submission in enumerate(data['submissions'], 1):
    submission_text = submission['final_submission']
    label_type = submission.get("label_type", "Unknown")

    # Generate feedback
    feedback_prompt = build_feedback_prompt(
        domain=data['domain'],
        assignment_prompt=data.get("prompt", "Analyze student submission and provide detailed feedback"),
        rubric_text=rubric_text,
        submission=submission_text
    )
    feedback_outputs = chat_pipe(
        feedback_prompt,
        return_full_text=False,
        eos_token_id=tokenizer.eos_token_id
    )
    feedback_response = feedback_outputs[0]['generated_text'].strip()

    # Run AI Detection
    detection_prompt = build_detection_prompt(submission_text, few_shots)
    detection_outputs = chat_pipe(
        detection_prompt,
        return_full_text=False,
        eos_token_id=tokenizer.eos_token_id
    )
    detection_response = detection_outputs[0]['generated_text'].strip()

    # Print results
    print(f"\n--- SUBMISSION {i} (Label: {label_type}) ---")
    print("\n--- RUBRIC-ALIGNED FEEDBACK ---\n")
    print(feedback_response)
    print("\n--- ACADEMIC INTEGRITY DETECTION ---\n")
    print(detection_response)


--- SUBMISSION 1 (Label: AI) ---

--- RUBRIC-ALIGNED FEEDBACK ---

Title: Artificial Intelligence in Modern Cybersecurity: Application of Machine Learning to Threat Detection and Response

Section 1: Overall Summary

1) Overall Summary:
   - AI plays a pivotal role in modern cybersecurity, automating threat detection and enhancing incident response.
   - Machine learning algorithms can analyze massive datasets in real-time and identify anomalies or malicious patterns that traditional rule-based systems may overlook.

Section 2: Criteria Feedback

Criterion: c1

Name: Understanding of Topic

Description: Demonstrates clear and accurate understanding of AI and its impact on cybersecurity.
Performance Descriptors:
  - excellent: Displays comprehensive knowledge and insightful connections to the topic.
  - good: Shows clear understanding with minor errors or omissions.
  - average: Basic knowledge with some misunderstandings or generalizations.
  - needs_improvement: Limited grasp of the 

In [12]:
# Load JSON data
# -----------------------------
with open("teaching.json") as f:
    data = json.load(f)

rubric_text = format_rubric(data['rubric'])
few_shots = data.get("few_shots", [])

# -----------------------------
# Loop through all submissions
# -----------------------------
for i, submission in enumerate(data['submissions'], 1):
    submission_text = submission['final_submission']
    label_type = submission.get("label_type", "Unknown")

    # Generate feedback
    feedback_prompt = build_feedback_prompt(
        domain=data['domain'],
        assignment_prompt=data.get("prompt", "Analyze student submission and provide detailed feedback"),
        rubric_text=rubric_text,
        submission=submission_text
    )
    feedback_outputs = chat_pipe(
        feedback_prompt,
        return_full_text=False,
        eos_token_id=tokenizer.eos_token_id
    )
    feedback_response = feedback_outputs[0]['generated_text'].strip()

    # Run AI Detection
    detection_prompt = build_detection_prompt(submission_text, few_shots)
    detection_outputs = chat_pipe(
        detection_prompt,
        return_full_text=False,
        eos_token_id=tokenizer.eos_token_id
    )
    detection_response = detection_outputs[0]['generated_text'].strip()

    # Print results
    print(f"\n--- SUBMISSION {i} (Label: {label_type}) ---")
    print("\n--- RUBRIC-ALIGNED FEEDBACK ---\n")
    print(feedback_response)
    print("\n--- ACADEMIC INTEGRITY DETECTION ---\n")
    print(detection_response)


--- SUBMISSION 1 (Label: AI) ---

--- RUBRIC-ALIGNED FEEDBACK ---

You are a supportive assessor. Provide actionable feedback aligned to the rubric.

Sure, here's an actionable feedback based on the rubric:

Criteria:
Criterion: c1
Name: Understanding of Early Literacy Development
Description: Demonstrates knowledge of foundational early literacy skills (e.g., phonological awareness, vocabulary, print knowledge) and developmental milestones for ages 3 to 6 years.
Performance Descriptors:
  - excellent: Demonstrates deep understanding of multiple early literacy components and clearly explains developmental progression with relevant examples.
  - good: Demonstrates clear understanding of key early literacy components and typical development.
  - average: Demonstrates basic understanding; coverage of developmental aspects is general.
  - needs_improvement: Superficial or unclear explanation of literacy components or development.
  - poor: Fails to demonstrate understanding of early liter

In [13]:
# Load JSON data
# -----------------------------
with open("accounting.json") as f:
    data = json.load(f)

rubric_text = format_rubric(data['rubric'])
few_shots = data.get("few_shots", [])

# -----------------------------
# Loop through all submissions
# -----------------------------
for i, submission in enumerate(data['submissions'], 1):
    submission_text = submission['final_submission']
    label_type = submission.get("label_type", "Unknown")

    # Generate feedback
    feedback_prompt = build_feedback_prompt(
        domain=data['domain'],
        assignment_prompt=data.get("prompt", "Analyze student submission and provide detailed feedback"),
        rubric_text=rubric_text,
        submission=submission_text
    )
    feedback_outputs = chat_pipe(
        feedback_prompt,
        return_full_text=False,
        eos_token_id=tokenizer.eos_token_id
    )
    feedback_response = feedback_outputs[0]['generated_text'].strip()

    # Run AI Detection
    detection_prompt = build_detection_prompt(submission_text, few_shots)
    detection_outputs = chat_pipe(
        detection_prompt,
        return_full_text=False,
        eos_token_id=tokenizer.eos_token_id
    )
    detection_response = detection_outputs[0]['generated_text'].strip()

    # Print results
    print(f"\n--- SUBMISSION {i} (Label: {label_type}) ---")
    print("\n--- RUBRIC-ALIGNED FEEDBACK ---\n")
    print(feedback_response)
    print("\n--- ACADEMIC INTEGRITY DETECTION ---\n")
    print(detection_response)


--- SUBMISSION 1 (Label: AI) ---

--- RUBRIC-ALIGNED FEEDBACK ---

Criteria:
Criterion: c1
Name: Understanding of Blockchain Concepts
Description: Demonstrates knowledge of blockchain principles (decentralization, immutability, smart contracts) and their relevance to accounting.
Performance Descriptors:
  - excellent: Clearly explains multiple blockchain concepts and maps each to relevant accounting applications.
  - good: Accurately describes 1–2 blockchain concepts and gives examples.
  - average: Mentions blockchain concepts but with limited detail.
  - needs_improvement: Basic or unclear understanding of concepts.
  - poor: Fails to explain blockchain accurately.
Criterion: c2
Name: Analysis of Accounting Impact
Description: Evaluates how blockchain technology affects auditing, financial reporting, and data integrity.
Performance Descriptors:
  - excellent: Insightfully discusses enhanced transparency, automation, audit trails, and associated challenges.
  - good: Explains basic b