# NanoGPT

In [32]:
import requests
import json
# from google.colab import userdata
from typing import List, Dict, Any
import os
import random
import time

### API Accessing

In [None]:
BASE_URL = "https://nano-gpt.com/api/v1"

os.environ["NANO_GPT_API_KEY"] = "NanoGPT-Key"
API_KEY = os.getenv("NANO_GPT_API_KEY")
if not API_KEY:
    raise ValueError("Missing API key! Please set it with os.environ['NANO_GPT_API_KEY']='your-key-here'")

headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json",
    "Accept": "text/event-stream"
}

### Function to communicate with the model

In [19]:
def stream_chat_completion(messages, model="chatgpt-4o-latest", max_retries=5):
    data = {"model": model, "messages": messages, "stream": True}

    for attempt in range(max_retries):
        try:
            response = requests.post(
                f"{BASE_URL}/chat/completions",
                headers=headers,
                json=data,
                stream=True,
                timeout=180
            )
            if response.status_code == 429:
                wait = 2 ** attempt
                print(f"Rate limit hit. Retrying in {wait}s...")
                time.sleep(wait)
                continue
            response.raise_for_status()
            for line in response.iter_lines():
                if line:
                    line = line.decode("utf-8")
                    if line.startswith("data: "):
                        line = line[6:]
                    if line == "[DONE]":
                        break
                    try:
                        chunk = json.loads(line)
                        if chunk['choices'][0]['delta'].get('content'):
                            yield chunk['choices'][0]['delta']['content']
                    except (json.JSONDecodeError, IndexError, KeyError):
                        continue
            return
        except requests.exceptions.RequestException as e:
            if attempt < max_retries - 1:
                wait = 2 ** attempt
                print(f"Request failed ({e}). Retrying in {wait}s...")
                time.sleep(wait)
            else:
                print(f"Failed after {max_retries} attempts: {e}")
                return

In [20]:
def get_chat_completion(messages, model="chatgpt-4o-latest"):
    """
    Non-streaming chat completion request that returns the full response text.
    """
    data = {
        "model": model,
        "messages": messages,
        "stream": False
    }

    try:
        response = requests.post(
            f"{BASE_URL}/chat/completions",
            headers=headers,
            json=data,
            timeout=180
        )
        response.raise_for_status()
        result = response.json()
        return result['choices'][0]['message']['content'].strip()
    except requests.exceptions.RequestException as e:
        print(f"Error during API request: {e}")
        return "Error generating response."
    except (KeyError, IndexError):
        return "Error parsing model response."

### Prompts

In [21]:
SYSTEM_PROMPT = "You are a careful academic assistant. Be precise and give clear structured output (not JSON, not CSV, no files)."


def build_detection_prompt(submission: str, few_shots: List[Dict[str, Any]]) -> List[Dict[str, str]]:
    """
    Academic Integrity Detector Prompt
    ----------------------------------
    Purpose:
        Classifies student submissions as Human, AI, or Hybrid (AI-assisted).

    Technique:
        - Role-based prompting
        - Few-shot support
        - CoT (reasoning encouraged but hidden from output)
        - Output in plain text

    Expected Output (example format in plain text):
        Label: Human | AI | Hybrid
        Rationale:
        - short bullet point 1
        - short bullet point 2
        Flags: style_inconsistency / high_verbatim / generic_phrasing / none
    """
    # Build few-shot block
    shot_texts = []
    for s in few_shots:
        shot_texts.append(
            f'Submission: """{s.get("final_submission","")}"""\n'
            f'Your analysis (2–4 bullet points): <analysis>\n'
            f'Label: {s.get("label_type","")}\n'
        )
    examples_block = "\n\n".join(shot_texts) if shot_texts else "/* no examples available */"

    user = f"""
You are an AI text-source classifier for academic integrity.
Decide whether the student submission is Human, AI, or Hybrid (AI-assisted).

Guidelines:
- Consider discourse features (specificity, subjectivity, personal context), style consistency, local/global coherence, repetitiveness, and cliché patterns.
- Hybrid = meaningful human writing with some AI assistance, or explicit admission of mixed use.

Examples:
{examples_block}

Now analyze the NEW submission and respond in plain text with the following structure:
Label: ...
Rationale:
- point 1
- point 2
Flags: ...
NEW submission:
\"\"\"{submission}\"\"\"\n
"""
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user},
    ]


def build_feedback_prompt(domain: str, assignment_prompt: str, rubric_text: str, submission: str) -> List[Dict[str, str]]:
    """
    Rubric-Aligned Feedback Prompt
    ------------------------------
    Purpose:
        Generates structured, supportive feedback for a student submission.

    Technique:
        - Role-based prompting
        - Rubric-grounded evaluation
        - Output in plain text

    Expected Output (example format in plain text):
        Overall Summary:
        <2–4 sentence overview>

        Criteria Feedback:
        Criterion: <criterion_id>
        Rating: Excellent | Good | Average | Needs Improvement | Poor
        Reason:
        - point 1
        - point 2
        Improvement Tip: one concrete suggestion

        Overall Rating: Excellent | Good | Average | Needs Improvement | Poor
    """
    user = f"""
You are a supportive assessor. Provide actionable feedback aligned to the rubric.
Return plain structured text only (no JSON, no files).

Sections to include:
1) Overall Summary: 2–4 sentences on strengths and priorities.
2) Criteria Feedback: for each rubric criterion include:
   - Criterion
   - Rating (excellent, good, average, needs_improvement, poor)
   - Evidence (1–3 bullet points citing excerpts or behaviors)
   - Improvement Tip (one concrete step)
3) Overall Rating: Excellent | Good | Average | Needs Improvement | Poor

Context:
- Domain: {domain}
- Assignment prompt: {assignment_prompt}

Rubric (verbatim):
{rubric_text}

Student submission:
\"\"\"{submission}\"\"\"\n
"""
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user},
    ]


### Loading the dataset

In [25]:
import json
import os

def load_multiple_json_files(file_paths: list) -> list:
    """
    Loads multiple JSON files and returns a list of loaded data dictionaries.

    Args:
        file_paths (list): List of full file paths to JSON files.

    Returns:
        List of dictionaries, each representing the data from one JSON file.
    """
    loaded_data = []
    for file_name in file_paths:
        try:
            with open(file_name, 'r') as f:
                data = json.load(f)
                if 'domain' in data and 'submissions' in data:
                    print(f"Loaded domain '{data['domain']}' with {len(data['submissions'])} submissions from '{file_name}'.")
                    loaded_data.append(data)
                else:
                    print(f"WARNING: File '{file_name}' missing 'domain' or 'submissions' keys. Skipping.")
        except FileNotFoundError:
            print(f"ERROR: The file '{file_name}' was not found. Skipping.")
        except json.JSONDecodeError:
            print(f"ERROR: The file '{file_name}' is not a valid JSON. Skipping.")
    return loaded_data

# example usage
file_list = [
    'Training_Data/accounting.json',
    'Training_Data/teaching.json',
    'Training_Data/psychology.json',
    'Training_Data/it.json',
    'Training_Data/engineering.json'
]

all_domains_data = load_multiple_json_files(file_list)
print(f"\nTotal domains loaded: {len(all_domains_data)}")


Loaded domain 'Accounting' with 6 submissions from 'Training_Data/accounting.json'.
Loaded domain 'Teaching' with 6 submissions from 'Training_Data/teaching.json'.
Loaded domain 'Psychology' with 6 submissions from 'Training_Data/psychology.json'.
Loaded domain 'Information Technology' with 6 submissions from 'Training_Data/it.json'.
Loaded domain 'Manufacturing Engineering' with 6 submissions from 'Training_Data/engineering.json'.

Total domains loaded: 5


### Evaluation Function

In [26]:
# function to generate self-evaluation rating
def build_self_eval_prompt(rubric: Dict, essay: str, feedback: str) -> str:
    crit = [c.get('name','Criterion') for c in rubric.get('criteria',[])]
    crit_str = ", ".join(crit) if crit else "the rubric"
    return (
        "You are a strict but fair assessor. Rate how well the FEEDBACK addresses the rubric for the ESSAY.\n"
        "Rate on a 1-5 scale (integers only). Provide ONLY the number.\n\n"
        f"ESSAY:\n{essay}\n\n"
        f"RUBRIC CRITERIA: {crit_str}\n\n"
        f"FEEDBACK:\n{feedback}\n\n"
        "RATING (1-5): "
    )

### Function to detect and generate feedback based on training data and prompt and also evaulate the model performance.

In [27]:
# define allowed standard labels globally
ALLOWED_LABELS = ["AI", "Human", "Hybrid"]

In [28]:
def normalize_label(label: str, allowed=ALLOWED_LABELS) -> str:
    """
    Normalize a label to one of the allowed values (AI, Human, Hybrid).
    Returns "Unknown" if no match is found.
    """
    for a in allowed:
        if a.lower() in label.lower():
            return a
    return "Unknown"

In [33]:
results = []

from collections import defaultdict
import pandas as pd

# initialize overall counters
overall_true = []
overall_pred = []

for domain_data in all_domains_data:
    domain = domain_data['domain']
    assignment_prompt = domain_data['prompt']
    rubric_text = json.dumps(domain_data['rubric'], indent=2)
    submissions = domain_data['submissions']

    # domain-level counters
    domain_true = []
    domain_pred = []

    print(f"\n{'='*80}")
    print(f"=== STARTING EVALUATION FOR '{domain}' DOMAIN ===")
    print(f"{'='*80}\n")

    for i, submission_data in enumerate(submissions):
        submission_text = submission_data['final_submission']
        ground_truth_label = submission_data['label_type'].strip()
        
        # normalize
        ground_truth_label = normalize_label(ground_truth_label)

        print(f"{'-'*80}")
        print(f"Submission {i+1} (Ground Truth: {ground_truth_label})")
        print(f"{'-'*80}\n")

        # detection
        available_shots = [s for j, s in enumerate(submissions) if i != j]
        
        few_shots = random.sample(
            available_shots,
            k=min(2, len(available_shots)))
        
        detection_messages = build_detection_prompt(submission_text, few_shots)
        full_detection = ""
        predicted_label = "Unknown"

        try:
            for chunk in stream_chat_completion(detection_messages):
                print(chunk, end='', flush=True)
                full_detection += chunk

            # normalize predicted label
            for line in full_detection.splitlines():
                if "Label" in line:
                    predicted_label = line.split(":")[1].strip()
                    break

            predicted_label = normalize_label(predicted_label)

        except Exception as e:
            print(f"Error generating detection: {str(e)}")
            full_detection = "Error generating detection."

        # Add to domain and overall lists
        domain_true.append(ground_truth_label)
        domain_pred.append(predicted_label)
        overall_true.append(ground_truth_label)
        overall_pred.append(predicted_label)

        print("\n")

        time.sleep(0.5)
        
        # feedback
        feedback_messages = build_feedback_prompt(
            domain=domain,
            assignment_prompt=assignment_prompt,
            rubric_text=rubric_text,
            submission=submission_text
        )
        full_feedback = ""
        try:
            for chunk in stream_chat_completion(feedback_messages):
                print(chunk, end='', flush=True)
                full_feedback += chunk
        except Exception as e:
            print(f"Error generating feedback: {str(e)}")
            full_feedback = "Error generating feedback."
        print("\n")

        # Self-Evaluation Rating
        print(">>> 3. Self-Evaluation Rating (1-5):")
        rating_prompt_text = build_self_eval_prompt(domain_data['rubric'], submission_text, full_feedback)
        rating_messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": rating_prompt_text},
        ]
        try:
            rating_response = get_chat_completion(rating_messages)
            print(rating_response)
        except Exception as e:
            print(f"Error generating rating: {str(e)}")
        
        results.append({
        "Domain": domain,
        "Submission Index": i + 1,
        "Ground Truth": ground_truth_label,
        "Predicted": predicted_label,
        "Detection Output": full_detection,
        "Feedback": full_feedback,
        "Self-Eval Rating": rating_response
        })

    # domain-Level confusion matrix
    domain_cm = pd.crosstab(
        pd.Series(domain_true, name='Ground Truth'),
        pd.Series(domain_pred, name='Predicted'),
        rownames=['Ground Truth'],
        colnames=['Predicted'],
        dropna=False
    )
    print(f"\nConfusion Matrix for Domain '{domain}':")
    print(domain_cm)
    print(f"\n{'='*80}\n")

# overall confusion matrix
overall_cm = pd.crosstab(
    pd.Series(overall_true, name='Ground Truth'),
    pd.Series(overall_pred, name='Predicted'),
    rownames=['Ground Truth'],
    colnames=['Predicted'],
    dropna=False
)
print("\nOverall Confusion Matrix Across All Domains:")
print(overall_cm)



=== STARTING EVALUATION FOR 'Accounting' DOMAIN ===

--------------------------------------------------------------------------------
Submission 1 (Ground Truth: AI)
--------------------------------------------------------------------------------

Label: AI

Rationale:  
- The submission demonstrates highly polished and formalized writing with consistently neutral tone, advanced vocabulary, and concise phrasing, characteristics commonly seen in AI-generated outputs.  
- The structure is globally coherent, with a logical flow and well-segmented paragraphs, but lacks subjective depth, personal context, or evaluative nuance that would indicate a human writer reflecting on the topic.  
- The discourse includes repetitive patterns and clichés associated with general AI output, such as "transformative shift," "enhance data integrity and automation," and reiterative phrasing about "efficiency" and "trust."  
- Local coherence is precise, but appears over-optimized, with few transitions that 

**Improvement Tip:** Strengthen transitions between ideas to improve coherence and flow, particularly when shifting from theoretical concepts to practical examples.  

---

### Overall Rating: Good

>>> 3. Self-Evaluation Rating (1-5):
4
--------------------------------------------------------------------------------
Submission 3 (Ground Truth: Human)
--------------------------------------------------------------------------------

Label: Hybrid

Rationale:  
- **Presence of Personal Context**: The submission begins with a personal perspective from the student ("As a business student fascinated by tech"), a hallmark of human authorship that demonstrates subjectivity and individual context, rather than the objective, detached tone typical of purely AI-generated content.  
- **Structured Analysis and Varied Tone**: The essay alternates between technical explanations (e.g., blockchain’s cryptographic security) and subjective, evaluative reflections (e.g., “garbage in, garbage out,” “Accou

- **Criterion: Structure and Academic Style**  
  - **Rating**: Average  
  - **Evidence**:  
    - The essay opens conversationally (“As a business student fascinated by tech”), which is inconsistent with formal academic tone.  
    - Ideas are clear, but transitions between topics (e.g., from auditing benefits to challenges) are abrupt, reducing the cohesion of the argument.  
    - Some points combine multiple issues (e.g., GAAP-readiness and privacy) without clear separation.  
  - **Improvement Tip**: Adopt a more formal and objective tone throughout. Structure the essay with clear sections (e.g., introduction, blockchain's principles, impacts on auditing, impacts on reporting, challenges, conclusion). Use transition phrases to maintain logical flow.  

---

3) **Overall Rating**: Good  

>>> 3. Self-Evaluation Rating (1-5):
4
--------------------------------------------------------------------------------
Submission 5 (Ground Truth: Hybrid)
---------------------------------------

Provide a more detailed analysis of the challenges, supported by explanations of specific risks and the evolving standards required to mitigate them.  

---

#### Criterion: Use of Real-World Examples/Case Studies  
**Rating:** Average  
**Evidence:**  
- General references to blockchain applications (e.g., invoice settlement, audit transparency) without citing actual companies or scenarios.  
- Lacks case studies from firms or industries currently utilizing blockchain in auditing or financial reporting.  

**Improvement Tip:**  
Include at least one concrete example of a company or industry using blockchain (e.g., IBM’s blockchain solutions in supply chain finance) to strengthen your argument.  

---

#### Criterion: Structure and Academic Style  
**Rating:** Good  
**Evidence:**  
- Arguments are presented in a clear sequence with proper paragraphs.  
- Formal academic tone is consistent, though transitions between ideas could be smoother.  
- Concludes with a thoughtful summary of b

- **Evidence:**  
  - Identifies shared book reading and phonemic games as evidence-based approaches.  
  - Cites relevant research (e.g., "Whitehurst et al., 1994") to support claims about the benefits of interactive questioning and repeated reading.  
- **Improvement Tip:** Critically evaluate each strategy by discussing its limitations or effectiveness in specific contexts, supported by additional research evidence.  

**Criterion: Consideration of Diversity and Inclusion**  
- **Rating:** Good  
- **Evidence:**  
  - Acknowledges the importance of using "bilingual books" and "culturally familiar content" to enhance inclusivity.  
  - Notes that these strategies can "enhance relevance and motivation," indicating an understanding of diverse learners’ needs.  
- **Improvement Tip:** Provide more specific examples of how these strategies can be adapted for learners with varied developmental needs, such as children with disabilities or those in multilingual contexts.  

**Criterion: Res

The submission demonstrates a foundational understanding of early literacy development, with clear references to key skills such as phonological awareness, vocabulary, and print knowledge. However, it does not provide in-depth coverage of developmental milestones, nor does it address the full scope of the assignment, particularly regarding evidence-based approaches, diversity considerations, and research integration. Strengthening these areas would enhance the overall rigor and alignment with the rubric.

### Criteria Feedback

#### Criterion: Understanding of Early Literacy Development  
- **Rating:** Average  
- **Evidence:**  
  - Mentions foundational skills like phonological awareness, vocabulary, and print knowledge.  
  - Provides developmental examples, such as understanding rhymes around age 4 and recognizing letters before school.  
  - General descriptions lack deeper analysis of developmental progression or variability in milestones.  
- **Improvement Tip:** Expand the disc

- The submission shows a formal academic tone and coherence consistent with human writing, particularly in terms of structure and clarity. However, it lacks strong personal context or subjectivity, which might indicate AI assistance.  
- The phrasing and vocabulary (e.g., “key skills such as phonological awareness, vocabulary, and an understanding of print”) mirror AI-like tendencies in their generic phrasing, repetitiveness, and slightly reformulated information that overlaps significantly with prior examples.  
Flags: Repetition of concepts and phrasing across observed patterns in other submissions suggests potential rephrasing from AI or blended use.  

1) **Overall Summary:**
The submission demonstrates a foundational understanding of early literacy skills and developmental milestones, with some focus on key concepts like phonological awareness and print knowledge. However, the response lacks depth in evaluating evidence-based methods, integrating research, and addressing diverse l

- Proper referencing is utilized with citations clearly linked to the ideas presented.  
- Grammar, flow, and organization are unproblematic, enabling readability and engagement.  

**Improvement Tip:** Continue maintaining this strong academic tone and consider adding subheadings or transitions to explicitly structure the analysis.

---

### Overall Rating: Good

>>> 3. Self-Evaluation Rating (1-5):
4
--------------------------------------------------------------------------------
Submission 2 (Ground Truth: AI)
--------------------------------------------------------------------------------

Label: AI  
Rationale:  
- **Point 1: High coherence and specificity reflective of AI-generated text.** The submission demonstrates well-structured sentences, precise examples, and contextually integrated citations, aligning closely with patterns of AI-produced academic content. The use of examples (e.g., relationships, healthcare) appears broad and lacks personal context or unique insight typica

- **Improvement Tip**: Propose tailored mitigation strategies for each bias (e.g., using statistical aids for anchoring or structured decision-making processes for availability bias). Support these suggestions with research-backed evidence and discuss their potential limitations.  

**Criterion: Structure and Academic Writing**  
- **Rating**: Good  
- **Evidence**:  
  - The writing is clear, with no major grammatical errors or informal tone.  
  - References are correctly formatted and integrated, adding credibility to the work.  
- **Improvement Tip**: Focus on creating smoother transitions between concepts. Expand paragraphs to balance depth with clarity, and introduce each bias with a brief overview before diving into examples or theories.  

---

**Overall Rating**: Average

>>> 3. Self-Evaluation Rating (1-5):
3
--------------------------------------------------------------------------------
Submission 4 (Ground Truth: Human)
-----------------------------------------------------

   - **Improvement Tip**: Critically evaluate each strategy, providing specific examples or studies to show their real-world impact on mitigating biases (e.g., controlled experiments or case studies).

4. **Criterion: Structure and Academic Writing**  
   - **Rating**: Needs Improvement  
   - **Evidence**:  
     - Lacks an introduction and conclusion, limiting the flow and coherence of the analysis.  
     - Use of vague phrasing (“based on a lot of reasons”) and grammatical errors detract from clarity.  
     - Reference formatting is correct but insufficiently integrated into the discussion.  
   - **Improvement Tip**: Begin with a clear introduction outlining the purpose and scope of the analysis, and conclude with a concise summary. Proofread for grammar and clarity, and integrate references seamlessly to support arguments.

---

**Overall Rating**: Average 

>>> 3. Self-Evaluation Rating (1-5):
3
--------------------------------------------------------------------------------
Su

     - Lacks exploration of the implications for the cybersecurity landscape in the future.  
   - **Improvement Tip**: Introduce a balanced discussion by addressing challenges (e.g., false sense of security due to over-reliance on AI) or ethical issues (e.g., bias in algorithms). Explore potential future developments critically.  

4. **Criterion: Originality and Perspective**  
   - **Rating**: Needs Improvement  
   - **Evidence**:  
     - The submission is general, with no unique insights or personal reflections offered.  
     - Content largely reiterates well-known, introductory-level knowledge about AI in cybersecurity.  
   - **Improvement Tip**: Add a distinct perspective by connecting the discussion to personal experiences, novel insights, or a unique angle (e.g., how AI could evolve in cybersecurity beyond its current uses).  

5. **Criterion: Clarity and Communication**  
   - **Rating**: Good  
   - **Evidence**:  
     - The response communicates ideas clearly and concis

- Offers a unique perspective on how personal skepticism toward AI evolved.  
- Lacks broader exploration of unique viewpoints or deeper conceptual reflections beyond the anecdote.  
**Improvement Tip:** Build on your perspective by exploring future implications, such as how AI’s role in cybersecurity could evolve or integrate into larger IT frameworks.  

---

**Criterion:** Clarity and Communication  
**Rating:** Good  
**Evidence:**  
- Well-organized and clear narrative structure; easy to follow.  
- Some room for refinement to strengthen professional tone (e.g., the phrase "it’s not about" could be rephrased as "its role is not primarily to…").  
- Limited in scope and somewhat brief for the audience's expectations.  
**Improvement Tip:** Expand your response with additional points or examples while maintaining clarity and logical flow to enhance its impact for an IT audience.  

---

**3) Overall Rating:** Good  

>>> 3. Self-Evaluation Rating (1-5):
4
---------------------------

- **Improvement Tip:** Build on the personal experience by linking it to larger industry perspectives, such as how organizations balance automation and human intervention.  

---

**Criterion: Clarity and Communication**  
- **Rating:** Average  
- **Evidence:**  
  - The response communicates the key message but lacks a clear structure, such as an introduction, body, and conclusion.  
  - It uses conversational phrasing ("we had to tweak the model"), which may be slightly less formal than expected for an IT audience.  
- **Improvement Tip:** Adopt a more formal tone and present ideas in a structured format (e.g., topic sentences, logical progression of points).  

---

### Overall Rating: Good  

>>> 3. Self-Evaluation Rating (1-5):
4
--------------------------------------------------------------------------------
Submission 6 (Ground Truth: Hybrid)
--------------------------------------------------------------------------------

Label: Hybrid

Rationale:
- The narrative contains pers

- **Improvement Tip**: Include examples of specific challenges or scenarios for each phase (e.g., how takt time influenced equipment choice) to provide a more thorough, real-world perspective.  

**Criterion**: Technical Acumen  
- **Rating**: Excellent  
- **Evidence**:  
  - Terminology such as "cycle time," "OEE," and "line balancing strategies" is used accurately.  
  - Engineering-specific methodologies like CAD-based layout optimization and FMEA for hazard analysis are included.  
  - Advanced concepts like ergonomic design and lean practices are integrated.  
- **Improvement Tip**: While terminology is accurate, briefly defining less common terms (e.g., 5S audits) could help ensure clarity for a broader audience.  

**Criterion**: Clarity & Structure  
- **Rating**: Good  
- **Evidence**:  
  - The explanation follows a logical order from design validation to continuous improvement.  
  - Sections are cohesively linked (e.g., tools for workflow mapping are tied to specific outco

- The structure is globally coherent, but the writing occasionally sacrifices detail for clarity, as seen in several straightforward, repetitive sentences. This balance of simplicity with logical flow is characteristic of AI models tuned for readability.  

Flags:  
- Repetition and generic phrasing in several points (e.g., "how they fit together," "makes changes to fix it").  
- Lacks deeper academic elaboration, use of domain-specific terms, and nuanced insights often seen in human-dominated work.

**Actionable Feedback Aligned to Rubric**

---

### 1) **Overall Summary**  
The submission provides a straightforward overview of the production line setup process, touching on design, equipment placement, efficiency, and safety. However, it lacks depth in explaining the steps and reasoning behind decisions, illustrating areas for improvement in technical detail and analytical rigor.

---

### 2) **Criteria Feedback**

#### **Criterion: Scope & Comprehensiveness**  
**Rating:** Average  


- Minor improvements in formal phrasing (e.g., "talk with the designers" could be rephrased as "collaborates with design teams") would enhance readability and professionalism.  
**Improvement Tip:** Refine language to integrate professional tone and terminology, and review for occasional informal phrasing.  

---

**Overall Rating:** Average  

>>> 3. Self-Evaluation Rating (1-5):
3
--------------------------------------------------------------------------------
Submission 5 (Ground Truth: Hybrid)
--------------------------------------------------------------------------------

Label: AI

Rationale:
- **Discourse features**: The submission is highly formal, technical, and generalized with no personal context or subjective insights, which aligns strongly with AI-generated content. It reads as an optimized explanation without human idiosyncrasies or nuances.
- **Style consistency**: The text is uniformly polished and detailed throughout. It uses advanced terminology such as "Gantt charts

- Effectively uses foundational engineering principles to describe logical layout grouping and performance evaluation (e.g., testing for bottlenecks).  
**Improvement Tip:** Consider briefly touching on advanced tools (e.g., CNC programming optimization or Lean Six Sigma integration) to demonstrate even deeper technical breadth.

---

**Criterion: Clarity & Structure**  
**Rating:** Excellent  
**Evidence:**  
- The explanation is logically sequenced, moving fluidly from design to testing phases.  
- Ideas are cohesively linked, with each phase of the production setup building on the previous one.  
**Improvement Tip:** No major issues; maintain this level of clear organization in future work.

---

**Criterion: Depth of Analysis**  
**Rating:** Good  
**Evidence:**  
- Provides considerations such as selecting machines based on “precision, capacity, and compatibility” and embedding safety measures into layouts.  
- Lacks specific examples or deeper analysis of key design choices, such

In [35]:
# all submission confusion matrix
print(overall_cm)

Predicted     AI  Human  Hybrid  Unknown
Ground Truth                            
AI             5      0       5        0
Human          1      1       7        1
Hybrid         5      0       5        0
