# Claude 3.7 Sonnet - AI vs Human Classifier
This notebook uses the Claude API to classify whether each text is written by a human or generated by AI.

In [None]:
# Make sure you have the `anthropic` package installed
# !pip install anthropic

In [None]:
# Standard imports
import os
import csv
import anthropic
import pandas as pd

In [None]:
# 🔐 Set your API key manually here
api_key = "sk-ant-..."  # Replace with your real key

In [None]:
# Safety check for the key
if not api_key or not api_key.startswith("sk-ant-"):
    raise ValueError("❌ CLAUDE_KEY inválida. A chave tem de começar por sk-ant-.")

In [None]:
# Initialize the Claude client
client = anthropic.Anthropic(api_key=api_key)

In [None]:
# This is the classification prompt we send to Claude
prompt = """
You are an advanced AI content detection system, designed to distinguish between texts written by humans and those generated by artificial intelligence.  
You will act as an automated evaluator similar to tools like GPTZero, analyzing the linguistic patterns, structure, and writing style of each passage to determine its most likely origin: Human or AI.

Instructions:
- Human: if the text is written by a human.
- AI: if the text is generated by an AI.
- Ignore the ID when analyzing the text.
- Output strictly in CSV format: ID;Label
- Use exactly \"Human\" or \"AI\" as labels.
- No explanations. No headers. No extra formatting.

-Example Input:
ID;Text  
E0-1;The use of statistical tools in climate modeling has evolved significantly over time.  
E0-2;Unlock the power of the universe with our AI-driven magic story generator.
-Example Output:
E0-1;Human  
E0-2;AI
"""

In [None]:
# Load the dataset (must be tab-separated with header ID\tText)
fileContent = ""
with open("data/submission3_inputs.csv", mode='r', encoding='utf-8') as file:
    reader = csv.reader(file, delimiter='\t')
    next(reader)  # Skip header
    for row in reader:
        fileContent += f"{row[0]};{row[1]}\n"

prompt += "\n### Input Dataset:\n" + fileContent

In [None]:
# Send the full prompt to Claude
message = client.messages.create(
    model="claude-3-7-sonnet-20250219",
    max_tokens=4000,
    temperature=0.0,
    top_p=1,
    messages=[{"role": "user", "content": prompt}]
)

In [None]:
# Parse and structure the model response
results = message.content[0].text.strip().split('\n')
parsed = [row.split(';') for row in results if ';' in row]

ids = [row[0] for row in parsed]
labels = [row[1] for row in parsed]
output_df = pd.DataFrame({"ID": ids, "Label": labels})

In [None]:
# Save predictions to a .tsv file
output_df.to_csv("submissao3-grupo5-s1.csv", sep="\t", index=False)
print("✅ Resultados guardados com sucesso em 'submissao3-grupo5-s1.csv'")