# Setup

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# View dataset
%cd "/content/drive/Shareddrives/CS152 Project/dataset/text"
MY_FOLDER_PATH = "/content/drive/Shareddrives/CS152 Project/dataset/text/"
%ls

In [None]:
# Packages to Install
!pip install -q google-generativeai

In [None]:
# Imports
import pandas as pd
import google.generativeai as genai
import json
import re
from tqdm import tqdm
from google.genai import Client
from google.genai.types import CreateBatchJobConfig
from datetime import datetime
import fsspec
import time
from google.colab import auth

In [None]:
# Auth
auth.authenticate_user()

In [None]:
API_KEY = "xxxx" # Configure API Key - replace "xxxx" with our API key, contact Andrea for access if you need it
genai.configure(api_key=API_KEY)

# Basic Text Detection

In [None]:
# AI Text Detection PROMPT

PROMPT = """
You're an expert in detecting AI-generated content, especially social media captions or prompts used to generate images with tools like DALL·E, Midjourney, Imagen, or Stable Diffusion.

Analyze the caption below and determine if it contains any of the following:
- A prompt-like structure (e.g., "portrait of a girl in ultra HD", "3D render of...")
- Mentions of generative AI tools or hashtags (e.g., #dalle2, #midjourney, #stablediffusion)
- Language that appears to be describing an AI-generated image (not a personal experience)


---

**Output Format**

Classification: [0 / 1 / Don't know]
- 0 = Human-written, non-AI-related caption
- 1 = Likely associated with AI-generated content
- Don't know = Caption is ambiguous or lacks clear indicators

Confidence Score: [X]%
- Provide a percentage (0-100%) indicating your confidence in the binary classification.

Brief Justification:
- In 2–3 concise sentences, explain the most significant reasons for your classification. Focus on structural, linguistic, or hashtag clues. Do not just restate the task description.

---

**Important Guidance**:
- If confidence is low or evidence is unclear, prefer “Don't know”.
- Weigh multiple subtle AI indicators more strongly than a single obvious one.
- Be cautious: some real captions may use odd phrasing without being AI-related.
- Prioritize linguistic patterns, keyword usage, and formatting common in AI prompts.

"""

# Batch Job Input (df)

In [None]:
# Load df
csv_path = MY_FOLDER_PATH + "df_text_for_llm.csv"
df = pd.read_csv(csv_path)
df = df.dropna(subset=["body"])
captions = df["body"].tolist()

In [None]:
records = [
    {
        "request": {
            "contents": [
                {
                    "role": "user",
                    "parts": [
                        {"text": f"{PROMPT}\n\nCaption:\n\"\"\"{caption}\"\"\""}
                    ]
                }
            ],
            "generationConfig": {
                "temperature": 0.4
            }
        }
    }
    for caption in captions
]

with open("text_requests.jsonl", "w", encoding="utf-8") as fout:
    for rec in records:
        fout.write(json.dumps(rec))
        fout.write("\n")


In [None]:
!gsutil -m cp text_requests.jsonl gs://cs152_text/

In [None]:
client = Client(vertexai=True, project="gen-lang-client-0780203024", location="us-central1")

INPUT_DATA = "gs://cs152_text/text_requests.jsonl"
BUCKET_URI = "gs://cs152_text/text_output"
MODEL_ID = "gemini-2.0-flash-001"

gcs_batch_job = client.batches.create(
    model=MODEL_ID,
    src=INPUT_DATA,
    config=CreateBatchJobConfig(dest=BUCKET_URI),
)

In [None]:
while gcs_batch_job.state in ["JOB_STATE_PENDING", "JOB_STATE_RUNNING"]:
    print(f"Waiting... current job state: {gcs_batch_job.state}")
    time.sleep(10)
    gcs_batch_job = client.batches.get(name=gcs_batch_job.name)

print(f"Job finished with state: {gcs_batch_job.state}")

In [None]:
# Testing
fs = fsspec.filesystem("gcs")

file_paths = fs.glob(f"{BUCKET_URI}/*/predictions.jsonl")
df_out = pd.read_json(f"gs://{file_paths[-1]}", lines=True)

def extract_text_response(resp):
    try:
        return resp['candidates'][0]['content']['parts'][0]['text']
    except:
        return None

df_out['response_text'] = df_out['response'].apply(extract_text_response)

df_out['Classification'] = df_out['response_text'].str.extract(r"Classification:\s*(\[?\d+|Don't know\]?)", expand=False).str.strip("[]")
df_out['Confidence'] = df_out['response_text'].str.extract(r'Confidence Score:\s*\[?(\d+(?:\.\d+)?)%', expand=False)
df_out['Justification'] = df_out['response_text'].str.extract(r'Brief Justification:\s*(.*)', flags=re.DOTALL, expand=False).str.strip()

df_out[['Classification', 'Confidence', 'Justification']].to_csv("text_with_gemini.csv", index=False)

# True Positives

In [None]:
# Reload both files to ensure clean merge
text_csv_path = MY_FOLDER_PATH + "df_text_for_llm.csv"
gemini_csv_path = MY_FOLDER_PATH + "text_with_gemini.csv"

df_text = pd.read_csv(text_csv_path).dropna(subset=["body"])
df_gemini = pd.read_csv(gemini_csv_path)

df_text = df_text.reset_index(drop=True)
df_gemini = df_gemini.reset_index(drop=True)

assert len(df_text) == len(df_gemini), "Mismatch in rows between Gemini results and input text."

In [None]:
# Make a fresh copy to avoid modifying df_text directly
df_merged = df_text.copy()

# Add Gemini outputs
df_merged["gemini_classification"] = df_gemini["Classification"]
df_merged["gemini_confidence"] = pd.to_numeric(df_gemini["Confidence"], errors="coerce")
df_merged["gemini_justification"] = df_gemini["Justification"]

df_merged[["body", "ai_service", "gemini_classification", "gemini_confidence", "gemini_justification"]].head()

In [None]:
# Create true label: 1 if ai_service is present, 0 otherwise
df_merged["is_ai"] = df_merged["ai_service"].notna().astype(int)

In [None]:
# Convert Gemini output to numeric values
df_merged["gemini_label"] = df_merged["gemini_classification"].map({
    "0": 0,
    "1": 1,
    "Don't know": 0.5
})

# Filter to exclude "Don't know"
df_eval = df_merged[df_merged["gemini_label"] != 0.5].dropna(subset=["gemini_label"])

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

y_true = df_eval["is_ai"]
y_pred = df_eval["gemini_label"].astype(int)

# Print classification report
print("=== Gemini vs. Ground Truth ===")
print(classification_report(y_true, y_pred, target_names=["Human", "AI"]))

# Plot confusion matrix
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Human", "AI"], yticklabels=["Human", "AI"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Gemini Text Classification Confusion Matrix")
plt.show()

In [None]:
# CSV to save
merged_csv_path = MY_FOLDER_PATH + "gemini_labeled_dataset.csv"
df_merged.to_csv(merged_csv_path, index=False)