# AI Multimodal Content Moderation Agent

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
# ==============================================================================
# Section 1: Environment Setup (Installations, Imports, Device Check)
# ==============================================================================
# Run this cell FIRST to prepare the environment.

# --- 1.1: Install required libraries ---

print("[Setup] Installing required libraries...")

# Using -q for quieter installation.
# faiss-cpu works on both CPU and GPU instances in Kaggle.
# bitsandbytes & accelerate help with loading/running larger models efficiently.
!pip install -q transformers sentence-transformers faiss-cpu accelerate bitsandbytes librosa soundfile Pillow torch
print("[Setup] Libraries installation attempted.")

# Uncomment the line below if you specifically need to interact with the OpenAI API and have set up secrets
# !pip install -q openai

# --- 1.2: Import libraries ---

print("\n[Setup] Importing libraries...")
import os
import torch
import numpy as np
import faiss
import librosa
import soundfile as sf
from PIL import Image
import random # For mocking AIGC detection
import json # For structured output examples
from pathlib import Path
import gc # Garbage collector

# Import transformers library itself and specific components
import transformers
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor, AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import accelerate
import bitsandbytes

# Import necessary for OpenAI API calls (if uncommented later)
# import openai
# from kaggle_secrets import UserSecretsClient

print("[Setup] Libraries imported.")

# --- 1.3: Check Environment and Set Device ---
print("\n[Setup] Checking environment and setting device...")

print(f"   PyTorch Version: {torch.__version__}")
if torch.cuda.is_available():
    device = "cuda:0"
    print(f"   CUDA Available: Yes")
    print(f"   Setting device: {device}")
    try:
        print(f"      CUDA Version (reported by PyTorch): {torch.version.cuda}")
        if torch.backends.cudnn.is_available():
             print(f"      cuDNN Version: {torch.backends.cudnn.version()}")
             print(f"      cuDNN Enabled: {torch.backends.cudnn.enabled}")
        else:
             print("      cuDNN backend not available.")
        print(f"      GPU Device Name: {torch.cuda.get_device_name(0)}")
        print(f"      GPU Memory Total: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB")
    except Exception as e:
        print(f"      Could not retrieve all GPU details: {e}")
else:
    device = "cpu"
    print(f"   CUDA Available: No")
    print(f"   Setting device: {device}")
    print("   WARNING: GPU not detected or configured.")
    print("      >>> Ensure 'Accelerator' in Kaggle's settings is set to a GPU (e.g., T4 x2) <<<")
    print("      >>> You MUST RESTART the session after changing the accelerator <<<")

# --- 1.4: Final Verification ---
print("\n--- [Setup] Verification ---")
print(f"   Transformers Version: {transformers.__version__}")
print(f"   Device variable set to: '{device}'")
print("--- [Setup] Environment setup complete ---")

[Setup] Installing required libraries...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m57.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m31.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.5/207.5 MB[0m [31m8.4 MB/s[0m eta [36m0:00:0

2025-04-19 18:07:16.619531: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745086036.938678      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745086036.995720      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


[Setup] Libraries imported.

[Setup] Checking environment and setting device...
   PyTorch Version: 2.5.1+cu124
   CUDA Available: Yes
   Setting device: cuda:0
      CUDA Version (reported by PyTorch): 12.4
      cuDNN Version: 90100
      cuDNN Enabled: True
      GPU Device Name: Tesla T4
      GPU Memory Total: 14.74 GB

--- [Setup] Verification ---
   Transformers Version: 4.51.1
   Device variable set to: 'cuda:0'
--- [Setup] Environment setup complete ---


# AI Multimodal Content Moderation Agent

This notebook simulates the core concepts of an advanced AI agent designed for content moderation across multiple modalities (text, image, audio), leveraging key Generative AI capabilities. 

It explicitly addresses the novelty, impact, and suitability of Gen AI for this challenging use case, structured for a competition submission.

# 1. Introduction: 
# The Content Moderation Crisis & The AI Agent Solution

## 1.1 The Problem: An Overwhelming Challenge
Online platforms face an immense challenge moderating user-generated content due to:
**Volume:** Billions of daily uploads make manual review impossible.
**Multimodality:** Harmful content spans text, images, videos, and audio, often combined.
**Complexity & Evolution:** Harmful content adapts rapidly (slang, coded language, deepfakes, nuanced misinformation).
**Consistency & Speed:** Manual moderation can be inconsistent and slow.
**Legal & Brand Risk:** Ineffective moderation leads to fines, loss of trust, and brand damage.

## 1.2 The Solution: An AI Multimodal Content Moderation Agent
This notebook proposes and simulates an AI-powered Multimodal Content Moderation Agent designed for scalable, fast, and context-aware moderation using advanced AI.

## 1.3 Use Case: Impact, Novelty & Innovation
Impact: This use case is highly impactful, directly addressing major societal issues (hate speech, misinformation, CSAM), enhancing user safety, protecting platform integrity, and aiding regulatory compliance for billions of users globally.
**Novelty & Innovation:** This agent concept innovates by:
True Multimodal Integration: Analyzing the interplay between text, image, and audio (simulated).
**Context-Awareness via RAG:** Uniquely using Retrieval-Augmented Generation (RAG) to consult platform policies and historical context during decision-making.
**Rapid Adaptation via Few-Shot Learning:** Incorporating few-shot prompting to quickly adapt to new harmful trends without full retraining.
Agentic Orchestration: Using function calling (simulated via Python functions) to manage analysis tools and actions.
**Creativity:** The creative aspect lies in synthesizing these Gen AI techniques specifically to overcome limitations of prior moderation methods.

## 1.4 Suitability of Generative AI Capabilities
The chosen Gen AI capabilities are extremely well-suited:
Multimodal Understanding (Image, Audio, Document): Essential for analyzing diverse content formats.
**Embeddings & Vector Search:** Foundation for semantic understanding needed for RAG.
**RAG (Retrieval Augmented Generation) & Grounding:** Crucial for providing policy context and evidence (grounding) for decisions, improving accuracy and explainability.
**Few-Shot Prompting:** Ideal for adapting to dynamic harmful content (new slang, spam tactics).
**Function Calling & Agents:** Perfect for orchestrating the complex workflow (invoking analysis, RAG, actions) providing modularity.
Structured Output: Necessary for logging, reporting, and reliable decision communication.

## 1.5 Notebook Goal & Limitations
**Focus:** Simulate the decision-making flow using key Gen AI components within Kaggle's environment.
**Limitations:** Uses relatively smaller models for feasibility. Simulates actions and AIGC detection. Full video analysis is omitted. Relies on user-provided sample files in Kaggle Datasets.

# 2. Simulate Input Data & Setup Paths

In a real system, content streams in. Here, we define sample text data directly and specify paths to sample image and audio files that you need to upload to Kaggle Datasets.

**Action Required:**
* Create a Kaggle Dataset (e.g., image-contentmoderation) and upload your sample image file (e.g., IMGS3.jpg).
* Create another Kaggle Dataset (e.g., aud-contentmoderation) and upload your sample audio file (e.g., AUDS1.mp3).
* Update the image_dataset_slug, sample_image_filename, audio_dataset_slug, and sample_audio_filename variables in the next code cell to match your dataset names and filenames.


In [3]:
# ==============================================================================
# Section 2: Multimodal Analysis (Load Data, Analyze, Store Results)
# ==============================================================================
# This section loads sample data, performs analysis on each modality using
# pre-trained models, stores the results, and cleans up model resources.

print("\n" + "="*60)
print("Section 2: Starting Multimodal Analysis & Data Loading")
print("="*60 + "\n")

# --- 2.1: Define Sample Text Data ---
sample_comments = [
    {"id": "txt001", "content": "This video is amazing, thanks for sharing!"},
    {"id": "txt002", "content": "Go back where you came from, you don't belong here! #HateSpeechExample"},
    {"id": "txt003", "content": "Check out my profile for FREE stuff >>> my-spam-link.com"},
    {"id": "txt004", "content": "The election was clearly rigged, watch this to see the proof."},
    {"id": "txt005", "content": "I disagree with the points made in this video, but the discussion is interesting."},
]
print("--- 2.1: Sample text data defined.")

# --- 2.2: Define and Verify Paths to User-Uploaded Image/Audio Files ---
# *** IMPORTANT: Replace values below with YOUR dataset slugs and filenames ***
image_dataset_slug = "image-contentmoderation" # <-- REPLACE with your image dataset slug
sample_image_filename = "IMGS3.jpg"          # <-- REPLACE with your image filename

audio_dataset_slug = "aud-contentmoderation"   # <-- REPLACE with your audio dataset slug
sample_audio_filename = "AUDS1.mp3"          # <-- REPLACE with your audio filename
# *** --- End of user replacement section --- ***

base_input_dir = "/kaggle/input"

# Construct the FULL PATH to the specific files
potential_image_path = os.path.join(base_input_dir, image_dataset_slug, sample_image_filename)
potential_audio_path = os.path.join(base_input_dir, audio_dataset_slug, sample_audio_filename)

# Check if the specific FILES exist and update paths
sample_image_path = None
if os.path.exists(potential_image_path) and os.path.isfile(potential_image_path):
    sample_image_path = potential_image_path
    print(f"--- 2.2: Found sample image file: {sample_image_path}")
else:
    print(f"--- 2.2: WARNING: Sample image file NOT FOUND at '{potential_image_path}'.")
    print(f"       Check dataset slug ('{image_dataset_slug}') and filename ('{sample_image_filename}'). Image analysis will be skipped.")

sample_audio_path = None
if os.path.exists(potential_audio_path) and os.path.isfile(potential_audio_path):
    sample_audio_path = potential_audio_path
    print(f"--- 2.2: Found sample audio file: {sample_audio_path}")
else:
    print(f"--- 2.2: WARNING: Sample audio file NOT FOUND at '{potential_audio_path}'.")
    print(f"       Check dataset slug ('{audio_dataset_slug}') and filename ('{sample_audio_filename}'). Audio analysis will be skipped.")

# --- 2.3: Initialize Dictionary for Analysis Results ---
all_analysis_results = {
    "text": {},
    "image": {"path": sample_image_path, "analysis": None, "error": None},
    "audio": {"path": sample_audio_path, "transcript": None, "analysis": None, "error": None}
}
print("--- 2.3: Initialized dictionary to store analysis results.")

# ------------------------------------------------------------------------------
# --- 2.4 Text Analysis (Toxicity) ---
# ------------------------------------------------------------------------------
print("\n--- 2.4: Text Analysis (Toxicity) ---")
text_classifier = None # Initialize variable outside try block
try:
    text_classifier = pipeline("text-classification",
                               model="unitary/toxic-bert",
                               device=device)
    print(f"Text classifier loaded on device: {text_classifier.device}")

    for comment in sample_comments:
        print(f"  Analyzing text ID: {comment['id']}")
        result = text_classifier(comment['content'])
        all_analysis_results["text"][comment['id']] = {
            "content": comment['content'],
            "analysis": result # result is typically a list e.g., [{'label': 'toxic', 'score': 0.99}]
        }
    print("Text analysis complete.")

except Exception as e:
    print(f"ERROR during Text Analysis setup or execution: {e}")
    # Store error if analysis failed for specific comments
    for comment in sample_comments:
        if comment['id'] not in all_analysis_results["text"]:
             all_analysis_results["text"][comment['id']] = {"content": comment['content'], "analysis": None, "error": str(e)}

finally:
    # Clean up GPU memory
    if text_classifier and hasattr(text_classifier, 'model'):
        del text_classifier.model
    if text_classifier:
        del text_classifier
    gc.collect() # Explicitly call garbage collector
    if device != 'cpu': torch.cuda.empty_cache()
    print("Text classifier resources released.")

# ------------------------------------------------------------------------------
# --- 2.5 Image Analysis (Zero-Shot Classification) ---
# ------------------------------------------------------------------------------
print("\n--- 2.5: Image Analysis (Zero-Shot Classification) ---")
image_classifier = None # Initialize variable
if sample_image_path:
    try:
        print(f"Processing image file: {sample_image_path}")
        image_classifier = pipeline("zero-shot-image-classification",
                                    model="openai/clip-vit-base-patch32",
                                    device=device)
        print(f"Image classifier loaded on device: {image_classifier.device}")

        image = Image.open(sample_image_path)
        candidate_labels = ["hate symbol", "weapon", "graphic violence", "nudity", "neutral object", "person", "text", "advertisement", "safe content"]
        print(f"  Classifying image against labels: {candidate_labels}")

        result = image_classifier(image, candidate_labels=candidate_labels)
        all_analysis_results["image"]["analysis"] = result # result is list of dicts e.g. [{'score': 0.8, 'label': 'neutral object'}, ...]
        print(f"Image Analysis Results (Top match): {result[0]}")

    except Exception as e:
        print(f"ERROR processing image: {e}")
        all_analysis_results["image"]["error"] = str(e) # Store error info

    finally:
        # Clean up GPU memory
        if image_classifier and hasattr(image_classifier, 'model'):
            del image_classifier.model
        if image_classifier:
            del image_classifier
        if 'image' in locals(): # Make sure 'image' (PIL object) is deleted
            del image
        gc.collect()
        if device != 'cpu': torch.cuda.empty_cache()
        print("Image classifier resources released.")
else:
    print("Skipping image analysis as sample image file was not found or specified.")


# ------------------------------------------------------------------------------
# --- 2.6 Audio Analysis (Transcription + Text Analysis) ---
# ------------------------------------------------------------------------------
print("\n--- 2.6: Audio Analysis (Transcription + Text Analysis) ---")
asr_model = None
asr_processor = None
text_classifier_for_audio = None

if sample_audio_path:
    try:
        # --- Part 1: Transcription (ASR) ---
        print("Loading Whisper model for ASR ('openai/whisper-base')...")
        asr_model_id = "openai/whisper-base"
        asr_processor = AutoProcessor.from_pretrained(asr_model_id)
        asr_model = AutoModelForSpeechSeq2Seq.from_pretrained(asr_model_id).to(device)
        asr_model.eval()
        print(f"Whisper model loaded on device: {asr_model.device}")

        print(f"Loading and processing audio file: {sample_audio_path}")
        # Load audio, ensuring resampling to 16kHz for Whisper
        audio_input, sample_rate = librosa.load(sample_audio_path, sr=16000)
        print(f"  Audio loaded. Duration: {len(audio_input)/sample_rate:.2f} seconds")

        print("Transcribing audio...")
        input_features = asr_processor(audio_input, sampling_rate=16000, return_tensors="pt").input_features.to(device)

        with torch.no_grad():
            predicted_ids = asr_model.generate(input_features, max_length=256) # Adjust max_length if needed

        audio_transcript = asr_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
        all_analysis_results["audio"]["transcript"] = audio_transcript
        print(f"  Audio Transcript: '{audio_transcript}'")

        # Clean up ASR model resources immediately (keep transcript)
        del asr_model, asr_processor, input_features, predicted_ids, audio_input
        gc.collect()
        if device != 'cpu': torch.cuda.empty_cache()
        print("Whisper model resources released.")
        asr_model = None # Ensure variables are None after deletion
        asr_processor = None

        # --- Part 2: Text Analysis of Transcript ---
        if audio_transcript and audio_transcript.strip():
            print("Analyzing audio transcript for toxicity ('unitary/toxic-bert')...")
            # Initialize the text classifier again (needed for the transcript)
            text_classifier_for_audio = pipeline("text-classification",
                                                 model="unitary/toxic-bert",
                                                 device=device)
            print(f"Text classifier (for audio) loaded on device: {text_classifier_for_audio.device}")

            result = text_classifier_for_audio(audio_transcript)
            all_analysis_results["audio"]["analysis"] = result # result is typically a list e.g., [{'label': 'toxic', 'score': 0.99}]
            print(f"  Audio Transcript Analysis: {result}")

            # Clean up text classifier resources
            del text_classifier_for_audio.model
            del text_classifier_for_audio
            gc.collect()
            if device != 'cpu': torch.cuda.empty_cache()
            print("Text classifier (for audio) resources released.")
            text_classifier_for_audio = None

        else:
             print("Audio transcript is empty or only whitespace, skipping analysis.")
             all_analysis_results["audio"]["analysis"] = "Skipped (Empty Transcript)"

    except Exception as e:
        print(f"ERROR processing audio: {e}")
        all_analysis_results["audio"]["error"] = str(e)
        # Ensure potential partial cleanup if error occurred mid-process
        if 'asr_model' in locals() and asr_model: del asr_model
        if 'asr_processor' in locals() and asr_processor: del asr_processor
        if 'text_classifier_for_audio' in locals() and text_classifier_for_audio: del text_classifier_for_audio
        gc.collect()
        if device != 'cpu': torch.cuda.empty_cache()
else:
    print("Skipping audio analysis as sample audio file was not found or specified.")


# ==============================================================================
# Section 2: Multimodal Analysis Summary
# ==============================================================================
print("\n" + "="*60)
print("Section 2: Multimodal Analysis Summary")
print("="*60)
# Pretty print the results collected so far
print(json.dumps(all_analysis_results, indent=2))


Section 2: Starting Multimodal Analysis & Data Loading

--- 2.1: Sample text data defined.
       Check dataset slug ('image-contentmoderation') and filename ('IMGS3.jpg'). Image analysis will be skipped.
       Check dataset slug ('aud-contentmoderation') and filename ('AUDS1.mp3'). Audio analysis will be skipped.
--- 2.3: Initialized dictionary to store analysis results.

--- 2.4: Text Analysis (Toxicity) ---


config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/174 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cuda:0


Text classifier loaded on device: cuda:0
  Analyzing text ID: txt001
  Analyzing text ID: txt002
  Analyzing text ID: txt003
  Analyzing text ID: txt004
  Analyzing text ID: txt005
Text analysis complete.
Text classifier resources released.

--- 2.5: Image Analysis (Zero-Shot Classification) ---
Skipping image analysis as sample image file was not found or specified.

--- 2.6: Audio Analysis (Transcription + Text Analysis) ---
Skipping audio analysis as sample audio file was not found or specified.

Section 2: Multimodal Analysis Summary
{
  "text": {
    "txt001": {
      "content": "This video is amazing, thanks for sharing!",
      "analysis": [
        {
          "label": "toxic",
          "score": 0.0005700007895939052
        }
      ]
    },
    "txt002": {
      "content": "Go back where you came from, you don't belong here! #HateSpeechExample",
      "analysis": [
        {
          "label": "toxic",
          "score": 0.7442686557769775
        }
      ]
    },
    "txt003

# 3. Component: Policy RAG (Retrieval-Augmented Generation)

This section demonstrates Document Understanding, Embeddings, Vector Search/DB, RAG, and Grounding.

We encode policy documents into numerical vectors (embeddings), store them in an efficient vector database (FAISS), and define a function to retrieve relevant policies based on descriptions of content being moderated. 
This grounds the agent's decisions in explicit platform rules.

In [4]:
# ==============================================================================
# Section 3: Policy RAG Implementation
# ==============================================================================
print("\n" + "="*60)
print("Section 3: Setting up Policy RAG")
print("="*60 + "\n")

# --- 3.1 Define Sample Policy Documents ---
# This represents the 'knowledge base' the agent can consult.
policy_docs = [
    # ID P01
    "Policy A (Hate Speech): Hate speech targeting individuals or groups based on attributes like race, religion, gender, sexual orientation, disability, or origin is strictly prohibited. This includes slurs, derogatory stereotypes, and calls for violence or exclusion.",
    # ID P02
    "Policy B (Violence & Graphic Content): Content depicting realistic graphic violence, gore, or promoting dangerous acts that could lead to serious harm is not allowed. Exceptions may exist for newsworthy or educational content with context.",
    # ID P03
    "Policy C (Misinformation): Spreading verifiably false information that can cause real-world harm (e.g., concerning elections, health crises, public safety) is subject to removal. Satire or opinion should be clearly distinguishable.",
    # ID P04
    "Policy D (Spam & Scams): Commercial spam, including unsolicited promotions, repetitive posting, deceptive links, phishing attempts, or fraudulent schemes, is forbidden.",
    # ID P05
    "Policy E (Harassment & Bullying): Targeted harassment, threats, intimidation, or bullying of individuals is not tolerated. This includes unwanted sexual advances and sharing private information.",
    # ID P06
    "Policy F (Copyright): Content violating copyright laws will be removed upon valid notice from the rights holder.",
    # ID P07
    "Policy G (AI-Generated Content Disclosure): AI-generated content must be disclosed if it realistically impersonates real individuals or depicts fabricated events as real, especially in sensitive contexts like politics or news.",
]
print(f"--- 3.1: Loaded {len(policy_docs)} sample policy documents.")

# --- 3.2 Create Embeddings and Vector Store (FAISS) ---
# Uses SentenceTransformer to create numerical representations (embeddings).
# FAISS provides efficient vector search using the L2 (Euclidean) distance metric.
print("\n--- 3.2: Creating Policy Embeddings and FAISS Index ---")
encoder = None # Initialize
index = None
policy_embeddings = None
try:
    encoder = SentenceTransformer('all-MiniLM-L6-v2', device=device) # Load encoder model
    print("Encoding policy documents into vectors...")
    policy_embeddings = encoder.encode(policy_docs, show_progress_bar=True, normalize_embeddings=True) # Normalize for better L2 search

    # Create a FAISS index
    index = faiss.IndexFlatL2(policy_embeddings.shape[1])
    # Add the policy embeddings to the index
    index.add(policy_embeddings.astype(np.float32)) # FAISS requires float32
    print(f"FAISS index created with {index.ntotal} policy vectors (dimension: {policy_embeddings.shape[1]}).")

except Exception as e:
    print(f"ERROR setting up RAG encoder or FAISS index: {e}")

# Note: Encoder model is kept in memory as it's needed by the retrieval function.
# If memory becomes an issue, it could be loaded/unloaded within the function,
# but this would add latency to each RAG call.

# --- 3.3 Define Retrieval Function ---
def retrieve_relevant_policies(query_text, k=2):
    """
    Encodes query text and retrieves top-k relevant policy snippets using FAISS.
    Requires 'encoder' and 'index' variables to be defined globally.
    """
    if not query_text or encoder is None or index is None:
        print("Warning: RAG query is empty or encoder/index not initialized.")
        return []
    try:
        query_embedding = encoder.encode([query_text], normalize_embeddings=True) # Normalize query too
        # Search the index: returns distances (squared L2) and indices
        distances, indices = index.search(query_embedding.astype(np.float32), k)

        retrieved = []
        for i, idx in enumerate(indices[0]):
            if idx != -1: # Check if a valid index was found
                # Retrieve the policy text and add structured info
                retrieved.append({
                    "policy_id": f"P{idx+1:02d}", # Generate ID (P01, P02, etc.)
                    "policy_text": policy_docs[idx],
                    "distance": float(distances[0][i]) # Lower distance means more relevant
                })
        return retrieved
    except Exception as e:
        print(f"ERROR during RAG retrieval: {e}")
        return []

# --- 3.4 Example RAG Query ---
print("\n--- 3.4: Example RAG Query ---")
# Simulate generating a query based on a problematic comment's content
test_comment_id_rag = "txt002"
if any(c['id'] == test_comment_id_rag for c in sample_comments):
    test_comment_content_rag = next(c['content'] for c in sample_comments if c['id'] == test_comment_id_rag)
    # Query could also include multimodal signals, e.g., "Hateful text comment about origins combined with image showing X"
    rag_query = f"Content Analysis Query: Text says '{test_comment_content_rag}'"
    print(f"Querying RAG with: '{rag_query}'")

    rag_results_example = retrieve_relevant_policies(rag_query)
    print("\nRetrieved Policies (Grounding Context):")
    print(json.dumps(rag_results_example, indent=2))
else:
    print(f"Test comment ID '{test_comment_id_rag}' not found for RAG example.")

# Note: The FAISS index and encoder remain in memory for use by the agent later.


Section 3: Setting up Policy RAG

--- 3.1: Loaded 7 sample policy documents.

--- 3.2: Creating Policy Embeddings and FAISS Index ---


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Encoding policy documents into vectors...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

FAISS index created with 7 policy vectors (dimension: 384).

--- 3.4: Example RAG Query ---
Querying RAG with: 'Content Analysis Query: Text says 'Go back where you came from, you don't belong here! #HateSpeechExample''


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Retrieved Policies (Grounding Context):
[
  {
    "policy_id": "P06",
    "policy_text": "Policy F (Copyright): Content violating copyright laws will be removed upon valid notice from the rights holder.",
    "distance": 1.3733962774276733
  },
  {
    "policy_id": "P01",
    "policy_text": "Policy A (Hate Speech): Hate speech targeting individuals or groups based on attributes like race, religion, gender, sexual orientation, disability, or origin is strictly prohibited. This includes slurs, derogatory stereotypes, and calls for violence or exclusion.",
    "distance": 1.4621390104293823
  }
]


# 4. Component: Few-Shot Prompting (Simulation)

This section demonstrates the **Few-Shot Prompting** capability. 

This allows the agent to potentially learn new rules or classifications quickly from just a few examples (shots), which is useful for adapting to rapidly evolving slang, spam tactics, or policy nuances without needing extensive fine-tuning.

* **Simulation:** We primarily simulate this by constructing a prompt and using simple keyword logic.
* **Optional LLM Call:** The code includes commented-out sections to use the OpenAI API (requires openai library and an API key configured via Kaggle Secrets). Large Language Models (LLMs) excel at few-shot tasks.
* **Alternatives:** You could adapt the LLM call to other APIs (Google Gemini, Anthropic Claude) or experiment with smaller open-source instruction-tuned models from Hugging Face (like Flan-T5, Gemma), though their few-shot performance might vary.
* **Security:** Never paste API keys directly into notebook code. Use Kaggle Secrets.

In [5]:
# ==============================================================================
# Section 4: Few-Shot Prompting Simulation
# ==============================================================================
print("\n" + "="*60)
print("Section 4: Simulating Few-Shot Prompting")
print("="*60 + "\n")

# --- 4.1: OpenAI API Setup (Optional - Requires Secret & Uncommenting) ---
# import openai
# from kaggle_secrets import UserSecretsClient
# openai_available = False
# try:
#     user_secrets = UserSecretsClient()
#     openai_api_key = user_secrets.get_secret("OPENAI_API_KEY") # Secret name MUST match exactly
#     openai.api_key = openai_api_key
#     print("OpenAI API Key loaded from secrets. API calls might be possible.")
#     openai_available = True
# except Exception as e:
#     print("OpenAI API Key secret not found or error loading. Using basic simulation ONLY.")
#     # print(f"Error details (if any): {e}") # Uncomment for debugging secrets issues
#     openai_available = False
# --- End OpenAI Setup ---

# --- 4.2: Few-Shot Classification Function ---
def classify_with_few_shot(comment_content, rule_description, examples, use_openai=False):
    """
    Simulates or executes few-shot classification.
    If use_openai is True and API key is available, it calls OpenAI API (currently commented out).
    Otherwise, it performs a basic keyword-based simulation based on provided examples.
    """
    print(f"\n--- Performing Few-Shot Classification ---")
    print(f"Rule: {rule_description}")
    print(f"Comment to classify: '{comment_content}'")

    # Construct the prompt with examples
    prompt = f"Determine if the following comment violates the rule: '{rule_description}'. Respond with only 'VIOLATION' or 'NO VIOLATION'.\n\nExamples:\n"
    for ex in examples:
        prompt += f"- Comment: '{ex['text']}'\n  Result: {ex['label']}\n"
    prompt += f"\nNew Comment to Classify:\n- Comment: '{comment_content}'\nResult: " # Ask model to complete

    print("\nConstructed Prompt (for simulation or LLM):")
    print("-" * 20)
    print(prompt)
    print("-" * 20)

    classification_result = "UNDETERMINED" # Default

    # --- Actual LLM Call (Optional & Commented Out) ---
    # if use_openai and openai_available:
    #    print("\nAttempting classification via OpenAI API...")
    #    try:
    #        # Using a chat model endpoint is generally recommended now
    #        response = openai.ChatCompletion.create(
    #            model="gpt-3.5-turbo", # Or gpt-4 if available/needed
    #            messages=[{"role": "user", "content": prompt}],
    #            max_tokens=10, # Need just enough for VIOLATION/NO VIOLATION
    #            temperature=0.0, # Low temperature for deterministic classification
    #        )
    #        llm_output = response.choices[0].message['content'].strip().upper()
    #        print(f"Raw LLM Output: {llm_output}")
    #        # Basic parsing - adjust based on expected model output format
    #        if "VIOLATION" in llm_output:
    #            classification_result = "VIOLATION"
    #        elif "NO VIOLATION" in llm_output:
    #            classification_result = "NO VIOLATION"
    #        else:
    #            classification_result = "UNCLEAR LLM OUTPUT"

    #    except Exception as e:
    #        print(f"  Error calling OpenAI API: {e}")
    #        classification_result = "API ERROR"
    #    print(f"OpenAI Classification Result: {classification_result}")
    #    # Keep the rest commented out unless API key is set

    # --- Simulation (Fallback if API not used/available) ---
    # else: # Runs if use_openai is False OR openai_available is False
    print("\nPerforming basic simulation (keyword/rule-based)...")
    # Simple logic matching the *intent* of the examples provided
    if "spam" in rule_description.lower():
        # Look for keywords often found in spam examples
        spam_keywords = ["free", "click here", "profile", "link", ".com", ".net", ".io", "earn $", "win"]
        if any(keyword in comment_content.lower() for keyword in spam_keywords):
            classification_result = "VIOLATION"
        else:
            classification_result = "NO VIOLATION"
    elif "election misinformation" in rule_description.lower():
        # Look for keywords often found in misinfo examples
        misinfo_keywords = ["rigged", "stolen", "fraud", "proof", "hackers"]
        if any(keyword in comment_content.lower() for keyword in misinfo_keywords):
            classification_result = "VIOLATION"
        else:
            classification_result = "NO VIOLATION"
    else:
        # Default simulation if rule is unknown
        classification_result = "NO VIOLATION (Simulation Default)"
    print(f"Simulated Classification Result: {classification_result}")

    # Return structured output
    return {"classification": classification_result, "prompt_used": prompt}

# --- 4.3: Example Few-Shot Scenario 1: Spam ---
print("\n--- 4.3: Few-Shot Example - Spam ---")
new_spam_rule = "Promoting external dubious links or using spam tactics"
spam_examples = [
    {'text': 'Great video!', 'label': 'NO VIOLATION'},
    {'text': 'Visit my-cool-site.io for more', 'label': 'VIOLATION'},
    {'text': 'Earn $1000 fast click here shady-rewards.net', 'label': 'VIOLATION'},
    {'text': 'I agree with this point.', 'label': 'NO VIOLATION'},
]
# Find the content for txt003
comment_to_classify_spam_id = "txt003"
comment_to_classify_spam_content = next((c['content'] for c in sample_comments if c['id'] == comment_to_classify_spam_id), None)

if comment_to_classify_spam_content:
    # Set use_openai=True ONLY if you have configured the API key secret AND uncommented the API call block
    few_shot_spam_result = classify_with_few_shot(
        comment_to_classify_spam_content, new_spam_rule, spam_examples, use_openai=False
    )
else:
    print(f"Comment ID '{comment_to_classify_spam_id}' not found for spam classification.")
    few_shot_spam_result = None # Ensure variable exists

# --- 4.4: Example Few-Shot Scenario 2: Misinformation ---
print("\n--- 4.4: Few-Shot Example - Misinformation ---")
election_misinfo_rule = "Election Misinformation Claims"
misinfo_examples = [
    {'text': 'I voted yesterday.', 'label': 'NO VIOLATION'},
    {'text': 'Proof the election was stolen by hackers!', 'label': 'VIOLATION'},
    {'text': 'The results are surprising.', 'label': 'NO VIOLATION'},
    {'text': 'Widespread fraud confirmed, the numbers were rigged.', 'label': 'VIOLATION'},
]
comment_to_classify_misinfo_id = "txt004"
comment_to_classify_misinfo_content = next((c['content'] for c in sample_comments if c['id'] == comment_to_classify_misinfo_id), None)

if comment_to_classify_misinfo_content:
    few_shot_misinfo_result = classify_with_few_shot(
        comment_to_classify_misinfo_content, election_misinfo_rule, misinfo_examples, use_openai=False
    )
else:
    print(f"Comment ID '{comment_to_classify_misinfo_id}' not found for misinformation classification.")
    few_shot_misinfo_result = None # Ensure variable exists

# Store results globally so the agent can access them later
# (These are already global as they were defined outside a function)


Section 4: Simulating Few-Shot Prompting


--- 4.3: Few-Shot Example - Spam ---

--- Performing Few-Shot Classification ---
Rule: Promoting external dubious links or using spam tactics
Comment to classify: 'Check out my profile for FREE stuff >>> my-spam-link.com'

Constructed Prompt (for simulation or LLM):
--------------------
Determine if the following comment violates the rule: 'Promoting external dubious links or using spam tactics'. Respond with only 'VIOLATION' or 'NO VIOLATION'.

Examples:
- Comment: 'Great video!'
  Result: NO VIOLATION
- Comment: 'Visit my-cool-site.io for more'
  Result: VIOLATION
- Comment: 'Earn $1000 fast click here shady-rewards.net'
  Result: VIOLATION
- Comment: 'I agree with this point.'
  Result: NO VIOLATION

New Comment to Classify:
- Comment: 'Check out my profile for FREE stuff >>> my-spam-link.com'
Result: 
--------------------

Performing basic simulation (keyword/rule-based)...
Simulated Classification Result: VIOLATION

--- 4.4: Few-Shot Exa

# 5. Component: Mock AI-Generated Content (AIGC) Detection

Detecting AI-generated content (especially images and audio) is a complex and evolving field, often requiring specialized models. For this simulation, we will mock this capability.

In a real system, dedicated AIGC detection models would analyze the image/audio data and provide a probability score. 

Here, we'll just generate a random result for demonstration purposes within the agent logic. We also add a check against relevant policies retrieved by RAG (like Policy G) to see if disclosure might be required.

In [6]:
# ==============================================================================
# Section 5: Mock AI-Generated Content (AIGC) Detection
# ==============================================================================
print("\n" + "="*60)
print("Section 5: Mocking AIGC Detection")
print("="*60 + "\n")

def mock_aigc_check(modality="unknown"):
    """
    Simulates the output of an AIGC detection model.
    Returns a dictionary indicating if content is likely AI-generated.
    """
    is_ai = random.choice([True, False, False]) # Skew towards non-AI for simulation
    confidence = random.uniform(0.5, 0.99) if is_ai else random.uniform(0.01, 0.49)
    result = {
        "modality": modality,
        "is_ai_generated": is_ai,
        "confidence": round(confidence, 2),
        "model_used": "mock_detector_v0.1"
    }
    print(f"  Mock AIGC Check ({modality}): Result = {result['is_ai_generated']}, Confidence = {result['confidence']}")
    return result

# Simulate checks for the sample image and audio (if they exist)
print("--- Running Mock AIGC Checks ---")
aigc_image_check = None
if sample_image_path:
    aigc_image_check = mock_aigc_check(modality="image")
else:
    print("  Skipping mock AIGC check for image (no path).")

aigc_audio_check = None
if sample_audio_path:
    aigc_audio_check = mock_aigc_check(modality="audio")
else:
    print("  Skipping mock AIGC check for audio (no path).")

# Results are stored globally (aigc_image_check, aigc_audio_check) for the agent.


Section 5: Mocking AIGC Detection

--- Running Mock AIGC Checks ---
  Skipping mock AIGC check for image (no path).
  Skipping mock AIGC check for audio (no path).


# 6. Component: Function Calling & Agent Logic (Simulation)

This section demonstrates Function Calling and Agent concepts.

* **Function Calling:** We define simple Python functions (func_delete_content, func_flag_for_review, func_approve_content) that simulate the actions an agent would take by calling specific platform APIs (e.g., delete a post, send to human review queue, approve). These functions return structured JSON-like dictionaries confirming the action.
* **Agent:** The main moderate_content_item function acts as the orchestrator or "agent." It takes a content item, calls various analysis functions (using pre-computed results from Section 2, RAG results from Section 3, Few-Shot results from Section 4, AIGC mock results from Section 5), aggregates the evidence, applies decision logic (based on a simple scoring system), and then "calls" the appropriate action function.

This structure provides modularity and mimics how real-world agent systems might operate. Structured Output is used throughout for clarity and machine readability.

In [7]:
# ==============================================================================
# Section 6.1: Define Simulated Platform Action Functions
# ==============================================================================
print("\n" + "="*60)
print("Section 6.1: Defining Simulated Platform Action Functions")
print("="*60 + "\n")

# These functions simulate interacting with a platform's backend API.
# They return structured confirmation messages.

def func_delete_content(content_id, reason_codes, explanation):
    """Simulates deleting content via platform API."""
    action_details = {
        "action": "DELETE",
        "content_id": content_id,
        "reason_codes": sorted(list(set(reason_codes))), # Ensure unique and sorted
        "explanation": explanation,
        "timestamp": pd.Timestamp.now(tz='UTC').isoformat() # Use pandas for consistent ISO timestamp
    }
    print(f"\n>>> ACTION CALLED: Delete Content <<<")
    # Use default=str for potential non-serializable types (though Timestamp is usually fine)
    print(json.dumps(action_details, indent=2, default=str))
    return {"status": "success", "details": action_details}

def func_flag_for_review(content_id, reason_codes, explanation, confidence_score):
    """Simulates flagging content for human review."""
    action_details = {
        "action": "FLAG_FOR_REVIEW",
        "content_id": content_id,
        "reason_codes": sorted(list(set(reason_codes))),
        "explanation": explanation,
        "ai_confidence": float(confidence_score), # Ensure float
        "timestamp": pd.Timestamp.now(tz='UTC').isoformat()
    }
    print(f"\n>>> ACTION CALLED: Flag for Human Review <<<")
    print(json.dumps(action_details, indent=2, default=str))
    return {"status": "success", "details": action_details}

def func_approve_content(content_id):
    """Simulates approving content."""
    action_details = {
        "action": "APPROVE",
        "content_id": content_id,
        "timestamp": pd.Timestamp.now(tz='UTC').isoformat()
    }
    print(f"\n>>> ACTION CALLED: Approve Content <<<")
    print(json.dumps(action_details, indent=2, default=str))
    return {"status": "success", "details": action_details}

print("Action functions defined.")


Section 6.1: Defining Simulated Platform Action Functions

Action functions defined.


## 6.2 Central Decision Logic (The Agent Core)

This is the core function moderate_content_item that orchestrates the entire process for a single piece of content.

**Dependencies:** 
This function relies on several variables and functions defined in previous cells being available in the global scope:

* all_analysis_results: Dictionary containing pre-calculated analysis from Section 2.
* retrieve_relevant_policies: The RAG function defined in Section 3.
* few_shot_spam_result, few_shot_misinfo_result: Results from few-shot checks in Section 4.
* comment_to_classify_spam_id, comment_to_classify_misinfo_id: IDs used in few-shot checks.
* aigc_image_check, aigc_audio_check: Mocked AIGC results from Section 5.
* func_delete_content, func_flag_for_review, func_approve_content: Action functions from Section 6.1.
* Image from PIL (for potential re-analysis if needed, though currently using stored results).
* os (for path checks if needed).

**Note:** To manage memory, some analysis models (like text classifier) were deleted earlier. The agent currently relies on the stored results in all_analysis_results. If direct re-analysis were needed within this function, the relevant pipelines would need to be re-initialized (potentially done in a later cell before calling this function).

In [8]:
# ==============================================================================
# Section 6.2: Central Decision Logic - The Agent Core Function
# ==============================================================================
print("\n" + "="*60)
print("Section 6.2: Defining the 'moderate_content_item' Agent Function")
print("="*60 + "\n")

def moderate_content_item(content_id, text_content=None): # Simplified signature - relies on global results for media
    """
    Processes a single content item through the simulated moderation pipeline.
    Acts as the central agent orchestrating analysis and action based on pre-calculated results.
    Returns a structured dictionary containing the decision and evidence.

    Args:
        content_id (str): The unique ID of the content item (e.g., "txt001", "media001").
        text_content (str, optional): The text content associated with the ID. Used for text items.

    Returns:
        dict: A dictionary containing the final decision, status, and collected evidence.
    """
    print(f"\n{'='*15} Moderating Content ID: {content_id} {'='*15}")

    # Ensure the global results dictionary is accessible and other dependencies exist
    required_globals = [
        'all_analysis_results', 'retrieve_relevant_policies',
        'few_shot_spam_result', 'few_shot_misinfo_result',
        'comment_to_classify_spam_id', 'comment_to_classify_misinfo_id',
        'aigc_image_check', 'aigc_audio_check',
        'func_delete_content', 'func_flag_for_review', 'func_approve_content'
    ]
    for var in required_globals:
        if var not in globals():
            print(f"FATAL ERROR: Required global variable/function '{var}' not found. Aborting moderation.")
            return {"content_id": content_id, "decision": {"status": "error", "details": f"Missing global: {var}"}, "evidence": {}}

    # Initialize tracking variables
    violation_score = 0.0
    reasons = [] # Human-readable reasons
    reason_codes = [] # Machine-readable codes (e.g., policy IDs, rule IDs)
    evidence = {"content_id": content_id} # Store analysis results & steps for explainability

    # --- Step 1: Gather Pre-Calculated Multimodal Features ---
    print("\n[Agent Step 1: Gathering Pre-calculated Analysis Results]")
    current_text_analysis_data = None
    image_analysis_data = None
    audio_analysis_data = None
    audio_transcript = None

    # Text Analysis (from all_analysis_results)
    if content_id in all_analysis_results["text"]:
        current_text_analysis_data = all_analysis_results["text"][content_id]
        evidence['text_analysis_raw'] = current_text_analysis_data # Store raw result
        print(f"  Retrieved Text Analysis for {content_id}")
        analysis_list = current_text_analysis_data.get('analysis')
        # Check if analysis is a list and not empty
        if isinstance(analysis_list, list) and analysis_list:
             text_analysis = analysis_list[0] # Get the first dictionary e.g. {'label': 'toxic', 'score': 0.99}
             evidence['text_analysis_processed'] = text_analysis
             # Simple Scoring based on toxicity label and score
             if text_analysis.get('label', '').lower() == 'toxic':
                 score = text_analysis.get('score', 0)
                 if score > 0.9:
                     violation_score += 3.0
                     reasons.append(f"High confidence toxic text (score: {score:.2f})")
                     reason_codes.append("TEXT_TOXIC_HIGH")
                 elif score > 0.6:
                     violation_score += 1.0
                     reasons.append(f"Moderate confidence toxic text (score: {score:.2f})")
                     reason_codes.append("TEXT_TOXIC_MOD")
        elif current_text_analysis_data.get('error'):
            print(f"    Text analysis error noted: {current_text_analysis_data['error']}")
            evidence['text_analysis_processed'] = {"error": current_text_analysis_data['error']}
        else:
             print(f"    No valid text analysis result found for {content_id}.")
             evidence['text_analysis_processed'] = "No analysis found"

    # Image Analysis (Check if this content ID involves an image)
    # We assume a single image/audio for this simulation tied to 'media001' or similar ID
    # Modify this logic if multiple media items are possible
    is_media_item = not content_id.startswith("txt") # Simple check if it's not just text
    current_image_analysis_summary = None
    if is_media_item and all_analysis_results["image"]["analysis"]:
        image_analysis_data = all_analysis_results["image"]["analysis"]
        evidence['image_analysis_raw'] = image_analysis_data
        print(f"  Retrieved Image Analysis for {content_id}")
        if isinstance(image_analysis_data, list) and image_analysis_data:
             # Find the highest scoring label
             top_match = max(image_analysis_data, key=lambda x: x['score'])
             top_label = top_match.get('label', 'unknown')
             top_score = top_match.get('score', 0)
             current_image_analysis_summary = f"Image top match: '{top_label}' (score: {top_score:.2f})"
             evidence['image_analysis_processed'] = top_match
             # Score based on potentially harmful labels
             harmful_img_labels = ["hate symbol", "weapon", "graphic violence", "nudity"]
             if top_label in harmful_img_labels and top_score > 0.7:
                  violation_score += 2.0
                  reasons.append(current_image_analysis_summary)
                  reason_codes.append(f"IMAGE_{top_label.upper().replace(' ', '_')}")
        elif all_analysis_results["image"]["error"]:
             print(f"    Image analysis error noted: {all_analysis_results['image']['error']}")
             evidence['image_analysis_processed'] = {"error": all_analysis_results['image']['error']}
        else:
             print(f"    No valid image analysis result found for {content_id}.")
             evidence['image_analysis_processed'] = "No analysis found"

    # Audio Analysis (Check if this content ID involves audio)
    if is_media_item and (all_analysis_results["audio"]["analysis"] or all_analysis_results["audio"]["transcript"]):
        audio_analysis_data = all_analysis_results["audio"]["analysis"]
        audio_transcript = all_analysis_results["audio"]["transcript"]
        evidence['audio_transcript'] = audio_transcript if audio_transcript else "N/A"
        evidence['audio_analysis_raw'] = audio_analysis_data if audio_analysis_data else "N/A"
        print(f"  Retrieved Audio Analysis/Transcript for {content_id}")

        if isinstance(audio_analysis_data, list) and audio_analysis_data:
             audio_text_analysis = audio_analysis_data[0] # Analysis of the transcript
             evidence['audio_analysis_processed'] = audio_text_analysis
             # Score based on toxicity of transcript
             if audio_text_analysis.get('label', '').lower() == 'toxic':
                  score = audio_text_analysis.get('score', 0)
                  if score > 0.8:
                      violation_score += 2.0
                      reasons.append(f"Toxic audio transcript (score: {score:.2f})")
                      reason_codes.append("AUDIO_TOXIC")
        elif isinstance(audio_analysis_data, str): # Handle "Skipped..." case
             print(f"    Audio analysis was skipped or transcript was empty.")
             evidence['audio_analysis_processed'] = audio_analysis_data
        elif all_analysis_results["audio"]["error"]:
             print(f"    Audio analysis error noted: {all_analysis_results['audio']['error']}")
             evidence['audio_analysis_processed'] = {"error": all_analysis_results['audio']['error']}
        else:
             print(f"    No valid audio analysis result found for {content_id}.")
             evidence['audio_analysis_processed'] = "No analysis found"

    # --- Step 2: Contextualization via RAG ---
    print("\n[Agent Step 2: Retrieving Policy Context (RAG)]")
    # Construct a query based on available content and initial analysis results
    rag_query_parts = []
    if text_content: rag_query_parts.append(f"Text: {text_content[:100]}...") # Use provided text_content for text items
    elif current_text_analysis_data: rag_query_parts.append(f"Text: {current_text_analysis_data.get('content', '')[:100]}...") # Or from stored data
    if current_image_analysis_summary: rag_query_parts.append(current_image_analysis_summary)
    if audio_transcript: rag_query_parts.append(f"Audio transcript: {audio_transcript[:100]}...")

    rag_query = " ".join(rag_query_parts) if rag_query_parts else f"Generic content item {content_id}"
    evidence['rag_query'] = rag_query
    print(f"  RAG Query: '{rag_query}'")

    rag_policies = []
    try:
        rag_policies = retrieve_relevant_policies(rag_query, k=2) # Call RAG function
        evidence['rag_policies_retrieved'] = rag_policies
        print("  Relevant Policies Found:")
        if rag_policies:
            for p in rag_policies:
                print(f"    - ID: {p['policy_id']}, Dist: {p['distance']:.3f}, Text: {p['policy_text'][:80]}...")
                # Adjust score based on relevant policies (example logic)
                if p['distance'] < 0.9: # Use a threshold for relevance (lower is better)
                    policy_text_lower = p['policy_text'].lower()
                    policy_id_code = p['policy_id'] # Use the retrieved policy ID as the code
                    # Add policy code to reasons if not already covered by specific checks
                    if policy_id_code not in reason_codes:
                        if "hate speech" in policy_text_lower and "TEXT_TOXIC_HIGH" not in reason_codes: violation_score += 0.5; reason_codes.append(policy_id_code); reasons.append(f"Matches {policy_id_code} (Hate Speech context)")
                        if "misinformation" in policy_text_lower and "FEWSHOT_MISINFO" not in reason_codes: violation_score += 0.5; reason_codes.append(policy_id_code); reasons.append(f"Matches {policy_id_code} (Misinfo context)")
                        if "spam" in policy_text_lower and "FEWSHOT_SPAM" not in reason_codes: violation_score += 1.0; reason_codes.append(policy_id_code); reasons.append(f"Matches {policy_id_code} (Spam context)")
                        # Add other policy matches as needed
        else:
            print("    No relevant policies found via RAG.")
    except Exception as e:
         print(f"    Error during RAG execution: {e}")
         evidence['rag_policies_retrieved'] = {"error": str(e)}

    # --- Step 3: Apply Specific Rules (Few-Shot Results) ---
    print("\n[Agent Step 3: Checking Specific Rules (Few-Shot Results)]")
    applied_few_shot = False
    # Check if the pre-calculated few-shot spam result applies to this content_id
    if few_shot_spam_result and content_id == comment_to_classify_spam_id:
         evidence['few_shot_spam_check'] = few_shot_spam_result
         if few_shot_spam_result.get('classification') == "VIOLATION":
             print(f"  Applying Few-Shot Spam Rule: VIOLATION")
             violation_score += 2.0 # Strong signal from specific check
             reasons.append("Flagged by Few-Shot Spam Rule")
             reason_codes.append("FEWSHOT_SPAM")
             applied_few_shot = True
         else:
             print(f"  Applying Few-Shot Spam Rule: NO VIOLATION")
    # Check misinfo result
    if few_shot_misinfo_result and content_id == comment_to_classify_misinfo_id:
         evidence['few_shot_misinfo_check'] = few_shot_misinfo_result
         if few_shot_misinfo_result.get('classification') == "VIOLATION":
             print(f"  Applying Few-Shot Misinfo Rule: VIOLATION")
             violation_score += 2.0
             reasons.append("Flagged by Few-Shot Misinformation Rule")
             reason_codes.append("FEWSHOT_MISINFO")
             applied_few_shot = True
         else:
              print(f"  Applying Few-Shot Misinfo Rule: NO VIOLATION")

    if not applied_few_shot: print("  No specific few-shot rules triggered or applied for this item.")

    # --- Step 4: Check for AI Generation (Mocked Results) ---
    print("\n[Agent Step 4: Checking for AI Generation (Mocked Results)]")
    evidence['aigc_checks'] = {'image': aigc_image_check, 'audio': aigc_audio_check}
    aigc_flagged = False

    # Example Rule: Flag if AI generated AND related to sensitive topic (e.g., politics, based on RAG)
    # Check if Misinfo (P03) or AIGC Disclosure (P07) policy was retrieved with low distance
    is_sensitive_topic_context = any(p['policy_id'] in ["P03", "P07"] for p in rag_policies if p['distance'] < 1.0)
    if is_sensitive_topic_context:
        print("  Sensitive topic context detected via RAG (P03/P07).")

    # Check image AIGC result
    if is_media_item and aigc_image_check and aigc_image_check.get('is_ai_generated'):
        conf = aigc_image_check.get('confidence', 0)
        print(f"  Mock AIGC Image detected (Conf: {conf:.2f}). Sensitive context: {is_sensitive_topic_context}")
        if is_sensitive_topic_context:
            reasons.append(f"Potential Sensitive AIGC Image (conf: {conf:.2f})")
            reason_codes.append("AIGC_IMAGE_SENSITIVE")
            violation_score += 0.5 # Moderate increase, might warrant review or disclosure check
            aigc_flagged = True
        elif conf > 0.8: # High confidence generic AIGC image might still be flagged depending on policy
             reasons.append(f"High Confidence AIGC Image (conf: {conf:.2f})")
             reason_codes.append("AIGC_IMAGE_HIGH_CONF")
             violation_score += 0.2
             aigc_flagged = True


    # Check audio AIGC result
    if is_media_item and aigc_audio_check and aigc_audio_check.get('is_ai_generated'):
        conf = aigc_audio_check.get('confidence', 0)
        print(f"  Mock AIGC Audio detected (Conf: {conf:.2f}). Sensitive context: {is_sensitive_topic_context}")
        if is_sensitive_topic_context:
            reasons.append(f"Potential Sensitive AIGC Audio (conf: {conf:.2f})")
            reason_codes.append("AIGC_AUDIO_SENSITIVE")
            violation_score += 0.5
            aigc_flagged = True
        elif conf > 0.8:
             reasons.append(f"High Confidence AIGC Audio (conf: {conf:.2f})")
             reason_codes.append("AIGC_AUDIO_HIGH_CONF")
             violation_score += 0.2
             aigc_flagged = True

    if not aigc_flagged: print("  No concerning AIGC signals flagged based on current rules.")

    # --- Step 5: Final Decision & Function Call ---
    print(f"\n[Agent Step 5: Final Decision for ID: {content_id}]")
    # Ensure reason codes are unique and sorted for consistency
    reason_codes = sorted(list(set(reason_codes)))
    final_explanation = "; ".join(sorted(list(set(reasons)))) # Unique, sorted reasons

    print(f"  Final Violation Score: {violation_score:.2f}")
    print(f"  Reason Codes: {reason_codes}")
    print(f"  Explanation: {final_explanation}")

    final_decision_details = {}
    # Define thresholds (these would be carefully tuned in production)
    DELETE_THRESHOLD = 4.0
    FLAG_THRESHOLD = 1.0

    try:
        if violation_score >= DELETE_THRESHOLD:
            final_decision_details = func_delete_content(content_id, reason_codes, final_explanation)
        elif violation_score >= FLAG_THRESHOLD:
            # Map score to a 0-1 confidence for flagging (simple linear mapping here)
            confidence_for_flag = min(1.0, max(0.0, (violation_score - FLAG_THRESHOLD) / (DELETE_THRESHOLD - FLAG_THRESHOLD)))
            final_decision_details = func_flag_for_review(content_id, reason_codes, final_explanation, confidence_for_flag)
        else:
            final_decision_details = func_approve_content(content_id)
    except Exception as e:
        print(f"    Error executing final action function: {e}")
        final_decision_details = {"status": "error", "details": {"action": "ACTION_FAILED", "error": str(e)}}

    print(f"{'='*30}")

    # Return structured output including the final action and all collected evidence
    return {"content_id": content_id, "decision": final_decision_details, "evidence": evidence}

print("Agent function 'moderate_content_item' defined.")


Section 6.2: Defining the 'moderate_content_item' Agent Function

Agent function 'moderate_content_item' defined.


# 7. Putting It All Together: Workflow Execution

Now, we'll run the moderate_content_item agent function on our sample content items.

**Preparation:** Because we released resources (like the text classifier model) earlier to save memory, we need to quickly re-initialize any components that the moderate_content_item function might directly call if it were designed to re-analyze instead of just using stored results.

In our current moderate_content_item implementation, it relies primarily on the pre-calculated results stored in all_analysis_results. However, if we had designed it to re-run, say, text classification on the fly, we would reload the text_classifier here. For this version, reloading isn't strictly necessary based on the current agent code, but it's good practice to include this step conceptually if the agent were more complex or needed live re-analysis.

In [9]:
# ==============================================================================
# Section 7.1: Re-initialize Moderation Components (If Needed)
# ==============================================================================
# This cell is a placeholder or for actual re-initialization if the agent function
# was designed to call pipelines directly instead of only using stored results.
# In our current setup, `moderate_content_item` uses `all_analysis_results`.

print("\n" + "="*60)
print("Section 7.1: Preparing for Workflow Execution")
print("="*60 + "\n")

# Example: Re-initializing the text classifier if the agent needed it directly.
# We'll keep it commented out as our agent uses stored results, but this shows where it would go.

# print("Checking if text_classifier needs re-initialization...")
# if 'text_classifier' not in globals() or globals()['text_classifier'] is None:
#     print("Re-initializing Text Classifier ('unitary/toxic-bert')...")
#     try:
#         if 'device' not in locals(): raise NameError("'device' not defined.")
#         text_classifier = pipeline("text-classification", model="unitary/toxic-bert", device=device)
#         print(f"Text classifier re-initialized successfully on device: {text_classifier.device}")
#     except Exception as e:
#         print(f"ERROR re-initializing text classifier: {e}")
#         text_classifier = None # Ensure it's None if failed
# else:
#     print("Text classifier already seems to be initialized (or not needed directly by agent).")

# Add similar blocks for image_classifier, asr_model etc. IF the agent called them directly.

print("--- Preparation complete. Ready to run moderation tasks. ---")


Section 7.1: Preparing for Workflow Execution

--- Preparation complete. Ready to run moderation tasks. ---


In [10]:
# ==============================================================================
# Section 7.2: Run Moderation Workflow on Sample Content
# ==============================================================================
print("\n" + "="*60)
print("Section 7.2: Running Moderation Tasks")
print("="*60 + "\n")

all_moderation_results = {}

# --- Run moderation on potentially hateful text comment (txt002) ---
# Expected: High score, likely DELETE action triggered by toxic text and RAG match for Policy A.
content_id_002 = "txt002"
text_002 = next((c['content'] for c in sample_comments if c['id'] == content_id_002), None)
if text_002:
    result_txt002 = moderate_content_item(content_id=content_id_002, text_content=text_002)
    all_moderation_results[content_id_002] = result_txt002
else:
    print(f"Skipping {content_id_002}, content not found.")


# --- Run moderation on potentially spammy text comment (txt003) ---
# Expected: Moderate/High score, likely DELETE/FLAG triggered by Few-Shot Spam rule and RAG match for Policy D.
content_id_003 = "txt003"
text_003 = next((c['content'] for c in sample_comments if c['id'] == content_id_003), None)
if text_003:
    result_txt003 = moderate_content_item(content_id=content_id_003, text_content=text_003)
    all_moderation_results[content_id_003] = result_txt003
else:
    print(f"Skipping {content_id_003}, content not found.")


# --- Run moderation on potentially misinformation text comment (txt004) ---
# Expected: Moderate/High score, likely DELETE/FLAG triggered by Few-Shot Misinfo rule and RAG match for Policy C.
content_id_004 = "txt004"
text_004 = next((c['content'] for c in sample_comments if c['id'] == content_id_004), None)
if text_004:
    result_txt004 = moderate_content_item(content_id=content_id_004, text_content=text_004)
    all_moderation_results[content_id_004] = result_txt004
else:
    print(f"Skipping {content_id_004}, content not found.")


# --- Run moderation on a neutral text comment (txt001) ---
# Expected: Low score, APPROVE action.
content_id_001 = "txt001"
text_001 = next((c['content'] for c in sample_comments if c['id'] == content_id_001), None)
if text_001:
    result_txt001 = moderate_content_item(content_id=content_id_001, text_content=text_001)
    all_moderation_results[content_id_001] = result_txt001
else:
    print(f"Skipping {content_id_001}, content not found.")

# --- Run moderation on a benign text comment (txt005) ---
# Expected: Low score, APPROVE action.
content_id_005 = "txt005"
text_005 = next((c['content'] for c in sample_comments if c['id'] == content_id_005), None)
if text_005:
    result_txt005 = moderate_content_item(content_id=content_id_005, text_content=text_005)
    all_moderation_results[content_id_005] = result_txt005
else:
    print(f"Skipping {content_id_005}, content not found.")


# --- Run moderation combining image and potentially audio (media001) ---
# Outcome depends heavily on the content of your sample image/audio, the analysis results
# from Section 2, and the mocked AIGC results from Section 5.
# This assumes the image/audio analysis results in `all_analysis_results` correspond to this ID.
combined_id = "media001"
print(f"\n--- Running moderation on combined media item: {combined_id} ---")
# We don't pass text_content here, assuming it relies on the media analysis stored globally
result_media001 = moderate_content_item(content_id=combined_id)
all_moderation_results[combined_id] = result_media001


# ==============================================================================
# Section 7.3: Display All Moderation Results
# ==============================================================================
print("\n" + "="*60)
print("Section 7.3: Summary of All Moderation Decisions")
print("="*60 + "\n")

# Print a summary of decisions
for content_id, result in all_moderation_results.items():
    action = "Unknown Action"
    if result and 'decision' in result and result['decision'] and 'details' in result['decision'] and result['decision']['details']:
       action = result['decision']['details'].get('action', 'Action Not Found')
    elif result and 'decision' in result and result['decision']:
        action = result['decision'].get('status', 'Status Unknown') # Fallback if details missing

    print(f"Content ID: {content_id} -> Final Action: {action}")

# Optional: Print full results for detailed inspection (can be verbose)
# print("\nFull Moderation Results Details:")
# print(json.dumps(all_moderation_results, indent=2, default=str))


Section 7.2: Running Moderation Tasks



[Agent Step 1: Gathering Pre-calculated Analysis Results]
  Retrieved Text Analysis for txt002

[Agent Step 2: Retrieving Policy Context (RAG)]
  RAG Query: 'Text: Go back where you came from, you don't belong here! #HateSpeechExample...'


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  Relevant Policies Found:
    - ID: P01, Dist: 1.404, Text: Policy A (Hate Speech): Hate speech targeting individuals or groups based on att...
    - ID: P06, Dist: 1.476, Text: Policy F (Copyright): Content violating copyright laws will be removed upon vali...

[Agent Step 3: Checking Specific Rules (Few-Shot Results)]
  No specific few-shot rules triggered or applied for this item.

[Agent Step 4: Checking for AI Generation (Mocked Results)]
  No concerning AIGC signals flagged based on current rules.

[Agent Step 5: Final Decision for ID: txt002]
  Final Violation Score: 1.00
  Reason Codes: ['TEXT_TOXIC_MOD']
  Explanation: Moderate confidence toxic text (score: 0.74)

>>> ACTION CALLED: Flag for Human Review <<<
{
  "action": "FLAG_FOR_REVIEW",
  "content_id": "txt002",
  "reason_codes": [
    "TEXT_TOXIC_MOD"
  ],
  "explanation": "Moderate confidence toxic text (score: 0.74)",
  "ai_confidence": 0.0,
  "timestamp": "2025-04-19T18:07:46.378083+00:00"
}


[Agent Step 1: Gathering

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  Relevant Policies Found:
    - ID: P04, Dist: 1.091, Text: Policy D (Spam & Scams): Commercial spam, including unsolicited promotions, repe...
    - ID: P06, Dist: 1.528, Text: Policy F (Copyright): Content violating copyright laws will be removed upon vali...

[Agent Step 3: Checking Specific Rules (Few-Shot Results)]
  Applying Few-Shot Spam Rule: VIOLATION

[Agent Step 4: Checking for AI Generation (Mocked Results)]
  No concerning AIGC signals flagged based on current rules.

[Agent Step 5: Final Decision for ID: txt003]
  Final Violation Score: 2.00
  Reason Codes: ['FEWSHOT_SPAM']
  Explanation: Flagged by Few-Shot Spam Rule

>>> ACTION CALLED: Flag for Human Review <<<
{
  "action": "FLAG_FOR_REVIEW",
  "content_id": "txt003",
  "reason_codes": [
    "FEWSHOT_SPAM"
  ],
  "explanation": "Flagged by Few-Shot Spam Rule",
  "ai_confidence": 0.3333333333333333,
  "timestamp": "2025-04-19T18:07:46.397579+00:00"
}


[Agent Step 1: Gathering Pre-calculated Analysis Results]
  Retriev

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  Relevant Policies Found:
    - ID: P03, Dist: 1.557, Text: Policy C (Misinformation): Spreading verifiably false information that can cause...
    - ID: P07, Dist: 1.597, Text: Policy G (AI-Generated Content Disclosure): AI-generated content must be disclos...

[Agent Step 3: Checking Specific Rules (Few-Shot Results)]
  Applying Few-Shot Misinfo Rule: VIOLATION

[Agent Step 4: Checking for AI Generation (Mocked Results)]
  No concerning AIGC signals flagged based on current rules.

[Agent Step 5: Final Decision for ID: txt004]
  Final Violation Score: 2.00
  Reason Codes: ['FEWSHOT_MISINFO']
  Explanation: Flagged by Few-Shot Misinformation Rule

>>> ACTION CALLED: Flag for Human Review <<<
{
  "action": "FLAG_FOR_REVIEW",
  "content_id": "txt004",
  "reason_codes": [
    "FEWSHOT_MISINFO"
  ],
  "explanation": "Flagged by Few-Shot Misinformation Rule",
  "ai_confidence": 0.3333333333333333,
  "timestamp": "2025-04-19T18:07:46.415718+00:00"
}


[Agent Step 1: Gathering Pre-calculate

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  Relevant Policies Found:
    - ID: P06, Dist: 1.635, Text: Policy F (Copyright): Content violating copyright laws will be removed upon vali...
    - ID: P07, Dist: 1.744, Text: Policy G (AI-Generated Content Disclosure): AI-generated content must be disclos...

[Agent Step 3: Checking Specific Rules (Few-Shot Results)]
  No specific few-shot rules triggered or applied for this item.

[Agent Step 4: Checking for AI Generation (Mocked Results)]
  No concerning AIGC signals flagged based on current rules.

[Agent Step 5: Final Decision for ID: txt001]
  Final Violation Score: 0.00
  Reason Codes: []
  Explanation: 

>>> ACTION CALLED: Approve Content <<<
{
  "action": "APPROVE",
  "content_id": "txt001",
  "timestamp": "2025-04-19T18:07:46.432952+00:00"
}


[Agent Step 1: Gathering Pre-calculated Analysis Results]
  Retrieved Text Analysis for txt005

[Agent Step 2: Retrieving Policy Context (RAG)]
  RAG Query: 'Text: I disagree with the points made in this video, but the discussion is 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  Relevant Policies Found:
    - ID: P07, Dist: 1.634, Text: Policy G (AI-Generated Content Disclosure): AI-generated content must be disclos...
    - ID: P03, Dist: 1.676, Text: Policy C (Misinformation): Spreading verifiably false information that can cause...

[Agent Step 3: Checking Specific Rules (Few-Shot Results)]
  No specific few-shot rules triggered or applied for this item.

[Agent Step 4: Checking for AI Generation (Mocked Results)]
  No concerning AIGC signals flagged based on current rules.

[Agent Step 5: Final Decision for ID: txt005]
  Final Violation Score: 0.00
  Reason Codes: []
  Explanation: 

>>> ACTION CALLED: Approve Content <<<
{
  "action": "APPROVE",
  "content_id": "txt005",
  "timestamp": "2025-04-19T18:07:46.450973+00:00"
}

--- Running moderation on combined media item: media001 ---


[Agent Step 1: Gathering Pre-calculated Analysis Results]

[Agent Step 2: Retrieving Policy Context (RAG)]
  RAG Query: 'Generic content item media001'


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  Relevant Policies Found:
    - ID: P06, Dist: 1.590, Text: Policy F (Copyright): Content violating copyright laws will be removed upon vali...
    - ID: P02, Dist: 1.658, Text: Policy B (Violence & Graphic Content): Content depicting realistic graphic viole...

[Agent Step 3: Checking Specific Rules (Few-Shot Results)]
  No specific few-shot rules triggered or applied for this item.

[Agent Step 4: Checking for AI Generation (Mocked Results)]
  No concerning AIGC signals flagged based on current rules.

[Agent Step 5: Final Decision for ID: media001]
  Final Violation Score: 0.00
  Reason Codes: []
  Explanation: 

>>> ACTION CALLED: Approve Content <<<
{
  "action": "APPROVE",
  "content_id": "media001",
  "timestamp": "2025-04-19T18:07:46.468587+00:00"
}

Section 7.3: Summary of All Moderation Decisions

Content ID: txt002 -> Final Action: FLAG_FOR_REVIEW
Content ID: txt003 -> Final Action: FLAG_FOR_REVIEW
Content ID: txt004 -> Final Action: FLAG_FOR_REVIEW
Content ID: txt001 -> Fi

# 8. Implemented Gen AI Capabilities & Discussion

## 8.1 Implemented Capabilities Summary:
This notebook demonstrably implements or simulates the following key Gen AI capabilities relevant to the content moderation use case:


1. **Multimodal Understanding (Image):** Implemented using CLIP (openai/clip-vit-base-patch32) via Hugging Face pipeline for zero-shot classification based on moderation-relevant labels.
2. **Multimodal Understanding (Audio):** Implemented using Whisper (openai/whisper-base) for transcription (ASR) followed by text analysis (unitary/toxic-bert) on the transcript.
3. **Document Understanding:** Implicitly used by RAG when processing and encoding the policy_docs.
4. **Embeddings:** Implemented using sentence-transformers (all-MiniLM-L6-v2) to create vector representations of policy documents and queries.
5. **Vector Search/Vector Store/Vector Database:** Implemented using FAISS (IndexFlatL2) for efficient similarity search over policy embeddings.
6. **Retrieval Augmented Generation (RAG):** Core component implemented (retrieve_relevant_policies function) to fetch relevant policy documents based on content analysis, providing grounding context.
7. **Grounding:** Explicitly achieved via the RAG component, linking moderation decisions back to specific retrieved policy snippets (evidence included in the agent's output).
8. **Few-Shot Prompting:** Simulated via prompt construction (classify_with_few_shot function) and keyword-based logic, with optional (commented-out) integration with an LLM API (OpenAI) for true few-shot learning.
9. **Function Calling:** Simulated via standard Python functions (func_delete_content, func_flag_for_review, func_approve_content) that mimic API calls for platform actions.
10. **Agents:** The moderate_content_item function acts as a basic agent, orchestrating the workflow: gathering multimodal signals, querying RAG, applying rules (few-shot, AIGC), making a decision based on aggregated evidence, and triggering the appropriate action function.
11. **Structured Output / JSON Mode / Controlled Generation:** Used extensively in the return values of analysis pipelines, RAG retrieval, few-shot classification, action function confirmations, and the final output of the agent (moderate_content_item), ensuring clear, machine-readable results.

(Capabilities not explicitly implemented in this simulation include: Long context handling beyond model limits, Context caching for efficiency, rigorous Gen AI evaluation metrics, MLOps pipelines for deployment/monitoring, and full Video Understanding.)


## 8.2 Connecting to External Databases in Kaggle:
Directly connecting Kaggle notebooks to arbitrary external databases (e.g., your own PostgreSQL, MySQL server) is generally not feasible due to Kaggle's sandboxed environment and network restrictions.

**Viable Alternatives within Kaggle:**
* **Kaggle Datasets:** The most common method. Upload database dumps or relevant data files (e.g., CSV, JSON, Parquet, SQLite .db files) as a Kaggle Dataset. You can then access these files directly from your notebook using standard libraries (pandas, sqlite3, etc.). This is suitable for many use cases where the data doesn't need real-time external updates during the notebook run.
* **Google BigQuery Integration:** Kaggle offers built-in integration with Google BigQuery. You can query public BigQuery datasets or your own tables if you have a GCP project linked and configured. This requires GCP setup.
* **Cloud Service APIs (Limited):** If your database or data source provides a public web API (REST, GraphQL), you might be able to interact with it using Python's requests library. However, this depends on the API's authentication method and Kaggle's network egress rules. You must use Kaggle Secrets to store any required API keys or credentials securely.

**For this Prototype:** We used simple in-memory Python lists (policy_docs) and a FAISS index (also in memory, though could be saved/loaded from disk within /kaggle/working/). This approach is self-contained and suitable for demonstrating the RAG concept within Kaggle's limitations.

## 8.3 Limitations & Production Considerations:
This notebook is a **simulation** and proof-of-concept. A production-grade system would require significant enhancements:

* **Infrastructure:** Real-time, scalable stream processing (e.g., Kafka, Pub/Sub), microservices architecture (e.g., Kubernetes), distributed databases (e.g., Spanner, Cassandra), dedicated model serving infrastructure (e.g., Vertex AI Endpoints, SageMaker).
* **Models:** Use of larger, potentially fine-tuned multimodal models, more sophisticated and robust AIGC detectors, powerful LLMs for reasoning and few-shot tasks, and potentially specialized models for specific violation types.
* **RAG:** Advanced retrieval strategies (hybrid search, re-ranking), larger and dynamically updated knowledge bases (policies, case law, historical decisions), potentially integrating knowledge graphs.
* **Human-in-the-Loop:** Critical workflow integration with human review tools for handling flagged content, resolving ambiguous cases, providing feedback for model retraining (active learning), and quality assurance.
* **MLOps:** Rigorous processes for model monitoring (drift detection, performance tracking), automated retraining and evaluation pipelines, version control, and safe deployment strategies (canary releases, A/B testing).
* **Latency & Cost:** Aggressive optimization for low-latency inference (quantization, model distillation, hardware acceleration) and cost management (model selection, autoscaling).
* **Resilience & Error Handling:** Robust error handling, retries, and failover mechanisms for all components.

# 9. Conclusion & AI Agent Overview

## 9.1 Conclusion
This notebook successfully simulated an AI Multimodal Content Moderation Agent, demonstrating the integration and potential of various Generative AI capabilities to address the complex challenges of online content safety. By combining multimodal analysis, Retrieval-Augmented Generation 
(RAG) for policy grounding, few-shot prompting for adaptability, and agentic orchestration with function calling, we illustrated a path towards more scalable, context-aware, and efficient moderation systems. While limitations exist within the Kaggle environment simulation, the core concepts highlight the significant promise of advanced AI in creating safer online environments.

## 9.2 AI Agent Overview
**What is it?** This AI agent is a simulated system designed to automatically analyze user-generated content across multiple modalities (text, image, audio) to identify potential violations of platform policies.
**How is it useful?** It addresses the critical need for scalable, fast, and consistent content moderation on online platforms. By automating the initial review process, it can:
* Handle vast volumes of content infeasible for manual review alone.
Speed up the detection and removal of harmful material (hate speech, spam, misinformation, etc.).
* Improve consistency by applying policies programmatically.
* Reduce the burden on human moderators, allowing them to focus on complex edge cases and appeals.
* Provide audit trails and evidence (grounding) for moderation decisions.
  
**What actions does it perform?** Based on its analysis and internal logic, the agent simulates taking one of the following primary actions:
1. **DELETE:** If the content is deemed highly likely to violate policies (based on a high violation score derived from multiple signals like toxicity, policy matches, specific rules).
2. **FLAG_FOR_REVIEW:** If the content is borderline or potentially violates policies but doesn't meet the high threshold for automatic deletion (moderate violation score). It sends the content to a human review queue with context (reason codes, AI confidence).
3. **APPROVE:** If the content is deemed unlikely to violate policies (low violation score).

The agent uses a combination of specialized models (toxicity, image classification, ASR), RAG for policy context, few-shot learning simulations for adaptability, and mocked AIGC detection to gather evidence before making its final decision and triggering the appropriate simulated action.

# 10. Adding More Image/Audio Data

To test the agent with different images or audio files:

1. Upload Files: Upload your new image (.jpg, .png, etc.) and audio (.mp3, .wav, etc.) files to your Kaggle Datasets (you can add them to the existing datasets created earlier or create new ones).
2. Update Paths in Cell 4: Go back to Code Cell 2 (Multimodal Analysis). Modify the following variables to point to your new files:

* image_dataset_slug: The slug (name) of the dataset containing your image.
* sample_image_filename: The exact filename of your image within that dataset.
* audio_dataset_slug: The slug of the dataset containing your audio.
* sample_audio_filename: The exact filename of your audio within that dataset.

3. Re-run Cells: Re-run Code Cell 2 to perform the analysis on the new files and store the results. Then, re-run Code Cell 5 (Mock AIGC), Code Cell 7 (Agent Definition) if necessary, Code Cell 8 (Prep), and finally Code Cell 9 (Workflow Execution), paying attention to the output for the media001 item (or adapt the ID if needed).
   
The agent's decision on media001 will now be based on the analysis results of your newly provided image and audio files.

This structure provides a comprehensive, end-to-end notebook that addresses your requirements, corrects the code, and explains the concepts clearly for a competition setting. Remember to configure your sample files and potentially the OpenAI API key if you choose to use it.