<a href="https://colab.research.google.com/github/SiphiweAI/IsAI/blob/main/IsAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Scope
- Use pretrained models that will review video, text or and/or image content and classify as being ai generated or not
## Inputs
- For Text - Hugging Face Transformers
- For Images -
- For Videos -
## Outputs
- Three separate models for text, image, and video detection, each trained and optimized independently.

- A wrapper API or service that routes incoming data to the appropriate model based on content type.

- Unified output format that reports whether the input is AI-generated, with modality-specific confidence scores.

# Save and Restore Session

In [None]:
import dill
dill.dump_session(r"C:\Users\siphi\OneDrive\Documents\Working Notebooks\notebook_session.db")

In [None]:
import dill
dill.load_session(r"C:\Users\siphi\OneDrive\Documents\Working Notebooks\notebook_session.db")

# Text Model

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

In [None]:
models_info = [
    {"name": "roberta-base-openai-detector", "sigmoid": True},
    {"name": "prasoonmhwr/ai_detection_model", "sigmoid": True},
    {"name": "openai-community/roberta-large-openai-detector", "sigmoid": True},
]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
models = []

In [None]:
for m in models_info:
    tok = AutoTokenizer.from_pretrained(m["name"])
    mdl = AutoModelForSequenceClassification.from_pretrained(m["name"]).to(device)
    mdl.eval()
    models.append({
        "tokenizer": tok,
        "model": mdl,
        "sigmoid": m["sigmoid"]
    })

In [None]:
def predict_ave(text: str) -> float:
    """
    Predict average probability that the input text is AI-generated
    by averaging outputs from multiple models.

    Args:
        text (str): Input text to classify.

    Returns:
        float: Average AI-generated probability (0 to 1).
    """
    probs = []
    with torch.no_grad():
        for entry in models:
            tokenizer = entry["tokenizer"]
            model = entry["model"]
            sigmoid_flag = entry["sigmoid"]

            inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
            inputs = {k: v.to(device) for k, v in inputs.items()}

            outputs = model(**inputs)
            logits = outputs.logits

            if sigmoid_flag:
                prob = torch.sigmoid(logits)[0][0].item()
            else:
                # If model outputs logits for two classes, apply softmax and take AI-generated class probability
                prob = F.softmax(logits, dim=1)[0][1].item()

            probs.append(prob)

    avg_prob = sum(probs) / len(probs)
    return avg_prob

In [None]:
text = "In this paper, we explore..."
result = predict_ave(text)
print(result)

# Image Model

# Videos Model