In [4]:
!pip install transformers requests




In [11]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
import numpy as np


In [13]:
# Load tokenizer and model from Hugging Face
model_name = "openai-community/roberta-base-openai-detector"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)


Some weights of the model checkpoint at openai-community/roberta-base-openai-detector were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [19]:
# Step 1: Imports
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

# Step 2: Load tokenizer and model
model_name = "openai-community/roberta-base-openai-detector"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Step 3: Device setup (CPU or MPS for Mac)
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model.to(device)

# Step 4: Prediction function
def predict_text_class(text):
    # Tokenize input
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Inference
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probs = F.softmax(logits, dim=1)
        pred_class = torch.argmax(probs, dim=1).item()
        confidence = probs[0][pred_class].item()

    # Map labels (same as your project)
    label_map = {
        0: "Human-Generated",
        1: "AI-Generated"
    }

    print(f" Prediction: {label_map[pred_class]} (confidence: {confidence:.2f})")


Some weights of the model checkpoint at openai-community/roberta-base-openai-detector were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [34]:
predict_text_class("I grabbed a coffee, walked around the block, and felt way better after that.") 
predict_text_class("I just started journaling, and it’s surprisingly therapeutic.")
predict_text_class("Cooked dinner for the family today and didn’t burn anything. Proud moment!")
predict_text_class("My dog literally understands everything I say, I swear.")
predict_text_class("Got stuck in traffic for 2 hours and missed my class—ugh!")


 Prediction: AI-Generated (confidence: 0.67)
 Prediction: AI-Generated (confidence: 0.75)
 Prediction: AI-Generated (confidence: 1.00)
 Prediction: AI-Generated (confidence: 0.65)
 Prediction: AI-Generated (confidence: 0.98)


In [53]:
predict_text_class("Guess what? I finally submitted the assignment at 11:58 PM")
predict_text_class("Maaaann that movie ending hit different... I’m not okay")
predict_text_class("Tell me why I walked into the room and forgot what I needed")
predict_text_class("I swear my brain just randomly decides to stop functioning after 8pm")
predict_text_class("My mom really just called me lazy while I’m on my third breakdown this week")
predict_text_class("Me: gonna be productive today. Also me: *watches 6 hours of YouTube")
predict_text_class("I texted them first... again. I'm retiring from this friendship")

 Prediction: Human-Generated (confidence: 0.74)
 Prediction: Human-Generated (confidence: 0.84)
 Prediction: Human-Generated (confidence: 0.89)
 Prediction: Human-Generated (confidence: 0.64)
 Prediction: Human-Generated (confidence: 0.94)
 Prediction: Human-Generated (confidence: 0.59)
 Prediction: Human-Generated (confidence: 0.62)


In [62]:
## testing with my model its givin accurate results...
from transformers import BertTokenizer, BertForSequenceClassification
import torch


load_path = "saved_bert_model" 
tokenizer = BertTokenizer.from_pretrained(load_path)
model = BertForSequenceClassification.from_pretrained(load_path)

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model.to(device)

def predict_from_saved_model(text):
    model.eval() 
    encoded_input = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256)
    encoded_input = {key: val.to(device) for key, val in encoded_input.items()}
    
    with torch.no_grad():
        outputs = model(**encoded_input)
        probs = torch.softmax(outputs.logits, dim=1)
        pred = torch.argmax(probs, dim=1).item()
        confidence = probs[0][pred].item()

    label = "AI-Generated" if pred == 1 else "Human-Generated"
    return f" Prediction: {label} (confidence: {confidence:.2f})"


In [74]:
sample_text = "This essay provides an in-depth analysis of renewable energy sources."
print(predict_from_saved_model(sample_text))

 Prediction: AI-Generated (confidence: 0.68)


In [76]:
samples = [
    "Guess what? I finally submitted the assignment at 11:58 PM",
    "Maaaann that movie ending hit different... I’m not okay",
    "Tell me why I walked into the room and forgot what I needed ",
    "I swear my brain just randomly decides to stop functioning after 8pm",
    "My mom really just called me lazy while I’m on my third breakdown this week",
    "I texted them first... again. I'm retiring from this friendship"
]

# Run predictions
for i, text in enumerate(samples, 1):
    print(f"Sample {i}: {predict_from_saved_model(text)}")

Sample 1:  Prediction: Human-Generated (confidence: 0.70)
Sample 2:  Prediction: Human-Generated (confidence: 0.71)
Sample 3:  Prediction: Human-Generated (confidence: 0.52)
Sample 4:  Prediction: Human-Generated (confidence: 0.72)
Sample 5:  Prediction: Human-Generated (confidence: 0.66)
Sample 6:  Prediction: Human-Generated (confidence: 0.55)
