# AI Text Detection Model Evaluation

This notebook evaluates multiple models on a test dataset and compares their performance.

In [1]:
import numpy as np
import pandas as pd
import pickle
import os
import sys
import time
import re
import string
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

sys.path.append('../')
np.random.seed(2025)

from models.numpyModels.dnn.neuralnet import NeuralNetwork
from models.numpyModels.rnn.rnn import RNN
from models.numpyModels.rnn.optimizers import AdamOptimizer

os.makedirs('results', exist_ok=True)

## Load Dataset

In [2]:
file_path = "../datasets/dataset2_inputs.csv"

with open(file_path, 'r') as file:
    csv_content = file.read()

rows = []
lines = csv_content.split('\n')
for line in lines:
    if not line.strip():
        continue
        
    parts = line.split(';', 1)
    if len(parts) >= 2:
        id_val = parts[0].strip()
        text_val = parts[1].strip()
        rows.append({'ID': id_val, 'Text': text_val})

df = pd.DataFrame(rows[1:])
print(f"Successfully read {len(df)} rows")
print(df)


Successfully read 100 rows
        ID                                               Text
0     D2-1  The Solar System faces a dramatic future over ...
1     D2-2  Spermidine is an aliphatic polyamine. Spermidi...
2     D2-3  The feasibility of extraterrestrial life is a ...
3     D2-4  Many cross sectional and prospective studies h...
4     D2-5  There were observations of spectral lines. Tha...
..     ...                                                ...
95   D2-96  Though a part of the continent of North Americ...
96   D2-97  There has been a steady increase in the number...
97   D2-98  Plasticizers like phthalates were thought to b...
98   D2-99  The main causes of lung cancer are multifacete...
99  D2-100  It is an approximation useful in chemistry, bu...

[100 rows x 2 columns]


## Load Models

In [3]:
class DatasetWrapper:
    def __init__(self, X, y=None):
        self.X = X
        self.y = y

def load_model(model_type):
    if model_type == "dnn":
        model = NeuralNetwork()
        model.load("../trained_models/numpy/dnn_weights.npz")
        
        with open("../preprocessed/vectorizer.pkl", "rb") as f:
            vectorizer = pickle.load(f)
        
        return model, vectorizer, None, None
    
    elif model_type == "rnn":
        embedding_matrix = np.load("../preprocessed/embedding_matrix.npy")
        
        model = RNN(n_units=64, embedding_matrix=embedding_matrix)
        model.initialize(AdamOptimizer())
        model.load("../trained_models/numpy/rnn_weights.npz")
        
        with open("../preprocessed/word_to_idx.pkl", "rb") as f:
            word_to_idx = pickle.load(f)
        
        return model, None, word_to_idx, embedding_matrix

models = {}
vectorizers = {}
word_to_idxs = {}
embedding_matrices = {}

for model_type in ["dnn", "rnn"]:
    try:
        model, vectorizer, word_to_idx, embedding_matrix = load_model(model_type)
        models[model_type] = model
        vectorizers[model_type] = vectorizer
        word_to_idxs[model_type] = word_to_idx
        embedding_matrices[model_type] = embedding_matrix
        print(f"Successfully loaded {model_type} model")
    except Exception as e:
        print(f"Error loading {model_type} model: {str(e)}")

Successfully loaded dnn model
Successfully loaded rnn model


## Define Prediction Function

In [4]:
nltk.download("punkt", quiet=True)
nltk.download("stopwords", quiet=True)

def clean_text(text):
    text = text.lower()
    text = re.sub(f"[{string.punctuation}]", "", text)
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words("english"))
    tokens = [word for word in tokens if word not in stop_words]
    return tokens

def predict_text(model, vectorizer, word_to_idx, embedding_matrix, text, model_type):
    tokens = clean_text(text)
    
    if model_type == "rnn":
        max_seq_length = 100
        sequence = np.zeros((1, max_seq_length), dtype=int)
        
        for j, word in enumerate(tokens[:max_seq_length]):
            if word in word_to_idx:
                sequence[0, j] = word_to_idx[word]
        
        predictions = model.forward_propagation(sequence)
        probability = predictions[0, -1, 0]
    
    else:
        joined_text = " ".join(tokens)
        X_new = vectorizer.transform([joined_text]).toarray()
        
        class DatasetWrapper:
            def __init__(self, X):
                self.X = X
        probability = model.predict(DatasetWrapper(X_new))[0][0]

    prediction = "AI" if probability >= 0.5 else "Human"
    return prediction, probability

## Make Predictions

In [5]:
results = {}
for model_type in models.keys():
    results[model_type] = pd.DataFrame(columns=['ID', 'Label'])

total_rows = len(df)
print(f"Making predictions on {total_rows} samples...")

start_time = time.time()
update_interval = max(1, total_rows // 20)

for index, row in df.iterrows():
    if index % update_interval == 0 or index == total_rows - 1:
        elapsed = time.time() - start_time
        progress = (index + 1) / total_rows * 100
        print(f"Progress: {progress:.1f}% ({index+1}/{total_rows}) - Elapsed: {elapsed:.1f}s")
    
    id_val = row['ID']
    print(row)
    text = row['Text']
    
    for model_type in models.keys():
        try:
            model = models[model_type]
            vectorizer = vectorizers[model_type]
            word_to_idx = word_to_idxs[model_type]
            embedding_matrix = embedding_matrices[model_type]
            
            prediction, _ = predict_text(model, vectorizer, word_to_idx, embedding_matrix, text, model_type)
            new_row = pd.DataFrame({'ID': [id_val], 'Label': [prediction]})
            results[model_type] = pd.concat([results[model_type], new_row], ignore_index=True)
        except Exception as e:
            print(f"Error with {model_type} model on text {id_val}: {str(e)}")
            new_row = pd.DataFrame({'ID': [id_val], 'Label': ['Error']})
            results[model_type] = pd.concat([results[model_type], new_row], ignore_index=True)

print(f"Predictions completed in {time.time() - start_time:.1f} seconds")

Making predictions on 100 samples...
Progress: 1.0% (1/100) - Elapsed: 0.0s
ID                                                   D2-1
Text    The Solar System faces a dramatic future over ...
Name: 0, dtype: object
ID                                                   D2-2
Text    Spermidine is an aliphatic polyamine. Spermidi...
Name: 1, dtype: object
ID                                                   D2-3
Text    The feasibility of extraterrestrial life is a ...
Name: 2, dtype: object
ID                                                   D2-4
Text    Many cross sectional and prospective studies h...
Name: 3, dtype: object
ID                                                   D2-5
Text    There were observations of spectral lines. Tha...
Name: 4, dtype: object
Progress: 6.0% (6/100) - Elapsed: 0.0s
ID                                                   D2-6
Text    The active quest for extra-solar planets has o...
Name: 5, dtype: object
ID                                                 

## Save Results to CSV

In [6]:
count = 1
for model_type, df in results.items():
    output_path = f"results/submissao1-grupo011-s{count}.csv"
    df.to_csv(output_path, sep='\t', index=False)
    print(f"Saved predictions for {model_type} model to {output_path}")
    count += 1

Saved predictions for dnn model to results/submissao1-grupo011-s1.csv
Saved predictions for rnn model to results/submissao1-grupo011-s2.csv
