In [1]:
# Import necessary libraries
import os
import pandas as pd
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from sklearn.metrics import accuracy_score, classification_report

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Define the paths
base_path = "/Users/home/Library/CloudStorage/GoogleDrive-mdiener.md@gmail.com/My Drive/ColabData/MyModel"
trained_model_path = f"{base_path}/trained_model"
ai_data_path = f"{base_path}/ai_essay.csv"
human_data_path = f"{base_path}/human_essay.csv"

In [3]:
# Function to read and preprocess the data
def read_data(file_path):
    data = pd.read_csv(file_path, delimiter=';')
    return data['essays'].tolist()

In [4]:
# Function to perform predictions
def predict(texts, model, tokenizer):
    inputs = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=1)
    return predictions.cpu().numpy()

In [5]:
# Load the model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained(trained_model_path)
tokenizer = AutoTokenizer.from_pretrained(trained_model_path)

# Read and preprocess the AI and human data
ai_texts = read_data(ai_data_path)
human_texts = read_data(human_data_path)
human_texts=human_texts[:400]

# Combine the texts and labels (0 for human, 1 for AI)
texts = human_texts + ai_texts
labels = [0]*len(human_texts) + [1]*len(ai_texts)

In [6]:
# Predict the labels for the combined dataset
predictions = predict(texts, model, tokenizer)

In [7]:
# Calculate and print the performance metrics
accuracy = accuracy_score(labels, predictions)
report = classification_report(labels, predictions, target_names=['Human', 'AI'])
print(f"Accuracy: {accuracy}")
print(report)

Accuracy: 0.8866666666666667
              precision    recall  f1-score   support

       Human       0.87      0.98      0.92       400
          AI       0.95      0.69      0.80       200

    accuracy                           0.89       600
   macro avg       0.91      0.84      0.86       600
weighted avg       0.89      0.89      0.88       600



In [8]:
# Save the metrics to a file
metrics_path = f"{base_path}/prediction_metrics.txt"
with open(metrics_path, 'w') as f:
    f.write(f"Accuracy: {accuracy}\n")
    f.write(report)

print(f"Metrics saved to {metrics_path}")

Metrics saved to /Users/home/Library/CloudStorage/GoogleDrive-mdiener.md@gmail.com/My Drive/ColabData/MyModel/prediction_metrics.txt
