<a href="https://colab.research.google.com/github/Di9mar/ada4b/blob/main/Evaluation_Multiple_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Install required packages
!pip install datasets transformers[torch] --upgrade

from google.colab import drive
from datasets import load_dataset
import pandas as pd
import torch
import numpy as np
import json
import os
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, roc_auc_score

Successfully installed accelerate-0.26.1 datasets-2.16.1 dill-0.3.7 multiprocess-0.70.15 transformers-4.36.2


In [7]:
# Import the 'drive' module from the 'google.colab' library
# This module allows you to mount your Google Drive in the Colab environment.
# Make sure you have the necessary authorization to access your Drive.
# If not already installed, you may need to install the 'google-colab' package.
from google.colab import drive

# Mount Google Drive to '/content/drive'
# This will make your Google Drive files accessible from within the Colab environment.
# You'll be prompted to authenticate and grant necessary permissions.
drive.mount('/content/drive')

Mounted at /content/drive


In [31]:
# Define paths
current_model = ['wiki', 'essay', 'poetry', 'story']
data_file = "val_dataset"

# Define paths based on your original code
base_path = "/content/drive/My Drive/ColabData"
model_paths = []
for element in current_model:
  model_paths.append(f"{base_path}/{element}")

csv_path = f"{base_path}/{data_file}.csv"
logs_path = f"{base_path}/logs"

In [32]:
# Load the trained model and tokenizer
model_list = []
tokenizer_list = []  # To store the tokenizers as well
for element in model_paths:
    print(f"Loading '{element}' model")
    model = AutoModelForSequenceClassification.from_pretrained(element)
    tokenizer = AutoTokenizer.from_pretrained(element)

    model_list.append(model)
    tokenizer_list.append(tokenizer)

    # Print the model configuration for reference
    print(f"Model Configuration:\n{model.config}")

Loading '/content/drive/My Drive/ColabData/wiki' model
Model Configuration:
DistilBertConfig {
  "_name_or_path": "/content/drive/My Drive/ColabData/wiki",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "problem_type": "single_label_classification",
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "torch_dtype": "float32",
  "transformers_version": "4.36.2",
  "vocab_size": 30522
}

Loading '/content/drive/My Drive/ColabData/essay' model
Model Configuration:
DistilBertConfig {
  "_name_or_path": "/content/drive/My Drive/ColabData/essay",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1

In [33]:
# Define dataset class
class TextDataset(torch.utils.data.Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __getitem__(self, idx):
        text = self.texts[idx]

        # Tokenize the text on-the-fly
        encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')

        # Convert the encoding to a format suitable for PyTorch
        item = {key: val.squeeze(0) for key, val in encoding.items()}  # Squeeze is used to remove batch dimension
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

    def get_labels(self):
        return self.labels


# Function to Calculate Metrics
def calculate_evaluation_metrics(predictions, true_labels):
    accuracy = accuracy_score(true_labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='binary')
    conf_matrix = confusion_matrix(true_labels, predictions)
    roc_auc = roc_auc_score(true_labels, predictions)  # For binary classification

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'F1_score': f1,
        'confusion_matrix': conf_matrix.tolist(),
        'ROC_AUC': roc_auc
    }

In [None]:
# Load new data
try:
    df = pd.read_csv(csv_path, delimiter=';')
    load_success = True
except Exception as e:
    load_success = False
    df = None
    error_message = str(e)

load_success, df if df is not None else error_message

In [38]:
# Remove excess newline characters
df['human'] = df['human'].str.replace(r'\n+', '\n')
df['ai'] = df['ai'].str.replace(r'\n+', '\n')

# Prepare the data
labels = [0] * len(df['human']) + [1] * len(df['ai'])  # Adjust columns as per your data
texts = df['human'].tolist() + df['ai'].tolist()  # Adjust columns as per your data
dataset = TextDataset(texts, labels, tokenizer)

trainers = []
for model, tokenizer in zip(model_list, tokenizer_list):
    trainer = Trainer(
        model=model,
        tokenizer=tokenizer,
    )
    trainers.append(trainer)

  df['human'] = df['human'].str.replace(r'\n+', '\n')
  df['ai'] = df['ai'].str.replace(r'\n+', '\n')


In [39]:
successful_count = 0
error_count = 0

for idx, row in df.iterrows():
    try:
        # Tokenize the text
        encoding = tokenizer(row['human'], row['ai'], truncation=True, padding='max_length', max_length=512, return_tensors='pt')
        successful_count += 1

    except Exception as e:
        error_count += 1
        # Print the error message and the problematic texts
        print(f"Error in row {idx}: {str(e)}")
        print(f"Problematic 'human' text (row {idx}):\n{row['human']}\n")
        print(f"Problematic 'ai' text (row {idx}):\n{row['ai']}\n")

# Print the summary at the end
print(f"Total rows processed: {successful_count + error_count}")
print(f"Successful tokenizations: {successful_count}")
print(f"Tokenization errors: {error_count}")

Total rows processed: 15000
Successful tokenizations: 15000
Tokenization errors: 0


In [41]:
all_predictions = []
all_metrics = []

for trainer in trainers:
    # Predict on the new dataset
    print(f"Predicting {trainer}")
    predictions = trainer.predict(dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Store predictions
    all_predictions.append(predicted_labels)

    # Evaluate the model
    metrics = calculate_evaluation_metrics(predicted_labels, dataset.get_labels())
    all_metrics.append(metrics)

    # Optionally, print the metrics for each model
    print(metrics)

Predicting <transformers.trainer.Trainer object at 0x7e047a4f3a00>


KeyboardInterrupt: 

In [44]:
print("Final Evaluation Metrics:")
for i, metrics in enumerate(all_metrics):
    print(f"Model {i+1} Metrics:")
    print("Accuracy:", metrics.get('accuracy', 'Not available'))
    print("Precision:", metrics.get('precision', 'Not available'))
    print("Recall:", metrics.get('recall', 'Not available'))
    print("F1 Score:", metrics.get('F1_score', 'Not available'))
    print("Confusion Matrix:\n", metrics.get('confusion_matrix', 'Not available'))
    print("ROC AUC:", metrics.get('ROC_AUC', 'Not available'))
    print("---------------------------------------")

Final Evaluation Metrics:


In [None]:
# Define the base name for the data file without extension
data_file_base = os.path.basename(data_file).split('.')[0]

# Save the evaluation metrics for each model
for model_name, metrics in zip(current_model, all_metrics):
    # Define the file name for the metrics using model_name and data_file
    metrics_file = f"{logs_path}/evaluation_metrics_{model_name}_{data_file_base}.json"

    # Save the evaluation metrics
    try:
        with open(metrics_file, 'w') as file:
            json.dump(metrics, file, indent=4)
        print(f"Evaluation metrics for '{model_name}' saved in {metrics_file}")
    except Exception as e:
        print(f"An error occurred while saving the metrics for '{model_name}': {str(e)}")

Evaluation metrics saved in /content/drive/My Drive/TEST/logs/evaluation_metrics_essay_story_data.json
