<a href="https://colab.research.google.com/github/Di9mar/ada4b/blob/main/Evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install required packages
!pip install datasets transformers[torch] --upgrade

from google.colab import drive
from datasets import load_dataset
import pandas as pd
import torch
import numpy as np
import json
import os
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, roc_auc_score

Collecting datasets
  Downloading datasets-2.16.1-py3-none-any.whl (507 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Collecting transformers[torch]
  Downloading transformers-4.36.2-py3-none-any.whl (8.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m31.3 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate>=0.21.0 (from transformers[torch])
  Downloading accelerate-0.26.1-py3-none-any.whl (270 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━

In [3]:
# Import the 'drive' module from the 'google.colab' library
# This module allows you to mount your Google Drive in the Colab environment.
# Make sure you have the necessary authorization to access your Drive.
# If not already installed, you may need to install the 'google-colab' package.
from google.colab import drive

# Mount Google Drive to '/content/drive'
# This will make your Google Drive files accessible from within the Colab environment.
# You'll be prompted to authenticate and grant necessary permissions.
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
# Define paths
current_model = "wiki"
data_file = "story_data"

# Define paths based on your original code
base_path = "/content/drive/My Drive/ColabData"
model_path = f"{base_path}/{current_model}"
csv_path = f"{base_path}/{data_file}.csv"
logs_path = f"{base_path}/logs"

In [8]:
# Load the trained model and tokenizer
print(f"Loading '{current_model}' model")
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Print the model configuration for reference
print(f"Model Configuration:\n{model.config}")

Loading 'wiki' model
Model Configuration:
DistilBertConfig {
  "_name_or_path": "/content/drive/My Drive/ColabData/wiki",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "problem_type": "single_label_classification",
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "torch_dtype": "float32",
  "transformers_version": "4.36.2",
  "vocab_size": 30522
}



In [9]:
# Define dataset class
class TextDataset(torch.utils.data.Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __getitem__(self, idx):
        text = self.texts[idx]

        # Tokenize the text on-the-fly
        encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')

        # Convert the encoding to a format suitable for PyTorch
        item = {key: val.squeeze(0) for key, val in encoding.items()}  # Squeeze is used to remove batch dimension
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

    def get_labels(self):
        return self.labels


# Function to Calculate Metrics
def calculate_evaluation_metrics(predictions, true_labels):
    accuracy = accuracy_score(true_labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='binary')
    conf_matrix = confusion_matrix(true_labels, predictions)
    roc_auc = roc_auc_score(true_labels, predictions)  # For binary classification

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'F1_score': f1,
        'confusion_matrix': conf_matrix.tolist(),
        'ROC_AUC': roc_auc
    }

In [10]:
# Load new data
try:
    df = pd.read_csv(csv_path, delimiter=';')
    load_success = True
except Exception as e:
    load_success = False
    df = None
    error_message = str(e)

load_success, df if df is not None else error_message

(True,
                                                  human  \
 0    Chapter Text\n\n\nThey’d just fired him.\n\n\n...   
 1    Stu wakes up in a coffin and thinks, fuck. Las...   
 2    It was the holiday season in Bound Arlyn, and ...   
 3    His eyes were so warm and intense on me that I...   
 4    Ada: Salazar Castle: Bedroom:\n\n \nI sat perc...   
 ..                                                 ...   
 173  A letter laid in his hand. A date not even rea...   
 174  Chapter Text\n\nDistrict One: Female- Valentin...   
 175  Intak always keep an eye on Jiung. How he sudd...   
 176  Once upon a time, in a small mountain town cal...   
 177  “I don’t need you anymore.”\n\n\n\n I repeat t...   
 
                                                     ai  
 0    In a world where soulmates actually exists, wh...  
 1    So i suddenly came up with this idea, what if ...  
 2    Once upon a time it was an average day in the ...  
 3    Bella sat there waiting, watching the clock. 

In [11]:
# Remove excess newline characters
df['human'] = df['human'].str.replace(r'\n+', '\n')
df['ai'] = df['ai'].str.replace(r'\n+', '\n')

# Prepare the data
labels = [0] * len(df['human']) + [1] * len(df['ai'])  # Adjust columns as per your data
texts = df['human'].tolist() + df['ai'].tolist()  # Adjust columns as per your data
dataset = TextDataset(texts, labels, tokenizer)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    tokenizer=tokenizer
)

  df['human'] = df['human'].str.replace(r'\n+', '\n')
  df['ai'] = df['ai'].str.replace(r'\n+', '\n')


In [21]:
successful_count = 0
error_count = 0

for idx, row in df.iterrows():
    try:
        # Tokenize the text
        encoding = tokenizer(row['human'], row['ai'], truncation=True, padding='max_length', max_length=512, return_tensors='pt')
        successful_count += 1

    except Exception as e:
        error_count += 1
        # Print the error message and the problematic texts
        print(f"Error in row {idx}: {str(e)}")
        print(f"Problematic 'human' text (row {idx}):\n{row['human']}\n")
        print(f"Problematic 'ai' text (row {idx}):\n{row['ai']}\n")

# Print the summary at the end
print(f"Total rows processed: {successful_count + error_count}")
print(f"Successful tokenizations: {successful_count}")
print(f"Tokenization errors: {error_count}")

Total rows processed: 178
Successful tokenizations: 178
Tokenization errors: 0


In [15]:
# Predict on the new dataset
predictions = trainer.predict(dataset)
predicted_labels = np.argmax(predictions.predictions, axis=1)

# Evaluate the model
metrics = calculate_evaluation_metrics(predicted_labels, dataset.get_labels())

In [16]:
# Print the final evaluation metrics
print("Final Evaluation Metrics:")
print("Accuracy:", metrics['accuracy'])
print("Precision:", metrics['precision'])
print("Recall:", metrics['recall'])
print("F1 Score:", metrics['F1_score'])
print("Confusion Matrix:")
print(metrics['confusion_matrix'])
print("ROC AUC:", metrics['ROC_AUC'])

Final Evaluation Metrics:
Accuracy: 0.7696629213483146
Precision: 0.9137931034482759
Recall: 0.5955056179775281
F1 Score: 0.7210884353741497
Confusion Matrix:
[[168, 10], [72, 106]]
ROC AUC: 0.7696629213483145


In [17]:
# Define the file name for the metrics using current_model and data_file
metrics_file = f"{logs_path}/evaluation_metrics_{current_model}_{os.path.basename(data_file).split('.')[0]}.json"

# Save the evaluation metrics
try:
    with open(metrics_file, 'w') as file:
        json.dump(metrics, file, indent=4)
    print(f"Evaluation metrics saved in {metrics_file}")
except Exception as e:
    print(f"An error occurred while saving the metrics: {str(e)}")

Evaluation metrics saved in /content/drive/My Drive/ColabData/logs/evaluation_metrics_wiki_story_data.json
