## Imports & Libraries

In [2]:
!pip install -U -q mlflow datasets>=2.14.5 nlp 2>/dev/null
!pip install -q accelerate -U
!pip install -q -U transformers

In [3]:
!pip install -q transformers
!pip install -q peft
!pip install -q evaluate

In [4]:
import pandas as pd
import gc  # For garbage collection to manage memory
import re  # For regular expressions
import numpy as np

import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split
from peft import LoraConfig, TaskType
from peft import get_peft_model
import evaluate

import torch  
from torch import nn
from transformers import AutoModel, AutoTokenizer 
from transformers import TextDataset, LineByLineTextDataset, DataCollatorForLanguageModeling, \
pipeline, Trainer, TrainingArguments, DataCollatorWithPadding  
from transformers import AutoModelForSequenceClassification, BertForSequenceClassification, RobertaForSequenceClassification  

from nlp import Dataset  
from imblearn.over_sampling import RandomOverSampler  
import datasets 
from datasets import Dataset, Image, ClassLabel  
from transformers import pipeline  
from bs4 import BeautifulSoup  

import matplotlib.pyplot as plt  
import itertools  
from sklearn.metrics import ( 
    accuracy_score,  
    roc_auc_score,  
    confusion_matrix,  
    classification_report,  
    f1_score  
)

from datasets import load_metric  #

from tqdm import tqdm 
tqdm.pandas()  

2024-05-10 13:29:16.923804: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-10 13:29:16.923921: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-10 13:29:17.091453: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [5]:
!conda install -q -y gdown

Retrieving notices: ...working... done
done
Solving environment: ...working... done

## Package Plan ##

  environment location: /opt/conda

  added / updated specs:
    - gdown


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    filelock-3.14.0            |     pyhd8ed1ab_0          16 KB  conda-forge
    gdown-5.1.0                |     pyhd8ed1ab_0          21 KB  conda-forge
    openssl-3.3.0              |       hd590300_0         2.8 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.8 MB

The following NEW packages will be INSTALLED:

  filelock           conda-forge/noarch::filelock-3.14.0-pyhd8ed1ab_0 
  gdown              conda-forge/noarch::gdown-5.1.0-pyhd8ed1ab_0 

The following packages will be UPDATED:

  openssl                                  3.2.1-hd590300_1 --> 3.3.0-hd590300_0 




In [6]:
!gdown --id 1GcERbF9WcgXHhBBnv4KCRZCPCa7CWIzn

Downloading...
From (original): https://drive.google.com/uc?id=1GcERbF9WcgXHhBBnv4KCRZCPCa7CWIzn
From (redirected): https://drive.google.com/uc?id=1GcERbF9WcgXHhBBnv4KCRZCPCa7CWIzn&confirm=t&uuid=2f003289-4cac-4fe0-94c0-3e5f33bee9ce
To: /kaggle/working/subtaskB_train.jsonl
100%|████████████████████████████████████████| 155M/155M [00:02<00:00, 60.6MB/s]


## HyperParameters

In [7]:
# Fraction of the dataset used for training, the rest will be used for validation
train_fraction = 0.9

# Number of training epochs
num_train_epochs = 3

# Pre-trained BERT model to be used
# BERT_MODEL = "bert-base-cased"
BERT_MODEL = "distilroberta-base"

# Directory where the model output will be saved
output_dir = "ai-generated-essay-detection-bert"

## Data

In [8]:
%%time
# Read the json file into a DataFrame
df = pd.read_json("subtaskB_train.jsonl", lines=True)

CPU times: user 916 ms, sys: 523 ms, total: 1.44 s
Wall time: 1.45 s


In [9]:
df.head()

Unnamed: 0,text,model,source,label,id
0,Forza Motorsport is a popular racing game that...,chatGPT,wikihow,1,0
1,Buying Virtual Console games for your Nintendo...,chatGPT,wikihow,1,1
2,Windows NT 4.0 was a popular operating system ...,chatGPT,wikihow,1,2
3,How to Make Perfume\n\nPerfume is a great way ...,chatGPT,wikihow,1,3
4,How to Convert Song Lyrics to a Song'\n\nConve...,chatGPT,wikihow,1,4


In [10]:
len(df.text)

71027

The computational capacity is not enough for 6 classes

In [11]:
df = df[df.model != 'bloomz']
df = df[df.model != 'dolly']

In [12]:
np.unique(df[['label']])

array([0, 1, 2, 3])

In [16]:
len(df.text)

47327

In [13]:
# Import the necessary library to compute class weights.
from sklearn.utils.class_weight import compute_class_weight

# Identify the unique classes in the training data.
class_names = np.unique(df[['model']])

# Calculate class weights using the 'balanced' option, which automatically adjusts for class imbalance.
weights_imb = compute_class_weight(class_weight=None, classes=class_names, y=df['model'])

# Create a dictionary mapping each class to its respective class weight.
class_weights_imb = dict(zip(class_names, weights_imb))

# Print the computed class weights to the console.
print(class_weights_imb)

{'chatGPT': 1.0, 'cohere': 1.0, 'davinci': 1.0, 'human': 1.0}


In [14]:
# Calculate class weights using the 'balanced' option, which automatically adjusts for class imbalance.
weights = compute_class_weight(class_weight='balanced', classes=class_names, y=df['model'])

# Create a dictionary mapping each class to its respective class weight.
class_weights = dict(zip(class_names, weights))

# Print the computed class weights to the console.
print(class_weights)

{'chatGPT': 0.9863901625677366, 'cohere': 1.0437323570924488, 'davinci': 0.9860613384448704, 'human': 0.986225723097441}


In [71]:
spl = lambda a : a.split()
l = lambda b : len(b)
np.mean(list(map(l, (map(spl, df.text)))))

399.57857037209203

In [53]:
word_mean = np.mean(list(map(l, df.text[0].split())))
word_mean

4.457002457002457

In [52]:
w_avg = []
for t in df.text:
    w_avg.append(np.mean(list(map(l, t.split()))))
print(np.mean(w_avg))

5.069463658538628


In [76]:
df_ai = df[df.model == 'chatGPT']
w_avg = []
for t in df_ai.text:
    w_avg.append(np.mean(list(map(l, t.split()))))
print(np.mean(w_avg))

5.172536376009882


In [77]:
df_coh = df[df.model == 'cohere']
for t in df_coh.text:
    w_avg.append(np.mean(list(map(l, t.split()))))
print(np.mean(w_avg))

5.028730991955361


In [78]:
df_dav = df[df.model == 'davinci']
for t in df_dav.text:
    w_avg.append(np.mean(list(map(l, t.split()))))
print(np.mean(w_avg))

5.06493706871457


In [79]:
df_human = df[df.model == 'human']
for t in df_human.text:
    w_avg.append(np.mean(list(map(l, t.split()))))
print(np.mean(w_avg))

5.069463658538628


In [56]:
# Create a list of unique labels
labels_list = class_names

# Initialize empty dictionaries to map labels to IDs and vice versa
label2id, id2label = dict(), dict()

# Iterate over the unique labels and assign each label an ID, and vice versa
for i, label in enumerate(labels_list):
    label2id[label] = i  # Map the label to its corresponding ID
    id2label[i] = label  # Map the ID to its corresponding label

# Print the resulting dictionaries for reference
print("Mapping of IDs to Labels:", id2label, '\n')
print("Mapping of Labels to IDs:", label2id)

Mapping of IDs to Labels: {0: 'chatGPT', 1: 'cohere', 2: 'davinci', 3: 'human'} 

Mapping of Labels to IDs: {'chatGPT': 0, 'cohere': 1, 'davinci': 2, 'human': 3}


In [57]:
ordered_weigths = [class_weights[x] for x in id2label.values()]
ordered_weigths

[0.9863901625677366, 1.0437323570924488, 0.9860613384448704, 0.986225723097441]

In [58]:
dataset = Dataset.from_pandas(df)

In [59]:
# Creating classlabels to match labels to IDs
ClassLabels = ClassLabel(num_classes=len(labels_list), names=labels_list.tolist())
print(ClassLabels)

# # Mapping labels to IDs
def map_label2id(example):
    example['label'] = ClassLabels.str2int(example['label'])
    return example

dataset = dataset.map(map_label2id, batched=True)

# Casting label column to ClassLabel Object
dataset = dataset.cast_column('label', ClassLabels)

# Splitting the dataset into training and testing sets using the predefined train/test split ratio.
dataset = dataset.train_test_split(test_size=1 - train_fraction, shuffle=True, stratify_by_column="label")

# Extracting the training data from the split dataset.
df_train = dataset['train']

# Extracting the testing data from the split dataset.
df_test = dataset['test']

ClassLabel(names=['chatGPT', 'cohere', 'davinci', 'human'], id=None)


Map:   0%|          | 0/47327 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/47327 [00:00<?, ? examples/s]

In [60]:
# Deleting the DataFrame 'df'
del df

# Performing garbage collection to free up memory
gc.collect()

1676

## Data preprocessing

In [61]:
tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL, use_fast=True, low_cpu_mem_usage=False)

In [62]:
def preprocess_function(examples):
    # The tokenizer is applied to each 'text' in the examples.

    return tokenizer(examples["text"], truncation=True, padding=True)

In [63]:
df_train = df_train.map(preprocess_function, batched=True)
df_test = df_test.map(preprocess_function, batched=True)

Map:   0%|          | 0/42594 [00:00<?, ? examples/s]

Map:   0%|          | 0/4733 [00:00<?, ? examples/s]

In [64]:
# Now we can remove the 'text' column from the training/test datasets.
df_train = df_train.remove_columns(['text'])
df_test = df_test.remove_columns(['text'])

In [65]:
tokenizer.decode(df_train[0]['input_ids'])

'<s>An outlaw is a person declared as outside the protection of the law. In pre-modern societies, all legal protection was withdrawn from the criminal, so that anyone was legally empowered to persecute or kill them. Outlawry was thus one of the harshest penalties in the legal system. In early Germanic law, the death penalty is conspicuously absent, and outlawing is the most extreme punishment, presumably amounting to a death sentence in practice. The concept is known from Roman law, as the status of homo sacer, and persisted throughout the Middle Ages.\n\nIn the common law of England, a "Writ of Outlawry" made the pronouncement Caput lupinum ("Let his be a wolf\'s head", literally "May he bear a wolfish head") with respect to its subject, using "head" to refer to the entire person (cf. "per capita") and equating that person with a wolf in the eyes of the law: not only was the subject deprived of all legal rights, being outside the "law", but others could kill him on sight as if he were

In [66]:
def plot_confusion_matrix(cm, classes, title='Confusion Matrix', cmap=plt.cm.Blues, figsize=(10, 8), is_norm=True):
    """
    This function plots a confusion matrix.

    Parameters:
        cm (array-like): Confusion matrix as returned by sklearn.metrics.confusion_matrix.
        classes (list): List of class names, e.g., ['Class 0', 'Class 1'].
        title (str): Title for the plot.
        cmap (matplotlib colormap): Colormap for the plot.
    """
    # Create a figure with a specified size
    plt.figure(figsize=figsize)

    # Display the confusion matrix as an image with a colormap
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    # Define tick marks and labels for the classes on the axes
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    if is_norm:
        fmt = '.3f'
    else:
        fmt = '.0f'
    # Add text annotations to the plot indicating the values in the cells
    thresh = cm.max() / 2.0
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    # Label the axes
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

    # Ensure the plot layout is tight
    plt.tight_layout()
    # Display the plot
    plt.show()
    plt.savefig(title, format='pgf')

In [67]:
def evaluation(y_true, y_pred, name):
  # Calculate accuracy and F1 score
  accuracy = accuracy_score(y_true, y_pred)
  f1 = f1_score(y_true, y_pred, average='macro')

  # Display accuracy and F1 score
  print(f"Accuracy: {accuracy:.4f}")
  print(f"F1 Score: {f1:.4f}")

  # Get the confusion matrix if there are a relatively small number of labels
  if len(labels_list) <= 120:
      # Compute the confusion matrix
      cm = confusion_matrix(y_true, y_pred, normalize='true')

      # Plot the confusion matrix using the defined function
      plot_confusion_matrix(cm, labels_list, name, figsize=(8, 6))

  # Finally, display classification report
  print()
  print("Classification report:")
  print()
  print(classification_report(y_true, y_pred, target_names=labels_list, digits=4))

# Simple BERT for ai text classification


## Loading the Model

In [68]:
model = AutoModelForSequenceClassification.from_pretrained(
    BERT_MODEL,
    num_labels=len(labels_list),
    output_attentions=False,  
    output_hidden_states=False  
)

# Configure the mapping of class labels to their corresponding indices for later reference.
model.config.id2label = id2label  # Mapping from label indices to class labels.
model.config.label2id = label2id  # Mapping from class labels to label indices.

# Calculate and print the number of trainable parameters in millions for the model.
print(model.num_parameters(only_trainable=True) / 1e6)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


82.121476


In [69]:
training_args = TrainingArguments(
    output_dir="test_trainer", 
    evaluation_strategy="epoch",
    num_train_epochs=num_train_epochs,)

In [70]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [71]:
train_simple_bert = df_train
test_simple_bert = df_test

In [72]:
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_simple_bert,
    eval_dataset=test_simple_bert,
)

In [73]:
trainer.train()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
2024/05/10 13:16:49 ERROR mlflow.utils.async_logging.async_logging_queue: Run Id 75d782d6c8a049e9a0766935af34fbfa: Failed to log run data: Exception: Changing param values is not allowed. Param with key='logging_dir' was already logged with value='test_trainer/runs/May10_12-19-55_ec36a89908bb' for run ID='75d782d6c8a049e9a0766935af34fbfa'. Attempted logging new value 'test_trainer/runs/May10_13-16-49_ec36a89908bb'.
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
trainer.evaluate()

In [None]:
predictions = trainer.predict(df_test)

## Confusion matrix

**For model's quality evaluation**

In [None]:
# Extract the true labels from the model outputs
y_true = predictions.label_ids

# Predict the labels by selecting the class with the highest probability
y_pred = predictions.predictions.argmax(1)

In [None]:
def answer(text, model):
    device = torch.device('cpu')
    model = model.to(device)
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        logits = model(**inputs).logits

    predicted_class_id = logits.argmax().item()
    return model.config.id2label[predicted_class_id]

In [None]:
text = "Emile Durkheim’s theories on collective effervescence and social solidarity can be used to explain what happens at a sporting event in the United States. Collective effervescence refers to the emotional excitement and energy that is generated when people come together in a group, and this can be seen at a sporting event as fans come together to support their team. This shared enthusiasm creates a sense of unity and belonging among the fans, and can lead to a heightened sense of social solidarity. This solidarity is strengthened by the shared emotions, values and beliefs of the fans, and the shared goal of supporting their team to victory."
print(answer(text, model))

In [None]:
evaluation(y_true, y_pred, "Confusion Matrix: BERT")

# Bert with Lora for every class

## Loading pretrained model

In [None]:
model_lora4 = AutoModelForSequenceClassification.from_pretrained(
    BERT_MODEL,
    torch_dtype=torch.float32,
    num_labels=4
)

## Data

In [None]:
train_lora4 = df_train
test_lora4 = df_test

## Finally, Lora

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=5,
    lora_alpha=1,
    lora_dropout=0.1
)

model_lora4 = get_peft_model(model_lora4, lora_config)
print_trainable_parameters(model_lora4)

## Training & evaluating the model

In [None]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer",
                                  evaluation_strategy="epoch",
                                  num_train_epochs=num_train_epochs,)

trainer = Trainer(
    model=model_lora4,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_lora4,
    eval_dataset=test_lora4,
)

In [None]:
trainer.train()

In [None]:
predictions = trainer.predict(test_lora4)

# Extract the true labels from the model outputs
y_true = predictions.label_ids

# Predict the labels by selecting the class with the highest probability
y_pred = predictions.predictions.argmax(1)
evaluation(y_true, y_pred)

In [None]:
y_pred_pt = torch.from_numpy(predictions.predictions)
probs = nn.functional.softmax(y_pred_pt, dim=-1)

text = "ChatGPT is a large language model developed by OpenAI. It is trained on a massive dataset of text and is able to generate human-like responses to a wide range of prompts. It can be used for a variety of tasks such as language translation, text summarization, and conversation generation. It has been trained on a diverse set of internet text and is capable of understanding and generating text in a variety of languages and styles."
print(answer(text, model_lora4))

# Bert with Lora for binary classification three times

## Model & Lora

In [None]:
tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL, use_fast=True, low_cpu_mem_usage=False)
def tokenize_function(examples, tokenizer):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

training_args = TrainingArguments(
    output_dir="test_trainer", 
    evaluation_strategy="epoch",
    num_train_epochs=num_train_epochs,)

## ChatGPT vs Humans

In [None]:
model1 = AutoModelForSequenceClassification.from_pretrained(
    BERT_MODEL,
    torch_dtype=torch.float32,
    num_labels=2
)
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=5,
    lora_alpha=1,
    lora_dropout=0.1
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model1.to(device)
model1 = get_peft_model(model1, lora_config)
print_trainable_parameters(model1)

In [None]:
df = pd.read_json("subtaskB_train.jsonl", lines=True)

df = df.where((df['model'] == 'chatGPT') | (df['model'] == 'human'))
df = df.dropna()
dataset = Dataset.from_pandas(df)

In [None]:
labels_list = np.unique(df[['model']])
ClassLabels = ClassLabel(num_classes=len(labels_list), names=labels_list.tolist())
dataset = dataset.cast_column('label', ClassLabels)
dataset = dataset.train_test_split(test_size=1 - train_fraction, shuffle=True, stratify_by_column="label")
df_train = dataset['train']
df_test = dataset['test']

In [None]:
df_train = df_train.map(lambda x: tokenize_function(x, tokenizer), batched=True)
df_test = df_test.map(lambda x: tokenize_function(x, tokenizer), batched=True)

In [None]:
trainer_1 = Trainer(
    model=model1,
    args=training_args,
    train_dataset=df_train,
    eval_dataset=df_test,
    compute_metrics=compute_metrics,
)

In [None]:
trainer_1.train()

In [None]:
predictions_1 = trainer_1.predict(df_test)
y_true_1 = predictions_1.label_ids
y_pred_1 = predictions_1.predictions.argmax(1)

evaluation(y_true_1, y_pred_1)

In [None]:
y_pred_pt = torch.from_numpy(predictions_1.predictions)
probs_1 = nn.functional.softmax(y_pred_pt, dim=-1)

text_human = "Are not the sane and the insane equal at night as the sane lie a dreaming? Are not all of us outside this hospital, who dream, more or less in the condition of those inside it, every night of our lives?"
text = "Once in a village, a wise old woman named Abigail united the villagers to overcome a great storm. With her guidance, they worked together, rebuilt, and emerged stronger, fostering a spirit of unity and hope. Abigail's wisdom and kindness were immortalized, and the village thrived."
print(answer(text, model1))
print(answer(text_human, model1))

## Cohere vs Humans

In [None]:
model2 = AutoModelForSequenceClassification.from_pretrained(
    BERT_MODEL,
    torch_dtype=torch.float32,
    num_labels=2
)
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=5,
    lora_alpha=1,
    lora_dropout=0.1
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model2.to(device)
model2 = get_peft_model(model2, lora_config)
print_trainable_parameters(model2)

In [None]:
df = pd.read_json("subtaskB_train.jsonl", lines=True)

df = df.where((df['model'] == 'cohere') | (df['model'] == 'human'))
df = df.dropna()
dataset = Dataset.from_pandas(df)

labels_list = np.unique(df[['model']])

# Creating classlabels to match labels to IDs
ClassLabels = ClassLabel(num_classes=len(labels_list), names=labels_list.tolist())

# Casting label column to ClassLabel Object
dataset = dataset.class_encode_column('label')

# Splitting the dataset into training and testing sets using the predefined train/test split ratio.
dataset = dataset.train_test_split(test_size=1 - train_fraction, shuffle=True, stratify_by_column="label")

# Extracting the training data from the split dataset.
df_train = dataset['train']

# Extracting the testing data from the split dataset.
df_test = dataset['test']

In [None]:
df_train = df_train.map(lambda x: tokenize_function(x, tokenizer), batched=True)
df_test = df_test.map(lambda x: tokenize_function(x, tokenizer), batched=True)

In [None]:
trainer_2 = Trainer(
    model=model2,
    args=training_args,
    train_dataset=df_train,
    eval_dataset=df_test,
    compute_metrics=compute_metrics,
)

In [None]:
trainer_2.train()

In [None]:
predictions_2 = trainer_2.predict(df_test)
y_true_2 = predictions_2.label_ids
y_pred_2 = predictions_2.predictions.argmax(1)
evaluation(y_true_2, y_pred_2)

In [None]:
y_pred_pt = torch.from_numpy(predictions_2.predictions)
probs_2 = nn.functional.softmax(y_pred_pt, dim=-1)

text_human = "Are not the sane and the insane equal at night as the sane lie a dreaming? Are not all of us outside this hospital, who dream, more or less in the condition of those inside it, every night of our lives?"
text = "Once in a village, a wise old woman named Abigail united the villagers to overcome a great storm. With her guidance, they worked together, rebuilt, and emerged stronger, fostering a spirit of unity and hope. Abigail's wisdom and kindness were immortalized, and the village thrived."
print(answer(text, model2))
print(answer(text_human, model2))

## Davinci vs Humans

In [None]:
model3 = AutoModelForSequenceClassification.from_pretrained(
    BERT_MODEL,
    torch_dtype=torch.float32,
    num_labels=2
)
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=5,
    lora_alpha=1,
    lora_dropout=0.1
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model3.to(device)
model3 = get_peft_model(model3, lora_config)
print_trainable_parameters(model3)

In [None]:
df = pd.read_json("subtaskB_train.jsonl", lines=True)

df = df.where((df['model'] == 'davinci') | (df['model'] == 'human'))
df = df.dropna()
dataset = Dataset.from_pandas(df)

labels_list = np.unique(df[['model']])

# Creating classlabels to match labels to IDs
ClassLabels = ClassLabel(num_classes=len(labels_list), names=labels_list.tolist())

# Casting label column to ClassLabel Object
dataset = dataset.class_encode_column('label')

# Splitting the dataset into training and testing sets using the predefined train/test split ratio.
dataset = dataset.train_test_split(test_size=1 - train_fraction, shuffle=True, stratify_by_column="label")

# Extracting the training data from the split dataset.
df_train = dataset['train']

# Extracting the testing data from the split dataset.
df_test = dataset['test']

In [None]:
df_train = df_train.map(lambda x: tokenize_function(x, tokenizer), batched=True)
df_test = df_test.map(lambda x: tokenize_function(x, tokenizer), batched=True)

In [None]:
trainer_3 = Trainer(
    model=model3,
    args=training_args,
    train_dataset=df_train,
    eval_dataset=df_test,
    compute_metrics=compute_metrics,
)

In [None]:
trainer_3.train()

In [None]:
predictions_3 = trainer_3.predict(df_test)
y_true_3 = predictions_3.label_ids
y_pred_3 = predictions_3.predictions.argmax(1)
evaluation(y_true_3, y_pred_3)

In [None]:
y_pred_pt = torch.from_numpy(predictions_3.predictions)
probs_3 = nn.functional.softmax(y_pred_pt, dim=-1)

text_human = "Are not the sane and the insane equal at night as the sane lie a dreaming? Are not all of us outside this hospital, who dream, more or less in the condition of those inside it, every night of our lives?"
text = "Once in a village, a wise old woman named Abigail united the villagers to overcome a great storm. With her guidance, they worked together, rebuilt, and emerged stronger, fostering a spirit of unity and hope. Abigail's wisdom and kindness were immortalized, and the village thrived."
print(answer(text, model3))
print(answer(text_human, model3))

In [None]:
def answer_all(text, model1, model2, model3):
    device = torch.device('cpu')
    model1 = model1.to(device)
    model2 = model2.to(device)
    model3 = model3.to(device)
    
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        logits1 = model1(**inputs).logits
        logits2 = model2(**inputs).logits
        logits3 = model3(**inputs).logits
        
    max_logits = max(logits1.max(), logits2.max(), logits3.max())
    predicted_class_id = max_logits.argmax().item()

    if (logits1.max() >= logits2.max() and logits1.max() >= logits3.max()):
        return (model1.config.id2label[predicted_class_id], 'model1')
    elif (logits2.max() >= logits1.max() and logits2.max() >= logits3.max()):
        return (model2.config.id2label[predicted_class_id], 'model2')
    return (model3.config.id2label[predicted_class_id], 'model3')

In [None]:
text_human = "Are not the sane and the insane equal at night as the sane lie a dreaming? Are not all of us outside this hospital, who dream, more or less in the condition of those inside it, every night of our lives?"
text = "Once in a village, a wise old woman named Abigail united the villagers to overcome a great storm. With her guidance, they worked together, rebuilt, and emerged stronger, fostering a spirit of unity and hope. Abigail's wisdom and kindness were immortalized, and the village thrived."
print(answer_all(text, model1, model2, model3)[0], answer_all(text, model1, model2, model3)[1])