In [None]:
!pip install -q transformers==4.28.0

In [None]:
import os
import random

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go

import spacy
import spacy.cli
from spacy.lang.en.stop_words import STOP_WORDS

# for spaCy Text Categorizer
from spacy.util import minibatch, compounding
from spacy.training.example import Example

from wordcloud import WordCloud # See : https://www.kaggle.com/aashita/word-clouds-of-various-shapes

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix

from transformers import BertTokenizer, BertForSequenceClassification

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
import torch.nn.functional as F # for softmax in Eval

from datetime import datetime
import pytz # timezone

from tqdm import trange # used to make our loops show a smart progress meter

# from autocorrect import spell # (TODO: do we really need this?)

#torch.manual_seed(0)

print("Using PyTorch version", torch.__version__)

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Cheating Detection for AI generated/df_score_predictor.csv')


In [None]:
# Replace 0, 1, 2 with 1 in the 'label' column
'''
df['final_AP_score'] = df['final_AP_score'].replace([0, 1, 2], 0)
df['final_AP_score'] = df['final_AP_score'].replace([3], 1)
df['final_AP_score'] = df['final_AP_score'].replace([4], 2)
df['final_AP_score'] = df['final_AP_score'].replace([5], 3)
df['final_AP_score'] = df['final_AP_score'].replace([6], 4)
'''

In [None]:
is_auto_eda = False # run sweetviz report
is_small_spacy = True
is_sample_df = True
sample_fraction = 0.1

In [None]:

# If there's a GPU available...
if torch.cuda.is_available():

    # Tell PyTorch to use the GPU.
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu") # (TODO: change "cpu" to "device error"?)
device

In [None]:
import re,string,unicodedata
import nltk
from nltk import pos_tag
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem.porter import PorterStemmer
from nltk.corpus import wordnet, stopwords

In [None]:

if is_small_spacy:
  spacy.cli.download("en_core_web_sm")
  nlp = spacy.load('en_core_web_sm')
else:
  spacy.cli.download("en_core_web_lg")
  nlp = spacy.load('en_core_web_lg')

In [None]:
def extract_text(df):
    pattern = r"<p>----Response for Part A----</p><p>(.*?)</p>"
    df['extracted_text'] = df['response'].str.extract(pattern, flags=re.DOTALL)
    return df

In [None]:
extract_text(df)
df

In [None]:
df['extracted_text'].isna().sum()

In [None]:
df['extract_text'] = df['extracted_text'].fillna('No Response form the Student')

In [None]:
df['preprocessed_text'] = list(nlp.pipe(df['extract_text']))

In [None]:
def clean_doc(doc):
  list_clean_tokens =  [token for token in doc if
              not token.is_punct # punctuation
              and not token.is_currency
              and not token.is_digit
              # and not token.is_oov # Is the token out-of-vocabulary (i.e. does it not have a word vector)?
              and not token.is_space # Does the token consist of whitespace characters? Equivalent to token.text.isspace().
              and not token.is_stop
              and not token.like_num
              and not token.like_url and ('@' not in token.text) and ('|' not in token.text)
              # and not token.pos_ == "PROPN" ("Wikipedia")
              ]
  return list_clean_tokens

# Lemmatizing + to lower case
def lemma_text(doc):
    tokens=[]
    for token in doc:
        if token.lemma_ != "-PRON-":
            lemma = token.lemma_.lower().strip()
        else:
            lemma = token.lower_
        tokens.append(lemma)
    return tokens

#create_string
def create_string(doc):
  new_string = ' '.join([str(token) for token in doc])
  return new_string

In [None]:
def preprocess_all(df):
    df['preprocessed_text'] = df['preprocessed_text'].apply(clean_doc)
    df['preprocessed_text'] = df['preprocessed_text'].apply(lemma_text)
    df['preprocessed_text'] = df['preprocessed_text'].apply(create_string)
    return df

In [None]:
df_preprocessed = preprocess_all(df)

In [None]:
df_preprocessed

In [None]:
from torch.utils.data import Dataset
from transformers import (ElectraForSequenceClassification,RobertaTokenizer, RobertaForSequenceClassification,
                          AutoTokenizer, EvalPrediction, InputFeatures,AutoTokenizer, AutoModelForSequenceClassification,
                          Trainer, TrainingArguments, glue_compute_metrics)


In [None]:
!pip install -q sentencepiece

In [None]:
tokenizer = AutoTokenizer.from_pretrained('bert-large-uncased')

In [None]:
!pip install --upgrade safetensors

In [None]:
from transformers import AutoTokenizer, XLMRobertaXLForSequenceClassification, XLMRobertaForSequenceClassification

In [None]:
df['final_AP_score'].value_counts()

In [None]:
model = AutoModelForSequenceClassification.from_pretrained('roberta-large',
                                                           num_labels=6,
                                                           problem_type="multi_label_classification",
                                                           ignore_mismatched_sizes=True
                                                           )

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("bert-large-uncased", num_labels=6, ignore_mismatched_sizes=True)


In [None]:
class TrainerDataset(Dataset):
    def __init__(self, inputs, targets, tokenizer):
        self.inputs = inputs
        self.targets = targets
        self.tokenizer = tokenizer

        # Tokenize the input
        self.tokenized_inputs = tokenizer(inputs, padding=True, max_length=256, add_special_tokens=True, truncation=True)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return InputFeatures(
            input_ids=self.tokenized_inputs['input_ids'][idx],
            token_type_ids=self.tokenized_inputs['token_type_ids'][idx],
            attention_mask=self.tokenized_inputs['attention_mask'][idx],
            label=self.targets[idx])

In [None]:
X_train, X_valid = train_test_split(df, test_size=0.2, stratify = df['final_AP_score'])

In [None]:
df['preprocessed_text'].isna().sum()

In [None]:
X_train

In [None]:

from sklearn.preprocessing import OneHotEncoder
y_train = X_train["final_AP_score"].values
# Assuming y_train is a 1D array or a pandas Series
y_train = y_train.reshape(-1, 1)
y_valid = X_valid["final_AP_score"].values
y_valid = y_valid.reshape(-1, 1)
categories = [[0, 1, 2, 3, 4, 5]]
encoder = OneHotEncoder(categories = categories, sparse=False)

# Fit and transform y_train to one-hot encoded format
y_train_encoded = encoder.fit_transform(y_train)
y_valid_encoded = encoder.fit_transform(y_valid)



In [None]:
y_train_encoded

In [None]:
train_dataset = TrainerDataset(X_train["response"].tolist(),
                               X_train["final_AP_score"].tolist(), tokenizer)
eval_dataset = TrainerDataset(X_valid["response"].tolist(),
                               X_valid["final_AP_score"].tolist(), tokenizer)

In [None]:
import gc

torch.cuda.empty_cache()

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score

def compute_metrics(pred):
    y_true = pred.label_ids
    y_pred = pred.predictions.argmax(-1)
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    qwk = cohen_kappa_score(y_true, y_pred, weights='quadratic')

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'qwk': qwk
    }


In [None]:
training_args = TrainingArguments(
    output_dir='.',
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics
)

In [None]:
# Set seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)

training_args = TrainingArguments(output_dir="./models/model_electra2_prompt_4",
  num_train_epochs=4,  # 1 (1 epoch gives slightly lower accuracy)
  overwrite_output_dir=True,
  evaluation_strategy="epoch",
  learning_rate=2e-5  ,
  lr_scheduler_type  = 'linear',
  adam_beta1 = 0.9,
  adam_beta2 = 0.999,
  adam_epsilon = 5e-06,
  weight_decay = 0.01,
  per_device_train_batch_size=8,
  per_device_eval_batch_size=8,
  save_total_limit = 2,
  save_strategy = 'epoch',
  load_best_model_at_end=False
                                )



# Instantiate the Trainer class
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset)

In [None]:
trainer.train()

In [None]:
results_df = pd.read_csv('/content/drive/MyDrive/Cheating Detection for AI generated/Results/result.csv')

In [None]:
# Function to print and store evaluation results
def print_and_store_results(y_test, y_pred, model_name, results_df):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    kappa = cohen_kappa_score(y_test, y_pred, weights='quadratic')
    cm = confusion_matrix(y_test, y_pred)

    print(f"Model: {model_name}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-score: {f1:.4f}")
    print(f"Quadratic Kappa: {kappa:.4f}")
    target_names = ['1','2', '3', '4', '5']

    results_df = results_df.append({
        'Model': model_name,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-score': f1,
        'Quadratic Kappa': kappa,
    }, ignore_index=True)

    # Plot confusion matrix as heatmap
    plt.figure(figsize=(6, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
                xticklabels=target_names, yticklabels=target_names)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title(model_name)
    plt.savefig(f'/content/drive/MyDrive/Cheating Detection for AI generated/Results/{model_name}_confusion_matrix.png')

    return results_df

In [None]:
probs = trainer.predict(test_dataset=eval_dataset).predictions

In [None]:
y_actual = trainer.predict(test_dataset=eval_dataset)

In [None]:
y_true = np.argmax(y_actual.label_ids, axis=1)

In [None]:
pred_labesl = np.argmax(probs, axis=1)

In [None]:
results_df = print_and_store_results(y_true,pred_labesl, 'roberta_large',results_df )

In [None]:
pred, actual, _ = trainer.predict(eval_dataset)
pred_labels = pred.argmax(-1)

results_df = print_and_store_results(actual,pred_labels, 'bert_large',results_df )

In [None]:
results_df

In [None]:
results_df.to_csv('/content/drive/MyDrive/Cheating Detection for AI generated/Results/result.csv', index = False)