## Connect to your Google Drive or any filesystem you are going to use.

In [2]:
import sys
from pathlib import Path

IN_COLAB = "google.colab" in sys.modules
if IN_COLAB:
    from google.colab import drive

    drive.mount("/content/gdrive")
base_path = (
    Path("your_path") if IN_COLAB else Path(".")
)

#Fix if you want to use a different path
output_path = base_path / "output"
data_path = base_path / "helm"
output_path.mkdir(exist_ok=True)

##Put here the file system where you are going to work. However you should have there the cloned repositories of:

- https://github.com/RUCAIBox/HaluEval.git

You should start inside the HaluEval folder if you do not want to change more things. However is enough with having access to the data folder of the HaluEvalRepository.

## These are the libraries needed to run this notebook

## Deep Learning Installations

In [3]:
%pip install transformers datasets evaluate rouge_score
%pip install --upgrade huggingface_hub
%pip install accelerate -U
%pip install transformers[torch]
%pip install sentencepiece
%pip install google
%pip install protobuf

You should consider upgrading via the '/data/yeroj/.venv/hallu/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.
You should consider upgrading via the '/data/yeroj/.venv/hallu/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.
Collecting accelerate
  Using cached accelerate-0.29.2-py3-none-any.whl (297 kB)
Installing collected packages: accelerate
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.21.0
    Uninstalling accelerate-0.21.0:
      Successfully uninstalled accelerate-0.21.0
Successfully installed accelerate-0.29.2
You should consider upgrading via the '/data/yeroj/.venv/hallu/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.
You should consider upgrading via the '/data/yeroj/.venv/hallu/bin/python -m pip install 

## Libraries

In [4]:
from datasets import load_dataset
from datasets import Dataset
from transformers import AutoTokenizer
from transformers import DataCollatorForSeq2Seq
from transformers import pipeline
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import AutoTokenizer
from transformers import AutoModelForSeq2SeqLM
import numpy as np
import pandas as pd
import json
import evaluate

#Reading Dataset

`The method loadDataset receieves the path where the datasets json files of the HaluEval repository are. You just need to pass your path and the name of the dataset you are going to use.`

##Dataset Names:
- summarization
- dialogue
- qa
- general


In [5]:
## As a recomendation keep these two with the same naming if you do not want to change many things
#datasetNames = ['falcon40b','gptj7b','llamabase7b','llamachat13b', 'mpt7b', 'opt7b'] llamachat7b
datasetNames = ['llamachat7b', 'falcon40b','mpt7b','llamachat13b', 'llamabase7b', 'opt7b', 'gptj7b']
#datasetNames = ['llamabase7b','llamachat13b']
#datasetNames = ['llamabase7b']

In [6]:
def loadDataset(datasetNames):
    sentences_list = []
    labels_list = []
    prompt_list = []

    for datasetName in datasetNames:
        path = data_path / f"./data/{datasetName}"

        with open(path / "data.json", "r") as file:
            data = json.load(file)

            # Iterate through each entry in the JSON
            for key, value in data.items():
                prompt = value["prompt"]
                for sentence_data in value["sentences"]:
                    sentence = sentence_data["sentence"]
                    label = sentence_data["label"]

                    # Append data to lists
                    sentences_list.append(sentence)
                    labels_list.append(label)
                    prompt_list.append(prompt)

    df = pd.DataFrame(
        {"Prompt": prompt_list, "Sentence": sentences_list, "Label": labels_list}
    )
    return df

## For this particular example we are loading the qa_data.json since is the one that takes the less time to process in case you want to test quickly how it works.

In [7]:
test_idx = 0
train_dataset_names = datasetNames[:test_idx] + datasetNames[test_idx + 1 :]
test_dataset = datasetNames[test_idx : test_idx + 1]
assert test_dataset[0] not in train_dataset_names
train_data = loadDataset(train_dataset_names)
test_data = loadDataset(test_dataset)

In [8]:
print(f'{train_dataset_names=}')
print(f'{test_dataset=}')

train_dataset_names=['falcon40b', 'mpt7b', 'llamachat13b', 'llamabase7b', 'opt7b', 'gptj7b']
test_dataset=['llamachat7b']


In [9]:
train_data.head()

Unnamed: 0,Prompt,Sentence,Label
0,This is a Wikipedia passage about Phillips ' S...,Phillips'Sound Recording Services was a studio...,0
1,This is a Wikipedia passage about Phillips ' S...,The studio was used by The Beatles to record t...,1
2,This is a Wikipedia passage about Phillips ' S...,The Beatles were introduced to Phillips by his...,1
3,This is a Wikipedia passage about Phillips ' S...,"The Beatles recorded ""My Bonnie"" and ""The Sain...",1
4,This is a Wikipedia passage about Best Day Eve...,It is the first episode of the series to be wr...,1


In [10]:
len(train_data)

3239

#Setting Device to use the GPU

We use the T4 GPU in Colab since the heaviest computation for us is the inference of the LLM-Evaluator. Therefore, T4 seem as the better fit.

In [11]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA L40S


In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

## Generic LLMModel class to reuse the functionality of extracting the features.


In [13]:
from transformers import BartForConditionalGeneration, BartTokenizer
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import BartForConditionalGeneration, PegasusForConditionalGeneration
from transformers import LEDForConditionalGeneration, LEDTokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from transformers import LlamaForCausalLM, LlamaTokenizer

import torch
import torch.nn.functional as F


class LLMModel:
    def __init__(self):
        # if self.model_name != 'google/gemma-7b-it':
        self.model = self.model.to(device)
        pass

    def getName(self) -> str:
        return self.model_name

    def getSanitizedName(self) -> str:
        return self.model_name.replace("/", "__")

    def generate(self, inpt):
        pass

    ##Move in future commits this method to an utils.py
    def truncate_string_by_len(self, s, truncate_len):
        words = s.split()
        truncated_words = words[:-truncate_len] if truncate_len > 0 else words
        return " ".join(truncated_words)

    # Method to get the vocabulary probabilities of the LLM for a given token on the generated text from LLM-Generator
    def getVocabProbsAtPos(self, pos, token_probs):
        sorted_probs, sorted_indices = torch.sort(token_probs[pos, :], descending=True)
        return sorted_probs

    def getMaxLength(self):
        return self.model.config.max_position_embeddings

    # By default knowledge is the empty string. If you want to add extra knowledge you can do it like in the cases of the qa_data.json and dialogue_data.json
    def extractFeatures(
        self,
        knowledge="",
        conditionted_text="",
        generated_text="",
        features_to_extract={},
    ):
        self.model.eval()

        """Keep in mind that this truncate only works if the long text is the knowledge or the conditioned_text, but not both.
        The case of both does not exist on the HaluEval benchmark, but be aware of it if use in other datasets. """

        # Also in the case of the LED model, there is no need to truncate the text in the context of this dataset.
        total_len = len(knowledge) + len(conditionted_text) + len(generated_text)
        truncate_len = min(total_len - self.tokenizer.model_max_length, 0)

        # Truncate knowledge in case is too large
        knowledge = self.truncate_string_by_len(knowledge, truncate_len // 2)
        # Truncate text_A in case is too large
        conditionted_text = self.truncate_string_by_len(
            conditionted_text, truncate_len - (truncate_len // 2)
        )

        inputs = self.tokenizer(
            [knowledge + conditionted_text + generated_text],
            return_tensors="pt",
            max_length=self.getMaxLength(),
            truncation=True,
        )

        for key in inputs:
            inputs[key] = inputs[key].to(device)

        with torch.no_grad():
            outputs = self.model(**inputs)
            logits = outputs.logits

        probs = F.softmax(logits, dim=-1)
        probs = probs.to(device)

        tokens_generated_length = len(self.tokenizer.tokenize(generated_text))
        start_index = logits.shape[1] - tokens_generated_length
        conditional_probs = probs[0, start_index :]

        token_ids_generated = inputs["input_ids"][0, start_index :].tolist()
        token_probs_generated = [
            conditional_probs[i, tid].item()
            for i, tid in enumerate(token_ids_generated)
        ]

        tokens_generated = self.tokenizer.convert_ids_to_tokens(token_ids_generated)

        minimum_token_prob = min(token_probs_generated)
        average_token_prob = sum(token_probs_generated) / len(token_probs_generated)

        maximum_diff_with_vocab = -1
        minimum_vocab_extreme_diff = 100000000000

        if features_to_extract["MDVTP"] == True or features_to_extract["MMDVP"] == True:
            size = len(token_probs_generated)
            for pos in range(size):
                vocabProbs = self.getVocabProbsAtPos(pos, conditional_probs)
                maximum_diff_with_vocab = max(
                    [
                        maximum_diff_with_vocab,
                        self.getDiffVocab(vocabProbs, token_probs_generated[pos]),
                    ]
                )
                minimum_vocab_extreme_diff = min(
                    [
                        minimum_vocab_extreme_diff,
                        self.getDiffMaximumWithMinimum(vocabProbs),
                    ]
                )

        # allFeatures = [minimum_token_prob, average_token_prob, maximum_diff_with_vocab, minimum_vocab_extreme_diff]

        allFeatures = {
            "mtp": minimum_token_prob,
            "avgtp": average_token_prob,
            "MDVTP": maximum_diff_with_vocab,
            "MMDVP": minimum_vocab_extreme_diff,
        }

        selectedFeatures = {}
        for key, feature in features_to_extract.items():
            if feature == True:
                selectedFeatures[key] = allFeatures[key]

        return selectedFeatures

    def getDiffVocab(self, vocabProbs, tprob):
        return (vocabProbs[0] - tprob).item()

    def getDiffMaximumWithMinimum(self, vocabProbs):
        return (vocabProbs[0] - vocabProbs[-1]).item()

## Definition of the specific Models

In [14]:
"""For now there is code repetition, but it helps to understand the details of each model as separate. However, will make
everything with better programming practices by using the AutoModel alternatives of HuggingFace."""


class Gemma(LLMModel):
    def __init__(self):
        self.model_name = "google/gemma-7b-it"
        # self.model_name = "chavinlo/alpaca-native"
        # self.model = AutoModelForCausalLM.from_pretrained(
        #     self.model_name,
            # low_cpu_mem_usage=True,
            # torch_dtype=torch.float16,
            # load_in_4bit=True,
        # )

        # quantization_config = BitsAndBytesConfig(load_in_4bit=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name
        )  # , #torch_dtype=torch.float16,# quantization_config=quantization_config)
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        super().__init__()


    def generate(self, inpt):
        inputs = self.tokenizer(
            [inpt], max_length=self.getMaxLength(), return_tensors="pt", truncation=True
        )
        summary_ids = self.model.generate(inputs["input_ids"])

        summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)

        return summary


class LLama(LLMModel):
    def __init__(self):
        self.model_name = "meta-llama/Llama-2-7b-chat-hf"
        # self.model_name = "chavinlo/alpaca-native"
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            # low_cpu_mem_usage=True,
            # torch_dtype=torch.float16,
            # load_in_4bit=True,
        )
        # self.model = LlamaForCausalLM.from_pretrained(
        #     self.model_name
        #     # ,torch_dtype=torch.float16
        # )
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        super().__init__()


    def generate(self, inpt):
        inputs = self.tokenizer(
            [inpt], max_length=1024, return_tensors="pt", truncation=True
        )
        summary_ids = self.model.generate(inputs["input_ids"])

        summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)

        return summary


class Opt(LLMModel):
    def __init__(self):
        self.model_name = "facebook/opt-6.7b"
        self.model = AutoModelForCausalLM.from_pretrained(self.model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        super().__init__()
 
 
    def generate(self, inpt):
        inputs = self.tokenizer(
            [inpt], max_length=self.getMaxLength(), return_tensors="pt", truncation=True
        )
        summary_ids = self.model.generate(inputs["input_ids"])
 
        summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
 
        return summary


class Gptj(LLMModel):
    def __init__(self):
        self.model_name = "EleutherAI/gpt-j-6B"
        self.model = AutoModelForCausalLM.from_pretrained(self.model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        super().__init__()
 
 
    def generate(self, inpt):
        inputs = self.tokenizer(
            [inpt], max_length=self.getMaxLength(), return_tensors="pt", truncation=True
        )
        summary_ids = self.model.generate(inputs["input_ids"])
 
        summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
 
        return summary


class BartCNN(LLMModel):
    def __init__(self):
        self.model_name = "facebook/bart-large-cnn"
        self.model = BartForConditionalGeneration.from_pretrained(self.model_name)
        self.tokenizer = BartTokenizer.from_pretrained(self.model_name)
        super().__init__()


    def generate(self, inpt):
        inputs = self.tokenizer(
            [inpt], max_length=self.getMaxLength(), return_tensors="pt", truncation=True
        )
        summary_ids = self.model.generate(inputs["input_ids"])

        summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)

        return summary


class BartCNNLong(LLMModel):
    def __init__(self):
        self.model_name = "ccdv/lsg-bart-base-16384-arxiv"
        self.model = BartForConditionalGeneration.from_pretrained(self.model_name)
        self.tokenizer = BartTokenizer.from_pretrained(self.model_name)
        super().__init__()


    def generate(self, inpt):
        inputs = self.tokenizer([inpt], return_tensors="pt", truncation=True)
        summary_ids = self.model.generate(inputs["input_ids"])

        summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)

        return summary


class GPT2Generator(LLMModel):
    def __init__(self):
        self.model_name = "gpt2-large"
        self.model = GPT2LMHeadModel.from_pretrained(self.model_name)
        self.tokenizer = GPT2Tokenizer.from_pretrained(self.model_name)
        super().__init__()

    def generate(self, inpt):
        inputs = self.tokenizer.encode(
            inpt, return_tensors="pt", max_length=self.getMaxLength(), truncation=True
        )
        output_ids = self.model.generate(
            inputs, max_length=1024, num_return_sequences=1
        )
        output = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
        return output


class LED(LLMModel):
    def __init__(self):
        self.model_name = "allenai/led-large-16384-arxiv"
        self.model = LEDForConditionalGeneration.from_pretrained(self.model_name)
        self.tokenizer = LEDTokenizer.from_pretrained(self.model_name)
        super().__init__()

    def generate(self, inpt):
        inputs = self.tokenizer(
            [inpt], max_length=self.getMaxLength(), return_tensors="pt", truncation=True
        )
        summary_ids = self.model.generate(inputs["input_ids"])

        summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)

        return summary

# The Dictionary `features_to_extract` defines which features will be use in this experiment.

## Features Meaning:

- `mtp` : Take the minimum of the probabilities that the LLM_E gives to the tokens on the generated-text.
- `avgtp` : Take the average of the probabilities that the LLM_E
gives to the tokens on the generated-text.
- `MDVTP` : Take the maximum from all the differences
between the token with the highest probability
according to LLM_E at position i and the
assigned probability from LLM_E to the token at position i in the generated_text.
- `MMDVP` : Take the maximum from all the differences between the token with the highest probability according to $LLM_E$ at position $i$ ($v^*$) and the token with the lowest probability according to $LLM_E$ at position $i$ ($v^-$).


In [15]:
feature_to_extract = 'all'

available_features_to_extract = ["mtp", "avgtp", "MDVTP", "MMDVP"]
if feature_to_extract == 'all':
    features_to_extract = {
        feature: True for feature in available_features_to_extract
    }
else:
    features_to_extract = {
        feature: True if feature == feature_to_extract else False
        for feature in available_features_to_extract
    }

features_to_extract

## This cell is to instantiate the model you intend to use for the experiment

In [16]:
%pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2
%pip install "huggingface-hub>=0.17.1"

You should consider upgrading via the '/data/yeroj/.venv/hallu/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.
You should consider upgrading via the '/data/yeroj/.venv/hallu/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [17]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [18]:
# model = BartCNN()
# model = BrioBart()
# model = LED()
# model = GPT2Generator()
model = LLama()
# model = Gemma()
# model = Opt()
# model = Gptj()

##Cleaning Cache on GPU to save memory

In [19]:
import torch
torch.cuda.empty_cache()

#This cell creates the dataset separation of `10%` for training and `90%` for testing depending on what task you are addressing. The following explanation is what happens if summarization is the task used. But the same explanation applies to all tasks and also you cand pass as parameter how many data points you want to include in training.

## Example: The data is separated on 2000 (1000 of document with right summary and 1000 with the same document but with the hallucinated summary). The rest which is 18000 is used to for testing.

### As expected from previous cells the task string expected are:
- `summarization`
- `qa`
- `dialogue`
- `general`

In [32]:
includeConditioned = True

In [33]:
import random


def adaptDataset(
    train_data: pd.DataFrame, test_data: pd.DataFrame, includeConditioned: bool
):

    dataset_train = []
    dataset_test = []
    for _, row in train_data.iterrows():
        prompt, text, hallu = row["Prompt"], row["Sentence"], row["Label"]
        dataset_train.append((prompt, text, hallu))

    for _, row in test_data.iterrows():
        prompt, text, hallu = row["Prompt"], row["Sentence"], row["Label"]
        dataset_test.append((prompt, text, hallu))

    random.shuffle(dataset_train)
    random.shuffle(dataset_test)

    X_train = [(p if includeConditioned else "", t) for p, t, _ in dataset_train]
    Y_train = [y for _, _, y, in dataset_train]

    X_test = [(p if includeConditioned else "", t) for p, t, _ in dataset_test]
    Y_test = [y for _, _, y, in dataset_test]

    return X_train, Y_train, [], [], X_test, Y_test

In [34]:
X_train, Y_train, X_val, Y_val, X_test, Y_test = adaptDataset(train_data, test_data, includeConditioned)

In [23]:
print(len(X_train), len(Y_train))
print(len(X_val), len(Y_val))
print(len(X_test), len(Y_test)) #verify the sizes look right

3239 3239
0 0
497 497


In [24]:
print(len(X_train), len(Y_train))
print(len(X_val), len(Y_val))
print(len(X_test), len(Y_test)) #verify the sizes look right

3239 3239
0 0
497 497


In [24]:
X_test[0]

('This is a Wikipedia passage about Moaning Lisa. " Moaning Lisa " is the sixth episode of The Simpsons \' first season .',
 'It originally aired on the Fox network in the United States on November 25, 1989 .\nlabel：0\ntext: "1989"\n\nThe episode was written by Conan O\'Brien and John Swartzwelder and directed by Rich Moore .')

In [25]:
Y_test[0]

1

## Extracting the features for the Training Data

In [26]:
import torch
from tqdm import tqdm


def extract_features(
    knowledge: str,
    conditioned_text: str,
    generated_text: str,
    features_to_extract: dict[str, bool],
):
    return model.extractFeatures(
        knowledge, conditioned_text, generated_text, features_to_extract
    )

X_train_features_maps = []
# i = 0

for conditioned_text, generated_text in tqdm(X_train, desc="Processing"):
    # print("Extracting: ", i)
    X_train_features_maps.append(
        extract_features(
            "", conditioned_text, generated_text, features_to_extract
        )
    )
    torch.cuda.empty_cache()  # Clean cache in every step for memory saving.
    # i += 1

Processing:   0%|          | 0/3239 [00:00<?, ?it/s]

Processing:   1%|▏         | 43/3239 [03:30<4:21:03,  4.90s/it]  


KeyboardInterrupt: 

In [27]:
len(X_train_features_maps)

3170

In [28]:
X_train_features_maps[0]

{'mtp': 2.7901327914747753e-09,
 'avgtp': 5.7490163297047623e-05,
 'MDVTP': 0.9977340698242188,
 'MMDVP': 0.07783844321966171}

In [29]:
X_train_features = [list(dic.values()) for dic in X_train_features_maps]

In [30]:
len(X_train_features)

3170

In [31]:
X_train_features[0]

[2.7901327914747753e-09,
 5.7490163297047623e-05,
 0.9977340698242188,
 0.07783844321966171]

## Training Logistic Regression

In [32]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(verbose=1)
clf.fit(X_train_features, Y_train)

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  6.93147D-01    |proj g|=  7.69127D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    5      9     12      1     0     0   3.212D-05   6.686D-01
  F =  0.66863515233775161     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            


 This problem is unconstrained.


## Evaluate accuracy of Logistic Regression on the training set

In [33]:
from sklearn.metrics import accuracy_score

Y_Pred = clf.predict(X_train_features)

accuracy = accuracy_score(Y_train, Y_Pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 58.93%


In [34]:
log_odds = clf.coef_[0]
odds = np.exp(clf.coef_[0])
lr_features_log = {k: v for k, v in zip(X_train_features_maps[0].keys(), log_odds)}
lr_features_no_log = {k: v for k, v in zip(X_train_features_maps[0].keys(), odds)}

print("log", lr_features_log)
print("no_log", lr_features_no_log)

Unnamed: 0,coef
MDVTP,0.030913
mtp,-3.4e-05
avgtp,-0.178024
MMDVP,-6.314978


## Extracting the Features of the Validation Set

In [36]:
X_val_features_map = []
# i = 0

for conditioned_text, generated_text in tqdm(X_val, desc="Processing"):
    # print("Extracting: ", i)
    X_val_features_map.append(
        extract_features("", conditioned_text, generated_text, features_to_extract)
    )
    torch.cuda.empty_cache()
    # i += 1

Processing: 0it [00:00, ?it/s]


In [37]:
X_val_features = [list(dic.values()) for dic in X_val_features_map]

In [38]:
# from sklearn.metrics import accuracy_score

# Y_Pred = clf.predict(X_val_features)

# accuracy = accuracy_score(Y_val, Y_Pred)
# print(f"Accuracy: {accuracy * 100:.2f}%")

## Extracting the Features of the Test Set

In [39]:
from tqdm import tqdm

X_test_features_map = []
# i = 0

for conditioned_text, generated_text in tqdm(X_test, desc="Processing"):
    # print("Extracting: ", i)
    X_test_features_map.append(
        extract_features(
            "", conditioned_text, generated_text, features_to_extract
        )
    )
    torch.cuda.empty_cache()
    # i += 1

Processing: 100%|██████████| 566/566 [00:44<00:00, 12.62it/s]


In [40]:
X_test_features = [list(dic.values()) for dic in X_test_features_map]

## Evaluate accuracy of the LogisticRegression on the testing set

In [41]:
from sklearn.metrics import accuracy_score

Y_Pred = clf.predict(X_test_features)

lr_accuracy = accuracy_score(Y_test, Y_Pred)
print(f"Accuracy: {lr_accuracy * 100:.2f}%")

Accuracy: 66.25%


In [42]:
log_odds = clf.coef_[0]
pd.DataFrame(log_odds,
             X_train_features_maps[0].keys(),
             columns=['coef'])\
            .sort_values(by='coef', ascending=False)

Unnamed: 0,coef
MDVTP,0.030913
mtp,-3.4e-05
avgtp,-0.178024
MMDVP,-6.314978


In [43]:
odds = np.exp(clf.coef_[0])
pd.DataFrame(odds,
             X_train_features_maps[0].keys(),
             columns=['coef'])\
            .sort_values(by='coef', ascending=False)

Unnamed: 0,coef
MDVTP,1.031396
mtp,0.999967
avgtp,0.836922
MMDVP,0.001809


In [44]:
import torch.nn as nn

class SimpleDenseNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim=1, dropout_prob=0.3):
        super(SimpleDenseNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

In [45]:
denseModel = SimpleDenseNet(input_dim=len(list(features_to_extract.keys())), hidden_dim=512).to(device)

#Code declaring and computing all the metrics to measure

In [46]:
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    roc_auc_score,
    precision_recall_curve,
    auc
)


def compute_metrics(model, input_tensor, true_labels):
    with torch.no_grad():
        outputs = model(input_tensor)
        predicted_probs = torch.sigmoid(outputs).cpu().numpy()
        predicted = (outputs > 0.5).float().cpu().numpy()

        true_labels = true_labels.cpu().numpy()

        acc = accuracy_score(true_labels, predicted)
        precision = precision_score(true_labels, predicted)
        recall = recall_score(true_labels, predicted)
        f1 = f1_score(true_labels, predicted)

        precision_negative = precision_score(true_labels, predicted, pos_label=0)
        recall_negative = recall_score(true_labels, predicted, pos_label=0)
        f1_negative = f1_score(true_labels, predicted, pos_label=0)

        tn, fp, fn, tp = confusion_matrix(true_labels, predicted).ravel()
        roc_auc = roc_auc_score(true_labels, predicted_probs)

        P, R, _ = precision_recall_curve(true_labels, predicted, pos_label=1)
        pr_auc = auc(R, P)

        roc_auc_negative = roc_auc_score(
            true_labels, 1 - predicted_probs
        )  # If predicted_probs is the probability of the positive class
        P_neg, R_neg, _ = precision_recall_curve(true_labels, predicted, pos_label=0)
        pr_auc_negative = auc(R_neg, P_neg)

        return {
            "Accuracy": acc,
            "Precision": precision,
            "Recall": recall,
            "F1": f1,
            "TP": tp,
            "TN": tn,
            "FP": fp,
            "FN": fn,
            "ROC AUC": roc_auc,
            "PR AUC": pr_auc,
            "Precision-Negative": precision_negative,
            "Recall-Negative": recall_negative,
            "F1-Negative": f1_negative,
            "ROC AUC-Negative": roc_auc_negative,
            "PR AUC-Negative": pr_auc_negative,
        }

## Code for training the Dense Model and getting the result of all metrics corresponding to the Testing Set.

In [47]:
def compute_accuracy(model, input_tensor, true_labels):
    with torch.no_grad():
        outputs = model(input_tensor)
        predicted = (outputs > 0.5).float()
        correct = (predicted == true_labels).float().sum()
        accuracy = correct / len(true_labels)
        return accuracy.item()


X_train_tensor = torch.tensor(X_train_features, dtype=torch.float32).to(device)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32).view(-1, 1).to(device)

print(X_train_tensor.shape, Y_train_tensor.shape)

# Define loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(denseModel.parameters(), lr=0.001)

bestValAcc = 0
# Training loop
num_epochs = 10000
for epoch in range(num_epochs):
    denseModel.train()
    optimizer.zero_grad()
    outputs = denseModel(X_train_tensor)
    loss = criterion(outputs, Y_train_tensor)
    loss.backward()
    optimizer.step()

    # Compute training accuracy
    train_accuracy = compute_accuracy(denseModel, X_train_tensor, Y_train_tensor)

    # Uncomment this if you want to see how the accuracy of testing improves during the training process.
    ##Compute testing accuracy
    # X_val_tensor = torch.tensor(X_val_features, dtype=torch.float32).to(device)
    # Y_val_tensor = torch.tensor(Y_val, dtype=torch.float32).view(-1, 1).to(device)

    # val_accuracy = compute_accuracy(denseModel, X_val_tensor, Y_val_tensor)

    # if bestValAcc < val_accuracy:
    #     bestValAcc = val_accuracy
    #     print(f'Saving model with best validation accuracy ...')
    #     torch.save(denseModel.state_dict(), 'llama-' + task + '-best-model')

    if (epoch + 1) % 10 == 0:
        print(
            f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Training Accuracy: {train_accuracy:.4f}"
        )
        # "Validation Accuracy": {val_accuracy:.4f}')

torch.Size([3170, 4]) torch.Size([3170, 1])
Epoch [10/10000], Loss: 0.6880, Training Accuracy: 0.5558
Epoch [20/10000], Loss: 0.6770, Training Accuracy: 0.5921
Epoch [30/10000], Loss: 0.6661, Training Accuracy: 0.5927
Epoch [40/10000], Loss: 0.6599, Training Accuracy: 0.5959
Epoch [50/10000], Loss: 0.6599, Training Accuracy: 0.5924
Epoch [60/10000], Loss: 0.6591, Training Accuracy: 0.5924
Epoch [70/10000], Loss: 0.6588, Training Accuracy: 0.5927
Epoch [80/10000], Loss: 0.6586, Training Accuracy: 0.5937
Epoch [90/10000], Loss: 0.6584, Training Accuracy: 0.5931
Epoch [100/10000], Loss: 0.6582, Training Accuracy: 0.5937
Epoch [110/10000], Loss: 0.6580, Training Accuracy: 0.5950
Epoch [120/10000], Loss: 0.6577, Training Accuracy: 0.5950
Epoch [130/10000], Loss: 0.6575, Training Accuracy: 0.5953
Epoch [140/10000], Loss: 0.6572, Training Accuracy: 0.5959


Epoch [150/10000], Loss: 0.6569, Training Accuracy: 0.5968
Epoch [160/10000], Loss: 0.6566, Training Accuracy: 0.5943
Epoch [170/10000], Loss: 0.6563, Training Accuracy: 0.5959
Epoch [180/10000], Loss: 0.6562, Training Accuracy: 0.5991
Epoch [190/10000], Loss: 0.6556, Training Accuracy: 0.6013
Epoch [200/10000], Loss: 0.6553, Training Accuracy: 0.5975
Epoch [210/10000], Loss: 0.6550, Training Accuracy: 0.6016
Epoch [220/10000], Loss: 0.6548, Training Accuracy: 0.5968
Epoch [230/10000], Loss: 0.6542, Training Accuracy: 0.5978
Epoch [240/10000], Loss: 0.6535, Training Accuracy: 0.6003
Epoch [250/10000], Loss: 0.6532, Training Accuracy: 0.6000
Epoch [260/10000], Loss: 0.6541, Training Accuracy: 0.5968
Epoch [270/10000], Loss: 0.6525, Training Accuracy: 0.5981
Epoch [280/10000], Loss: 0.6518, Training Accuracy: 0.6038
Epoch [290/10000], Loss: 0.6515, Training Accuracy: 0.6032
Epoch [300/10000], Loss: 0.6510, Training Accuracy: 0.6044
Epoch [310/10000], Loss: 0.6506, Training Accuracy: 0.60

#Compute the metrics using the model on the Test Set.

In [48]:
X_test_tensor = torch.tensor(X_test_features, dtype=torch.float32).to(device)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32).view(-1, 1).to(device)

test_metrics = compute_metrics(denseModel, X_test_tensor, Y_test_tensor)

print(
    f"Testing - Accuracy: {test_metrics['Accuracy']:.4f}, Precision: {test_metrics['Precision']:.4f}, Recall: {test_metrics['Recall']:.4f}, F1: {test_metrics['F1']:.4f}, ROC AUC: {test_metrics['ROC AUC']:.4f}, PR AUC: {test_metrics['PR AUC']:.4f}"
)
print(
    f"Testing - Negative: {test_metrics['Accuracy']:.4f}, Precision-Negative: {test_metrics['Precision-Negative']:.4f}, Recall-Negative: {test_metrics['Recall-Negative']:.4f}, F1-Negative: {test_metrics['F1-Negative']:.4f}, ROC AUC-Negative: {test_metrics['ROC AUC-Negative']:.4f}, PR AUC-Negative: {test_metrics['PR AUC-Negative']:.4f}"
)

Testing - Accuracy: 0.5901, Precision: 0.8430, Recall: 0.4883, F1: 0.6184, ROC AUC: 0.7250, PR AUC: 0.8397
Testing - Negative: 0.5901, Precision-Negative: 0.4257, Recall-Negative: 0.8066, F1-Negative: 0.5573, ROC AUC-Negative: 0.2750, PR AUC-Negative: 0.5501


## Save the results on a CSV if you want.

In [49]:
model_dataframe = pd.DataFrame(
    columns=[
        "features",
        "model_name",
        "feature_to_extract",
        "method",
        "accuracy",
        "precision",
        "recall",
        "roc auc",
        "pr auc",
        "negative",
        "precision-negative",
        "recall-negative",
        "negative f1",
        "lr_accuracy",
        "lr_features_log",
        "lr_features_no_log",
    ]
)

In [50]:
d = {
    "features": features_to_extract,
    "model_name": str(model.getName()),
    "feature_to_extract": feature_to_extract,
    "method": "TEST",
    "accuracy": test_metrics["Accuracy"],
    "precision": test_metrics["Precision"],
    "recall": test_metrics["Recall"],
    "f1": test_metrics["F1"],
    "pr auc": test_metrics["PR AUC"],
    "precision-negative": test_metrics["Precision-Negative"],
    "recall-negative": test_metrics["Recall-Negative"],
    "negative-f1": test_metrics["F1-Negative"],
    "lr_accuracy": lr_accuracy,
    "lr_features_log": lr_features_log,
    "lr_features_no_log": lr_features_no_log,
}

model_dataframe.loc[len(model_dataframe.index)] = d

In [51]:
model_dataframe.head()

Unnamed: 0,features,model_name,model,method,accuracy,precision,recall,roc auc,pr auc,negative,precision-negative,recall-negative,negative f1,lr_accuracy
0,"{'mtp': True, 'avgtp': True, 'MDVTP': True, 'M...",EleutherAI/gpt-j-6B,<__main__.Gptj object at 0x1553c2f1c430>,TEST,0.590106,0.843049,0.488312,,0.839709,,0.425656,0.80663,,0.662544


In [52]:
csv_name = f"{model.getSanitizedName()}_mind_test={test_dataset[0]}_{includeConditioned=}_{'_'.join([f'{k}={v}' for k, v in features_to_extract.items()])}.csv"
model_dataframe.to_csv(output_path / csv_name, index=False)