In [1]:
!pip install transformers
!pip install bert-score



In [96]:
from transformers import BertTokenizer, BertForMaskedLM, BertModel
from bert_score import BERTScorer

# Example texts
reference = "This is a reference text example."
candidate = "This is a candidate text example."
# BERTScore calculation
def bert_score(reference,candidate):
    scorer = BERTScorer(model_type='bert-base-uncased')
    P, R, F1 = scorer.score([candidate], [reference])
    print(f"BERTScore Precision: {P.mean():.4f}, Recall: {R.mean():.4f}, F1: {F1.mean():.4f}")
    
    return round(P.item(),4), round(R.item(),4), round(F1.item(),4)


bert_score(reference,candidate)

BERTScore Precision: 0.9258, Recall: 0.9258, F1: 0.9258


(0.9258, 0.9258, 0.9258)

In [None]:
# Step 1: Import the required libraries
from transformers import BertTokenizer, BertModel
import torch
import numpy as np

# Step 2: Load the pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased")

# Step 3: Define the two texts to compare
text1 = "This is a reference text example."
text2 = "This is a candidate text example."

# Step 4: Prepare the texts for BERT
inputs1 = tokenizer(text1, return_tensors="pt", padding=True, truncation=True)
inputs2 = tokenizer(text2, return_tensors="pt", padding=True, truncation=True)

# Step 5: Feed the texts to the BERT model
outputs1 = model(**inputs1)
outputs2 = model(**inputs2)

# Step 6: Obtain the representation vectors
embeddings1 = outputs1.last_hidden_state.mean(dim=1).detach().numpy()
embeddings2 = outputs2.last_hidden_state.mean(dim=1).detach().numpy()

# Step 7: Calculate cosine similarity
similarity = np.dot(embeddings1, embeddings2.T) / (np.linalg.norm(embeddings1) * np.linalg.norm(embeddings2))

# Step 8: Print the result
print("Similarity between the texts: {:.4f}".format(similarity[0][0]))

### Output: Similarity between the texts: 0.9000

### Hallucination Checker

In [4]:
!pip install nltk



In [5]:
import nltk
nltk.download('punkt')

def extract_sentences_nltk(text):
    """
    Function to extract sentences from text using NLTK.
    
    :param text: String, the text to extract sentences from.
    :return: List of sentences.
    """
    # Use NLTK's sent_tokenize to split text into sentences
    sentences = nltk.sent_tokenize(text)
    return sentences

# Example text
text = "The Eiffel Tower is in Paris. It was built in 1889. It's one of the most famous landmarks in the world."

# Extract sentences using NLTK
sentences = extract_sentences_nltk(text)

for i, sentence in enumerate(sentences):
    print(f"Sentence {i+1}: {sentence}")


Sentence 1: The Eiffel Tower is in Paris.
Sentence 2: It was built in 1889.
Sentence 3: It's one of the most famous landmarks in the world.


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\lakie\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [49]:
from transformers import AutoModelForSequenceClassification

pairs = [ # Test data, List[Tuple[str, str]]. (Input text, summary). (input text, Answer)
    ("The capital of France is Berlin.", "The capital of France is Paris."), # factual but hallucinated
    ('I am in California', 'I am in United States.'), # Consistent
    ('I am in United States', 'I am in California.'), # Hallucinated
    ("A person on a horse jumps over a broken down airplane.", "A person is outdoors, on a horse."),
    ("A boy is jumping on skateboard in the middle of a red bridge.", "The boy skates down the sidewalk on a red bridge"),
    ("A man with blond-hair, and a brown shirt drinking out of a public water fountain.", "A blond man wearing a brown shirt is reading a book."),
    ("Mark Wahlberg was a fan of Manny.", "Manny was a fan of Mark Wahlberg.")
]
def hallucination_checker(input):
    # Step 1: Load the model
    model = AutoModelForSequenceClassification.from_pretrained(
        'vectara/hallucination_evaluation_model', trust_remote_code=True)

    # Step 2: Use the model to predict
    scores = model.predict(input) # note the predict() method. Do not do model(pairs).
#     print(scores)
    # tensor([0.0111, 0.6474, 0.1290, 0.8969, 0.1846, 0.0050, 0.0543])
#     print(input)
    for text, score in zip(input, scores):
        print(f"\nText: {text}")
        print(f"Hallucination: {'Yes' if score<0.5 else 'No'}, Score: {score:.4f}")
        hallucination_sent = []
        if score<0.5:
            # Extract sentences using NLTK
            sentences = extract_sentences_nltk(text[1])
            for sent in sentences:
#                 print([(input[0][0],sent)])
                score = model.predict([(text[0],sent)])
#                 print(score)
                if score < 0.5:
                    hallucination_sent.append(sent)
            print("Hallucination sentences ", hallucination_sent,"\n")

In [50]:
hallucination_checker(pairs)

You are using a model of type HHEMv2Config to instantiate a model of type HHEMv2. This is not supported for all configurations of models and can yield errors.



Text: ('The capital of France is Berlin.', 'The capital of France is Paris.')
Hallucination: Yes, Score: 0.0111
Hallucination sentences  ['The capital of France is Paris.'] 


Text: ('I am in California', 'I am in United States.')
Hallucination: No, Score: 0.6474

Text: ('I am in United States', 'I am in California.')
Hallucination: Yes, Score: 0.1290
Hallucination sentences  ['I am in California.'] 


Text: ('A person on a horse jumps over a broken down airplane.', 'A person is outdoors, on a horse.')
Hallucination: No, Score: 0.8969

Text: ('A boy is jumping on skateboard in the middle of a red bridge.', 'The boy skates down the sidewalk on a red bridge')
Hallucination: Yes, Score: 0.1846
Hallucination sentences  ['The boy skates down the sidewalk on a red bridge'] 


Text: ('A man with blond-hair, and a brown shirt drinking out of a public water fountain.', 'A blond man wearing a brown shirt is reading a book.')
Hallucination: Yes, Score: 0.0050
Hallucination sentences  ['A blond m

In [51]:
input_text = """ There are many techniques available to generate extractive summarization to keep it simple, I will be using an unsupervised learning approach to find the sentences similarity and rank them. Summarization can be defined as a task of producing a concise and fluent summary while preserving key information and overall meaning. One benefit of this will be, you don’t need to train and build a model prior start using it for your project. It’s good to understand Cosine similarity to make the best use of the code you are going to see. Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space that measures the cosine of the angle between them. Its measures cosine of the angle between vectors. The angle will be 0 if sentences are similar."""
summary = """There are many techniques available to generate extractive summarization. Summarization can be defined as a task of producing a concise and fluent summary while preserving key information and overall meaning. One benefit of this will be, you don’t need to train and build a model prior start using it for your project. Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space that measures the cosine of the angle between them."""
pairs = [(input_text,summary)]
hallucination_checker(pairs)

You are using a model of type HHEMv2Config to instantiate a model of type HHEMv2. This is not supported for all configurations of models and can yield errors.



Text: (' There are many techniques available to generate extractive summarization to keep it simple, I will be using an unsupervised learning approach to find the sentences similarity and rank them. Summarization can be defined as a task of producing a concise and fluent summary while preserving key information and overall meaning. One benefit of this will be, you don’t need to train and build a model prior start using it for your project. It’s good to understand Cosine similarity to make the best use of the code you are going to see. Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space that measures the cosine of the angle between them. Its measures cosine of the angle between vectors. The angle will be 0 if sentences are similar.', 'There are many techniques available to generate extractive summarization. Summarization can be defined as a task of producing a concise and fluent summary while preserving key information and overall meaning

In [54]:
input_text = """ There are many techniques available to generate extractive summarization to keep it simple, I will be using an unsupervised learning approach to find the sentences similarity and rank them. Summarization can be defined as a task of producing a concise and fluent summary while preserving key information and overall meaning. One benefit of this will be, you don’t need to train and build a model prior start using it for your project. It’s good to understand Cosine similarity to make the best use of the code you are going to see. Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space that measures the cosine of the angle between them. Its measures cosine of the angle between vectors. The angle will be 0 if sentences are similar."""
summary = """I am an Indian. I love India. I live in Bengaluru. There are many techniques available to generate extractive summarization. Summarization can be defined as a task of producing a concise and fluent summary while preserving key information and overall meaning. One benefit of this will be, you don’t need to train and build a model prior start using it for your project. Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space that measures the cosine of the angle between them."""
pairs = [(input_text,summary)]
hallucination_checker(pairs)

You are using a model of type HHEMv2Config to instantiate a model of type HHEMv2. This is not supported for all configurations of models and can yield errors.



Text: (' There are many techniques available to generate extractive summarization to keep it simple, I will be using an unsupervised learning approach to find the sentences similarity and rank them. Summarization can be defined as a task of producing a concise and fluent summary while preserving key information and overall meaning. One benefit of this will be, you don’t need to train and build a model prior start using it for your project. It’s good to understand Cosine similarity to make the best use of the code you are going to see. Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space that measures the cosine of the angle between them. Its measures cosine of the angle between vectors. The angle will be 0 if sentences are similar.', 'I am an Indian. I love India. I live in Bengaluru. There are many techniques available to generate extractive summarization. Summarization can be defined as a task of producing a concise and fluent summary w

### Supert

In [99]:
import nltk
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import sent_tokenize
import math

# Download required NLTK datasets (only needed once)
nltk.download('punkt')

# 1. Preprocess the text and tokenize it into sentences
def preprocess_sentences(text):
    sentences = sent_tokenize(text)  # Tokenize the text into sentences
    return sentences

# 2. Build a similarity matrix for the sentences
def build_similarity_matrix(sentences):
    # Initialize the TF-IDF vectorizer
    vectorizer = TfidfVectorizer(stop_words='english')
    
    # Create a matrix of sentence vectors (TF-IDF weighted)
    sentence_vectors = vectorizer.fit_transform(sentences)
    
    # Compute pairwise cosine similarity between sentence vectors
    similarity_matrix = cosine_similarity(sentence_vectors)
    
    return similarity_matrix

# 3. Rank the sentences based on their centrality (sum of similarities)
def rank_sentences(sentences, similarity_matrix):
    # Calculate sentence scores based on the sum of cosine similarities
    sentence_scores = similarity_matrix.sum(axis=1)
    
    # Rank sentences by their scores
    ranked_sentences = [(score, sentence) for score, sentence in zip(sentence_scores, sentences)]
    ranked_sentences = sorted(ranked_sentences, reverse=True, key=lambda x: x[0])
    
    return ranked_sentences

# 4. Generate the summary by extracting the top-ranked sentences
def generate_summary(text, perc = 10):
    # Preprocess the text and tokenize into sentences
    sentences = preprocess_sentences(text)
    sentence_seperation = extract_sentences_nltk(text)
#     print(len(sentence_seperation))
    num_sentences = math.ceil(len(sentence_seperation)*(perc/100))
#     print(num_sentences)
    # Build a similarity matrix for the sentences
    similarity_matrix = build_similarity_matrix(sentences)
    
    # Rank the sentences
    ranked_sentences = rank_sentences(sentences, similarity_matrix)
    
    # Extract the top-ranked sentences for the summary
    summary_sentences = [ranked_sentences[i][1] for i in range(num_sentences)]
    
    return ' '.join(summary_sentences)

# Example usage
if __name__ == "__main__":
    text = """
    The quick brown fox jumps over the lazy dog. The dog barked back at the fox. The fox ran away into the forest.
    In the forest, the fox met other animals. They all wondered what the fox was doing there. The fox told them about the lazy dog.
    Meanwhile, the dog went back to sleep. It was a peaceful day for the dog, but the fox had a lot to do in the forest.
    """
    
    # Generate the summary
    summary = generate_summary(text, perc=50)
    print("Extractive Summary:")
    print(summary)


Extractive Summary:
The fox told them about the lazy dog. The dog barked back at the fox. It was a peaceful day for the dog, but the fox had a lot to do in the forest. 
    The quick brown fox jumps over the lazy dog.


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\lakie\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [100]:
def supert(input_text,summary):
    reference_summary = generate_summary(input_text, perc=10)
    bert_score(reference_summary,summary)

In [101]:
input_text = """
    The quick brown fox jumps over the lazy dog. The dog barked back at the fox. The fox ran away into the forest.
    In the forest, the fox met other animals. They all wondered what the fox was doing there. 
    The fox told them about the lazy dog.
    Meanwhile, the dog went back to sleep. It was a peaceful day for the dog, but the fox had a lot to do in the forest.
    """
summary =""" Dog had a peaceful day but fox was threatend by dog"""
supert(input_text,summary)

BERTScore Precision: 0.4808, Recall: 0.5014, F1: 0.4909
