In [2]:
import torch
from transformers import BartTokenizer, BartForConditionalGeneration

# Load the model and tokenizer
model_dir = 'Facebook-Bert-20241016T112414Z-001/Facebook-Bert'  # Update this path
tokenizer = BartTokenizer.from_pretrained(model_dir)
model = BartForConditionalGeneration.from_pretrained(model_dir)

# Move the model to the GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
print(f"device : {device}")

device : cuda


In [13]:
import pandas as pd

In [14]:
test_dataset = pd.read_csv("dataset/cnn_dailymail/test.csv")
test_dataset.head()

Unnamed: 0,id,article,highlights
0,92c514c913c0bdfe25341af9fd72b29db544099b,Ever noticed how plane seats appear to be gett...,Experts question if packed out planes are put...
1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,A drunk teenage boy had to be rescued by secur...,Drunk teenage boy climbed into lion enclosure ...
2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Dougie Freedman is on the verge of agreeing a ...,Nottingham Forest are close to extending Dougi...
3,caabf9cbdf96eb1410295a673e953d304391bfbb,Liverpool target Neto is also wanted by PSG an...,Fiorentina goalkeeper Neto has been linked wit...
4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,Bruce Jenner will break his silence in a two-h...,"Tell-all interview with the reality TV star, 6..."


In [15]:
test_dataset.head()

Unnamed: 0,id,article,highlights
0,92c514c913c0bdfe25341af9fd72b29db544099b,Ever noticed how plane seats appear to be gett...,Experts question if packed out planes are put...
1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,A drunk teenage boy had to be rescued by secur...,Drunk teenage boy climbed into lion enclosure ...
2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Dougie Freedman is on the verge of agreeing a ...,Nottingham Forest are close to extending Dougi...
3,caabf9cbdf96eb1410295a673e953d304391bfbb,Liverpool target Neto is also wanted by PSG an...,Fiorentina goalkeeper Neto has been linked wit...
4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,Bruce Jenner will break his silence in a two-h...,"Tell-all interview with the reality TV star, 6..."


In [16]:
def summarize_text(model, tokenizer, text, max_length=150, min_length=30):
    # Tokenize the input text
    inputs = tokenizer([text], max_length=1024, return_tensors="pt", truncation=True).to(device)
    
    # Generate the summary
    summary_ids = model.generate(inputs["input_ids"], max_length=max_length, min_length=min_length, num_beams=4, early_stopping=True)
    
    # Decode the summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary


In [17]:
!pip install rouge_score sacrebleu



In [20]:
from rouge_score import rouge_scorer
import sacrebleu

# Initialize the ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

def evaluate_model_on_test_data(model, tokenizer, test_data, num_samples=100):
    rouge1_scores, rouge2_scores, rougeL_scores, bleu_scores = [], [], [], []
    
    # Inspect the data structure and check if it contains expected fields
    sample = test_data.iloc[0]  # Use .iloc for DataFrame
    print("Sample Data Structure:", sample)  # This helps verify the structure

    # Evaluate the first 'num_samples' samples from the test set
    for i in range(min(num_samples, len(test_data))):
        # Check if the sample has 'article' and 'highlights' fields
        if 'article' in test_data.columns and 'highlights' in test_data.columns:
            article = test_data.iloc[i]['article']
            reference_summary = test_data.iloc[i]['highlights']
        else:
            # If not, skip to the next iteration or adjust based on structure
            print(f"Skipping sample {i}: Data does not contain expected fields.")
            continue
        
        # Generate the summary
        generated_summary = summarize_text(model, tokenizer, article)
        
        # Compute ROUGE scores
        scores = scorer.score(reference_summary, generated_summary)
        rouge1_scores.append(scores['rouge1'].fmeasure)
        rouge2_scores.append(scores['rouge2'].fmeasure)
        rougeL_scores.append(scores['rougeL'].fmeasure)
        
        # Compute BLEU score
        bleu = sacrebleu.corpus_bleu([generated_summary], [[reference_summary]])
        bleu_scores.append(bleu.score)
    
    # Calculate the average scores
    if rouge1_scores:
        avg_rouge1 = sum(rouge1_scores) / len(rouge1_scores)
        avg_rouge2 = sum(rouge2_scores) / len(rouge2_scores)
        avg_rougeL = sum(rougeL_scores) / len(rougeL_scores)
        avg_bleu = sum(bleu_scores) / len(bleu_scores)
    
        # Print the average scores
        print(f"Average ROUGE-1 Score: {avg_rouge1:.4f}")
        print(f"Average ROUGE-2 Score: {avg_rouge2:.4f}")
        print(f"Average ROUGE-L Score: {avg_rougeL:.4f}")
        print(f"Average BLEU Score: {avg_bleu:.4f}")
    else:
        print("No valid samples found for evaluation.")


In [21]:
# Run the evaluation on the test dataset
evaluate_model_on_test_data(model, tokenizer, test_dataset)

Sample Data Structure: id                     92c514c913c0bdfe25341af9fd72b29db544099b
article       Ever noticed how plane seats appear to be gett...
highlights    Experts question if  packed out planes are put...
Name: 0, dtype: object


  attn_output = torch.nn.functional.scaled_dot_product_attention(


Average ROUGE-1 Score: 0.4200
Average ROUGE-2 Score: 0.1914
Average ROUGE-L Score: 0.2933
Average BLEU Score: 12.7138
