In [4]:
!pip install evaluate
!pip install rouge_score
!pip install bert_score



In [5]:
# import pandas as pd
# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
# from evaluate import load
# import os

# # Load evaluation metrics
# bertscore = load("bertscore")
# rouge = load("rouge")

# # Load annotations
# def load_annotations(file_path):
#     data = []
#     with open(file_path, 'r', encoding='utf-8') as f:
#         for line in f:
#             fields = line.strip().split('\t')
#             if len(fields) >= 4:
#                 article_id, narrative, subnarrative, explanation = fields
#                 data.append([article_id, narrative, subnarrative, explanation])
#     return pd.DataFrame(data, columns=["article_id", "narrative", "subnarrative", "explanation"])

# # Generate grounded explanations
# def generate_grounded_explanations(articles_dir, annotations_df, tokenizer, model):
#     explanations = []
#     for _, row in annotations_df.iterrows():
#         article_path = os.path.join(articles_dir, row['article_id'])
#         if os.path.exists(article_path):
#             with open(article_path, 'r', encoding='utf-8') as f:
#                 article_text = f.read()

#             prompt = f"Summarize the key points supporting the claim: {row['narrative']} - {row['subnarrative']}. Ground your explanation in the text."
#             inputs = tokenizer(prompt + article_text, return_tensors="pt", truncation=True, max_length=512)
#             outputs = model.generate(inputs["input_ids"], max_length=80, min_length=20, num_beams=4, early_stopping=True)
#             explanation = tokenizer.decode(outputs[0], skip_special_tokens=True)
#             explanations.append(explanation)
#         else:
#             explanations.append("")
#     return explanations

# # Evaluate explanations
# def evaluate_explanations(annotations_df, generated_explanations):
#     references = annotations_df['explanation'].tolist()
#     results = {}

#     # ROUGE Scores
#     rouge_scores = rouge.compute(predictions=generated_explanations, references=references)
#     results['rouge1'] = rouge_scores['rouge1']
#     results['rouge2'] = rouge_scores['rouge2']
#     results['rougeL'] = rouge_scores['rougeL']

#     # BERTScore
#     bert_scores = bertscore.compute(predictions=generated_explanations, references=references, model_type="distilbert-base-uncased")
#     results['bert_precision'] = sum(bert_scores['precision']) / len(bert_scores['precision'])
#     results['bert_recall'] = sum(bert_scores['recall']) / len(bert_scores['recall'])
#     results['bert_f1'] = sum(bert_scores['f1']) / len(bert_scores['f1'])

#     return results

# # Main script
# annotations_file = 'EN/subtask-3-annotations.txt'
# articles_dir = 'EN/raw-documents'

# # Load tokenizer and model
# model_name = "facebook/bart-large-cnn"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# # Load annotations
# annotations_df = load_annotations(annotations_file)

# # Generate grounded explanations
# annotations_df['generated_explanation'] = generate_grounded_explanations(articles_dir, annotations_df, tokenizer, model)

# # Evaluate generated explanations
# evaluation_results = evaluate_explanations(annotations_df, annotations_df['generated_explanation'].tolist())

# # Print evaluation results
# print(f"Evaluation Results: {evaluation_results}")

# # # Save results
# # annotations_df.to_csv('/mnt/data/subtask3_results.csv', index=False)


In [6]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from evaluate import load
from tqdm import tqdm
import os
import torch

# Load evaluation metrics
bertscore = load("bertscore")
rouge = load("rouge")

# Load annotations
def load_annotations(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            fields = line.strip().split('\t')
            if len(fields) >= 4:
                article_id, narrative, subnarrative, explanation = fields
                data.append([article_id, narrative, subnarrative, explanation])
    return pd.DataFrame(data, columns=["article_id", "narrative", "subnarrative", "explanation"])

# Generate grounded explanations with GPU and batch processing
def generate_grounded_explanations(articles_dir, annotations_df, tokenizer, model, batch_size=8, max_length=80):
    """
    Generate explanations in batches using GPU for efficiency.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    explanations = []

    for i in tqdm(range(0, len(annotations_df), batch_size), desc="Generating Explanations"):
        batch_data = annotations_df.iloc[i:i + batch_size]
        batch_prompts = []
        for _, row in batch_data.iterrows():
            article_path = os.path.join(articles_dir, row['article_id'])
            if os.path.exists(article_path):
                with open(article_path, 'r', encoding='utf-8') as f:
                    article_text = f.read()
                prompt = (
                    f"Summarize the key points supporting the claim: {row['narrative']} - {row['subnarrative']}. "
                    "Ground your explanation in the text. "
                    f"{article_text}"
                )
                batch_prompts.append(prompt)
            else:
                batch_prompts.append("Missing article content.")

        # Tokenize batch
        inputs = tokenizer(
            batch_prompts,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=512
        ).to(device)

        # Generate explanations
        with torch.no_grad():
            outputs = model.generate(
                inputs["input_ids"],
                attention_mask=inputs["attention_mask"],
                max_length=max_length,
                num_beams=2,
                early_stopping=True
            )

        # Decode generated text
        explanations.extend([tokenizer.decode(output, skip_special_tokens=True) for output in outputs])

    return explanations

# Evaluate explanations
def evaluate_explanations(annotations_df, generated_explanations):
    references = annotations_df['explanation'].tolist()
    results = {}

    # ROUGE Scores
    rouge_scores = rouge.compute(predictions=generated_explanations, references=references)
    results['rouge1'] = rouge_scores['rouge1']
    results['rouge2'] = rouge_scores['rouge2']
    results['rougeL'] = rouge_scores['rougeL']

    # BERTScore
    bert_scores = bertscore.compute(predictions=generated_explanations, references=references, model_type="distilbert-base-uncased")
    results['bert_precision'] = sum(bert_scores['precision']) / len(bert_scores['precision'])
    results['bert_recall'] = sum(bert_scores['recall']) / len(bert_scores['recall'])
    results['bert_f1'] = sum(bert_scores['f1']) / len(bert_scores['f1'])

    return results


annotations_file = 'BG/subtask-3-annotations.txt'
articles_dir = 'BG/raw-documents'

# Load tokenizer and model
# model_name = "facebook/bart-large-cnn"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# from transformers import MBartForConditionalGeneration, MBartTokenizer

# model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50")
# tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-50")

model_name = "google/mt5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# model_name = "bigscience/bloomz-7b1"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSeq2SeqLM.from_pretrained(model_name)



# Load annotations
annotations_df = load_annotations(annotations_file)

# Generate grounded explanations
print("Generating explanations...")
annotations_df['generated_explanation'] = generate_grounded_explanations(
    articles_dir,
    annotations_df,
    tokenizer,
    model,
    batch_size=8,  # Adjust batch size based on GPU memory
    max_length=80
)

# Evaluate generated explanations
print("Evaluating explanations...")
evaluation_results = evaluate_explanations(annotations_df, annotations_df['generated_explanation'].tolist())

# Print evaluation results
print(f"Evaluation Results: {evaluation_results}")

# # Save results
# results_path = '/mnt/data/subtask3_results.csv'
# annotations_df.to_csv(results_path, index=False)
# print(f"Results saved to {results_path}")




Generating explanations...


Generating Explanations: 100%|██████████| 45/45 [04:47<00:00,  6.40s/it]


Evaluating explanations...
Evaluation Results: {'rouge1': 0.0017191689915329361, 'rouge2': 0.0, 'rougeL': 0.0015153518318960003, 'bert_precision': 0.5744952647959819, 'bert_recall': 0.639760310993809, 'bert_f1': 0.6029345385667657}
