In [32]:
from collections import Counter

def ngram_counts(tokens, n):
    return [tuple(tokens[i:i+n]) for i in range(len(tokens)-n+1)]

def rouge_n(hypothesis, reference, n=1):
    hyp_ngrams = ngram_counts(hypothesis, n)
    ref_ngrams = ngram_counts(reference, n)
    overlap = len(set(hyp_ngrams) & set(ref_ngrams))
    if len(hyp_ngrams) == 0 or len(ref_ngrams) == 0:
        return 0.0
    prec = overlap / len(hyp_ngrams)
    rec = overlap / len(ref_ngrams)
    return (2*prec*rec)/(prec+rec) if prec+rec > 0 else 0.0

def lcs_length(x, y):
    dp = np.zeros((len(x)+1, len(y)+1), dtype=int)
    for i in range(1, len(x)+1):
        for j in range(1, len(y)+1):
            if x[i-1] == y[j-1]:
                dp[i][j] = dp[i-1][j-1] + 1
            else:
                dp[i][j] = max(dp[i-1][j], dp[i][j-1])
    return dp[len(x)][len(y)]

def rouge_l(hypothesis, reference):
    lcs = lcs_length(hypothesis, reference)
    prec = lcs / len(hypothesis) if hypothesis else 0
    rec = lcs / len(reference) if reference else 0
    return (2*prec*rec)/(prec+rec) if prec+rec > 0 else 0.0

def bleu_score(hypothesis, reference, max_n=4):
    hyp_len = len(hypothesis)
    ref_len = len(reference)
    if hyp_len == 0:
        return 0.0
    
    precisions = []
    for n in range(1, max_n+1):
        hyp_ngrams = ngram_counts(hypothesis, n)
        ref_ngrams = ngram_counts(reference, n)
        ref_counts = Counter(ref_ngrams)
        overlap = 0
        for ng in hyp_ngrams:
            if ref_counts[ng] > 0:
                overlap += 1
                ref_counts[ng] -= 1
        prec = overlap / len(hyp_ngrams) if hyp_ngrams else 0
        precisions.append(prec if prec > 0 else 1e-9)  # smoothing
    
    geo_mean = np.exp(np.mean([np.log(p) for p in precisions]))
    bp = 1.0 if hyp_len > ref_len else np.exp(1 - ref_len/hyp_len)
    return bp * geo_mean


In [None]:
import os
import numpy as np
import random
import time
import pickle
from collections import defaultdict, Counter
import re

# Set random seeds for reproducibility
np.random.seed(42)
random.seed(42)

def load_data(file_name):
    full_path = os.path.join(file_name)
    with open(full_path, "r", encoding="utf-8") as f:
        return [line.strip() for line in f if line.strip()]

def create_dummy_data():
    global train_src, train_tgt, val_src, val_tgt, test_src, test_tgt
    train_src = [
        "This is the first sentence of the article. It's about a dog named Max. The dog is very happy and playful. Max loves to run in the park. He enjoys playing fetch with his owner. This is the last sentence about Max's adventures.",
        "Another article about a different topic entirely. This one discusses recent scientific discoveries. It talks about a breakthrough in quantum computing. The discovery could revolutionize technology. Scientists are very excited about the implications. This represents a major step forward.",
        "Climate change is affecting ecosystems worldwide. Many species are struggling to adapt. Arctic ice is melting at unprecedented rates. Ocean levels are rising steadily. Governments must take immediate action. The future of our planet depends on it.",
        "Artificial intelligence continues to advance rapidly. Machine learning models are becoming more sophisticated. They can now perform complex tasks autonomously. However, ethical concerns remain important. Researchers emphasize responsible development. The technology holds great promise."
    ]
    train_tgt = [
        "Max is a happy, playful dog who loves running and playing fetch in the park.",
        "Scientists made a quantum computing breakthrough that could revolutionize technology.",
        "Climate change threatens ecosystems globally, requiring immediate government action.",
        "AI advances rapidly with sophisticated models, but ethical development remains crucial."
    ]
    val_src = [
        "Space exploration has entered a new era of innovation. Private companies are launching missions. Mars colonization plans are becoming realistic. Technology advances enable longer journeys. The universe holds many secrets waiting to be discovered.",
        "Renewable energy sources are becoming more efficient. Solar panels now convert more sunlight. Wind turbines generate substantial electricity. Governments invest heavily in clean technology. The transition away from fossil fuels accelerates."
    ]
    val_tgt = [
        "Private companies drive space exploration innovation toward realistic Mars colonization.",
        "Renewable energy efficiency improves as governments accelerate the clean technology transition."
    ]
    test_src = [
        "Medical research has achieved remarkable breakthroughs recently. New treatments target previously incurable diseases. Gene therapy shows promising results. Clinical trials demonstrate safety and efficacy. Patients now have renewed hope for recovery.",
        "Education systems worldwide are embracing digital transformation. Online learning platforms expand access. Interactive technologies engage students effectively. Teachers adapt to new methodologies. The future of education looks bright."
    ]
    test_tgt = [
        "Medical breakthroughs in gene therapy offer new treatments and hope for patients.",
        "Digital transformation expands educational access through online platforms and interactive technologies."
    ]
    
    print("Using enhanced dummy data for demonstration.")

# Load or create data
try:
    train_src = load_data("train.source.txt")
    train_tgt = load_data("train.target.txt")
    val_src = load_data("val.source.txt")
    val_tgt = load_data("val.target.txt")
    test_src = load_data("test.source.txt")
    test_tgt = load_data("test.target.txt")
except FileNotFoundError:
    create_dummy_data()

print(f"Loaded {len(train_src)} training samples")
print(f"Loaded {len(val_src)} validation samples")
print(f"Loaded {len(test_src)} test samples")

class TextPreprocessor:
    def __init__(self, vocab_size=10000):
        self.vocab_size = vocab_size
        self.word_to_idx = {}
        self.idx_to_word = {}
        self.word_counts = Counter()

    def build_vocabulary(self, texts):
        """Build vocabulary from text data"""
        print("Building vocabulary...")
        
        for text in texts:
            words = self.tokenize(text)
            self.word_counts.update(words)

        most_common = self.word_counts.most_common(self.vocab_size - 4)  # Reserve for special tokens

        # Add special tokens
        self.word_to_idx = {
            '<PAD>': 0,
            '<UNK>': 1,
            '<START>': 2,
            '<END>': 3
        }
        self.idx_to_word = {0: '<PAD>', 1: '<UNK>', 2: '<START>', 3: '<END>'}

        # Add regular vocabulary
        for idx, (word, count) in enumerate(most_common, start=4):
            self.word_to_idx[word] = idx
            self.idx_to_word[idx] = word

        print(f"Built vocabulary with {len(self.word_to_idx)} words")
        return self

    def tokenize(self, text):
        """Simple tokenization"""
        text = text.lower()
        words = re.findall(r'\b\w+\b', text)
        return words

    def text_to_indices(self, text):
        """Convert text to list of indices"""
        words = self.tokenize(text)
        return [self.word_to_idx.get(word, 1) for word in words]

    def indices_to_text(self, indices):
        """Convert indices back to text"""
        words = [self.idx_to_word.get(idx, '<UNK>') for idx in indices 
                if idx != 0]  # Skip padding
        return ' '.join(words)

def split_into_sentences(text, max_length=50):
    """Split text into sentences using simple heuristics"""
    sentences = re.split(r'[.!?]+', text)
    processed_sentences = []
    
    for sent in sentences:
        sent = sent.strip()
        if len(sent) > 0:
            words = sent.split()
            if len(words) > max_length:
                for i in range(0, len(words), max_length):
                    chunk = ' '.join(words[i:i + max_length])
                    if chunk.strip():
                        processed_sentences.append(chunk.strip())
            else:
                processed_sentences.append(sent)
    
    return processed_sentences

# Improved ROUGE and BLEU implementations
def ngram_counts(tokens, n):
    """Get n-gram counts from token list"""
    return [tuple(tokens[i:i+n]) for i in range(len(tokens)-n+1)]

def rouge_n(hypothesis, reference, n=1):
    """Compute ROUGE-N score"""
    if not hypothesis or not reference:
        return 0.0
        
    hyp_ngrams = ngram_counts(hypothesis, n)
    ref_ngrams = ngram_counts(reference, n)
    
    if not hyp_ngrams or not ref_ngrams:
        return 0.0
        
    overlap = len(set(hyp_ngrams) & set(ref_ngrams))
    precision = overlap / len(hyp_ngrams)
    recall = overlap / len(ref_ngrams)
    
    return (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0

def lcs_length(x, y):
    """Compute longest common subsequence length"""
    dp = np.zeros((len(x)+1, len(y)+1), dtype=int)
    for i in range(1, len(x)+1):
        for j in range(1, len(y)+1):
            if x[i-1] == y[j-1]:
                dp[i][j] = dp[i-1][j-1] + 1
            else:
                dp[i][j] = max(dp[i-1][j], dp[i][j-1])
    return dp[len(x)][len(y)]

def rouge_l(hypothesis, reference):
    """Compute ROUGE-L score"""
    if not hypothesis or not reference:
        return 0.0
        
    lcs = lcs_length(hypothesis, reference)
    precision = lcs / len(hypothesis)
    recall = lcs / len(reference)
    
    return (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0

def bleu_score(hypothesis, reference, max_n=4):
    """Compute BLEU score"""
    hyp_len = len(hypothesis)
    ref_len = len(reference)
    
    if hyp_len == 0:
        return 0.0
    
    precisions = []
    for n in range(1, max_n+1):
        hyp_ngrams = ngram_counts(hypothesis, n)
        ref_ngrams = ngram_counts(reference, n)
        
        if not hyp_ngrams:
            precisions.append(1e-9)  # smoothing
            continue
            
        ref_counts = Counter(ref_ngrams)
        overlap = 0
        
        for ng in hyp_ngrams:
            if ref_counts[ng] > 0:
                overlap += 1
                ref_counts[ng] -= 1
                
        prec = overlap / len(hyp_ngrams)
        precisions.append(prec if prec > 0 else 1e-9)
    
    geo_mean = np.exp(np.mean([np.log(p) for p in precisions]))
    bp = 1.0 if hyp_len > ref_len else np.exp(1 - ref_len/hyp_len)
    
    return bp * geo_mean

def compute_sentence_similarity(sent_tokens, summary_tokens):
    """Compute ROUGE-1 based similarity between sentence and summary"""
    if not sent_tokens or not summary_tokens:
        return 0.0
    return rouge_n(sent_tokens, summary_tokens, n=1)

def create_extractive_labels(article_text, summary_text, preprocessor, top_ratio=0.3):
    """Create extractive labels using ROUGE-1 similarity"""
    sentences = split_into_sentences(article_text)
    if len(sentences) < 2:
        return [], []
    
    summary_tokens = preprocessor.text_to_indices(summary_text)
    similarities = []
    sentence_indices = []
    
    for sentence in sentences:
        sent_tokens = preprocessor.text_to_indices(sentence)
        similarity = compute_sentence_similarity(sent_tokens, summary_tokens)
        similarities.append(similarity)
        sentence_indices.append(sent_tokens)
    
    if not similarities:
        return [], []
    
    similarities = np.array(similarities)
    num_to_select = max(1, int(len(sentences) * top_ratio))
    top_indices = np.argsort(similarities)[-num_to_select:]
    
    labels = np.zeros(len(sentences), dtype=int)
    labels[top_indices] = 1
    
    return sentence_indices, labels

class ImprovedRNNEncoder:
    def __init__(self, vocab_size, embed_dim, hidden_dim, dropout=0.1):
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        self.dropout = dropout
        
        # Initialize parameters with Xavier initialization
        self.embedding = np.random.normal(0, np.sqrt(2.0/embed_dim), 
                                        (vocab_size, embed_dim)).astype(np.float32)
        self.W_ih = np.random.normal(0, np.sqrt(2.0/(embed_dim + hidden_dim)), 
                                   (hidden_dim, embed_dim)).astype(np.float32)
        self.W_hh = np.random.normal(0, np.sqrt(2.0/hidden_dim), 
                                   (hidden_dim, hidden_dim)).astype(np.float32)
        self.b_h = np.zeros(hidden_dim).astype(np.float32)
        
        # Store gradients
        self.grad_embedding = np.zeros_like(self.embedding)
        self.grad_W_ih = np.zeros_like(self.W_ih)
        self.grad_W_hh = np.zeros_like(self.W_hh)
        self.grad_b_h = np.zeros_like(self.b_h)

    def forward(self, sequence, training=True):
        """Forward pass with improved dropout - FIXED to always return 4 values"""
        if len(sequence) == 0:
            return np.zeros(self.hidden_dim).astype(np.float32), [], [], []
        
        h = np.zeros(self.hidden_dim).astype(np.float32)
        hidden_states = []
        embeddings = []
        
        for token_idx in sequence:
            if token_idx >= self.vocab_size:
                token_idx = 1  # UNK token
                
            x = self.embedding[token_idx].copy()
            
            # Apply dropout to input
            if training and self.dropout > 0:
                dropout_mask = np.random.binomial(1, 1-self.dropout, x.shape).astype(np.float32)
                x = x * dropout_mask / (1-self.dropout)
            
            embeddings.append(x)
            
            # RNN computation
            h_new = np.tanh(np.dot(self.W_ih, x) + np.dot(self.W_hh, h) + self.b_h)
            
            # Apply dropout to hidden state
            if training and self.dropout > 0:
                h_dropout_mask = np.random.binomial(1, 1-self.dropout, h_new.shape).astype(np.float32)
                h_new = h_new * h_dropout_mask / (1-self.dropout)
            
            h = h_new
            hidden_states.append(h.copy())
        
        return h, hidden_states, embeddings, sequence

    def backward(self, grad_output, hidden_states, embeddings, sequence, learning_rate=0.001):
        """Backward pass with proper BPTT"""
        if len(sequence) == 0:
            return
            
        # Reset gradients
        self.grad_embedding.fill(0)
        self.grad_W_ih.fill(0)
        self.grad_W_hh.fill(0)
        self.grad_b_h.fill(0)
        
        grad_h = grad_output.copy()
        
        # Backward through time
        for t in reversed(range(len(sequence))):
            token_idx = sequence[t]
            if token_idx >= self.vocab_size:
                token_idx = 1
                
            h_prev = hidden_states[t-1] if t > 0 else np.zeros(self.hidden_dim)
            x = embeddings[t]
            
            # Gradient through tanh
            grad_h_raw = grad_h * (1 - hidden_states[t]**2)
            
            # Gradients for parameters
            self.grad_W_ih += np.outer(grad_h_raw, x)
            self.grad_W_hh += np.outer(grad_h_raw, h_prev)
            self.grad_b_h += grad_h_raw
            
            # Gradient for embedding
            grad_x = np.dot(self.W_ih.T, grad_h_raw)
            self.grad_embedding[token_idx] += grad_x
            
            # Gradient for previous hidden state
            if t > 0:
                grad_h = np.dot(self.W_hh.T, grad_h_raw)
        
        # Clip gradients to prevent exploding gradients
        max_norm = 5.0
        self.grad_embedding = np.clip(self.grad_embedding, -max_norm, max_norm)
        self.grad_W_ih = np.clip(self.grad_W_ih, -max_norm, max_norm)
        self.grad_W_hh = np.clip(self.grad_W_hh, -max_norm, max_norm)
        self.grad_b_h = np.clip(self.grad_b_h, -max_norm, max_norm)
        
        # Update parameters
        self.embedding -= learning_rate * self.grad_embedding
        self.W_ih -= learning_rate * self.grad_W_ih
        self.W_hh -= learning_rate * self.grad_W_hh
        self.b_h -= learning_rate * self.grad_b_h

class ImprovedSentenceEncoder:
    def __init__(self, word_encoder):
        self.word_encoder = word_encoder
        hidden_dim = word_encoder.hidden_dim
        
        # Initialize parameters
        self.W_ih_sent = np.random.normal(0, np.sqrt(2.0/hidden_dim), 
                                        (hidden_dim, hidden_dim)).astype(np.float32)
        self.W_hh_sent = np.random.normal(0, np.sqrt(2.0/hidden_dim), 
                                        (hidden_dim, hidden_dim)).astype(np.float32)
        self.b_h_sent = np.zeros(hidden_dim).astype(np.float32)
        
        # Store gradients
        self.grad_W_ih_sent = np.zeros_like(self.W_ih_sent)
        self.grad_W_hh_sent = np.zeros_like(self.W_hh_sent)
        self.grad_b_h_sent = np.zeros_like(self.b_h_sent)

    def forward(self, sentences, training=True):
        """Encode document as sequence of sentences"""
        if not sentences:
            return [], []
        
        sentence_representations = []
        sentence_data = []  # Store data needed for backprop
        
        # Encode each sentence
        for sentence in sentences:
            sent_rep, hidden_states, embeddings, sequence = self.word_encoder.forward(sentence, training)
            sentence_representations.append(sent_rep)
            sentence_data.append((hidden_states, embeddings, sequence))
        
        # Document-level RNN
        h_doc = np.zeros(self.word_encoder.hidden_dim).astype(np.float32)
        contextual_sentence_reps = []
        doc_hidden_states = []
        
        for sent_rep in sentence_representations:
            h_doc = np.tanh(np.dot(self.W_ih_sent, sent_rep) + 
                          np.dot(self.W_hh_sent, h_doc) + self.b_h_sent)
            
            if training and self.word_encoder.dropout > 0:
                dropout_mask = np.random.binomial(1, 1-self.word_encoder.dropout, h_doc.shape)
                h_doc = h_doc * dropout_mask / (1-self.word_encoder.dropout)
            
            contextual_sentence_reps.append(h_doc.copy())
            doc_hidden_states.append(h_doc.copy())
        
        return contextual_sentence_reps, (sentence_data, doc_hidden_states, sentence_representations)

    def backward(self, grad_outputs, forward_data, learning_rate=0.001):
        """Backward pass through sentence encoder"""
        sentence_data, doc_hidden_states, sentence_representations = forward_data
        
        # Reset gradients
        self.grad_W_ih_sent.fill(0)
        self.grad_W_hh_sent.fill(0)
        self.grad_b_h_sent.fill(0)
        
        grad_h_doc = np.zeros(self.word_encoder.hidden_dim)
        
        # Backward through document-level RNN
        for t in reversed(range(len(grad_outputs))):
            grad_h_raw = (grad_outputs[t] + grad_h_doc) * (1 - doc_hidden_states[t]**2)
            
            h_prev = doc_hidden_states[t-1] if t > 0 else np.zeros(self.word_encoder.hidden_dim)
            sent_rep = sentence_representations[t]
            
            # Update gradients
            self.grad_W_ih_sent += np.outer(grad_h_raw, sent_rep)
            self.grad_W_hh_sent += np.outer(grad_h_raw, h_prev)
            self.grad_b_h_sent += grad_h_raw
            
            # Gradient for sentence representation
            grad_sent_rep = np.dot(self.W_ih_sent.T, grad_h_raw)
            
            # Backpropagate to word encoder
            hidden_states, embeddings, sequence = sentence_data[t]
            self.word_encoder.backward(grad_sent_rep, hidden_states, embeddings, sequence, learning_rate)
            
            # Gradient for previous doc hidden state
            if t > 0:
                grad_h_doc = np.dot(self.W_hh_sent.T, grad_h_raw)
        
        # Clip and update parameters
        max_norm = 5.0
        self.grad_W_ih_sent = np.clip(self.grad_W_ih_sent, -max_norm, max_norm)
        self.grad_W_hh_sent = np.clip(self.grad_W_hh_sent, -max_norm, max_norm)
        self.grad_b_h_sent = np.clip(self.grad_b_h_sent, -max_norm, max_norm)
        
        self.W_ih_sent -= learning_rate * self.grad_W_ih_sent
        self.W_hh_sent -= learning_rate * self.grad_W_hh_sent
        self.b_h_sent -= learning_rate * self.grad_b_h_sent

class ImprovedBinaryClassifier:
    def __init__(self, input_dim):
        self.input_dim = input_dim
        self.W_class = np.random.normal(0, np.sqrt(2.0/input_dim), (1, input_dim)).astype(np.float32)
        self.b_class = np.zeros(1).astype(np.float32)

    def forward(self, representations):
        """Binary classification for each sentence"""
        if not representations:
            return []
        
        probabilities = []
        for rep in representations:
            logit = np.dot(self.W_class, rep) + self.b_class
            prob = 1.0 / (1.0 + np.exp(-np.clip(logit, -10, 10)))
            probabilities.append(prob[0])
        
        return np.array(probabilities)

    def backward(self, grad_outputs, representations, learning_rate=0.001):
        """Backward pass through classifier"""
        grad_representations = []
        grad_W = np.zeros_like(self.W_class)
        grad_b = np.zeros_like(self.b_class)
        
        for i, (grad_out, rep) in enumerate(zip(grad_outputs, representations)):
            # Gradients for parameters
            grad_W += grad_out * rep.reshape(1, -1)
            grad_b += grad_out
            
            # Gradient for representation
            grad_rep = grad_out * self.W_class.flatten()
            grad_representations.append(grad_rep)
        
        # Clip and update
        max_norm = 5.0
        grad_W = np.clip(grad_W, -max_norm, max_norm)
        grad_b = np.clip(grad_b, -max_norm, max_norm)
        
        self.W_class -= learning_rate * grad_W
        self.b_class -= learning_rate * grad_b
        
        return grad_representations

class ImprovedExtractiveRNNSummarizer:
    def __init__(self, vocab_size, embed_dim=128, hidden_dim=256):
        self.word_encoder = ImprovedRNNEncoder(vocab_size, embed_dim, hidden_dim)
        self.sentence_encoder = ImprovedSentenceEncoder(self.word_encoder)
        self.classifier = ImprovedBinaryClassifier(hidden_dim)
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim

    def forward(self, sentences, training=True):
        """Forward pass through the complete model"""
        sentence_reps, forward_data = self.sentence_encoder.forward(sentences, training)
        if not sentence_reps:
            return np.array([]), None
        
        probabilities = self.classifier.forward(sentence_reps)
        return probabilities, (sentence_reps, forward_data)

    def backward(self, loss_gradients, forward_data, learning_rate=0.001):
        """Backward pass through the complete model"""
        sentence_reps, sentence_forward_data = forward_data
        
        # Backprop through classifier
        grad_sentence_reps = self.classifier.backward(loss_gradients, sentence_reps, learning_rate)
        
        # Backprop through sentence encoder
        self.sentence_encoder.backward(grad_sentence_reps, sentence_forward_data, learning_rate)

def compute_loss_and_gradients(model, sentences, labels, learning_rate=0.001):
    """Compute loss and perform full backpropagation"""
    if not sentences or len(labels) == 0:
        return 0.0
    
    # Forward pass
    probs, forward_data = model.forward(sentences, training=True)
    if len(probs) == 0:
        return 0.0
    
    # Ensure same length
    min_len = min(len(probs), len(labels))
    probs = probs[:min_len]
    labels = labels[:min_len]
    
    # Compute loss
    probs = np.clip(probs, 1e-8, 1-1e-8)
    loss = -np.mean(labels * np.log(probs) + (1 - labels) * np.log(1 - probs))
    
    if not np.isfinite(loss):
        return 0.0
    
    # Compute gradients
    grad_probs = (probs - labels) / len(labels)
    
    # Backward pass
    model.backward(grad_probs, forward_data, learning_rate)
    
    return loss

def evaluate_model(model, data, preprocessor, max_samples=None):
    """Comprehensive model evaluation"""
    total_loss = 0.0
    predictions_all = []
    labels_all = []
    num_samples = 0
    
    rouge1_list, rouge2_list, rougel_list, bleu_list = [], [], [], []
    
    samples_to_eval = min(len(data), max_samples) if max_samples else len(data)
    
    for i in range(samples_to_eval):
        article, summary = data[i]
        sentences, labels = create_extractive_labels(article, summary, preprocessor)
        
        if not sentences or len(labels) == 0:
            continue
            
        probs, _ = model.forward(sentences, training=False)
        if len(probs) == 0:
            continue
        
        # Compute classification metrics
        min_len = min(len(probs), len(labels))
        probs_eval = np.clip(probs[:min_len], 1e-8, 1-1e-8)
        labels_eval = labels[:min_len]
        
        loss = -np.mean(labels_eval * np.log(probs_eval) +
                        (1 - labels_eval) * np.log(1 - probs_eval))
        
        if np.isfinite(loss):
            total_loss += loss
            predictions_all.extend((probs_eval > 0.5).astype(int))
            labels_all.extend(labels_eval)
            num_samples += 1
        
        # Generate summary and compute ROUGE/BLEU
        generated_sentences = generate_summary(model, article, preprocessor)
        hyp_tokens = preprocessor.text_to_indices(' '.join(generated_sentences))
        ref_tokens = preprocessor.text_to_indices(summary)
        
        rouge1_list.append(rouge_n(hyp_tokens, ref_tokens, n=1))
        rouge2_list.append(rouge_n(hyp_tokens, ref_tokens, n=2))
        rougel_list.append(rouge_l(hyp_tokens, ref_tokens))
        bleu_list.append(bleu_score(hyp_tokens, ref_tokens))

    if num_samples == 0:
        return float('inf'), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    
    # Classification metrics
    avg_loss = total_loss / num_samples
    predictions_all = np.array(predictions_all)
    labels_all = np.array(labels_all)
    
    tp = np.sum((predictions_all == 1) & (labels_all == 1))
    fp = np.sum((predictions_all == 1) & (labels_all == 0))
    fn = np.sum((predictions_all == 0) & (labels_all == 1))
    tn = np.sum((predictions_all == 0) & (labels_all == 0))
    
    accuracy = (tp + tn) / (tp + fp + fn + tn) if (tp + fp + fn + tn) > 0 else 0.0
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
    
    # Summary quality metrics
    rouge1 = float(np.mean(rouge1_list)) if rouge1_list else 0.0
    rouge2 = float(np.mean(rouge2_list)) if rouge2_list else 0.0
    rougel = float(np.mean(rougel_list)) if rougel_list else 0.0
    bleu = float(np.mean(bleu_list)) if bleu_list else 0.0
    
    return avg_loss, accuracy, precision, recall, f1, rouge1, rouge2, rougel, bleu

def generate_summary(model, article_text, preprocessor, max_sentences=3, threshold=0.5):
    """Generate extractive summary for given article"""
    sentences_text = split_into_sentences(article_text)
    if not sentences_text:
        return []
    
    sentences_indices = []
    for sent_text in sentences_text:
        sent_indices = preprocessor.text_to_indices(sent_text)
        sentences_indices.append(sent_indices)
    
    probs, _ = model.forward(sentences_indices, training=False)
    if len(probs) == 0:
        return sentences_text[:max_sentences]
    
    # Select sentences based on threshold
    selected_indices = []
    for i, prob in enumerate(probs):
        if prob > threshold:
            selected_indices.append(i)
    
    # If no sentences meet threshold, select top-scoring ones
    if len(selected_indices) == 0:
        top_indices = np.argsort(probs)[-max_sentences:]
        selected_indices = sorted(top_indices)
    
    # Limit to max_sentences
    if len(selected_indices) > max_sentences:
        scored_indices = [(i, probs[i]) for i in selected_indices]
        scored_indices.sort(key=lambda x: x[1], reverse=True)
        selected_indices = [i for i, _ in scored_indices[:max_sentences]]
        selected_indices.sort()
    
    summary_sentences = [sentences_text[i] for i in selected_indices if i < len(sentences_text)]
    return summary_sentences

def train_extractive_summarizer(train_data, val_data, preprocessor,
                                n_epochs=10, learning_rate=0.001, patience=3):
    """Train the improved extractive summarizer with full backpropagation"""
    print("Initializing improved model...")
    model = ImprovedExtractiveRNNSummarizer(vocab_size=preprocessor.vocab_size)
    
    best_val_loss = float('inf')
    patience_counter = 0
    train_losses = []
    val_losses = []
    
    print(f"Starting training for {n_epochs} epochs with learning rate {learning_rate}...")
    
    for epoch in range(n_epochs):
        epoch_start = time.time()
        random.shuffle(train_data)
        
        total_train_loss = 0.0
        num_train_samples = 0
        
        print(f"\nEpoch {epoch + 1}/{n_epochs}")
        print("Training...")
        
        # Training loop
        for i, (article, summary) in enumerate(train_data):
            sentences, labels = create_extractive_labels(article, summary, preprocessor)
            if not sentences or len(labels) == 0:
                continue
            
            # Compute loss and perform backpropagation
            loss = compute_loss_and_gradients(model, sentences, labels, learning_rate)
            if loss > 0:
                total_train_loss += loss
                num_train_samples += 1
            
            if (i + 1) % 50 == 0:  # More frequent progress updates
                current_avg_loss = total_train_loss / max(1, num_train_samples)
                print(f"  Processed {i + 1}/{len(train_data)} samples, Avg Loss: {current_avg_loss:.4f}")
        
        avg_train_loss = total_train_loss / max(1, num_train_samples) if num_train_samples > 0 else float('inf')
        
        # Validation
        print("Validating...")
        val_loss, val_acc, val_prec, val_recall, val_f1, r1, r2, rl, bleu = evaluate_model(
            model, val_data, preprocessor, max_samples=100
        )
        
        epoch_time = time.time() - epoch_start
        
        # Print epoch results
        print(f"Epoch {epoch + 1} Results:")
        print(f"  Train Loss: {avg_train_loss:.4f}")
        print(f"  Val Loss: {val_loss:.4f}")
        print(f"  Val Accuracy: {val_acc:.3f}")
        print(f"  Val Precision: {val_prec:.3f}")
        print(f"  Val Recall: {val_recall:.3f}")
        print(f"  Val F1-Score: {val_f1:.3f}")
        print(f"  Val ROUGE-1: {r1:.3f}, ROUGE-2: {r2:.3f}, ROUGE-L: {rl:.3f}, BLEU: {bleu:.3f}")
        print(f"  Time: {epoch_time:.1f}s")
        
        train_losses.append(avg_train_loss)
        val_losses.append(val_loss)
        
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            print(f"  New best validation loss!")
        else:
            patience_counter += 1
        
        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch + 1}")
            break
    
    return model, train_losses, val_losses

def save_model(model, preprocessor, filename="improved_rnn_model.pkl"):
    """Save the trained model and preprocessor"""
    model_data = {
        'preprocessor': preprocessor,
        'model_params': {
            'word_encoder': {
                'embedding': model.word_encoder.embedding,
                'W_ih': model.word_encoder.W_ih,
                'W_hh': model.word_encoder.W_hh,
                'b_h': model.word_encoder.b_h
            },
            'sentence_encoder': {
                'W_ih_sent': model.sentence_encoder.W_ih_sent,
                'W_hh_sent': model.sentence_encoder.W_hh_sent,
                'b_h_sent': model.sentence_encoder.b_h_sent
            },
            'classifier': {
                'W_class': model.classifier.W_class,
                'b_class': model.classifier.b_class
            }
        },
        'config': {
            'vocab_size': model.vocab_size,
            'embed_dim': model.embed_dim,
            'hidden_dim': model.hidden_dim
        }
    }
    
    try:
        with open(filename, "wb") as f:
            pickle.dump(model_data, f)
        print(f"Model saved to '{filename}'")
    except (IOError, pickle.PickleError) as e:
        print(f"Error saving model: {e}")

def load_model(filename="improved_rnn_model.pkl"):
    """Load a trained model"""
    try:
        with open(filename, "rb") as f:
            model_data = pickle.load(f)
        
        config = model_data['config']
        model = ImprovedExtractiveRNNSummarizer(
            vocab_size=config['vocab_size'],
            embed_dim=config['embed_dim'],
            hidden_dim=config['hidden_dim']
        )
        
        # Load parameters
        params = model_data['model_params']
        model.word_encoder.embedding = params['word_encoder']['embedding']
        model.word_encoder.W_ih = params['word_encoder']['W_ih']
        model.word_encoder.W_hh = params['word_encoder']['W_hh']
        model.word_encoder.b_h = params['word_encoder']['b_h']
        
        model.sentence_encoder.W_ih_sent = params['sentence_encoder']['W_ih_sent']
        model.sentence_encoder.W_hh_sent = params['sentence_encoder']['W_hh_sent']
        model.sentence_encoder.b_h_sent = params['sentence_encoder']['b_h_sent']
        
        model.classifier.W_class = params['classifier']['W_class']
        model.classifier.b_class = params['classifier']['b_class']
        
        preprocessor = model_data['preprocessor']
        
        print(f"Model loaded from '{filename}'")
        return model, preprocessor
        
    except (IOError, pickle.PickleError) as e:
        print(f"Error loading model: {e}")
        return None, None

# Main execution
if __name__ == "__main__":
    print("Setting up improved extractive summarizer...")
    
    # Prepare data
    train_data = list(zip(train_src, train_tgt))
    val_data = list(zip(val_src, val_tgt))
    test_data = list(zip(test_src, test_tgt))
    
    print(f"Train samples: {len(train_data)}")
    print(f"Val samples: {len(val_data)}")
    print(f"Test samples: {len(test_data)}")
    
    # Build vocabulary
    preprocessor = TextPreprocessor(vocab_size=5000)  # Smaller vocab for demo
    all_texts = train_src + train_tgt + val_src + val_tgt
    preprocessor.build_vocabulary(all_texts)
    
    # Example preprocessing
    print("\nExample preprocessing:")
    if len(train_src) > 0:
        sample_text = train_src[0]
        print(f"Original: {sample_text[:100]}...")
        tokens = preprocessor.text_to_indices(sample_text)
        print(f"Tokenized: {tokens[:20]}...")
        reconstructed = preprocessor.indices_to_text(tokens[:20])
        print(f"Reconstructed: {reconstructed}")
    
    print(f"\n=== Training Improved Extractive Summarizer ===")
    
    # Use smaller subsets for demonstration
    train_subset = train_data[:len(train_data)] if len(train_data) <= 1000 else train_data[:1000]
    val_subset = val_data[:len(val_data)] if len(val_data) <= 200 else val_data[:200]
    
    # Train the model
    model, train_losses, val_losses = train_extractive_summarizer(
        train_subset, val_subset, preprocessor,
        n_epochs=5, learning_rate=0.01, patience=3
    )
    
    print(f"\n=== Testing ===")
    test_subset = test_data[:len(test_data)] if len(test_data) <= 200 else test_data[:200]
    
    test_loss, test_acc, test_prec, test_recall, test_f1, r1, r2, rl, bleu = evaluate_model(
        model, test_subset, preprocessor, max_samples=len(test_subset)
    )
    
    print(f"Final Test Results:")
    print(f"  Test Loss: {test_loss:.4f}")
    print(f"  Test Accuracy: {test_acc:.3f}")
    print(f"  Test Precision: {test_prec:.3f}")
    print(f"  Test Recall: {test_recall:.3f}")
    print(f"  Test F1-Score: {test_f1:.3f}")
    print(f"  Test ROUGE-1: {r1:.3f}, ROUGE-2: {r2:.3f}, ROUGE-L: {rl:.3f}, BLEU: {bleu:.3f}")
    
    print(f"\n=== Sample Summaries ===")
    for i in range(min(3, len(test_data))):
        article = test_data[i][0]
        reference = test_data[i][1]
        
        print(f"\nExample {i + 1}:")
        print(f"Article: {article[:200]}...")
        print(f"Reference: {reference}")
        
        generated_sentences = generate_summary(model, article, preprocessor, max_sentences=2)
        generated_summary = ' '.join(generated_sentences)
        print(f"Generated: {generated_summary}")
        
        # Compute individual ROUGE scores for this example
        hyp_tokens = preprocessor.text_to_indices(generated_summary)
        ref_tokens = preprocessor.text_to_indices(reference)
        
        r1_score = rouge_n(hyp_tokens, ref_tokens, n=1)
        r2_score = rouge_n(hyp_tokens, ref_tokens, n=2)
        rl_score = rouge_l(hyp_tokens, ref_tokens)
        bleu_score_val = bleu_score(hyp_tokens, ref_tokens)
        
        print(f"Scores - R1: {r1_score:.3f}, R2: {r2_score:.3f}, RL: {rl_score:.3f}, BLEU: {bleu_score_val:.3f}")
    
    # Save results
    results = {
        'train_losses': train_losses,
        'val_losses': val_losses,
        'test_metrics': {
            'loss': test_loss,
            'accuracy': test_acc,
            'precision': test_prec,
            'recall': test_recall,
            'f1': test_f1,
            'rouge1': r1,
            'rouge2': r2,
            'rougel': rl,
            'bleu': bleu
        },
        'vocab_size': preprocessor.vocab_size,
        'model_config': {
            'embed_dim': model.embed_dim,
            'hidden_dim': model.hidden_dim
        }
    }
    
    try:
        with open("improved_rnn_results.pkl", "wb") as f:
            pickle.dump(results, f)
        print(f"\nResults saved to 'improved_rnn_results.pkl'")
    except (IOError, pickle.PickleError) as e:
        print(f"Error saving results: {e}")
    
    # Save the trained model
    save_model(model, preprocessor, "improved_rnn_model.pkl")
    
    print(f"\n=== Training Summary ===")
    print(f"Best validation loss: {min(val_losses):.4f}")
    print(f"Final test F1-score: {test_f1:.3f}")
    print(f"Final test ROUGE-1: {r1:.3f}")
    print("Training completed successfully!")

Loaded 20000 training samples
Loaded 2500 validation samples
Loaded 2500 test samples
Setting up improved extractive summarizer...
Train samples: 20000
Val samples: 2500
Test samples: 2500
Building vocabulary...
