# FLAN-T5 Opinion Summarization
---

Summarizing multiple opinions/comments using FLAN-T5-large model.

**Loading the Model**

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load FLAN-T5-large model and tokenizer
model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    dtype=torch.float32  # CPU usage
)

**Defining the Summarization Function**

In [3]:
# Summarization function
def summarize_opinions(comments, topic=None, max_input_tokens=400, max_length=128, min_length=20, temperature=0.3, num_beams=4):
    """
    Summarize a list of comments and provide a general analysis.
    Handles long text by chunking if necessary.
    
    Args:
        comments (list): List of comment strings
        topic (str, optional): The topic being discussed
        max_input_tokens (int): Maximum tokens for input text (default 400, leaving room for prompt)
        max_length (int): Maximum tokens in generated summary (default 128)
        min_length (int): Minimum tokens in generated summary (default 20)
        temperature (float): Controls randomness (0.1-1.0, default 0.3)
        num_beams (int): Number of beams for beam search (default 4)
    
    Returns:
        str: General summary of public opinion
    """
    
    def create_prompt(text, topic):
        """Create the prompt for summarization"""
        if topic:
            return (
                f"Review these comments about {topic}:\n"
                f"{text}\n\n"
                f"Instruction: Analyze the comments and provide a general summary of the public opinion on {topic}."
            )
        else:
            return (
                f"Analyze these comments:\n"
                f"{text}\n\n"
                f"Instruction: Analyze the comments and provide a general summary of the overall sentiment and key themes."
            )
    
    def summarize_chunk(text_chunk, topic):
        """Summarize a single chunk of text"""
        prompt = create_prompt(text_chunk, topic)
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            max_length=512,
            truncation=True
        )
        
        summary_ids = model.generate(
            **inputs,
            max_length=max_length,
            min_length=min_length,
            temperature=temperature,
            num_beams=num_beams,
            do_sample=True,
            early_stopping=True
        )
        
        return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    combined_text = "\n".join([f"- {c}" for c in comments])
    test_prompt = create_prompt(combined_text, topic)
    tokens = tokenizer.encode(test_prompt)
    
    # If it fits, process normally
    if len(tokens) <= 512:
        return summarize_chunk(combined_text, topic)
    print(f"Input too long ({len(tokens)} tokens). Processing in chunks...")
    
    chunks = []
    current_chunk = []
    current_length = 0
    
    for comment in comments:
        comment_text = f"- {comment}\n"
        comment_tokens = len(tokenizer.encode(comment_text))
        
        # If adding this comment exceeds the limit, save current chunk and start new one
        if current_length + comment_tokens > max_input_tokens and current_chunk:
            chunks.append("\n".join(current_chunk))
            current_chunk = [comment_text.strip()]
            current_length = comment_tokens
        else:
            current_chunk.append(comment_text.strip())
            current_length += comment_tokens
    
    # Add the last chunk
    if current_chunk:
        chunks.append("\n".join(current_chunk))
    
    print(f"Split into {len(chunks)} chunks")
    
    # Summarize each chunk
    chunk_summaries = []
    for i, chunk in enumerate(chunks):
        print(f"Processing chunk {i+1}/{len(chunks)}...")
        summary = summarize_chunk(chunk, topic)
        chunk_summaries.append(summary)
    
    # If only one chunk, return it
    if len(chunk_summaries) == 1:
        return chunk_summaries[0]
    
    # Otherwise, combine the chunk summaries into a final summary
    print("Combining chunk summaries...")
    combined_summaries = "\n".join([f"- {s}" for s in chunk_summaries])
    final_prompt = (
        f"Combine these summaries about {topic if topic else 'the topic'}:\n"
        f"{combined_summaries}\n\n"
        f"Instruction: Analyze these summaries and provide a comprehensive general summary of the overall opinion."
    )
    
    inputs = tokenizer(
        final_prompt,
        return_tensors="pt",
        max_length=512,
        truncation=True
    )
    
    summary_ids = model.generate(
        **inputs,
        max_length=max_length,
        min_length=min_length,
        temperature=temperature,
        num_beams=num_beams,
        do_sample=True,
        early_stopping=True
    )
    
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)


### Usage

In [6]:
# ==== CONFIGURATION =====

max_length = 130 # token length
min_length = 20
temperature = 0.30 # creativity level
num_beams = 4 # beams for beam search (affects quality and speed)

# ========================

In [10]:
import pandas as pd

df = pd.read_csv("sustainability_social_media_posts.csv")

# head (mixed)
# sample_comments = df["post_text"].head(10).tolist()

# filtered (same sentiment and topic)
sample_comments = df[(df["post_sentiment"] == "Positive") & 
                       (df["climate_topic"] == "Climate Policy")]["post_text"].tolist()

print(f"Total comments to summarize: {len(sample_comments)}\n")

print("="*60)
print("RAW COMMENTS:")
print("="*60)
for comment in sample_comments[:10]:
    print(f"{comment}")

print("\n" + "="*60 + "\n")

result = summarize_opinions(
    sample_comments[:10],
    topic=None,
    max_length=max_length,
    min_length=min_length,
    temperature=temperature,
    num_beams=num_beams
)
print(f"\nSUMMARY:\n{result}")

Total comments to summarize: 25

RAW COMMENTS:
Renewable energy requires change through activism. Global warming should be embraced through education. Climate mitigation prevents catastrophic warming scenarios.
Environmental protection is everyone's responsibility at every level. Reforestation is urgent through activism. Water conservation reduces pollution starting today.
Water conservation can save our planet starting today. Green innovation drives economic growth while protecting environment. Climate activism promotes cleaner air to empower communities.
Green infrastructure projects provide multiple environmental benefits. Sustainable transportation reduces urban pollution significantly. Circular economy builds a greener future by reducing waste.
LED lighting saves energy consumption dramatically. Renewable energy is everyone's responsibility by reducing waste. Green buildings improve energy efficiency markedly.
Green technology is essential through innovation. Climate justice shoul

## Fine Tuning
---

## Model Evaluation
---