# Install dependencies

In [1]:
# RAG Meeting Summarizer — Colab-ready Python script
# Run in Google Colab (or any Python env).
# Requirements: sentence-transformers, faiss-cpu, requests

# ----------------------
# 1) Install dependencies (run this cell in Colab)
# ----------------------
!pip install -q sentence-transformers faiss-cpu requests


# Imports

In [2]:


import os
import math
import requests
from typing import List, Tuple
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss


# LLM

In [3]:


API_BASE_URL = "https://openrouter.ai/api/v1"

class LLM:
    def __init__(self, model: str, api_key: str):
        self.api_key = api_key
        self.base_url = API_BASE_URL
        self.model = model

    def generate_response(self, prompt: str, system:str, temperature: float = 0.2, max_tokens: int = 512) -> str:
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }
        data = {
            "model": self.model,
            "messages": [
                {"role": "system", "content": system},
                {"role": "user", "content": prompt}
            ],
            "temperature": temperature,
            "max_tokens": max_tokens
        }

        response = requests.post(f"{self.base_url}/chat/completions", headers=headers, json=data)
        response.raise_for_status()
        json = response.json()

        # API returns a structure as json
        return json["choices"][0]["message"]["content"].strip()


# Preprocessing

In [4]:

def preprocess_text(text: str) -> str:

    # Basic cleaning — feel free to extend (remove timestamps, filler words etc.)
    text = text.replace('\r', '\n')
    # normalize extra spaces
    text = "\n".join([line.strip() for line in text.splitlines() if line.strip()])
    return text


def chunk_text(text: str, max_chars: int = 1000) -> List[str]:
    # Naive chunking by characters while preserving line breaks and speaker turns

    lines = text.split('\n')
    chunks = []
    cur = []
    cur_len = 0

    for ln in lines:
        if cur_len + len(ln) + 1 > max_chars and cur:
            chunks.append("\n".join(cur))
            cur = [ln]
            cur_len = len(ln)
        else:
            cur.append(ln)
            cur_len += len(ln) + 1
    if cur:
        chunks.append("\n".join(cur))
    return chunks


# Build embeddings

In [5]:


def build_embeddings_index(chunks: List[str], embed_model_name: str = 'all-MiniLM-L6-v2') -> Tuple[SentenceTransformer, np.ndarray, faiss.IndexFlatL2]:
    model = SentenceTransformer(embed_model_name)
    embeddings = model.encode(chunks, convert_to_numpy=True, show_progress_bar=True)
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)
    return model, embeddings, index



# Retrieval

In [6]:


def retrieve(query: str, embed_model: SentenceTransformer, index: faiss.IndexFlatL2, chunks: List[str], k: int = 4) -> List[Tuple[int, float, str]]:

    qv = embed_model.encode([query], convert_to_numpy=True)
    D, I = index.search(qv, k)
    results = []

    for score, idx in zip(D[0], I[0]):
        if idx < 0:
            continue
        results.append((int(idx), float(score), chunks[idx]))

    return results


# Prompt

In [7]:


system_prompt = """
Act as a professional summarization assistant for meeting transcripts. Provide concise, accurate summaries of key points,
 decisions, and action items from meetings. Tailor the summaries to be clear and helpful for meeting participants,
ensuring they are easy to understand and relevant for follow-up.
 If needed, include tools or suggestions to enhance meeting productivity and collaboration."
"""

def make_summary_prompt(retrieved_chunks: List[str]) -> str:
    """
    Generate a prompt for summarizing a meeting transcript based on provided excerpts.
    """
    if not retrieved_chunks:
        return "Error: No transcript excerpts provided."

    context = "\n\n---\n\n".join(chunk.strip() for chunk in retrieved_chunks if chunk.strip())
    prompt = f"""
You are a professional summarization assistant tasked with analyzing excerpts from a meeting transcript. Your goal is to create a clear and concise summary for meeting participants. Provide the following:

1. A 3-sentence summary capturing the main discussion points and purpose of the meeting.
2. A bulleted list of key decisions made during the meeting, if any.
3. A bulleted list of action items, including assignees and deadlines, if specified in the transcript.

If no decisions or action items are present, explicitly state this in the respective sections. Use clear, professional language and avoid speculation beyond the provided context.

**Transcript Excerpts:**
{context}
"""
    return prompt

def make_question_prompt(question: str, retrieved_chunks: List[str]) -> str:
    """
    Generate a prompt for answering a question based on meeting transcript excerpts.

    """
    if not question.strip():
        return "Error: No question provided."
    if not retrieved_chunks:
        return "Error: No transcript excerpts provided."

    context = "\n\n---\n\n".join(chunk.strip() for chunk in retrieved_chunks if chunk.strip())
    prompt = f"""
 You are a professional assistant tasked with answering a question based solely on the provided meeting transcript excerpts.
 Provide a concise and accurate answer in english If the answer cannot be found in the context,
 respond with "answer cannot be found in text" AVOID speculation or adding information beyond the transcript.

**Transcript Excerpts:**
{context}

**Question:**
{question}

"""
    return prompt


# Demo — use the provided Finance Meeting Transcript

In [8]:


TRANSCRIPT = '''
Date: October 17, 2025
Time: 10:00 AM - 10:10 AM
Attendees:

Sarah Thompson (CEO, Female)
Michael Chen (CFO, Male)
David Patel (VP of Operations, Male)
James Rodriguez (Head of Investments, Male)

Sarah (CEO): Good morning, everyone. Let’s dive into our quarterly finance review. Michael, can you start with the current financial overview?
Michael (CFO): Absolutely, Sarah. Our Q3 revenue is up 8% from last quarter, hitting $12.5 million. However, operating expenses rose by 10% due to increased marketing spend. Net profit margin is holding at 15%, but we need to address cost efficiencies.
Sarah (CEO): Thanks, Michael. That growth is solid, but the expense increase concerns me. David, what’s driving the operational costs?
David (VP of Operations): It’s mainly the new supply chain software implementation. It’s a one-time hit of $500,000, but it’s already streamlining logistics. We expect a 20% reduction in delivery costs by Q1 next year.
Sarah (CEO): Good to know. Let’s ensure we track those savings. James, what’s the status on our investment portfolio?
James (Head of Investments): We’ve seen strong returns from our tech stock holdings, up 12% this quarter. I propose reallocating 10% of our cash reserves—about $2 million—into green energy ETFs. They’re showing consistent growth and align with market trends.
Michael (CFO): I’m cautious about pulling from cash reserves. Liquidity is key with the economic uncertainty. Could we scale that to $1 million and keep the rest in bonds?
James (Head of Investments): Fair point, Michael. A $1 million investment still diversifies us without overextending. I’ll run projections for bonds versus ETFs by tomorrow.
Sarah (CEO): I like the balance here. James, prioritize low-risk ETFs for now. David, can operations absorb a 5% budget cut to offset the software costs?
David (VP of Operations): It’s tight, but we could trim non-essential maintenance and delay some equipment upgrades. I’ll need to review with my team to confirm.
Sarah (CEO): Please do, and report back by Monday. Michael, what about our debt repayment plan?
Michael (CFO): We’re on track to pay off $3 million of our long-term debt by year-end. Interest rates are stable, so I recommend maintaining the current schedule rather than accelerating payments.
James (Head of Investments): Agreed. Freeing up cash for investments might give us better returns than early debt repayment right now.
Sarah (CEO): Makes sense. Let’s stick with the plan but keep an eye on interest rates. If they spike, we may need to revisit. Anything else critical?
David (VP of Operations): Just a heads-up—our supplier contracts are up for renewal next quarter. We might negotiate better terms to cut costs further.
Sarah (CEO): Great, David, lead on that. Okay, we’re at time. Michael, finalize the budget adjustments. James, get me those ETF projections. David, confirm the budget cut feasibility. Let’s reconvene Monday. Thanks, everyone.
Meeting Adjourned: 10:10 AM
'''


# Generate a summary of a meeting transcript

In [9]:
from typing import List, Optional
import logging

def generate_summary(
    transcript: str,
    llm,  # Now expects an initialized LLM instance
    api_key: str,
    model_name: str,
    system_prompt: str = "You are a professional summarization assistant.",
    max_chars: int = 900,
    k: int = 4,
    temperature: float = 0.1,
    max_tokens: int = 400
) -> Optional[str]:
    """
    Generate a summary of a meeting transcript, including decisions and action items, using an LLM.

    Args:
        transcript (str): The raw meeting transcript to summarize.
        llm: An initialized language model instance for generating the summary.
        api_key (str): API key for the LLM service (may be unused if llm is pre-initialized).
        model_name (str): Name of the LLM model (may be unused if llm is pre-initialized).
        system_prompt (str, optional): System prompt for the LLM. Defaults to "You are a professional summarization assistant."
        max_chars (int, optional): Maximum characters per chunk for text splitting. Defaults to 900.
        k (int, optional): Number of chunks to retrieve for summarization. Defaults to 4.
        temperature (float, optional): Sampling temperature for LLM generation. Defaults to 0.1.
        max_tokens (int, optional): Maximum tokens for LLM output. Defaults to 400.

    Returns:
        Optional[str]: The generated summary, or None if an error occurs.

    Raises:
        ValueError: If the transcript is empty or invalid inputs are provided.
    """
    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

    # Validate inputs
    if not transcript or not transcript.strip():
        logger.error("Empty or invalid transcript provided.")
        raise ValueError("Transcript cannot be empty.")

    try:
        # Preprocess and chunk the transcript
        logger.info("Preprocessing transcript...")
        text = preprocess_text(transcript)
        chunks = chunk_text(text, max_chars=max_chars)
        logger.info(f"Created {len(chunks)} chunks.")

        if not chunks:
            logger.error("No chunks created from transcript.")
            raise ValueError("Failed to create chunks from transcript.")

        # Build embeddings and FAISS index
        logger.info("Building embeddings and FAISS index...")
        embed_model, embeddings, index = build_embeddings_index(chunks)
        logger.info("Index ready.")

        # Retrieve relevant chunks
        query = "Summarize the meeting and list decisions and action items."
        results = retrieve(query, embed_model, index, chunks, k=k)
        retrieved_texts = [r[2] for r in results if r and len(r) > 2]

        if not retrieved_texts:
            logger.warning("No relevant chunks retrieved for summarization.")
            return "No relevant information found in the transcript."

        # Generate prompt and get summary
        prompt = make_summary_prompt(retrieved_texts)
        logger.info("Sending prompt to LLM for summary...")
        summary = llm.generate_response(
            prompt,
            temperature=temperature,
            max_tokens=max_tokens,
            system=system_prompt
        )

        logger.info("Summary generated successfully.")
        print('\n=== SUMMARY OUTPUT ===\n')
        return summary

    except Exception as e:
        logger.error(f"Error generating summary: {str(e)}")
        return None

# run

In [10]:
api_key = "ur api"
model_name = "gpt-4o-mini"

llm = LLM(model=model_name, api_key=api_key)
summary = generate_summary(TRANSCRIPT, llm, api_key, model_name,system_prompt=system_prompt)
print(summary)







The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


=== SUMMARY OUTPUT ===

### Summary
The meeting focused on the quarterly finance review, highlighting a revenue increase of 8% but also a concerning rise in operating expenses. Discussions included strategies for managing costs, investment opportunities, and the status of supplier contracts. The team agreed on several action items to enhance financial efficiency and investment strategies moving forward.

### Key Decisions Made
- Maintain the current debt repayment schedule rather than accelerating payments.
- Prioritize low-risk ETFs for investment, scaling the proposed $2 million allocation down to $1 million.
- David to lead negotiations on supplier contract renewals next quarter.

### Action Items
- **Michael (CFO)**: Finalize budget adjustments by Monday.
- **James (Head of Investments)**: Provide ETF projections by tomorrow.
- **David (VP of Operations)**: Confirm feasibility of a 5% budget cut and report back by Monday.
