In [None]:
import os
import requests
import pandas as pd
import json
import streamlit as st
import docx2txt
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from concurrent.futures import ThreadPoolExecutor, as_completed

# ====== Load .env ======
load_dotenv()
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
GROQ_API_KEY = os.getenv('GROQ_API_KEY')

# ====== CONFIG ======
EXCEL_FILE = "Meeting_summary_template.xlsx"
MAX_WORKERS = 5  # parallel requests to Groq

# ====== JSON SCHEMA PROMPT ======
PROMPT_TEMPLATE = """
You are a meeting summarizer. 
Return the result ONLY in strict JSON following this schema, without extra text:

{
  "MeetingDetails": {
    "Date & Time": "",
    "Location": "",
    "Participants": []
  },
  "Objective": "",
  "AgendaItems": [],
  "KeyDiscussions": "",
  "DecisionsMade": "",
  "ActionItems": [
    {"Task": "", "Owner": "", "DueDate": ""}
  ],
  "NextSteps": "",
  "AdditionalNotes": ""
}
"""

# ====== Helpers ======
def read_docx(file_path):
    """Fast text extraction from .docx"""
    return docx2txt.process(file_path)

def call_groq_api(text, force_schema=True):
    """Call Groq API for summarization"""
    url = "https://api.groq.com/openai/v1/chat/completions"
    headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}

    payload = {
        "model": "llama-3.1-8b-instant",
        "messages": [
            {"role": "system", "content": "You are a meeting summarizer."},
            {"role": "user", "content": PROMPT_TEMPLATE + "\n\nTranscript:\n" + text}
        ],
        "temperature": 0.2
    }
    if force_schema:
        payload["response_format"] = {"type": "json_object"}

    response = requests.post(url, headers=headers, json=payload)
    result = response.json()
    print(result)
    return json.loads(result["choices"][0]["message"]["content"]) if force_schema else result["choices"][0]["message"]["content"]

In [6]:
def chunk_text(text, chunk_size=4000, chunk_overlap=200):
    """Split long transcript into manageable chunks"""
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    return splitter.split_text(text)

def summarize_chunks_parallel(chunks):
    """Summarize chunks in parallel and stream results"""
    results = [None] * len(chunks)
    progress = st.progress(0, text="Starting summarization...")
    output_area = st.container()

    def process_chunk(idx, chunk):
        return idx, call_groq_api(chunk, force_schema=False)

    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = [executor.submit(process_chunk, i, chunk) for i, chunk in enumerate(chunks)]
        done_count = 0
        for future in as_completed(futures):
            idx, summary = future.result()
            results[idx] = summary
            done_count += 1
            progress.progress(done_count / len(chunks), text=f"Processed {done_count}/{len(chunks)} chunks")
            with output_area:
                st.markdown(f"✅ **Chunk {idx+1} summary (partial):**")
                st.write(summary[:500] + "..." if len(summary) > 500 else summary)

    return results

def merge_summaries(partial_summaries):
    """Run final summarization pass to produce structured JSON"""
    combined_text = "\n".join([str(s) for s in partial_summaries])
    final_summary = call_groq_api(combined_text, force_schema=True)
    return final_summary

def flatten_summary(summary):
    """Flatten JSON summary into a row dict for Excel"""
    row = {
        "Meeting_DateTime": summary["MeetingDetails"].get("Date & Time", ""),
        "Meeting_Location": summary["MeetingDetails"].get("Location", ""),
        "Meeting_Participants": ", ".join(summary["MeetingDetails"].get("Participants", [])),
        "Objective": summary.get("Objective", ""),
        "AgendaItems": "; ".join(summary.get("AgendaItems", [])),
        "KeyDiscussions": summary.get("KeyDiscussions", ""),
        "DecisionsMade": summary.get("DecisionsMade", ""),
        "NextSteps": summary.get("NextSteps", ""),
        "AdditionalNotes": summary.get("AdditionalNotes", "")
    }

    # Handle multiple Action Items
    if "ActionItems" in summary and summary["ActionItems"]:
        for i, action in enumerate(summary["ActionItems"], start=1):
            row[f"ActionItem_{i}_Task"] = action.get("Task", "")
            row[f"ActionItem_{i}_Owner"] = action.get("Owner", "")
            row[f"ActionItem_{i}_DueDate"] = action.get("DueDate", "")
    return row

def save_to_excel(row):
    """Append row to Excel file"""
    if os.path.exists(EXCEL_FILE):
        df = pd.read_excel(EXCEL_FILE)
        df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
    else:
        df = pd.DataFrame([row])

    df.to_excel(EXCEL_FILE, index=False)
    return df

# # ====== Streamlit UI ======
# st.set_page_config(page_title="Meeting Summarizer", layout="wide")
# st.title("📋 Meeting Transcript Summarizer (Parallel + Streaming)")

# uploaded_file = st.file_uploader("Upload a transcript (.docx)", type=["docx"])

# if uploaded_file:
#     with open("temp.docx", "wb") as f:
#         f.write(uploaded_file.read())

#     transcript = read_docx("temp.docx")
#     st.success(f"✅ Transcript uploaded successfully! Extracted {len(transcript)//1024} KB of text")

#     with st.spinner("Summarizing meeting..."):
#         chunks = chunk_text(transcript)
#         st.info(f"Transcript split into {len(chunks)} chunks")
#         partial_summaries = summarize_chunks_parallel(chunks)
#         final_summary = merge_summaries(partial_summaries)
#         row = flatten_summary(final_summary)
#         df = save_to_excel(row)

#     st.success("✅ Final structured summary added to Excel!")

#     # Show last 3 rows with highlight
#     st.subheader("📊 Recent Meeting Summaries")
#     recent_df = df.tail(3)

#     def highlight_last_row(x):
#         df_styled = pd.DataFrame('', index=x.index, columns=x.columns)
#         df_styled.iloc[-1, :] = 'background-color: lightgreen; font-weight: bold;'
#         return df_styled

#     styled_df = recent_df.style.apply(highlight_last_row, axis=None)
#     st.dataframe(styled_df, use_container_width=True, height=300)
