<a href="https://colab.research.google.com/github/Rinas1817/Ai-Meeting-Summarizer/blob/main/AI_Summarizer_Project1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ================================================================
# MASTER SETUP CELL (V9 - Final Fix): Run this
# ================================================================
from google.colab import drive
import os

# 1. MOUNT GOOGLE DRIVE
print("▶️ Mounting Google Drive...")
drive.mount('/content/drive', force_remount=True)

# 2. DEFINE PROJECT PATHS
DRIVE_ROOT = "/content/drive/MyDrive/AI_Summarizer_Project"
MODEL_PATH = os.path.join(DRIVE_ROOT, "models")
PIP_CACHE_DIR = os.path.join(DRIVE_ROOT, "pip_cache")
HF_CACHE_DIR = os.path.join(DRIVE_ROOT, "hf_cache")
VOICEPRINT_PATH = os.path.join(DRIVE_ROOT, "voiceprints")
os.makedirs(MODEL_PATH, exist_ok=True)
os.makedirs(PIP_CACHE_DIR, exist_ok=True)
os.makedirs(HF_CACHE_DIR, exist_ok=True)
os.makedirs(VOICEPRINT_PATH, exist_ok=True)
print("✅ Drive mounted and paths defined.")

# 3. INSTALL DEPENDENCIES
print("\n▶️ Installing dependencies...")
!pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cu121
!pip install --cache-dir={PIP_CACHE_DIR} -q --upgrade pip
!pip install --cache-dir={PIP_CACHE_DIR} -q git+https://github.com/openai/whisper.git
!pip install --cache-dir={PIP_CACHE_DIR} -q streamlit pyngrok pyannote.audio==3.1.1 google-generativeai python-dotenv scipy pydub
!pip install numpy==1.26.4
print("✅ Dependencies installed.")

# 4. LOAD MODELS
print("\n▶️ Loading AI models...")
os.environ['HUGGING_FACE_HUB_CACHE'] = HF_CACHE_DIR
from google.colab import userdata

import whisper
import torch
whisper_model = whisper.load_model("base", download_root=MODEL_PATH)
print("✅ Whisper model loaded.")

from pyannote.audio import Pipeline, Model # <--- Import Model
HF_TOKEN = userdata.get('HF_TOKEN')
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=HF_TOKEN)

# THE FINAL FIX: Load this as a Model, not a Pipeline
embedding_model = Model.from_pretrained("pyannote/embedding", use_auth_token=HF_TOKEN)

print("✅ Pyannote models loaded.")

# 5. CONFIGURE GEMINI API
print("\n▶️ Configuring Gemini API...")
import google.generativeai as genai
GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
genai.configure(api_key=GEMINI_API_KEY)
print("✅ Gemini client initialized.")

print("\n\n🎉 SETUP COMPLETE! You are ready to go. 🎉")

In [None]:
%%writefile app.py
# ================================================================
# THE COMPLETE AND FINAL STREAMLIT APPLICATION
# ================================================================
import streamlit as st
import os
import torch
import pickle
import numpy as np
from scipy.spatial.distance import cdist
import uuid
from pydub import AudioSegment
import io
import json
import whisper
from pyannote.audio import Pipeline, Model, Inference
import google.generativeai as genai

# --- CONFIGURATION AND PATHS ---
st.set_page_config(page_title="AI Meeting Summarizer", layout="wide")
DRIVE_ROOT = "/content/drive/MyDrive/AI_Summarizer_Project"
VOICEPRINT_PATH = os.path.join(DRIVE_ROOT, "voiceprints")
MODEL_PATH = os.path.join(DRIVE_ROOT, "models")
HF_CACHE_DIR = os.path.join(DRIVE_ROOT, "hf_cache")
os.environ['HUGGING_FACE_HUB_CACHE'] = HF_CACHE_DIR

# --- LOAD MODELS AND API CLIENTS (CACHED) ---
@st.cache_resource
def load_models_and_clients():
    from google.colab import userdata
    # Reads keys passed from the launcher cell
    HF_TOKEN = os.getenv('HF_TOKEN') or userdata.get('HF_TOKEN')
    GEMINI_API_KEY = os.getenv('GEMINI_API_KEY') or userdata.get('GEMINI_API_KEY')

    if not HF_TOKEN or not GEMINI_API_KEY:
        raise ValueError("API keys for Hugging Face and/or Gemini are not set in the environment.")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    st.info(f"Using device: {device}")

    whisper_model = whisper.load_model("base", device=device, download_root=MODEL_PATH)
    diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=HF_TOKEN).to(device)
    embedding_model = Model.from_pretrained("pyannote/embedding", use_auth_token=HF_TOKEN).to(device)
    embedding_inference = Inference(embedding_model, window="whole", device=device)

    genai.configure(api_key=GEMINI_API_KEY)
    generation_config = {"temperature": 0.7, "top_p": 1, "top_k": 1, "max_output_tokens": 8192}
    gemini_model = genai.GenerativeModel(model_name="gemini-1.5-flash", generation_config=generation_config)

    return whisper_model, diarization_pipeline, embedding_inference, gemini_model

try:
    whisper_model, diarization_pipeline, embedding_inference, gemini_model = load_models_and_clients()
    models_loaded = True
except Exception as e:
    st.error(f"Failed to load AI models. Please check your API keys and file paths. Error: {e}")
    models_loaded = False

# --- GEMINI PROMPT ---
GEMINI_JSON_PROMPT = """
You are an expert meeting analysis AI. Your task is to analyze the provided meeting transcript and extract key information. Your response MUST be a single, valid JSON object. Do not include any text or formatting before or after the JSON object. The JSON object must have the following keys:
- "title": A short, catchy title for the meeting (e.g., "Q3 Marketing Strategy Session").
- "summary": A concise, one-paragraph summary of the meeting's key discussions and outcomes.
- "action_items": An array of objects. Each object represents a specific task and must have the keys: "task", "assigned_to", "deadline". If info is missing, use "Not specified".
- "key_decisions": An array of strings, where each string is a key decision made during the meeting.
- "topics_discussed": An array of strings, where each string is a main topic that was discussed.
Analyze the following transcript carefully:
TRANSCRIPT:
'''
[INSERT TRANSCRIPT HERE]
'''
"""

# --- CORE LOGIC FUNCTIONS ---
def get_embedding(file_path):
    return embedding_inference(file_path)

def enroll_speaker(speaker_name, audio_file):
    temp_file_path = f"temp_enroll_{uuid.uuid4()}.wav"
    try:
        audio = AudioSegment.from_file(io.BytesIO(audio_file.read()))
        audio.export(temp_file_path, format="wav")
        voiceprint = get_embedding(temp_file_path)
        speaker_data = {"name": speaker_name, "voiceprint": voiceprint}
        file_path = os.path.join(VOICEPRINT_PATH, f"{speaker_name}_{uuid.uuid4()}.pkl")
        with open(file_path, "wb") as f:
            pickle.dump(speaker_data, f)
        st.session_state.enrolled_speakers = load_enrolled_speakers()
        return f"✅ Successfully enrolled **{speaker_name}**."
    except Exception as e:
        return f"❌ Error during enrollment: {e}"
    finally:
        if os.path.exists(temp_file_path):
            os.remove(temp_file_path)

def load_enrolled_speakers():
    enrolled_speakers = []
    if not os.path.exists(VOICEPRINT_PATH): return []
    for filename in os.listdir(VOICEPRINT_PATH):
        if filename.endswith(".pkl"):
            file_path = os.path.join(VOICEPRINT_PATH, filename)
            with open(file_path, "rb") as f:
                enrolled_speakers.append(pickle.load(f))
    return enrolled_speakers

def process_meeting(meeting_audio_file):
    diarize_temp_path = f"temp_diarize_{uuid.uuid4()}.wav"

    try:
        audio = AudioSegment.from_file(io.BytesIO(meeting_audio_file.read()))
        audio.export(diarize_temp_path, format="wav")

        enrolled_speakers = st.session_state.get('enrolled_speakers', [])
        diarization = diarization_pipeline(diarize_temp_path)

        # --- NEW LOGIC WITH SIMILARITY THRESHOLD ---
        speaker_mapping = {}
        DISTANCE_THRESHOLD = 0.5

        if enrolled_speakers:
            st.info("Enrolled speakers found. Attempting to identify them...")
            unidentified_speakers = {}
            generic_speaker_count = 1

            for turn, _, speaker_label in diarization.itertracks(yield_label=True):
                if speaker_label not in speaker_mapping:
                    temp_segment_path = f"temp_segment_{uuid.uuid4()}.wav"
                    segment = audio[turn.start * 1000 : turn.end * 1000]
                    segment.export(temp_segment_path, format="wav")

                    current_embedding = get_embedding(temp_segment_path)

                    distances = [cdist(np.array([current_embedding]), np.array([sp['voiceprint']]), 'cosine')[0][0] for sp in enrolled_speakers]
                    min_distance = np.min(distances)

                    if min_distance < DISTANCE_THRESHOLD:
                        best_match_index = np.argmin(distances)
                        speaker_name = enrolled_speakers[best_match_index]['name']
                        speaker_mapping[speaker_label] = speaker_name
                    else:
                        if speaker_label not in unidentified_speakers:
                            unidentified_speakers[speaker_label] = f"Speaker {generic_speaker_count}"
                            generic_speaker_count += 1
                        speaker_mapping[speaker_label] = unidentified_speakers[speaker_label]

                    if os.path.exists(temp_segment_path):
                        os.remove(temp_segment_path)
        else:
            st.info("No speakers enrolled. Using generic labels (Speaker 1, Speaker 2, etc.).")
            generic_labels = sorted(list(set([label for _, _, label in diarization.itertracks(yield_label=True)])))
            for i, label in enumerate(generic_labels):
                speaker_mapping[label] = f"Speaker {i + 1}"
        # --- END OF NEW LOGIC ---

        result = whisper_model.transcribe(diarize_temp_path, fp16=True, word_timestamps=True)
        word_segments = result['segments']

        transcript_parts = []
        for turn, _, speaker_label in diarization.itertracks(yield_label=True):
            turn_start, turn_end = turn.start, turn.end
            speaker_name = speaker_mapping.get(speaker_label, "Unknown")

            turn_words = []
            for segment in word_segments:
                for word in segment['words']:
                    if turn_start <= word['start'] < turn_end:
                        turn_words.append(word['word'])

            if turn_words:
                turn_text = "".join(turn_words).strip()
                transcript_parts.append(f"**{speaker_name}**: {turn_text}")

        final_transcript = "\n\n".join(transcript_parts)

        prompt = GEMINI_JSON_PROMPT.replace("[INSERT TRANSCRIPT HERE]", final_transcript)
        response = gemini_model.generate_content(prompt)

        cleaned_response = response.text.replace("```json", "").replace("```", "").strip()
        summary_data = json.loads(cleaned_response)
        summary_data['full_transcript'] = final_transcript
        return summary_data
    except Exception as e:
        st.error(f"An error occurred during processing: {e}")
        return None
    finally:
        if os.path.exists(diarize_temp_path):
            os.remove(diarize_temp_path)

# --- STREAMLIT UI ---
st.title("🧠 AI Meeting Summarizer")
st.write("This tool transcribes, identifies speakers, and summarizes meetings. Built using Colab and Google Drive.")

if 'enrolled_speakers' not in st.session_state:
    st.session_state.enrolled_speakers = load_enrolled_speakers()

if not models_loaded:
    st.stop()

app_mode = st.sidebar.radio("Choose an action", ["Process Meeting", "Enroll New Speaker"])

if app_mode == "Enroll New Speaker":
    st.header("Enroll a New Speaker")
    st.info("Upload a short, clear audio sample (15-30 seconds) of a single person speaking.")

    with st.form("enroll_form"):
        speaker_name = st.text_input("Enter Speaker's Name:")
        audio_sample = st.file_uploader("Upload Audio Sample", type=["wav", "mp3", "m4a"])
        submitted = st.form_submit_button("Enroll Speaker")

    if submitted and speaker_name and audio_sample:
        with st.spinner(f"Enrolling {speaker_name}..."):
            message = enroll_speaker(speaker_name, audio_sample)
            st.success(message)

elif app_mode == "Process Meeting":
    st.header("Process a Meeting")
    st.info("Upload the full meeting audio recording to get a summary and transcript.")

    meeting_audio = st.file_uploader("Upload Meeting Audio", type=["wav", "mp3", "m4a"])

    if st.button("Analyze Meeting") and meeting_audio:
        st.write("---")
        with st.spinner("Analyzing meeting... This may take several minutes on CPU, much faster on GPU."):
            results = process_meeting(meeting_audio)

        if results:
            st.header("Meeting Results")
            st.subheader("📌 Title")
            st.write(f"**{results.get('title', 'N/A')}**")

            st.subheader("📄 Summary")
            st.write(f"{results.get('summary', 'N/A')}")

            st.subheader("✅ Action Items")
            for item in results.get('action_items', []):
                st.markdown(f"- **Task:** {item['task']} | **Assigned to:** {item['assigned_to']} | **Deadline:** {item['deadline']}")

            st.subheader("🔑 Key Decisions")
            for decision in results.get('key_decisions', []):
                st.markdown(f"- {decision}")

            with st.expander("Show Full Transcript"):
                st.markdown(results.get('full_transcript', ''))

# Display enrolled speakers in the sidebar
st.sidebar.write("---")
st.sidebar.subheader("Enrolled Speakers")
if st.session_state.enrolled_speakers:
    for speaker in st.session_state.enrolled_speakers:
        st.sidebar.markdown(f"- {speaker['name']}")
else:
    st.sidebar.info("No speakers enrolled yet.")

In [None]:
from google.colab import userdata
from pyngrok import ngrok
import os
import time

# --- Step 1: Kill any old ngrok tunnels to prevent errors ---
ngrok.kill()
print("✅ Old ngrok tunnels killed.")

# --- Step 2: Load your secrets from Colab ---
HF_TOKEN = userdata.get('HF_TOKEN')
GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
NGROK_AUTH_TOKEN = userdata.get('NGROK_AUTH_TOKEN')
print("✅ Secrets loaded.")

# --- Step 3: Launch the Streamlit app in the background ---
# We pass the secrets as environment variables to the command
!HF_TOKEN={HF_TOKEN} GEMINI_API_KEY={GEMINI_API_KEY} streamlit run app.py &>streamlit.log &
print("🚀 Streamlit app launching in the background...")

# --- Step 4: Wait for 5 seconds for the app to start ---
time.sleep(5)
print("⏳ Waiting for app to initialize...")

# --- Step 5: Connect ngrok to the now-running app ---
ngrok.set_auth_token(NGROK_AUTH_TOKEN)
public_url = ngrok.connect(8501)
print(f"\n🎉 Your Streamlit app is now live at: {public_url}")