In [1]:
!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124
!pip install -q requests bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 openai streamlit pyaudio-fork gradio

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m908.3/908.3 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.3/7.3 MB[0m [31m43.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m38.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m37.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m90.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import os
import json
import requests
from datetime import datetime
from openai import OpenAI
from google.colab import userdata
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
import gradio as gr
import re

In [4]:
AUDIO_MODEL = "whisper-1"
LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct"

class MeetingMinutesGenerator:
    def __init__(self):
        self.openai_client = None
        self.tokenizer = None
        self.model = None
        self.setup_models()

    def setup_models(self):
        """Initialize OpenAI and Hugging Face models with proper error handling"""
        try:
            print("Setting up models...")

            # Setup OpenAI
            openai_api_key = userdata.get('OPENAI_API_KEY')
            if not openai_api_key:
                raise ValueError("OPENAI_API_KEY not found in Colab secrets")

            self.openai_client = OpenAI(api_key=openai_api_key)
            print("OpenAI client initialized")

            # Setup HuggingFace
            hf_token = userdata.get('HF_TOKEN')
            if not hf_token:
                raise ValueError("HF_TOKEN not found in Colab secrets")

            login(hf_token, add_to_git_credential=True)
            print("HuggingFace login successful")

            # Load tokenizer first
            print(f"Loading tokenizer: {LLAMA}")
            self.tokenizer = AutoTokenizer.from_pretrained(LLAMA)
            self.tokenizer.pad_token = self.tokenizer.eos_token
            print(" Tokenizer loaded")

            # Load Llama model with quantization
            print(f"Loading model: {LLAMA}")
            quant_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_compute_dtype=torch.bfloat16,
                bnb_4bit_quant_type="nf4"
            )

            self.model = AutoModelForCausalLM.from_pretrained(
                LLAMA,
                device_map="auto",
                quantization_config=quant_config,
                torch_dtype=torch.bfloat16
            )
            print("Model loaded successfully")

        except Exception as e:
            print(f" Setup error: {e}")
            print("Make sure you have set OPENAI_API_KEY and HF_TOKEN in Colab secrets")
            # Initialize with None values to prevent attribute errors
            self.tokenizer = None
            self.model = None
            self.openai_client = None

    def transcribe_audio(self, audio_file_path):
        """Transcribe audio file using OpenAI Whisper"""
        if not self.openai_client:
            return "OpenAI client not initialized. Please check your API key."

        try:
            print(f"🎙️ Transcribing audio: {audio_file_path}")
            with open(audio_file_path, "rb") as audio_file:
                transcription = self.openai_client.audio.transcriptions.create(
                    model=AUDIO_MODEL,
                    file=audio_file,
                    response_format="text"
                )
            print(" Transcription complete")
            return transcription

        except Exception as e:
            error_msg = f" Transcription error: {str(e)}"
            print(error_msg)
            return error_msg

    def extract_structured_data(self, text):
        """Extract structured information from transcription"""
        if not text or isinstance(text, str) and text.startswith("❌"):
            return {}

        patterns = {
            'emails': r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
            'dates': r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}\b',
            'times': r'\b\d{1,2}:\d{2}(?:\s?[AaPp][Mm])?\b',
            'action_words': r'\b(?:will|should|must|need to|action|todo|follow up|assigned|responsible)\b[^.]*',
        }

        extracted = {}
        for key, pattern in patterns.items():
            try:
                matches = re.findall(pattern, text, re.IGNORECASE)
                extracted[key] = list(set(matches))[:5]  # Limit to 5 matches
            except:
                extracted[key] = []

        return extracted

    def generate_enhanced_minutes(self, transcription, meeting_type="general"):
        """Generate structured meeting minutes with enhanced prompting"""

        if not self.tokenizer or not self.model:
            return " Models not properly initialized. Please restart and check your setup.", {}

        if not transcription or transcription.startswith(""):
            return " Invalid transcription provided", {}

        try:
            print(" Generating enhanced minutes...")

            # Extract structured data
            structured_data = self.extract_structured_data(transcription)

            #system prompt
            system_message = """You are an expert meeting secretary. Create comprehensive meeting minutes that are:
            - Professionally formatted in markdown
            - Structured with clear sections
            - Action-oriented with specific assignments
            - Include metrics where mentioned (dates, numbers, percentages)

            Always follow the exact structure requested."""

            user_prompt = f"""
            Create detailed meeting minutes following this EXACT structure:

            # Meeting Minutes - {datetime.now().strftime("%B %d, %Y")}

            ## Meeting Overview
            - **Date**: {datetime.now().strftime("%B %d, %Y")}
            - **Type**: {meeting_type.title()} Meeting
            - **Duration**: [Estimate from transcript]

            ##  Key Decisions Made
            [List 3-5 major decisions from the discussion]

            ##  Discussion Highlights
            [Summarize main discussion points]

            ## Action Items
            | Task | Owner | Deadline | Priority |
            |------|-------|----------|----------|
            [Extract specific tasks with owners]

            ##  Key Metrics & Data Points
            [Any numbers, percentages, or measurements mentioned]

            ##  Follow-up Required
            [Items needing additional discussion]

            **TRANSCRIPT TO ANALYZE:**
            {transcription[:3000]}...
            """

            messages = [
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_prompt}
            ]

            # Generate response
            inputs = self.tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

            with torch.no_grad():
                outputs = self.model.generate(
                    inputs,
                    max_new_tokens=1500,
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=self.tokenizer.eos_token_id,
                    eos_token_id=self.tokenizer.eos_token_id
                )

            # Decode response
            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

            # Extract just the generated minutes
            if "# Meeting Minutes" in response:
                minutes_start = response.find("# Meeting Minutes")
                minutes = response[minutes_start:]
            else:
                minutes = response.split("assistant")[-1] if "assistant" in response else response

            print(" Minutes generated successfully")
            return minutes, structured_data

        except Exception as e:
            error_msg = f" Generation error: {str(e)}"
            print(error_msg)
            return error_msg, {}

def create_gradio_interface():
    """Create a centered interactive Gradio interface with better error handling"""

    # Initialize generator once
    print("🚀 Initializing Meeting Minutes Generator...")
    generator = MeetingMinutesGenerator()

    def process_meeting(audio_file, meeting_type, progress=gr.Progress()):
        """Process uploaded audio file with comprehensive error handling"""

        if audio_file is None:
            return "❌ Please upload an audio file", "{}"

        try:
            progress(0.1, desc="Starting processing...")

            # Check if models are properly initialized
            if not generator.openai_client:
                return "❌ OpenAI client not initialized. Please check your API key in Colab secrets.", "{}"

            if not generator.tokenizer or not generator.model:
                return "❌ Language models not initialized. Please check your HuggingFace token.", "{}"

            # Transcribe
            progress(0.3, desc="Transcribing audio...")
            transcription = generator.transcribe_audio(audio_file)

            if transcription.startswith("❌"):
                return transcription, "{}"

            # Generate minutes
            progress(0.7, desc="Generating structured minutes...")
            minutes, structured_data = generator.generate_enhanced_minutes(
                transcription,
                meeting_type
            )

            progress(1.0, desc="Complete!")

            return minutes, json.dumps(structured_data, indent=2)

        except Exception as e:
            error_msg = f"❌ Processing error: {str(e)}"
            print(error_msg)
            return error_msg, "{}"

    # Custom CSS for centered layout and better styling
    custom_css = """
    .gradio-container {
        max-width: 1200px !important;
        margin: 0 auto !important;
        padding: 20px !important;
    }

    .main-header {
        text-align: center !important;
        margin-bottom: 30px !important;
    }

    .upload-section {
        max-width: 500px !important;
        margin: 0 auto 20px auto !important;
    }

    .control-section {
        max-width: 400px !important;
        margin: 0 auto 30px auto !important;
    }

    .output-section {
        max-width: 1000px !important;
        margin: 0 auto !important;
    }

    .generate-btn {
        width: 100% !important;
        max-width: 300px !important;
        margin: 20px auto !important;
        display: block !important;
    }

    .requirements-box {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
        padding: 20px !important;
        border-radius: 10px !important;
        margin: 20px auto !important;
        max-width: 800px !important;
        color: white !important;
    }

    .features-box {
        background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
        padding: 20px !important;
        border-radius: 10px !important;
        margin: 20px auto !important;
        max-width: 800px !important;
        color: white !important;
    }
    """

    # Create interface with custom styling
    with gr.Blocks(
        theme=gr.themes.Soft(),
        title="AI Meeting Minutes Generator",
        css=custom_css
    ) as interface:

        # Main Header
        with gr.Column(elem_classes="main-header"):
            gr.Markdown("""
            # 🎙️ AI Meeting Minutes Generator
            ### Transform your meeting recordings into professional, structured minutes
            """)

        # Requirements Section
        with gr.Column(elem_classes="requirements-box"):
            gr.Markdown("""
            ## Setup Requirements
            1. **Add your `OPENAI_API_KEY` to Colab secrets**
            2. **Add your `HF_TOKEN` to Colab secrets**
            3. **Ensure you have GPU runtime enabled**
            """)

        # Upload Section
        with gr.Column(elem_classes="upload-section"):
            audio_input = gr.Audio(
                label="📁 Upload Meeting Audio",
                type="filepath",
                sources=["upload"],
                elem_classes="audio-upload"
            )

        # Control Section
        with gr.Column(elem_classes="control-section"):
            meeting_type = gr.Dropdown(
                choices=["general", "project", "executive"],
                value="general",
                label=" Meeting Type",
                info="Choose the type for optimized formatting"
            )

            process_btn = gr.Button(
                "🚀 Generate Professional Minutes",
                variant="primary",
                size="lg",
                elem_classes="generate-btn"
            )

        # Output Section
        with gr.Column(elem_classes="output-section"):
            gr.Markdown("## 📋 Generated Minutes")
            minutes_output = gr.Markdown(
                value=" **Ready to process your meeting!**\n\nUpload an audio file and click 'Generate Minutes' to see professionally formatted results here...",
                elem_classes="output-display"
            )

        # Extracted Data Section
        with gr.Accordion("📊 Extracted Structured Data", open=False):
            with gr.Column(elem_classes="output-section"):
                structured_output = gr.Code(
                    label="Raw Data (JSON Format)",
                    language="json",
                    value="{}",
                    lines=10
                )

        # Wire up the button
        process_btn.click(
            fn=process_meeting,
            inputs=[audio_input, meeting_type],
            outputs=[minutes_output, structured_output]
        )

        # Features Section
        with gr.Column(elem_classes="features-box"):
            gr.Markdown("""
            ## Key Features
            - **🎙️ Smart Transcription** - OpenAI Whisper for 95%+ accuracy
            - **🧠 AI Analysis** - Llama 3.1 8B for intelligent content structuring
            - **📊 Data Extraction** - Automatic identification of emails, dates, action items
            - **🎯 Multiple Formats** - Optimized for General, Project, or Executive meetings
            - **⚡ Memory Optimized** - 4-bit quantization for efficient processing
            - **📝 Professional Output** - Clean markdown formatting ready for sharing
            """)

        # Troubleshooting Section
        with gr.Accordion("🛠️ Troubleshooting Guide", open=False):
            gr.Markdown("""
            ### Common Issues & Solutions:

            **🔴 "Models not initialized"**
            - Check your API keys in Colab secrets (🔑 left sidebar)
            - Restart runtime and run all cells again

            **🔴 CUDA/GPU errors**
            - Runtime → Change Runtime Type → GPU (T4)
            - Check: Runtime → View Resources (should show GPU)

            **🔴 Long processing time**
            - Large files (30+ min) take 2-3 minutes to process
            - This is normal for high-quality transcription

            **🔴 Upload fails**
            - Try smaller audio files (<100MB)
            - Supported formats: MP3, WAV, M4A, MP4

            **🔴 Poor transcription quality**
            - Ensure clear audio with minimal background noise
            - Speaker should be close to microphone
            """)

    return interface

# IMPORTANT: Launch the interface
print(" Launching Gradio Interface...")
interface = create_gradio_interface()
interface.launch(
    share=True,
    debug=True,
    show_error=True,
    server_name="0.0.0.0",
    server_port=7860
)

print("Interface launched successfully!")
print("Use the link above to access your Meeting Minutes Generator")

 Launching Gradio Interface...
🚀 Initializing Meeting Minutes Generator...
Setting up models...
OpenAI client initialized
HuggingFace login successful
Loading tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct


tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

 Tokenizer loaded
Loading model: meta-llama/Meta-Llama-3.1-8B-Instruct
 Setup error: No package metadata was found for bitsandbytes
Make sure you have set OPENAI_API_KEY and HF_TOKEN in Colab secrets
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://eb2e613db893293879.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 0.0.0.0:7860 <> https://eb2e613db893293879.gradio.live
Interface launched successfully!
Use the link above to access your Meeting Minutes Generator
