In [1]:
!pip install -q --upgrade bitsandbytes accelerate gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 MB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.8/375.8 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:

import os
import torch
from google.colab import drive, userdata
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig, pipeline
from openai import OpenAI
import gradio as gr
from IPython.display import Markdown, display


In [10]:
LLAMA = "meta-llama/Llama-3.1-8B-Instruct"
AUDIO_MODEL = "gpt-4o-mini-transcribe"

In [4]:
drive.mount("/content/drive")
audio_file_path = "/content/drive/MyDrive/denver_extract.mp3"

Mounted at /content/drive


In [6]:
hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

openai_api_key = userdata.get('OPENAI_API_KEY')
openai = OpenAI(api_key=openai_api_key)


In [7]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [11]:
tokenizer = AutoTokenizer.from_pretrained(LLAMA)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    LLAMA,
    device_map="auto",
    quantization_config=quant_config
)


tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
pipe = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-medium.en",
    dtype=torch.float16,
    device='cuda',
    return_timestamps=True
)

In [None]:
def process_audio(audio):
    """
    audio: path to uploaded file (or the hardcoded audio_file_path)
    """
    try:

        whisper_result = pipe(audio)
        whisper_text = whisper_result["text"]

        with open(audio, "rb") as f:
            gpt_result = openai.audio.transcriptions.create(
                model=AUDIO_MODEL,
                file=f,
                response_format="text"
            )
        gpt_text = gpt_result


        system_message = """
        You produce minutes of meetings from transcripts, with summary, key discussion points,
        takeaways and action items with owners, in markdown format without code blocks.
        """
        user_prompt = f"""
        Below is a meeting transcript. Write minutes in markdown without code blocks, including:
        - a summary with attendees, location and date
        - discussion points
        - takeaways
        - action items with owners

        Transcription:
        {gpt_text}
        """

        messages = [
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_prompt}
        ]


        inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
        streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
        outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)
        llama_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

        return whisper_text, gpt_text, llama_output

    except Exception as e:
        return f"Error: {str(e)}", "", ""



In [None]:
demo = gr.Interface(
    fn=lambda _: process_audio(audio_file_path),
    inputs=[],
    outputs=[
        gr.Textbox(label="Whisper Transcription (Open Source)"),
        gr.Textbox(label="GPT-4o-mini Transcription"),
        gr.Markdown(label="Generated Meeting Minutes (LLaMA 3.2 3B)")
    ],
    title="Meeting Minutes Generator",
    description="Uses a meeting audio file. The app transcribes it using Whisper and GPT-4o-mini, then generates structured minutes using LLaMA 3.2."
)

demo.launch(share=True)