# Create meeting minutes from an Audio file

I downloaded some Denver City Council meeting minutes and selected a portion of the meeting for us to transcribe. You can download it here:  
https://drive.google.com/file/d/1N_kpSojRR5RYzupz6nqM8hMSoEF_R7pU/view?usp=sharing

If you'd rather work with the original data, the HuggingFace dataset is [here](https://huggingface.co/datasets/huuuyeah/meetingbank) and the audio can be downloaded [here](https://huggingface.co/datasets/huuuyeah/MeetingBank_Audio/tree/main).

The goal of this product is to use the Audio to generate meeting minutes, including actions.




In [None]:
!pip install -q --upgrade bitsandbytes accelerate

In [None]:
# imports

import os
import requests
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from google.colab import drive
from huggingface_hub import login
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch

In [None]:
# Constants

LLAMA = "meta-llama/Llama-3.2-3B-Instruct"

if i want use it local not in gradio

In [None]:
drive.mount("/content/drive")
audio_filename = "/content/drive/MyDrive/Copy of denver_extract.mp3"

# Download denver_extract.mp3

You can either use the same file as me, the extract from Denver city council minutes, or you can try your own..

If you want to use the same as me, then please download my extract here, and put this on your Google Drive:  
https://drive.google.com/file/d/1N_kpSojRR5RYzupz6nqM8hMSoEF_R7pU/view?usp=sharing


In [None]:
# Sign in to HuggingFace Hub

hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

# Open the file

audio_file = open(audio_filename, "rb")

# STEP 1: Transcribe Audio

## Option 1: Use Open Source for Transcription - Hugging Face Pipelines

In [None]:
from transformers import pipeline

pipe = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-medium.en",
    dtype=torch.float16,
    device='cuda',
    return_timestamps=True
)


In [None]:
def transcribe_audio(audio_file):
    result = pipe(audio_file)
    transcription = result["text"]
    return transcription

## Option 2: Use OpenAI for Transcription

In [None]:
# Sign in to OpenAI using Secrets in Colab

#AUDIO_MODEL = "gpt-4o-mini-transcribe"

#openai_api_key = userdata.get('OPENAI_API_KEY')
#openai = OpenAI(api_key=openai_api_key)
#transcription = openai.audio.transcriptions.create(model=AUDIO_MODEL, file=audio_file, response_format="text")
#print(transcription)

# STEP 2: Analyze & Report

In [None]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(LLAMA)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    LLAMA,
    device_map="auto",
    quantization_config=quant_config
)

In [None]:
def generate_minutes(transcription):
    system_message = """
    You produce minutes of meetings from transcripts, with summary, key discussion points,
    takeaways and action items with owners, in markdown format without code blocks.
    """
    user_prompt = f"""
    Below is an extract transcript of a Denver council meeting.
    Please write minutes in markdown without code blocks, including:
    - a summary with attendees, location and date
    - discussion points
    - takeaways
    - action items with owners

    Transcription:
    {transcription}
    """
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt}
    ]

    # Prepare inputs
    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

    # Generate output
    output_ids = model.generate(
        inputs,
        max_new_tokens=2000,
        do_sample=True,          # Optional: makes output more natural
        temperature=0.7          # Optional: adjust creativity
    )

    # Decode to text
    minutes_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return minutes_text


In [None]:
def full_pipeline(audio_file):
    transcription = transcribe_audio(audio_file)
    minutes = generate_minutes(transcription)
    return transcription, minutes

In [None]:
import gradio as gr

iface = gr.Interface(
    fn=full_pipeline,
    inputs=gr.Audio(type="filepath"),
    outputs=[
        gr.Textbox(label="Transcription", lines=10, placeholder="Transcribed text will appear here...", interactive=False),
        gr.Textbox(label="Meeting Minutes", lines=20, placeholder="Generated meeting minutes will appear here...", interactive=False)
    ],
    title="Meeting Minutes Generator",
    description="Upload an audio file, get the transcription and meeting minutes in markdown format.",
    theme="default"
)

In [None]:
iface.launch()