## Create meeting minutes from an Audio file

In [None]:
import os
import requests
from IPython.display import Markdown, display, update_display
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch
from dotenv import load_dotenv
from faster_whisper import WhisperModel

In [None]:
AUDIO_MODEL="whisper-1"
LLAMA="meta-llama/Meta-Llama-3.1-8B-Instruct"
audio_filename="./denver_extract.mp3"

In [None]:
load_dotenv(override=True)
hf_token=os.getenv('HF_TOKEN')
login(token=hf_token)

In [None]:
def transcribe_audio(path, model_size="small"):
    model = WhisperModel(
        model_size,
        device="cuda" if torch.cuda.is_available() else "cpu",
        compute_type="float16"
    )
    segments, info = model.transcribe(path)
    return " ".join(segment.text for segment in segments)

In [None]:
transcripts = transcribe_audio(audio_filename)
print(transcripts)

In [None]:
system_message= "You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners, in markdown."

user_prompt = f"Below is an extract transcript of a denver council meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\n{transcripts}"

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_prompt}
]

In [None]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(LLAMA)
tokenizer.pad_token = tokenizer.eos_token
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True, padding=True, return_attention_mask=True).to("cuda")
streamer = TextStreamer(tokenizer, skip_prompt=True)
model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map="cuda", quantization_config=quant_config)
attention_mask = (inputs != tokenizer.pad_token_id).int().to("cuda")
outputs=model.generate(inputs, max_new_tokens=2000, streamer=streamer, attention_mask=attention_mask, pad_token_id=tokenizer.pad_token_id,eos_token_id=tokenizer.eos_token_id)

In [None]:
response = tokenizer.decode(outputs[0])

In [None]:
display(Markdown(response))

In [None]:
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

# Alternative implementation
AUDIO_MODEL = "openai/whisper-small"
speech_model = AutoModelForSpeechSeq2Seq.from_pretrained(AUDIO_MODEL, torch_dtype=torch.float16, low_cpu_mem_usage=True, use_safetensors=True)
speech_model.to('cuda')
processor = AutoProcessor.from_pretrained(AUDIO_MODEL)

pipe = pipeline(
    "automatic-speech-recognition",
    model=speech_model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch.float16,
    device='cuda',
)

In [None]:
result = pipe(audio_filename)

In [None]:
transcription = result["text"]
print(transcription)