In [None]:
# Meeting Minutes Audio Source
This notebook uses an audio file downloaded from [this Google Drive link](https://drive.google.com/file/d/1N_kpSojRR5RYzupz6nqM8hMSoEF_R7pU/view?usp=sharing).
The file has been downloaded and uploaded to Google Drive for processing in this notebook.

**Instructions:**
1. Download the audio file from the link above.
2. Upload the file to your own Google Drive.
3. Update the notebook to reference the correct path to your uploaded file (e.g., `/content/drive/MyDrive/your_folder/your_audio_file.mp3`).

In [None]:
# Install required packages for PyTorch (CUDA), Transformers, and OpenAI API.
!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124
!pip install -q requests bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 openai

In [None]:
# Import libraries for file operations, API access, display, authentication, and model inference.
import os
import requests
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from google.colab import drive
from huggingface_hub import login
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch

In [None]:
# Define constants for audio transcription and LLM model names.
AUDIO_MODEL = "whisper-1"
LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct"

In [None]:
# Mount Google Drive and set the path to the uploaded audio file.
drive.mount("/content/drive")
audio_filename = "/content/drive/MyDrive/llms/denver_extract.mp3"

In [None]:
# Authenticate with HuggingFace Hub using a stored token.
hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [None]:
# Authenticate with OpenAI using a stored API key.
openai_api_key = userdata.get('OPENAI_API_KEY')
openai = OpenAI(api_key=openai_api_key)

In [None]:
# Transcribe the uploaded audio file to text using OpenAI Whisper model.
audio_file = open(audio_filename, "rb")
transcription = openai.audio.transcriptions.create(model=AUDIO_MODEL, file=audio_file, response_format="text")
print(transcription)

In [None]:
# Construct system and user prompts for generating meeting minutes from transcript.
system_message = "You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown."
user_prompt = f"Below is an extract transcript of a Denver council meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\n{transcription}"
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_prompt}
  ]

In [None]:
# Configure quantization for efficient LLM inference using BitsAndBytes.
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [None]:
# Tokenize prompts, load the LLM, and generate meeting minutes with streaming output.
tokenizer = AutoTokenizer.from_pretrained(LLAMA)
tokenizer.pad_token = tokenizer.eos_token
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
streamer = TextStreamer(tokenizer)
model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map="auto", quantization_config=quant_config)
outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)

In [None]:
# Decode the generated output to readable text.
response = tokenizer.decode(outputs[0])

In [None]:
# Display the generated meeting minutes in markdown format.
display(Markdown(response))