In [22]:
import os
import requests
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig, AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import torch
from dotenv import load_dotenv
import gradio as gr
import gc

In [2]:
load_dotenv(override=True)

openai_api_key = os.getenv('OPENAI_API_KEY')
hf_token = os.getenv('HUGGING_FACE_TOKEN')


if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
    MODEL = "gpt-4o-mini"
    openai = OpenAI()
else:
    print("OpenAI API Key not set")

if hf_token:
    print(f"HuggingFace API token exists and begins {hf_token[:3]}")
    login(hf_token, add_to_git_credential=True)
else:
    print("HuggingFace API token not set")

OpenAI API Key exists and begins sk-proj-
HuggingFace API token exists and begins hf_


In [34]:
LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct"
PHI3 = "microsoft/Phi-3-mini-4k-instruct"
GEMMA2 = "google/gemma-2-2b-it"
QWEN2 = "Qwen/Qwen2-7B-Instruct"
AUDIO_MODEL = "whisper-1"

audio_file="D:/Projects/llm_engineering/denver_extract.mp3"

In [4]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [28]:
system_message = "You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown."
user_prompt = "Below is an extract transcript of a Denver council meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners."

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_prompt}
  ]

In [38]:
def audio_transcript(audio_model, audio_filename):
    with open(audio_filename, "rb") as audio_file:
        transcription = openai.audio.transcriptions.create(
            file=audio_file,
            model=audio_model,  # or whatever model you're using
            response_format="text"
        )
    return transcription

In [31]:
def generate_minutes_of_meeting(model, transcript):
    system_message = "You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown."
    user_prompt = f"Below is an extract transcript of a meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\n{transcript}"

    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt}
    ]
    tokenizer = AutoTokenizer.from_pretrained(model)
    tokenizer.pad_token = tokenizer.eos_token
    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True,  padding=True, truncation=True).to("cuda")
    streamer = TextStreamer(tokenizer)
    model = AutoModelForCausalLM.from_pretrained(LLAMA, torch_dtype="auto", device_map="auto", quantization_config=quant_config)
    outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)
    response = tokenizer.decode(outputs[0])
    del model, inputs, tokenizer, outputs, streamer
    gc.collect()
    torch.cuda.empty_cache()
    return response

In [39]:
display(Markdown(generate_minutes_of_meeting(LLAMA, audio_transcript(AUDIO_MODEL, audio_file))))

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown.<|eot_id|><|start_header_id|>user<|end_header_id|>

Below is an extract transcript of a meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.
and kind of the confluence of this whole idea of the confluence week, the merging of two rivers and as we've kind of seen recently in politics and in the world, there's a lot of situations where water is very important right now and it's a very big issue. So that is the reason that the back of the logo is considered water. So let me see the creation of the logo here. So that basically kind of sums up the reason behind the logo and all the meanings behind the symbo

KeyboardInterrupt: 