In [1]:
from transformers import pipeline
import torchaudio
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch
from huggingface_hub import login
from dotenv import load_dotenv

In [None]:
load_dotenv()

HF_TOKEN = os.getenv('HF_TOKEN')
login(HF_TOKEN, add_to_git_credential=True)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
asr = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device, return_timestamps=True)

In [None]:
audio_path = "./denver_extract.mp3 - Google Drive.mp3"

In [None]:
waveform, sample_rate = torchaudio.load(audio_path)

if sample_rate != 16000:
    print(f"Resampling from {sample_rate} Hz to 16000 Hz...")
    resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
    waveform = resampler(waveform)
    audio_path = "resampled_audio.wav"
    torchaudio.save(audio_path, waveform, 16000)

In [None]:
print("Transcribing...")
result = asr(audio_path)

In [None]:
print("\n Transcribed Text:\n")
print(result["text"])

In [None]:
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
)
model.to(device)

In [None]:
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

In [None]:
transcribed_text = result["text"]

In [None]:
system_message = "You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown."
user_prompt = f"Below is an extract transcript of a Denver council meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\n{transcribed_text}"

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_prompt}
  ]

In [None]:
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

In [None]:
output = model.generate(
    inputs,
    max_new_tokens=500,
    pad_token_id=tokenizer.eos_token_id,
    streamer=streamer
)

output_text = tokenizer.decode(output[0], skip_special_tokens=True)