In [None]:
!pip install transformers torch torchaudio bitsandbytes trl accelerate peft torchcodec datasets[audio] evaluate jiwer tensorboard pinecone pinecone_text langchain-pinecone langchain_huggingface langchain langchain[community]

In [None]:
from huggingface_hub import notebook_login

notebook_login()


In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
    WhisperForConditionalGeneration,
    WhisperProcessor,
    WhisperFeatureExtractor,
    WhisperTokenizer,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from evaluate import load
from IPython.display import Audio


from pinecone import Pinecone, ServerlessSpec
from langchain_community.retrievers import PineconeHybridSearchRetriever
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from pinecone_text.sparse import BM25Encoder
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_pinecone.vectorstores import PineconeVectorStore
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype = torch.bfloat16,
    bnb_4bit_use_double_quant = False
)

In [None]:
index_name = "meeting-assistant"
pc = Pinecone(api_key="**********")



if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="dotproduct",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),

    )

In [None]:
class AudioProcessor:
  def __init__(self, asr_model_name: str,adapter_name, bnb_config: BitsAndBytesConfig):
    self.base_model = WhisperForConditionalGeneration.from_pretrained(
      asr_model_name,
      quantization_config=bnb_config,
      device_map="auto"
    )

    self.asr_model = PeftModel.from_pretrained(
        self.base_model,
        adapter_name,
        is_trainable=False,
    )

    self.processor = WhisperProcessor.from_pretrained(asr_model_name)
    self.processor.tokenizer.language = "english"
    self.processor.tokenizer.task = "transcribe"


    self.asr_pipe = pipeline(
        task="automatic-speech-recognition",
        model=self.asr_model,
        tokenizer=self.processor.tokenizer,
        feature_extractor=self.processor.feature_extractor,
    )

  def transcribe(self, input):
    #produce transcript and vector embeddings from audio
    return self.asr_pipe(input, return_timestamps=True)['text']



In [None]:
ap = AudioProcessor("openai/whisper-small", "ibraheemaloran/whisper_qlora", bnb_config)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/967M [00:00<?, ?B/s]

generation_config.json: 0.00B [00:00, ?B/s]

adapter_config.json:   0%|          | 0.00/976 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/28.3M [00:00<?, ?B/s]

preprocessor_config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

normalizer.json: 0.00B [00:00, ?B/s]

added_tokens.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


In [None]:
transcript = ap.transcribe("first_5_minutes.wav")
print(transcript)

Using custom `forced_decoder_ids` from the (generation) config. This is deprecated in favor of the `task` and `language` flags/config options.
Transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English. This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`. See https://github.com/huggingface/transformers/pull/28687 for more details.


 15 we're going to start our special meeting closed session roll call Council members madrasi here. Oh dear mayor Spencer here three present council members. They saw gonna as you crash Thank you Public comment on closed session items. We have no speaker. Thank you We will now adjourn to closed session to consider 3a conference of legal counsel workers compensation claim Claimant Robert Hunt agency claimed against city of Alameda Thank you everyone I wanted to share we have been in closed session so now we're going to resume our special meeting that started at 615 agenda item for announcement of action taken in closed session if any direction was given to staff now I will adjourn the special that special meeting I will now Now open the second special city council meeting. It is 7.02 PM. Please rise for the Pledge of Allegiance. I pledge allegiance to the flag of the United States of America to the Republic for which it stands, one nation under God, indivisible, with liberty and justice

In [None]:
transcript = "15 we're going to start our special meeting closed session roll call Council members madrasi here. Oh dear mayor Spencer here three present council members. They saw gonna as you crash Thank you Public comment on closed session items. We have no speaker. Thank you We will now adjourn to closed session to consider 3a conference of legal counsel workers compensation claim Claimant Robert Hunt agency claimed against city of Alameda Thank you everyone I wanted to share we have been in closed session so now we're going to resume our special meeting that started at 615 agenda item for announcement of action taken in closed session if any direction was given to staff now I will adjourn the special that special meeting I will now Now open the second special city council meeting. It is 7.02 PM. Please rise for the Pledge of Allegiance. I pledge allegiance to the flag of the United States of America to the Republic for which it stands, one nation under God, indivisible, with liberty and justice for all. All right, we have a consent calendar, which is routine. However, I wanted to request that we move up. It's my understanding some people wanted to pull some items from consent. There are some items being pulled. Okay, so for some reason on the agenda here, it doesn't have the part. You can pull the matter of consent calendar. So first of all, what items did we want to pull from consent? It was E, F, G, and K are the ones I knew of. Also, 2C, please. And did you just say 2E? E, F, G, and K. Did anyone have any other items to pull from consent? Yeah, J, you said J? No, no, J. Okay, so J, it's my understanding we're pulling 2C, to E, to F, to G, and to K. Is that correct? All right. That being said, I would like to request that we move up in the calendar items 3A to confirm the nomination of the Shrub Social Service Human Relations Board, which is going to take a short amount of time. 3A, 4A, which is member Day Sox, referral, and 5A, which is the historical advisory board nomination. I would be agreeable to having the motion to accept the balance of the consent calendar, but not to entertain all of those items at this point, because for our charter, we need to be ready to, to adjourn or recess this meeting by 7th. Well, we wanna also give him time to be able to speak and I don't wanna run out of time. So this is what I was gonna propose, Madam Mayor, that we go ahead and approve those few items on the consent calendar that weren't pulled. But then we do an agenda change where we actually move item 1B from the 7th. an o'clock meeting to right after the balance of the consent calendar so that we can give our colleague his due. Right, but it's my understanding that we cannot move from one item to the other. I ran it by this city clerk. That also spoke to the clerk. Yeah. No, what Marilyn's proposing is to recess right after you vote on the consent calendar and convene the regular meeting and take that one meeting at that point. Okay. That's what she said. To recess earlier. Yeah. to give member Daysock the opportunity to have his referral at least start with that and watch the clock because this is something that you weren't here at the last council meeting but we didn't have the votes to go past 11 o'clock and he does have a referral that's been sitting so we could possibly give him a few minutes at least to be able to speak to his referral yeah well that's not what I'm proposing but if you want to take a vote you could take a right so my proposal was actually to hear to first of all could we do them could we do the motion to accept the balance of the consent thank you second all those in favor hi thank you hi so that motion carries unanimous"

In [None]:
class EmbeddingGenerator:
  def __init__(self, index_name: str, embedding_model: HuggingFaceEmbeddings):
    self.index = index_name

    self.embeddings = embedding_model

  def create_sparse_encoder(self, doc_chunks):
    text = []
    for doc in doc_chunks:
      text.append(doc.metadata['context'])
    bm25 = BM25Encoder().default()
    bm25.fit(text)
    bm25.dump('bm25_values.json')
    return bm25


  def data_ingest(self, input):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    text_list = text_splitter.split_text(input)
    docs = text_splitter.create_documents(text_list)

    docs = [
          Document(page_content="", metadata={"context": doc.page_content, **doc.metadata})
          for doc in docs
      ]

    return docs


  def create_vector_store(self, input):
    docs = self.data_ingest(input)
    vector_store = PineconeVectorStore.from_documents(documents=docs, embedding=self.embeddings, index_name=self.index)
    sparser = self.create_sparse_encoder(docs)
    return sparser

  def create_sparser(self, input):
    docs = self.data_ingest(input)
    sparser = self.create_sparse_encoder(docs)
    return sparser





In [None]:
embedder = HuggingFaceEmbeddings(model="all-MiniLM-L6-v2")
eg = EmbeddingGenerator(index_name, embedder)
sparser = eg.create_vector_store(transcript)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
class RAGAgent:
  def __init__(self, llm_model_name: str, index_name: str, sparser: BM25Encoder, embeddings: HuggingFaceEmbeddings):
    # self.llm_pipe = pipeline(
    #     task="text-generation",
    #     model=llm_model_name,
    #     dtype=torch.bfloat16,
    #     # model_kwargs={"quantization_config": bnb_config},
    # )
    # self.llm = HuggingFacePipeline(pipeline=self.llm_pipe)

    self.llm = HuggingFacePipeline.from_model_id(model_id=llm_model_name,
                                                 task="text-generation",
                                                 pipeline_kwargs={"dtype": torch.bfloat16,
                                                                  "max_new_tokens": 1024,
                                                                  "return_full_text": False,
                                                                  "device": "cuda"})

    self.summary_prompt = PromptTemplate(
        template="""You are an expert summarization assistant. Provide a detailed summary of the following transcript in the following format:
        Detailed Summary:
        Action Items (in bullet point format):

        Transcript: {transcript}""",
        input_variables=['transcript']
    )


    self.qa_prompt = PromptTemplate(
        template="""You are an expert meeting assistant. Answer the question based only on the provided context. Use only the information in the context.
        If the answer or information is not in the context, say I don't know. Do not make up answers

        Context:
        {context}

        Questions:
        {question}

        Answer:""",
        input_variables=['context', 'question']
    )



    self.retriever = PineconeHybridSearchRetriever(embeddings=embeddings, sparse_encoder=sparser, index=pc.Index(index_name), top_k=4)

  def summarize(self, trans):
    #summarize transcript
    prompt = self.summary_prompt.format_prompt(transcript=trans)
    return self.llm.invoke(prompt.text)

  def get_response(self, query):
    #retrieve docs, inject into prompt and get llm response
    prompt = self.qa_prompt.format_prompt(context=self.retrieve_context(query), question=query)
    return self.llm.invoke(prompt.text)


  def retrieve_context(self, query):
    #retrieve relevant documents and return structured text
    docs = self.retriever.invoke(query)
    text_chunks = [doc.page_content for doc in docs]
    context = "\n\n".join(text_chunks)
    return context


In [None]:
ma = RAGAgent("google/gemma-3-4b-it", index_name, sparser, embedder)

tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/90.6k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.64G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
ma.summarize(transcript)

In [None]:
ma.get_response("what did the council vote on")

In [None]:
ma.summarize("first_5_minutes.wav")

"Hereâ€™s a summary of the transcript, following your requested format:\n\n**Concise Summary:**\n\nThe City Council convened a special meeting and then transitioned into a second special meeting. The primary focus was on addressing items from the consent calendar, specifically requesting a shift in the agenda to prioritize certain nominations (specifically items 3A, 4A, and 5A) due to time constraints and a desire to allow Member Daysock to speak about his referral. A proposal was made to recess the meeting briefly after approving the consent calendar to allow for this immediate action, and then reconvene as a regular meeting. The consent calendar itself was approved with the exception of items that were pulled.\n\n**Action Items:**\n\n*   **Move items 3A, 4A, and 5A to the front of the agenda:** Council members requested a shift in the consent calendar to prioritize these nominations.\n*   **Recess the meeting:**  Mayor Spencer proposed a recess after approving the remaining items on 