<a href="https://colab.research.google.com/github/Satyadeep-Dey/AI-experiments/blob/main/7__Model_Comparison_MoM_from_transcript.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai

In [None]:
# imports

import os
import time
from google.colab import drive
from google.colab import userdata
import requests
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch



# Function to write text into a file

In [None]:
def write_text_to_file(folder_path, file_name, write_text):

  # Always mount Drive explicitly when using Google Drive
  drive.mount('/content/drive', force_remount=True)
  print("Drive mounted.")

  # Wait until MyDrive is available
  mydrive_path = '/content/drive/MyDrive'
  while not os.path.exists(mydrive_path):
      print("Waiting for Drive to be ready...")
      time.sleep(1)

  # Create folder path if it doesn't exist
  folder_path = os.path.join(mydrive_path, folder_path)
  os.makedirs(folder_path, exist_ok=True)

  # Define file path
  file_path = os.path.join(folder_path, file_name)

  # Write content to the file
  with open(file_path, 'w') as file:
      file.write(write_text)


  print("File written successfully to:", file_path)


# Let's try this out

In [None]:
# Writes to Google Drive
write_text_to_file(
    folder_path="Files/Text",
    file_name="example.txt",
    write_text="Hello, PLANET .. ! Howz life ? .... !!"
)

# Function to read from a file

In [None]:
def read_text_from_file(folder_path, file_name):

  # Always mount Drive explicitly when using Google Drive
  drive.mount('/content/drive', force_remount=True)
  print("Drive mounted.")

  # Wait until MyDrive is available
  mydrive_path = '/content/drive/MyDrive'
  while not os.path.exists(mydrive_path):
      print("Waiting for Drive to be ready...")
      time.sleep(1)

  # Path to the file
  file_path = os.path.join(mydrive_path, folder_path, file_name)

  # Check if the file exists
  if os.path.exists(file_path):
      # Read the content of the file
      with open(file_path, 'r') as file:
          contents = file.read()
      return contents
  else:
      return "File not found!"


# Let's try this out

In [None]:
contents = read_text_from_file(
    folder_path="Files/Text",
    file_name="example.txt"
)

print("File contents:")
print(contents)

# Now let's start comparing models

In [None]:
# Constants

LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct"
GPT_4o_mini = "gpt-4o-mini"
GPT_4o ="gpt-4o"



In [None]:
# Sign in to HuggingFace Hub

hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [None]:
# Sign in to OpenAI using Secrets in Colab

openai_api_key = userdata.get('OPENAI_API_KEY')
openai = OpenAI(api_key=openai_api_key)

# Compare 3 models for creating MoM
We have already used "whisper-1" from Open AI to create a transcript from the audio recording . Now we'll create minutes of the meeting from this transcript using **gpt-4o-mini** , **gpt-4o** and **meta-llama/Meta-Llama-3.1-8B-Instruct** and compare the results.

In [None]:
# Let's read the transcript first

contents = read_text_from_file(
    folder_path="Files/Text",
    file_name="audio_file_name_202504181127.txt"
)

#print("File contents:")
#display(contents) # using this instead of print so that we can see it in multi line

**Let's create the system and user prompt which will be used for all 3 LLMs**

In [None]:
system_message = "You are an assistant that produces minutes of meetings from transcripts,with summary, key discussion points, takeaways and \
action items with owners, in markdown."

user_prompt = f"Below is an extract transcript of a Denver council meeting. Please write minutes in markdown, including a summary with date \
, location and attendees;discussion points; list of key government web-sites and initiatives; takeaways; and action items with summary \
, owners and details.Ensure all action items are captured. It's ok if you have extra action items . \
Use bullet points when required and ensure that there is no line space between a point and it's children or sub bullet points. \
Here is the transscript : \ {contents}"

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_prompt}
  ]

#display(messages)

#Call Open AI to generate Minutes of the Meeting


In [None]:
# lets try with mini
completion = openai.chat.completions.create(
        model=GPT_4o_mini,
        messages=messages,
    )
meeting_minutes = completion.choices[0].message.content
display(Markdown(meeting_minutes))

# Writes to Google Drive
write_text_to_file(
    folder_path="Files/Text",
    file_name="gpt-4o-mini MoM.md",
    write_text=meeting_minutes
)

In [None]:
# and then OpenAI's newest GPT-4 variant

completion = openai.chat.completions.create(
        model=GPT_4o,
        messages=messages,
    )
meeting_minutes = completion.choices[0].message.content
display(Markdown(meeting_minutes))

# Writes to Google Drive
write_text_to_file(
    folder_path="Files/Text",
    file_name="gpt-4o MoM.md",
    write_text=meeting_minutes
)

# Call Llama to generate Minutes of the Meeting

In [None]:
# Quantization Config - this allows us to load the model into memory and use less memory

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [None]:
# Tokenizer

tokenizer = AutoTokenizer.from_pretrained(LLAMA)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
#adds a padding token ([PAD]) to the tokenizerâ€™s vocabulary if it doesn't already have one.

inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", padding=True).to("cuda") # cuda -> use GPU
#return_tensors="pt" means it will return PyTorch tensors rather than a Python string or list.
# The inputs is now a tensor, not a dictionary.

# Access the input_ids and attention_mask directly as attributes
input_ids = inputs  # or inputs.input_ids if the model expects it as a separate key
# Create attention mask from input_ids - assuming padding token is 0
attention_mask = (input_ids != tokenizer.pad_token_id).type(torch.int64).to("cuda") # changed to create attention_mask from input_ids


In [None]:
model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map="auto", quantization_config=quant_config)

outputs = model.generate(
    inputs,
    attention_mask=attention_mask,
    pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
    max_new_tokens=2000
)


In [None]:
response = tokenizer.decode(outputs[0])

# we don't need to see prompt sent to LLM . Only response. Hence we split ..
split_response = response.split("<|eot_id|><|start_header_id|>assistant<|end_header_id|>")
meeting_minutes = split_response[1] # and take 2nd part = part after prompt


display(Markdown(meeting_minutes))

# Writes to Google Drive
write_text_to_file(
    folder_path="Files/Text",
    file_name="Llama MoM.md",
    write_text=meeting_minutes
)