In [1]:
!pip install nbformat

import nbformat

# Read the current notebook
with open('/content/MONA.ipynb', 'r') as f:
    nb = nbformat.read(f, as_version=4)

# Remove widget metadata if it exists
if 'widgets' in nb.metadata:
    del nb.metadata['widgets']
    print("Removed 'widgets' metadata")

# Write the fixed version
with open('/content/MONA_fixed.ipynb', 'w') as f:
    nbformat.write(nb, f)

print("Fixed notebook saved as MONA_fixed.ipynb")

Removed 'widgets' metadata
Fixed notebook saved as MONA_fixed.ipynb


In [None]:
from google.colab import files
uploaded = files.upload()


In [None]:
ls

In [None]:
from transformers import pipeline,AutoModelForCausalLM, AutoTokenizer
import torch
from IPython.display import  clear_output


device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
import subprocess

def convert_mp4_to_mp3(input_file, output_file):
    """Convert an MP4 file to MP3 using ffmpeg."""
    try:
        command = [
            "ffmpeg",
            "-i", input_file,          # Input file
            "-vn",                     # Disable video
            "-acodec", "libmp3lame",   # Use MP3 codec
            "-b:a", "192k",            # MP3 bitrate
            output_file                # Output file
        ]
        subprocess.run(command, check=True)
        print(f"Conversion successful: {output_file}")
    except subprocess.CalledProcessError as e:
        print("Error during conversion:", e)

# Run conversion on your newly uploaded file
convert_mp4_to_mp3("Weekly Meeting Example .mp4", "meeting.mp3")


In [None]:
pipe  = pipeline("automatic-speech-recognition",
                    "openai/whisper-small",
                    chunk_length_s=30,
                    stride_length_s=5,
                    return_timestamps=True,
                    device=device,
                    generate_kwargs = {"language": 'english', "task": "transcribe"})

In [None]:
transcription = pipe("meeting.mp3" )
#Once the transcription is complete, we’ll format the text with timestamps for better readability.
formatted_lyrics = ""
for line in transcription['chunks']:
    text = line["text"]
    ts = line["timestamp"]
    formatted_lyrics += f"{ts}-->{text}\n"

print(formatted_lyrics.strip())

In [None]:
# Let’s also save the transcription to a text file so we can use it later!"
with open("transcription.txt", "w", encoding="utf-8") as file:
    file.write(formatted_lyrics.strip())

print("Transcription saved to transcription.txt")

In [None]:
DEFAULT_MODEL = "Qwen/Qwen2.5-3B-Instruct"

model = AutoModelForCausalLM.from_pretrained(
    DEFAULT_MODEL,
    torch_dtype=torch.bfloat16,
    device_map=device,
)

tokenizer = AutoTokenizer.from_pretrained(DEFAULT_MODEL)
tokenizer.pad_token_id = tokenizer.eos_token_id


In [None]:
conversation = [
    {
        "role": "system",
        "content": (
            "You are an expert meeting summarizer. Your job is to read the user's full meeting "
            "transcript and produce a clear, structured, accurate summary.\n\n"
            "The summary should include:\n"
            "- Main topics discussed\n"
            "- Key decisions made\n"
            "- Action items (with who will do what)\n"
            "- Important concerns or issues raised\n"
            "- Any follow-ups required\n\n"
            "DO NOT add any extra topics. ONLY use information from the transcript."
        )
    },
    {
        "role": "user",
        "content": formatted_lyrics.strip()
    }
]


prompt = tokenizer.apply_chat_template(conversation, tokenize=False)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
# print(prompt)

with torch.no_grad():
    output = model.generate(
        **inputs,
        do_sample=True,
        max_new_tokens=1000
    )

processed_text = tokenizer.decode(output[0][len(inputs.input_ids[0]):], skip_special_tokens=True)

print(processed_text)

In [None]:
print(formatted_lyrics)
