### Convert video to audio (if needed)

In [None]:
# Converts video to audio directly using `ffmpeg` command with the help of subprocess module

import subprocess
import os
import sys

VIDEO_FILENAME = "video.mp4"
OUTPUT_EXT = "mp3"

subprocess.call(["ffmpeg", "-y", "-i", VIDEO_FILENAME, f"audio.{OUTPUT_EXT}"], 
    stdout=subprocess.DEVNULL,
    stderr=subprocess.STDOUT)

print("Audio extracted into audio." + OUTPUT_EXT)

### Convert .m4a into .mp3

In [None]:
import subprocess

input_file = "data/record.m4a"
output_file = "data/record.mp3"

subprocess.call(["ffmpeg", "-i", input_file, output_file])

### Transcript audio with whisper (localy)

In [3]:
# Download the whisper model (if not downloaded yet)
# Then transcribe the audio file

import whisper

# if FILENAME is empty, use default audio.mp3 file in the root system
FILENAME = "data/record.mp3" 
input = FILENAME
output = FILENAME.split(".")[0] + ".txt"

# run the model to transcript
model = whisper.load_model("large-v3")
result = model.transcribe(input, language='fr')

with open(output, "w") as f:
    f.write("\n".join(s['text'] for s in result['segments']))

print(f'Transcription saved in {output}')



Transcription saved in records/attention_group/#3/record.txt


### Use Llama.cpp to generate summary of meetings (localy)

In [None]:
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Callbacks support token-wise streaming
#callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

# Create the LlamaCpp object
llm = LlamaCpp(
    model_path="./models/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
    temperature=0.75,
    top_p=1,
    top_k=50,
    max_tokens=4096,
    n_ctx=4096,
    n_gpu_layer=1024,
    #callback_manager=callback_manager, 
    #verbose=True, # Verbose is required to pass to the callback manager
    #repeat_penalty=1.1,
    #frequency_penalty=0,
    #presence_penalty=0,
)

In [None]:
import numpy as np

# Function that use LLM to summarize text
def summarize(text):
    instruction = (
       "You are an expert in summarising meetings and create reports."
       "Provide a short summary of the previous text.\n"
       "Provide your answer in english.\n"
       "Please consider that the previous text may contains transcription errors."    
    )
    template = (
        f"[INST]{text}\n\n{instruction} [/INST]\n"
        "Sure, here's a short summary of the previous text:\n\n"
    )
    return llm(template)

# Define filenames
FILENAME = "data/record.txt"
OUTPUT = FILENAME.split(".")[0] + "-summary.txt"

# Open transcription file and extract text
with open(FILENAME, 'r') as f:
    text = f.read()

# Split text into chunks of 80 lines and summarize each chunk
summaries = []
lines = text.split('\n')
nb_lines = len(lines)
chunk_size = 80
nb_chunks = int(np.ceil(nb_lines / chunk_size))
for i in range(nb_chunks):
    chunk = lines[i*chunk_size:(i+1)*chunk_size]
    summary = summarize(chunk)
    summaries.append(summary)
    print(summary)

# Save summaries into a file
with open(OUTPUT, "w+") as f:
    f.write("\n".join(summaries))

In [None]:
# Function that uses LLM to make a meeting report of the summary
def make_meeting_report(summary):
    instruction = (
       "You are an expert in making proper meeting report. The following is a summary of a meeting and some instructions to follow."
       "Provide a meeting report based on the previous text.\n"
       "Please consider all agenda items that were discussed in the whole text and list them in the report.\n"
       "I want the report to looks like a formal report."
       #"Please consider that the preivous text may contains transcription errors, fix them to make the report clearer."    
    )
    template = (
        f"[INST]{summary}\n\n{instruction} [/INST]\n"
        "Sure, here's a meeting report based on the previous summary:\n\n"
    )
    return llm(template)

# Define file names
FILENAME = "data/record-summary.txt"
OUTPUT = FILENAME[:-18] + "meeting-report.txt"

# Open summary file and extract text
with open(FILENAME, 'r') as f:
    summary = f.read()

# Make meeting report
report = make_meeting_report(summary)
print(report)

# Save report into a file
with open(OUTPUT, "w+") as f:
    f.write(report)
