#Multilingual Audio Transcription and Summarization using Whisper and Llama

In [1]:
!pip install whisper
!pip install --upgrade openai-whisper

Collecting whisper
  Downloading whisper-1.1.10.tar.gz (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: whisper
  Building wheel for whisper (setup.py) ... [?25l[?25hdone
  Created wheel for whisper: filename=whisper-1.1.10-py3-none-any.whl size=41120 sha256=77dddfb1fa548482c419c7c82815ef4e5e3fffaa68214c904afb8cf5a4092eb3
  Stored in directory: /root/.cache/pip/wheels/aa/7c/1d/015619716e2facae6631312503baf3c3220e6a9a3508cb14b6
Successfully built whisper
Installing collected packages: whisper
Successfully installed whisper-1.1.10
Collecting openai-whisper
  Downloading openai-whisper-20231117.tar.gz (798 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m798.6/798.6 kB[0m [31m8.2

In [8]:
import os
import whisper

def audioToSpeech(audioLocation):
    if not os.path.exists(audioLocation):
        print(f"Error: Audio file '{audioLocation}' not found.")
        return None

    print(f"Loading audio from: {audioLocation}")

    try:
        model = whisper.load_model("medium")
        audio = whisper.load_audio(audioLocation)
        audio = whisper.pad_or_trim(audio)
        mel = whisper.log_mel_spectrogram(audio).to(model.device)
        _, probs = model.detect_language(mel)
        print(f"Detected language: {max(probs, key=probs.get)}")
        options = whisper.DecodingOptions()
        result = whisper.decode(model, mel, options)
        actual_content = result.text
        print(result.text)  # Print decoded text
        return actual_content  # Return the decoded text
    except Exception as e:
        print(f"Error during audio processing: {e}")
        return None

decoded_text_hindi = audioToSpeech("/content/Hindi Audio.mp3")

decoded_text_german = audioToSpeech("/content/German Audio.mp3")

decoded_text_french = audioToSpeech("/content/French Audio.mp3")

Loading audio from: /content/Hindi Audio.mp3
Detected language: hi
पल एक पल में ही थंसा गया तू राथ में राथ जो दे गया चलू मैं जहां जाए तू दाए में तेरे बाए तू हुरुत में हवाए तू साथिया
Loading audio from: /content/German Audio.mp3
Detected language: de
Auf der Eide blüht ein kleines Blügel ein, und das heißt Eger.
Loading audio from: /content/French Audio.mp3
Detected language: fr
... ... Retombe-toi et souris, le Québécois. Bonjour. ... Ça va ? Le fessier allemand est désolé. Je pense que je le connais.


In [9]:
# GPU llama-cpp-python
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python==0.1.78 numpy==1.23.4 --force-reinstall --upgrade --no-cache-dir --verbose
!pip install huggingface_hub
!pip install llama-cpp-python==0.1.78
!pip install numpy==1.23.4

Using pip 23.1.2 from /usr/local/lib/python3.10/dist-packages/pip (python 3.10)
Collecting llama-cpp-python==0.1.78
  Downloading llama_cpp_python-0.1.78.tar.gz (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Running command pip subprocess to install build dependencies
  Using pip 23.1.2 from /usr/local/lib/python3.10/dist-packages/pip (python 3.10)
  Collecting setuptools>=42
    Using cached setuptools-69.5.1-py3-none-any.whl (894 kB)
  Collecting scikit-build>=0.13
    Downloading scikit_build-0.17.6-py3-none-any.whl (84 kB)
       ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 84.3/84.3 kB 2.5 MB/s eta 0:00:00
  Collecting cmake>=3.18
    Downloading cmake-3.29.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (26.7 MB)
       ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 26.7/26.7 MB 21.6 MB/s eta 0:00:00
  Collecting ninja
    Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_

In [10]:
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # the model is in bin format

In [11]:
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

In [12]:
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


llama-2-13b-chat.ggmlv3.q5_1.bin:   0%|          | 0.00/9.76G [00:00<?, ?B/s]

In [13]:
# GPU
lcpp_llm = None
lcpp_llm = Llama(
    model_path=model_path,
    n_threads=2, # CPU cores
    n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    n_gpu_layers=32 # Change this value based on your model and your GPU VRAM pool.
    )

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | VSX = 0 | 


In [14]:
# See the number of layers in GPU
lcpp_llm.params.n_gpu_layers

32

##For Hindi

In [15]:
PROMPT = "output is \n " + decoded_text_hindi
PROMPT_TEMPLATE = f"""
SYSTEM: Summarise this content in small bullet point in it actual language and then give me transaltion in english

USER: {PROMPT}

ASSISTANT:
"""

In [16]:
response = lcpp_llm(
  prompt=PROMPT_TEMPLATE,
  max_tokens=256,
  temperature=0.5,
  top_p=0.95,
  repeat_penalty=1.2,
  top_k=150,
  echo=True
)

In [17]:
print(response["choices"][0]["text"])


SYSTEM: Summarise this content in small bullet point in it actual language and then give me transaltion in english

USER: output is 
 पल एक पल में ही थंसा गया तू राथ में राथ जो दे गया चलू मैं जहां जाए तू दाए में तेरे बाए तू हुरुत में हवाए तू साथिया

ASSISTANT:
•	पल (pal) - a drop, a small amount
•	एक पल (ek pal) - one drop
•	में (mein) - in, within
•	ही (hi) - only, just
•	थंसा (thansa) - to become wet or moist
•	राथ (rath) - chariot, vehicle
•	जो (jo) - which, that
•	दे (de) - give
•	गया (gaya) - went
•	चलू (chalu) - go
•	मैं (main) - I
•	जहाए (jahaae) - where
•	तू (tu) - you
•	दाए (dae) - give
•	बाए (bae) - take, have
•	हुरुत (hurut) - quickly
•	में (mein) - in, within
•	हवाए (havae) - blow
•	साथि


##For French

In [18]:
PROMPT = "output is \n " + decoded_text_french
PROMPT_TEMPLATE = f"""
SYSTEM: Summarise this content in small bullet point in it actual language and then give me transaltion in english

USER: {PROMPT}

ASSISTANT:
"""

In [19]:
response = lcpp_llm(
  prompt=PROMPT_TEMPLATE,
  max_tokens=256,
  temperature=0.5,
  top_p=0.95,
  repeat_penalty=1.2,
  top_k=150,
  echo=True
)

Llama.generate: prefix-match hit


In [20]:
print(response["choices"][0]["text"])


SYSTEM: Summarise this content in small bullet point in it actual language and then give me transaltion in english

USER: output is 
 ... ... Retombe-toi et souris, le Québécois. Bonjour. ... Ça va ? Le fessier allemand est désolé. Je pense que je le connais.

ASSISTANT:
•	Retombe-toi : come back/return (to a previous state or position)
•	souris : smile
•	Bonjour : hello
•	ça va ? : how are you?
•	le fessier allemand : the German ass (a derogatory term for a German person)
•	je le connais : I know him/it (used to express familiarity or recognition)

ENGLISH TRANSLATION:
•	Come back and smile, my Québécois friend. Hello! How are you? Oh, it's the German butt (a derogatory term for a German person). I know him/it (used to express familiarity or recognition)


##For German

In [21]:
PROMPT = "output is \n " + decoded_text_german
PROMPT_TEMPLATE = f"""
SYSTEM: Summarise this content in small bullet point in it actual language and then give me transaltion in english

USER: {PROMPT}

ASSISTANT:
"""

In [22]:
response = lcpp_llm(
  prompt=PROMPT_TEMPLATE,
  max_tokens=256,
  temperature=0.5,
  top_p=0.95,
  repeat_penalty=1.2,
  top_k=150,
  echo=True
)

Llama.generate: prefix-match hit


In [23]:
print(response["choices"][0]["text"])


SYSTEM: Summarise this content in small bullet point in it actual language and then give me transaltion in english

USER: output is 
 Auf der Eide blüht ein kleines Blügel ein, und das heißt Eger.

ASSISTANT:
•	Auf der Eide blüht ein kleines Blugel ein - This means that a small flower called "Blugel" grows on the meadow.
•	und das heißt Eger - And this is called "Eger".

ENGLISH TRANSLATION:
• A small flower named "Blugel" blooms in the meadow, and it's called "Eger".
