In [13]:
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [14]:
! pip install flash-attn --no-build-isolation
! pip install langdetect nltk



In [15]:
from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
import torch
import torchaudio
import os
import numpy as np
from langdetect import detect
import nltk

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

def setPipeLine(size:str = 'large-v3'):
  model= "openai/whisper-" + size
  return pipeline(
    "automatic-speech-recognition",
    model=model,
    chunk_length_s=30,
    device=DEVICE,
    generate_kwargs={"task": "translate"}
  )

tts_pipe = setPipeLine() # tiny, base, small, medium, large, large-v2, large-v3 (default)

nltk.download('punkt')
ar_en_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-ar-en")
en_ar_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ar")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [28]:
def speech2txt(audio_path):
    # Load the audio using torchaudio
    waveform, sample_rate = torchaudio.load(audio_path)

    # Convert stereo to mono
    if waveform.shape[0] > 1:
        waveform = waveform[0].unsqueeze(0)  # Keep the first channel and add a new dimension
        # waveform = torch.mean(waveform, dim=0, keepdim=True)  # Another method: Average across channels

    # Resample waveform if necessary
    if sample_rate != 16000:
        waveform = torchaudio.transforms.Resample(sample_rate, 16000)(waveform)
    # drop extra dimensions
    waveform = waveform.squeeze().numpy()
    # Generate token ids
    transcription = tts_pipe(waveform, batch_size=8)
    return transcription['text']

def split_text(text, max_segment_length = 480):
    segments = []
    sentences = nltk.sent_tokenize(text)  # Sentence segmentation
    current_segment = ''
    for sentence in sentences:
        if len(current_segment) + len(sentence) < max_segment_length:
            current_segment += sentence + ' '
        else:
            segments.append(current_segment.strip())
            current_segment = sentence + ' '
    if current_segment:
        segments.append(current_segment.strip())
    return segments

# Function to translate segments
def translate_segments(segments, pipe):
    translated_segments = []
    for segment in segments:
        translation = pipe(segment)[0]['translation_text']
        translated_segments.append(translation)
    return ''.join(translated_segments)

def en2ar(text:str):
  return translate_segments(split_text(text), en_ar_pipe)

def ar2en(text:str):
  return translate_segments(split_text(text), ar_en_pipe)


In [4]:
! curl 'https://archive.org/download/06-arabicSpeechByAsh-sheikhR/06-arabicSpeechByAsh-sheikhRameezazhariAtConvocationCeremony2014OfAl-azhariyyahArabicCollegeMadigeGalagedara-Www.tamilbayans.com.mp3?tunnel=1'   -H 'accept: */*'   -H 'accept-language: ar-EG,ar;q=0.9,en-US;q=0.8,en;q=0.7'   -H 'cache-control: no-cache'   -H 'cookie: donation-identifier=b594d465f2f3aa927c06e9619e87a22a; abtest-identifier=619d9b93ea958a3d7fc567b88f81f352; PHPSESSID=2g0nr4updves43r0b2ogckpmub'   -H 'pragma: no-cache'   -H 'range: bytes=0-'   -H 'referer: https://archive.org/details/06-arabicSpeechByAsh-sheikhR?webamp=default'   -H 'sec-ch-ua: "Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"'   -H 'sec-ch-ua-mobile: ?0'   -H 'sec-ch-ua-platform: "Windows"'   -H 'sec-fetch-dest: audio'   -H 'sec-fetch-mode: cors'   -H 'sec-fetch-site: same-origin'   -H 'user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36' -o ./test.mp3

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1337k  100 1337k    0     0  4574k      0 --:--:-- --:--:-- --:--:-- 4580k


In [17]:
x = speech2txt('./test.mp3')
x

' Praise be to Allah. Praise be to Allah who taught with a pen. He taught man what he did not know. And who created everything and estimated it with appreciation. And peace and blessings be upon you. I seek refuge with Allah from the virtue of knowledge and His people. Praise be to Allah and praise be to Allah. Indeed, our religion is the religion of Islam, the religion of knowledge and knowledge. Where Islam came and the nations were lost in the Quran So this verse was the first light in this dark night. Then the believing verses came to light that this religion is the religion of the knowledge of the blind. So he invites his people to the knowledge and dispels them from the darkness. So the Ummah turned into the Ummah of knowledge and light. Allah Almighty said worship the chosen face except with knowledge. The Ummah cannot be straight on the correct path except with knowledge. My beloved in Allah, there are many Quranic verses and many prophetic hadiths, all of them are doubtless th

In [29]:
ar = en2ar(x)

['الحمد لله. الحمد لله الذي علّم بقلم. علّم الانسان ما لم يعرفه. وخلّق كل شيء وقيّمه بتقدير. والسلام والبركات عليكم. الجأ الى الله من فضل العلم وشعبه. الحمد لله والتسبيح لله. ان ديننا هو دين الاسلام ودين العلم والمعرفة.', 'حيث جاء الإسلام وخسرت الأمم في القرآن، كانت هذه الآية أول نور في هذه الليلة المظلمة، ثم أظهرت آيات الإيمان أن هذا الدين دين معرفة العمي، فدعا شعبه إلى العلم وصرفهم عن الظلمة، فتحولت الأمة إلى أمة العلم والنور، وقال الله تعالى تعالى اعبد الوجه المختار إلا بعلم.', 'لا يمكن للأمة أن تكون مستقيمة على الطريق الصحيح إلا بالعلم. يا أعزائي في الله هناك العديد من الآيات القرآنية والكثير من التعاليم النبوية كلها شك في أن الذين أعطوا العلم هم الأفضل في هذه الأمة إلى الأبد. كما يقول الله يُرِق الله المؤمنين منكم والمعطين العلم بالدرجات. يقول المترجمون في هذا الآية إن الله يُرِق المؤمن الذي هو عالم للمؤمن الذي ليس بعالم.', 'إن أصحاب العلم هم الذين يتقون الله على وجه هذا القرآن الكريم، سندرك أن سر الشريعة قد افتخروا بالعلوم العالمية في العلوم الدينية وهم فخورون بها، وإن كان فخر وم

In [30]:
en = ar2en(ar)

["Praise be to God. Praise be to God, who taught by pen. He taught man what he didn't know. He created and assessed all things with appreciation. Peace and blessings be upon you. He came to God out of the grace of science and his people.", 'Our religion is Islam and the religion of science and knowledge. Where Islam came and nations were lost in the Koran, this was the first light on this dark night, and the signs of faith showed that this religion is a religion of knowledge of the blind. He called on his people to learn and to keep them from the darkness, and the nation came to the nation of science and light.', 'Dear God, there are many Koranic verses and many prophetic teachings all doubt that those who have given knowledge are the best in this nation for ever. As God says, God loves those who believe in you and those who give knowledge of degrees.', 'It is the people of science who fear God in the face of this Holy Koran that we will realize that the secret of the Shariah has been 

In [32]:
en

'Praise be to God. Praise be to God, who taught by pen. He taught man what he didn\'t know. He created and assessed all things with appreciation. Peace and blessings be upon you. He came to God out of the grace of science and his people.Our religion is Islam and the religion of science and knowledge. Where Islam came and nations were lost in the Koran, this was the first light on this dark night, and the signs of faith showed that this religion is a religion of knowledge of the blind. He called on his people to learn and to keep them from the darkness, and the nation came to the nation of science and light.Dear God, there are many Koranic verses and many prophetic teachings all doubt that those who have given knowledge are the best in this nation for ever. As God says, God loves those who believe in you and those who give knowledge of degrees.It is the people of science who fear God in the face of this Holy Koran that we will realize that the secret of the Shariah has been proud of and