# **Libraries**

In [None]:
# Install necessary libraries
!pip install transformers datasets evaluate jiwer librosa langdetect
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForSequenceClassification
from IPython.display import Audio, display
import librosa  # Library to process audio files
from langdetect import detect  # Language detection library



In [None]:
# Function to load and preprocess the audio file from a specific path
def preprocess_audio(audio_path):
    audio_sample_array, sampling_rate = librosa.load(audio_path, sr=None)  # Load the audio file with original sampling rate
    return audio_sample_array, sampling_rate

# **Speech to Text English**

In [None]:
# Function 1: Speech to Text (English) using openai/whisper-medium
def speech_to_text_english(audio_sample_array, sampling_rate):
    pipe_3 = pipeline("automatic-speech-recognition", model="openai/whisper-medium", device="cpu")
    result = pipe_3({"array": audio_sample_array, "sampling_rate": sampling_rate}, max_new_tokens=256)
    return result['text']

In [None]:
# Function 2: Translate Text (English to Arabic) using facebook/nllb-200-distilled-600M
def translate_text_to_arabic(text, src_lang="eng_Latn", tgt_lang="arb_Arab"):
    checkpoint = 'facebook/nllb-200-distilled-600M'
    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)

    translation_pipeline = pipeline('translation',
                                    model=model,
                                    tokenizer=tokenizer,
                                    src_lang=src_lang,
                                    tgt_lang=tgt_lang,
                                    max_length=400)

    translation = translation_pipeline(text)
    return translation[0]['translation_text']

In [None]:
# Function 3: Sentiment Analysis (Arabic) using CAMeL-Lab/bert-base-arabic-camelbert-da-sentiment
def sentiment_analysis_arabic(text):
    arabic_model = AutoModelForSequenceClassification.from_pretrained("CAMeL-Lab/bert-base-arabic-camelbert-da-sentiment")
    arabic_tokenizer = AutoTokenizer.from_pretrained("CAMeL-Lab/bert-base-arabic-camelbert-da-sentiment")
    arabic_sentiment_pipe = pipeline('sentiment-analysis', model=arabic_model, tokenizer=arabic_tokenizer, device="cpu")
    sentiment = arabic_sentiment_pipe(text)
    return sentiment

# **Speech to Text Arabic**

In [None]:
# Function 4: Speech to Text (Arabic) using openai/whisper-medium
def speech_to_text_arabic(audio_sample_array, sampling_rate):
    pipe_arabic = pipeline("automatic-speech-recognition", model="openai/whisper-medium", device="cpu")
    result = pipe_arabic({"array": audio_sample_array, "sampling_rate": sampling_rate}, max_new_tokens=256)
    return result['text']

In [None]:
# Function 5: Translate Text (Arabic to English) using facebook/nllb-200-distilled-600M
def translate_text_to_english(text, src_lang="arb_Arab", tgt_lang="eng_Latn"):
    checkpoint = 'facebook/nllb-200-distilled-600M'
    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)

    translation_pipeline = pipeline('translation',
                                    model=model,
                                    tokenizer=tokenizer,
                                    src_lang=src_lang,
                                    tgt_lang=tgt_lang,
                                    max_length=400)

    translation = translation_pipeline(text)
    return translation[0]['translation_text']

In [None]:
# Function 6: Sentiment Analysis (English) using pre-trained BERT model
def sentiment_analysis_english(text):
    english_model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
    english_tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
    english_sentiment_pipe = pipeline('sentiment-analysis', model=english_model, tokenizer=english_tokenizer, device="cpu")
    sentiment = english_sentiment_pipe(text)
    return sentiment

# **Check and Detected Language**

In [None]:
# Function to detect language
def detect_language(text):
    return detect(text)


# Step 1: Specify the path to your audio file
audio_path = "/content/output_audio.wav"


# Step 2: Preprocess the audio
audio_sample_array, sampling_rate = preprocess_audio(audio_path)

# Playing the audio sample
display(Audio(audio_sample_array, rate=sampling_rate))


# Step 3: Convert the speech to text (assume English for the initial check)
text = speech_to_text_english(audio_sample_array, sampling_rate)
print("Speech to Text Output:", text)


# Step 4: Detect language of the transcribed text
detected_language = detect_language(text)
print("Detected Language:", detected_language)

# Step 5: Route based on detected language
if detected_language == 'en':  # English language detected
    print("Processing English pipeline...")

    # Step 6: Translate Text (English to Arabic)
    translated_text_arabic = translate_text_to_arabic(text)
    print("Translated Text (English to Arabic):", translated_text_arabic)

    # Step 7: Sentiment Analysis (Arabic)
    sentiment_arabic = sentiment_analysis_arabic(translated_text_arabic)
    print("Sentiment Analysis (Arabic):", sentiment_arabic)

elif detected_language == 'ar':  # Arabic language detected
    print("Processing Arabic pipeline...")

    # Step 6: Convert Speech to Text (Arabic)
    text_arabic = speech_to_text_arabic(audio_sample_array, sampling_rate)
    print("Speech to Text (Arabic):", text_arabic)

    # Step 7: Translate Text (Arabic to English)
    translated_text_english = translate_text_to_english(text_arabic)
    print("Translated Text (Arabic to English):", translated_text_english)

    # Step 8: Sentiment Analysis (English)
    sentiment_english = sentiment_analysis_english(translated_text_english)
    print("Sentiment Analysis (English):", sentiment_english)

else:
    print("Unsupported language detected.")

Speech to Text Output:  everyone and welcome to EnglishPod my name is Marco and I'm Erica how are you Erica? Marco I'm doing really well how about you? I'm doing great I'm really excited because today
Detected Language: en
Processing English pipeline...
Translated Text (English to Arabic): مرحباً بكم في الإنجليزية، اسمي ماركو وأنا إيريكا كيف حالك؟ ماركو أنا بخير حقاً ماذا عنك؟ أنا بخير أنا متحمس جداً لأن اليوم
Sentiment Analysis (Arabic): [{'label': 'neutral', 'score': 0.8497380614280701}]
