<a href="https://colab.research.google.com/github/Kandil7/english_to_arabic_convo/blob/main/english_to_arabic_conversation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install dependencies
!pip install -q torch torchaudio transformers gradio pydub
!pip install git+https://github.com/openai/whisper.git

In [None]:
#Download sample audio
!wget -q https://www.sound-effects.in/audio/samples/10610/english-conversation.mp3 -O sample_english.mp3
!ffmpeg -y -i sample_english.mp3 -ac 1 -ar 16000 sample_english.wav

In [None]:
# Load models
import torch
from transformers import MarianMTModel, MarianTokenizer, AutoModelForCausalLM, AutoTokenizer
import whisper

whisper_model = whisper.load_model("base")
translation_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
translation_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
convo_tokenizer = AutoTokenizer.from_pretrained("aubmindlab/aragpt2")
convo_model = AutoModelForCausalLM.from_pretrained("aubmindlab/aragpt2")


In [None]:
#Define functions
def transcribe_audio(audio_path):
    result = whisper_model.transcribe(audio_path)
    return result["text"]

def translate_to_arabic(text):
    inputs = translation_tokenizer(text, return_tensors="pt", padding=True)
    outputs = translation_model.generate(**inputs)
    return translation_tokenizer.decode(outputs[0], skip_special_tokens=True)

def generate_conversation(prompt, max_length=50):
    inputs = convo_tokenizer(prompt, return_tensors="pt")
    outputs = convo_model.generate(
        inputs["input_ids"],
        max_length=max_length,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        top_k=50,
        top_p=0.95,
        temperature=0.7
    )
    return convo_tokenizer.decode(outputs[0], skip_special_tokens=True)


In [None]:
#Test the pipeline
english_text = transcribe_audio("sample_english.wav")
print("English Transcript:", english_text)

arabic_text = translate_to_arabic(english_text)
print("Translated Arabic:", arabic_text)

conversational_arabic = generate_conversation(arabic_text)
print("Conversational Arabic:", conversational_arabic)


In [None]:
#Gradio interface
import gradio as gr

def process_audio(audio_file):
    english_text = transcribe_audio(audio_file)
    arabic_text = translate_to_arabic(english_text)
    conversational_arabic = generate_conversation(arabic_text)
    return {
        "English Transcript": english_text,
        "Translated Arabic": arabic_text,
        "Conversational Arabic": conversational_arabic
    }

iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath", label="Upload English Audio"),
    outputs=[
        gr.Textbox(label="English Transcript"),
        gr.Textbox(label="Translated Arabic"),
        gr.Textbox(label="Conversational Arabic")
    ],
    examples=[["sample_english.wav"]],
    title="English to Arabic Conversational AI"
)

iface.launch(debug=True)

#Key Notes

##GPU Recommendation :
 Use a GPU runtime for faster inference (especially for Whisper and AraGPT2).
## Sample Audio :
The provided sample (sample_english.wav) is for testing. Replace it with your own audio.
## Model Adjustments :
##For better accuracy,
#### use whisper.load_model("large-v2").
##For dialectal Arabic,
####fine-tune aubmindlab/aragpt2 on dialect-specific data.