In [43]:
import os
import re
import spacy
import whisper
import torch
import subprocess

 
device = "cuda" if torch.cuda.is_available() else "cpu"
 
model = whisper.load_model("base", device=device)

def download_audio(youtube_url, output_path="audio.mp3"):
    """Download the audio from a YouTube video."""
    try:
        command = [
            "yt-dlp",
            "--format", "bestaudio",
            "--extract-audio",
            "--audio-format", "mp3",
            "--output", output_path,
            youtube_url,
        ]
        subprocess.run(command, check=True)
        print(f"Audio downloaded to {output_path}")
        return output_path
    except Exception as e:
        print(f"Error downloading audio: {e}")
        return None

def transcribe_audio(audio_path):
    """Transcribe the audio using Whisper."""
    try:
        print("Extracting transcript")
        result = model.transcribe(audio_path)
        return result  # Contains text and timestamps
    except Exception as e:
        print(f"Error transcribing audio: {e}")
        return None

def save_transcript_with_timestamps(transcript, output_file="transcript.txt"):
    """Save the transcript with timestamps to a file."""
    try:
        with open(output_file, "w") as file:
            for segment in transcript['segments']:
                start_time = segment['start']
                end_time = segment['end']
                text = segment['text']
                file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
        print(f"Transcript saved to {output_file}")
    except Exception as e:
        print(f"Error saving transcript: {e}")

def detect_terms_to_exclude(text:str):
    """Detect terms that are likely to be technical or domain-specific."""
  
    nlp = spacy.load("en_core_web_trf")

    patterns = [
        {
            "label": "TECH",
            "pattern": [
                {"lower": "machine"},
                {"lower": "learning"}
            ]
        },
        {
            "label": "TECH",
            "pattern": [
                {"lower": "artificial"},
                {"lower": "intelligence"}
            ]
        },
        {
            "label": "TECH",
            "pattern": [
                {"lower": "data"},
                {"lower": "science"}
            ]
        },
        {
            "label": "TECH",
            "pattern": [
                {"TEXT": "ML"},
            ]
        },
          {
            "label": "PERSON",
            "pattern": [
                {"TEXT": "Rover"},
            ]
        },
        {
            "label": "TECH",
            "pattern": [{"TEXT": {"REGEX": "(?i)^ai$"}}]
        },
        {
                "label": "LINK",
                "pattern": [
                    {"TEXT": {"REGEX": r"https?://[^\s]+"}}
                ]
        }
    ]

    ruler = nlp.add_pipe("entity_ruler", before="ner") 
    ruler.add_patterns(patterns)

    doc = nlp(text)     
 
    exclude_terms = []
 
    for ent in doc.ents:
        exclude_terms.append(ent.text)
    
 
    for token in doc:
        if token.pos_ == "PROPN" or (token.is_upper and len(token) > 1):  
            exclude_terms.append(token.text)

    exclude_terms = [term for term in exclude_terms if not re.search(r"(\n|%|\d{1,2}%|^\d{1,2}$)", term)]
    exclude_terms = list(set(exclude_terms))
    
    return exclude_terms

 
 
def generate_system_prompt(exclude_terms:list|None=None, custom_terms:dict|None=None):
 
    if exclude_terms is None:
        exclude_terms = []
    if custom_terms is None:
        custom_terms = {}
 
    exclusion_text = ", ".join([f"{term}" for term in exclude_terms])
    exclusion_text.rstrip(",")
    exclusion_text += "."
    custom_terms_text = "\n".join([f"'{eng_term}' => '{myanmar_term}'" for eng_term, myanmar_term in custom_terms.items()])

 
    prompt = f"""
    Translate the following English text to Burmese (Myanmar) while keeping the original English terms for special terms.
    Please exclude the following terms from translation: 
    {exclusion_text}
    """
    
    if custom_terms:
        prompt += f"""Also, translate the following custom terms as specified:
    {custom_terms_text}"""
    return prompt



def save_file(filepath: str, content: str):
    with open(filepath, "w", encoding="utf-8") as f:
        f.write(content)
        
def read_file(filepath:str):
    with open(filepath , "r") as f:
        return f.read()


  checkpoint = torch.load(fp, map_location=device)


In [None]:
youtube_url = "https://www.youtube.com/watch?v=DmgGGUYn2c8"
output_filename = "trnascript.txt"
audio_file = download_audio(youtube_url)

if audio_file:

    transcript = transcribe_audio(audio_file)
    
    if transcript:

        save_transcript_with_timestamps(transcript, output_filename)

    os.remove(audio_file)

In [44]:
 
transcript_filepath = "transcripts/transcript.txt"

english_text = read_file(transcript_filepath)


my_exclude_terms = ["Abilities", "character", "icon", "players", "player", "play", "damage", "normal attack", "action", "actions"]

exclude_terms = detect_terms_to_exclude(english_text)
exclude_terms = exclude_terms + my_exclude_terms

exclude_terms = list(set(exclude_terms))
print("Terms to Exclude:", exclude_terms)




  model.load_state_dict(torch.load(filelike, map_location=device))


Terms to Exclude: ['Concerto Energy / Outro /', 'an Outro Skill', 'play', 'action', 'Skill', 'Ult', 'Echo', 'character', 'players', 'damage', 'Concerto Energy', 'Forte Circuit', 'Forte', 'Concerto', 'player', 'actions', 'Intro', 'Energy', 'Outro', 'normal attack', 'icon', 'Abilities', 'Gadget', 'Circuit']


In [45]:
custom_terms = {
        "for each character": "character တစ်ကောင်ချင်းစီအတွက်",
    }

transcript_filepath = "transcripts/transcript.txt"

original_timestamp_text = read_file(transcript_filepath)


system_prompt = generate_system_prompt(exclude_terms=exclude_terms, custom_terms=None)

In [46]:
print(system_prompt)
 


    Translate the following English text to Burmese (Myanmar) while keeping the original English terms for special terms.
    Please exclude the following terms from translation: 
    Concerto Energy / Outro /, an Outro Skill, play, action, Skill, Ult, Echo, character, players, damage, Concerto Energy, Forte Circuit, Forte, Concerto, player, actions, Intro, Energy, Outro, normal attack, icon, Abilities, Gadget, Circuit.
    


In [47]:
import requests
 
import openai

response = requests.get("http://localhost:11434")
if response.status_code != 200:
    raise Exception("Ollama Server is not running!")

print(f"Ollama status: {response.status_code}")

client = openai.Client(
    base_url="http://localhost:11434/v1",
    api_key="ollama"  
)

user_message = f"""
Translate this:
{english_text}
"""

response = client.chat.completions.create(
    model="deepseek-r1:7b",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_message},
    ],
    stream=False,
    
)

response = response.choices[0].message.content




Ollama status: 200


In [50]:
# Extract the <think> section
think_match = re.search(r'<think>(.*?)</think>', response, re.DOTALL)
think_section = think_match.group(1).strip() if think_match else None

# Extract the translated result
translated_result = re.sub(r'<think>.*?</think>', '', response, flags=re.DOTALL).strip()

# print("Think Section:")
# print(think_section)
print("\nTranslated Result:")
print(translated_result)


Translated Result:
Here’s an attempt to translate your text into Burmese (Myanmar) while keeping a few of the terms you listed in English:

---

I think it's time to teach new players how to play the game, solet's get into it!
Aberations

Let's start with the character's aberations: the Ult, Echo, skill, gadget, Forte Circuit, and Concerto Energy. These are the primary aberations for each character. If you're wondering about the small circular icon, that's the lock-on feature.
Concerto Energy / Outro / Intro

Concerto Energy is a resource that needs to be filled to use certain abilities. You gain this energy by performing attacks or other actions in battle. Once the gauge is full, you can cast an ability called an Outro Skill by switching to another character.

---

I hope this helps!


In [10]:
import requests

# Ollama server URL
OLLAMA_URL = "http://localhost:11434/api/generate"

# Define the payload for the API request
payload = {
    "model": "deepseek-r1:7b",  # Replace with your model name
    "prompt": "Hello, how are you?",  # Your input prompt
    "stream": False  # Set to True if you want streaming responses
}

# Send a POST request to the Ollama API
response = requests.post(OLLAMA_URL, json=payload)

# Check if the request was successful
if response.status_code == 200:
    # Print the response from the model
    print("Response from Deepseek:")
    print(response.json().get("response"))
else:
    print(f"Error: {response.status_code} - {response.text}")

Response from Deepseek:
<think>

</think>

Hello! I'm just a virtual assistant, so I don't have feelings, but thanks for asking! How are *you* doing today? 😊


In [79]:
pattern = r"\[\d+\.\d{2} - \d+\.\d{2}\]"
 
 
text = read_file("transcripts/transcript.txt") 
timestamps = re.findall(pattern, text)

time_stamps ="" 
time_stamps += "\n".join(timestamps)

save_file("timestamps.txt", time_stamps)


In [4]:
import torch
import numpy as np
import re
import uuid
from transformers import pipeline
import soundfile as sf

if torch.cuda.is_available():
    device = "cuda"

myanmar_model_name = "facebook/mms-tts-mya"
english_model_name = "facebook/mms-tts-eng"
myanmar_model = pipeline("text-to-speech", model=myanmar_model_name)
english_model = pipeline("text-to-speech", model=english_model_name)

sample_rate = 16000
pause_duration = 0.5  

myanmar_text = """
ကဲ ဒါဆိုရင်တော့, character ရဲ့ Abilities တွေကို စတင်ကြည့်ရှုရအောင်
Ult, Echo, Skill, Gadget, Forte Circuit, နဲ့ Concerto Energy.
ဒါတွေက character တစ်ကောင်ချင်းစီအတွက်
အဓိက Abilities တွေဖြစ်ပါတယ်။
ပုံကဝိုင်း icon လေးဟာ  lock-on feature ဖြစ်ပါတယ်။
"""

def generate_audio(text, model):
    speech = model(text)
    audio_data = speech["audio"]
    if audio_data.ndim == 2 and audio_data.shape[0] == 1:
        audio_data = audio_data.squeeze(0)
    return audio_data


# Extract English words and Myanmar text segments
segments = re.split(r'(\b[A-Za-z]+\b)', myanmar_text)
audio_segments = []

for segment in segments:
    segment = segment.strip()
    if not segment:
        continue
    if re.match(r'^[A-Za-z]+$', segment):
     
        print(f"segment (English): {segment}")
        audio_segments.append(generate_audio(segment, english_model))
    else:
        if segment.strip() in [",", ".", "။", "၊", ":", ";","-"]:
            continue
      
        print(f"transcript (Myanmar): {segment}")
        audio_segments.append(generate_audio(segment, myanmar_model))
    
    # Add pause for newline characters
    if '\n' in segment:
        pause = np.zeros(int(pause_duration * sample_rate)) 
        audio_segments.append(pause)

# Combine all audio segments
combined_audio = np.concatenate(audio_segments)
 
random_uuid = uuid.uuid4()
output_file = f"samples/combined_audio_{random_uuid}.wav"
sf.write(output_file, combined_audio, sample_rate)
print(f"Audio saved as {output_file}")

Device set to use cpu
Device set to use cpu


transcript (Myanmar): ကဲ ဒါဆိုရင်တော့,
segment (English): character
transcript (Myanmar): ရဲ့
segment (English): Abilities
transcript (Myanmar): တွေကို စတင်ကြည့်ရှုရအောင်
segment (English): Ult
segment (English): Echo
segment (English): Skill
segment (English): Gadget
segment (English): Forte
segment (English): Circuit
transcript (Myanmar): , နဲ့
segment (English): Concerto
segment (English): Energy
transcript (Myanmar): .
ဒါတွေက
segment (English): character
transcript (Myanmar): တစ်ကောင်ချင်းစီအတွက်
အဓိက
segment (English): Abilities
transcript (Myanmar): တွေဖြစ်ပါတယ်။
ပုံကဝိုင်း
segment (English): icon
transcript (Myanmar): လေးဟာ
segment (English): lock
segment (English): on
segment (English): feature
transcript (Myanmar): ဖြစ်ပါတယ်။
Audio saved as samples/combined_audio_3538434f-5cf9-4cdf-ae9f-bdecb647cf74.wav


In [None]:
# Eng to Myanmar Translation using MBart


from transformers import MBartForConditionalGeneration, MBart50TokenizerFast

 
myanmar_text = text

model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")

# Translate Eng to Myanmar
tokenizer.src_lang = "en_XX"
encoded_hi = tokenizer(myanmar_text, return_tensors="pt")
generated_tokens = model.generate(
    **encoded_hi,
    forced_bos_token_id=tokenizer.lang_code_to_id["my_MM"]
)
translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
print("Translation:", translation)

 

Translation: ['ကျွန်တော် က ၎င်း သည် ကစား ပွဲ ကို ဘယ်လို ကစား ရ မ လဲ ဆိုတာ ကစား သမား အသစ် များ ကို သင်ကြား ရန် အချိန် ဖြစ် တယ် လို့ ထင် ပါ တယ် ၊ ဒါကြောင့် ကျွန်တော် တို့ ၎င်း ထဲ သို့ သွား ပါ စို့ ! စွမ်းရည် များ ကစား သမား များ ၏ စွမ်းရည် များ နှင့်အတူ စတင် ပါ စို့ : အယ်လ်တ် ၊ အီချို ၊ စွမ်းရည် ၊ ဂတ်ဂျက် ၊ ဖဲတာ ဆားကစ် ၊ နှင့် ကွန်ရက်တို စွမ်းအင် ။ ထို အရာ များ သည် ဇာတ်ကောင် တစ် ယောက် စီ အတွက် မူလ စွမ်းရည် များ ဖြစ် ကြ သည် ။ အကယ်၍ သင် သည် သေးငယ် သော ပတ်ပတ်လည် အိုင်ကွန် နှင့် ပတ်သက် ၍ စဉ်းစား နေ လျှင် ၊ ထို အရာ သည် လော့-オン ပုံစံ ဖြစ် ']
