In [3]:
# Basic Tibetan-English Translator Test
#
# This notebook demonstrates our simple dictionary-based translator

import sys
import os
import pandas as pd

# Add parent directory to path so we can import our modules
sys.path.append('..')

# Import our modules
from src.data_loader import load_parallel_data
from src.dictionary import TibetanEnglishDictionary
from src.translator import BasicTranslator
from src.build_dictionary import build_dictionary_from_parallel_data

# First, let's build our dictionary from sample data
input_file = "C:/Users/91770/tibetan-english-translator/src/data/raw/tibetan_english_sample.txt"
output_file = "C:/Users/91770/tibetan-english-translator/src/data/raw/simple_dictionary.txt"

# Create directories if they don't exist
os.makedirs('../data/processed', exist_ok=True)

# Build the dictionary
build_dictionary_from_parallel_data(input_file, output_file)

# Now load the parallel data for testing
df = load_parallel_data(input_file)

# Create a translator
translator = BasicTranslator(dictionary_path=output_file)

# Try translating each example
results = []
for _, row in df.iterrows():
    tibetan = row['tibetan']
    actual_english = row['english']
    
    # Translate Tibetan to English
    translated_english = translator.translate_tibetan_to_english(tibetan)
    
    # Translate English to Tibetan
    translated_tibetan = translator.translate_english_to_tibetan(actual_english)
    
    results.append({
        'original_tibetan': tibetan,
        'actual_english': actual_english,
        'translated_english': translated_english,
        'translated_tibetan': translated_tibetan
    })

# Show the results
pd.DataFrame(results)

Built dictionary with 21 entries and saved to C:/Users/91770/tibetan-english-translator/src/data/raw/simple_dictionary.txt
Loaded 21 entries from C:/Users/91770/tibetan-english-translator/src/data/raw/simple_dictionary.txt


Unnamed: 0,original_tibetan,actual_english,translated_english,translated_tibetan
0,ཁྱེད་རང་ག་འདྲས་ཡོད།,How are you?,what is you? [འདྲས] [ཡོད],[how][are]ག
1,ང་བདེ་པོ་ཡིན།,I am well.,i am well. [ཡིན],ངབདེཔོ
2,ཐུགས་རྗེ་ཆེ།,Thank you.,thank you. [ཆེ],ཐུགསརྗེ
3,དགོངས་དག,Sorry/Excuse me.,sorry/excuse me.,དགོངསདག
4,བཀྲ་ཤིས་བདེ་ལེགས།,Hello/Greetings.,hello/greetings. [ཤིས] am [ལེགས],བཀྲ
5,ཞོགས་པ་བདེ་ལེགས།,Good morning.,good morning. am [ལེགས],མཚནཔ
6,མཚན་མོ་བདེ་ལེགས།,Good night.,good night. am [ལེགས],མཚནམོ
7,ཁྱེད་རང་གི་མིང་ལ་ག་རེ་ཞུ་གི་ཡོད།,What is your name?,what is your name is you? [རེ] [ཞུ] your [ཡོད],ཁྱེདལགི[name?]
8,ངའི་མིང་ལ་_ཟེར་གྱི་ཡོད།,My name is _.,my name is _. [གྱི] [ཡོད],ངའིམིངལ_ཟེར
9,དགའ་བསུ་ཞུ།,Welcome.,welcome. [བསུ] [ཞུ],དགའ
