In [5]:
import pandas as pd
data = pd.read_csv("/content/indo_sunda_dictionary.csv")
data

Unnamed: 0,Indonesian,Sundanese
0,Abu,Kekebul
1,Abu Rokok,Calacah
2,Ada,Aya
3,Adalah,Nyaeta
4,Adik,Rai
...,...,...
755,Untuk,Kanggo
756,Untuk,Jang
757,Urakan/Norak,Kampungan
758,Usia,Yuswa


In [10]:
import csv

# --- FUNGSI LOADING LEXICON ---
def load_indonesian_sunda_lexicon():
    lexicon = {}
    try:
        with open('/content/indo_sunda_dictionary.csv', 'r', encoding='utf-8') as file:
            reader = csv.DictReader(file)
            for row in reader:
                sunda_word = row['Sundanese'].strip()
                indonesian_word = row['Indonesian'].strip()

                # Handle multiple sunda words separated by '/'
                if '/' in sunda_word:
                    first_sunda = sunda_word.split('/')[0].strip().lower()
                else:
                    first_sunda = sunda_word.lower()

                # Handle multiple Indonesian words separated by ','
                if ',' in indonesian_word:
                    for variant in indonesian_word.split(','):
                        indonesian_variant = variant.strip().lower()
                        if indonesian_variant:
                            lexicon[indonesian_variant] = first_sunda
                else:
                    lexicon[indonesian_word.lower()] = first_sunda

    except FileNotFoundError:
        print("CSV file not found. Using empty lexicon.")

    return lexicon


# --- FUNGSI TERJEMAHAN (RBMT) ---
def rbmt(sentence, lexicon):
    tokens = sentence.lower().split()
    translated = [lexicon.get(token, token) for token in tokens]
    return ' '.join(translated)


# --- MEMBACA FILE CSV DAN MENULIS HASIL TERJEMAHAN ---
# Load lexicon
indonesian_sunda_lexicon = load_indonesian_sunda_lexicon()

# Path ke file CSV input dan output
input_csv_path = '/content/sentiments_data.csv'  # Sesuaikan sesuai lokasi file Anda
output_csv_path = '/content/translated_output.csv'  # Nama file hasil terjemahan

try:
    with open(input_csv_path, mode='r', encoding='utf-8') as csvfile:
        # Gunakan delimiter='\t' karena file CSV menggunakan tab sebagai pemisah
        reader = csv.DictReader(csvfile, delimiter=',')

        # Buka file output untuk menulis hasil terjemahan
        with open(output_csv_path, mode='w', encoding='utf-8', newline='') as outfile:
            fieldnames = ['id', 'original_text', 'translated_text']
            writer = csv.DictWriter(outfile, fieldnames=fieldnames)

            # Tulis header CSV output
            writer.writeheader()

            print("=== TERJEMAHAN DIMULAI ===\n")
            for row in reader:
                original_text = row['text']
                translated_text = rbmt(original_text, indonesian_sunda_lexicon)

                # Simpan ke dalam file CSV
                writer.writerow({
                    'id': row['id'],
                    'original_text': original_text,
                    'translated_text': translated_text
                })

                # Opsional: tampilkan hasil terjemahan di konsol
                print(f"ID {row['id']}:")
                print(f"Original: {original_text}")
                print(f"Translated: {translated_text}")
                print("-" * 80)

    print(f"✅ Hasil terjemahan telah disimpan di: {output_csv_path}")

except Exception as e:
    print(f"❌ Error: {e}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Original: truk . they hanya started operating there , bangunan depan hotel acapella . kedai asal dekat sungai buloh . memang dari dulu makanan barat dia tk pernah mengecewakan !
Translated: truk . they hanya started operating there , bangunan hareup hotel acapella . kedai asal deukeut susukan buloh . emang ti heubeul makanan barat manehna tk pernah mengecewakan !
--------------------------------------------------------------------------------
ID 11512:
Original: anda ke bandung ? jika tidak coba masakan queen restorant ? sangat-sangat sayang . queen restoran sudah sejak lama sekali . mungkin di atas 40 tahun lalu sudah ada
Translated: anda ke bandung ? jika teu cobi masakan queen restorant ? sangat-sangat tresna . queen restoran enggeus sejak lila sekali . meureun di luhur 40 tahun lalu enggeus aya
--------------------------------------------------------------------------------
ID 11513:
Original: minta dalil agama soal h