In [None]:
!pip install nltk

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
nltk.download("punkt")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
!pip install zemberek-python

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting zemberek-python
  Downloading zemberek_python-0.2.3-py3-none-any.whl (95.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.1/95.1 MB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting antlr4-python3-runtime==4.8 (from zemberek-python)
  Downloading antlr4-python3-runtime-4.8.tar.gz (112 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.4/112.4 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: antlr4-python3-runtime
  Building wheel for antlr4-python3-runtime (setup.py) ... [?25l[?25hdone
  Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.8-py3-none-any.whl size=141210 sha256=e432f48c6eb84388472383fedf05df0fc6b17ced039f3f10ec2a22b87d1e9ea4
  Stored in directory: /root/.cache/pip/wheels/a7/20/bd/e1477d664f

In [None]:
import nltk
from nltk.metrics.distance import edit_distance
from zemberek.morphology import TurkishMorphology
from zemberek.normalization import TurkishSentenceNormalizer
from nltk.translate.bleu_score import sentence_bleu

In [None]:
# Create TurkishMorphology and TurkishSentenceNormalizer instances
turkish_morphology = TurkishMorphology.create_with_defaults()
sentence_normalizer = TurkishSentenceNormalizer(turkish_morphology)

# Read English sentences from txt file
english_file_path = '/content/drive/MyDrive/colab_notebooks/english_sentences.txt'
with open(english_file_path, 'r', encoding='utf-8') as file:
    english_sentences = file.read().splitlines()

# Read Azerbaijani sentences from txt file
azerbaijani_file_path = '/content/drive/MyDrive/colab_notebooks/azerbaijani_sentences.txt'
with open(azerbaijani_file_path, 'r', encoding='utf-8') as file:
    azerbaijani_sentences = file.read().splitlines()

# Read Turkish sentences from txt file
turkish_file_path = '/content/drive/MyDrive/colab_notebooks/turkish_sentences.txt'
with open(turkish_file_path, 'r', encoding='utf-8') as file:
    turkish_sentences = file.read().splitlines()

# Preprocess the parallel corpus
def preprocess_corpus(corpus, language):
    preprocessed_corpus = []
    if language == 'english' or language == 'azerbaijani':
        for sentence in corpus:
            tokens = nltk.word_tokenize(sentence.lower())
            preprocessed_corpus.append(tokens)
    elif language == 'turkish':
        for sentence in corpus:
            normalized_sentence = sentence_normalizer.normalize(sentence)
            tokens = nltk.word_tokenize(normalized_sentence.lower())
            preprocessed_corpus.append(tokens)
    return preprocessed_corpus

english_corpus = preprocess_corpus(english_sentences, 'english')
azerbaijani_corpus = preprocess_corpus(azerbaijani_sentences, 'azerbaijani')
turkish_corpus = preprocess_corpus(turkish_sentences, 'turkish')

# Translate a sentence using example-based machine translation
def translate_example_based(input_sentence, source_corpus, target_corpus):
    input_tokens = nltk.word_tokenize(input_sentence.lower())

    # Calculate edit distance between input sentence and source corpus sentences
    distances = [edit_distance(input_tokens, source_sentence) for source_sentence in source_corpus]

    # Find the index of the most similar sentence in the source corpus
    most_similar_index = distances.index(min(distances))

    # Retrieve the translation from the target corpus based on the most similar index
    translated_sentence = target_corpus[most_similar_index]

    return ' '.join(translated_sentence)

# Calculate BLEU score
def calculate_bleu_score(reference_sentence, translated_sentence):
    reference_tokenized = nltk.word_tokenize(reference_sentence.lower())
    translated_tokenized = nltk.word_tokenize(translated_sentence.lower())

    bleu_score = sentence_bleu([reference_tokenized], translated_tokenized)

    return bleu_score

# User interface
while True:
    print("Select an option:")
    print("1. English to Turkish")
    print("2. Azerbaijani to Turkish")
    print("3. Exit")
    choice = input("Enter your choice: ")

    if choice == '1':
        english_input = input("Enter the English sentence: ")
        translated_sentence = translate_example_based(english_input, english_corpus, turkish_corpus)
        print("Translated Sentence: ", translated_sentence)

        # Calculate BLEU score
        bleu_score = calculate_bleu_score(english_input, translated_sentence)
        print("BLEU score:", bleu_score)

        print()
    elif choice == '2':
        azerbaijani_input = input("Enter the Azerbaijani sentence: ")
        translated_sentence = translate_example_based(azerbaijani_input, azerbaijani_corpus, turkish_corpus)
        print("Translated Sentence: ", translated_sentence)

        # Calculate BLEU score
        bleu_score = calculate_bleu_score(azerbaijani_input, translated_sentence)
        print("BLEU score:", bleu_score)

        print()
    elif choice == '3':
        break
    else:
        print("Invalid choice! Please try again.")
        print()

INFO:zemberek.morphology.turkish_morphology:TurkishMorphology instance initialized in 20.967856884002686


2023-06-01 12:05:34,363 - zemberek.morphology.turkish_morphology - INFO
Msg: TurkishMorphology instance initialized in 20.967856884002686

Select an option:
1. English to Turkish
2. Azerbaijani to Turkish
3. Exit
Enter your choice: 1
Enter the English sentence: The sun slowly set behind the mountains, casting a warm golden glow.
Translated Sentence:  hafif esen rüzgar yaprakların arasından hışırdıyor ve rahatlatıcı bir melodi yaratıyordu .
BLEU score: 8.285726588482745e-232

Select an option:
1. English to Turkish
2. Azerbaijani to Turkish
3. Exit
Enter your choice: 1
Enter the English sentence: She walked through the crowded streets, her heart pounding with anticipation
Translated Sentence:  faturayı alabilir miyim lütfen ?
BLEU score: 0

Select an option:
1. English to Turkish
2. Azerbaijani to Turkish
3. Exit
Enter your choice: 1
Enter the English sentence: The aroma of freshly baked bread filled the air, enticing everyone nearby
Translated Sentence:  fırından yeni çıkmış ekmek koku

KeyboardInterrupt: ignored