In [1]:
import requests
from Levenshtein import ratio
from unidecode import unidecode
from deep_translator import GoogleTranslator

# --- SETTINGS ---
source_lang = "de"  # German
target_lang = "it"  # Italian
fallback_lang = "en"
word_limit = 20     # How many top similar words to display
buffer_count = 100  # How many words to fetch before filtering

# --- STEP 1: Fetch Top Words ---
def fetch_top_words(language, fallback, buffer_count=100):
    base_url = "https://raw.githubusercontent.com/hermitdave/FrequencyWords/master/content/2016"
    try:
        url = f"{base_url}/{language}/{language}_50k.txt"
        response = requests.get(url)
        response.raise_for_status()
        return [line.split()[0] for line in response.text.splitlines()[:buffer_count]]
    except:
        url = f"{base_url}/{fallback}/{fallback}_50k.txt"
        response = requests.get(url)
        return [line.split()[0] for line in response.text.splitlines()[:buffer_count]]

# --- STEP 2: Translate Words ---
def get_translations(words, src_lang, dest_lang):
    translations = {}
    for word in words:
        try:
            translations[word] = GoogleTranslator(source=src_lang, target=dest_lang).translate(word)
        except:
            translations[word] = None
    return translations

# --- STEP 3: Calculate Similarity ---
def get_similarity_scores(translations):
    return {
        word: ratio(unidecode(word.lower()), unidecode(translations[word]).lower())
        for word in translations if translations[word] is not None
    }

# --- STEP 4: Get Native Meaning ---
def get_meanings(words, src_lang, dest_lang):
    meanings = {}
    for word in words:
        try:
            meanings[word] = GoogleTranslator(source=src_lang, target=dest_lang).translate(word)
        except:
            meanings[word] = "[No translation]"
    return meanings

# --- RUN THE ALGORITHM ---
print("Fetching top words...")
top_words = fetch_top_words(source_lang, fallback_lang, buffer_count=buffer_count)

print("Translating...")
translations = get_translations(top_words, source_lang, target_lang)

print("Calculating similarity...")
similarities = get_similarity_scores(translations)
sorted_matches = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
top_matches = sorted_matches[:word_limit]

print("\nTop Matches (German → Italian):\n")
for word, score in top_matches:
    print(f"{word} → {translations[word]} | Similarity: {score:.2f}")


Fetching top words...
Translating...
Calculating similarity...

Top Matches (German → Italian):

in → In | Similarity: 1.00
dem → Dem | Similarity: 1.00
s → S | Similarity: 1.00
oh → OH | Similarity: 1.00
hat → ha | Similarity: 0.80
es → Esso | Similarity: 0.67
an → A | Similarity: 0.67
sich → si | Similarity: 0.67
auch → Anche | Similarity: 0.67
mein → Mio | Similarity: 0.57
sind → Sono | Similarity: 0.50
da → Là | Similarity: 0.50
meine → mio | Similarity: 0.50
nichts → Niente | Similarity: 0.50
habe → Avere | Similarity: 0.44
alles → qualunque cosa | Similarity: 0.42
ich → IO | Similarity: 0.40
die → IL | Similarity: 0.40
ein → UN | Similarity: 0.40
mir → Me | Similarity: 0.40


In [2]:
from unidecode import unidecode
from Levenshtein import ratio

# Arabic word and its Turkish counterpart
arabic_word = "موز"
turkish_word = "muz"
transliterated_arabic = unidecode(arabic_word)

similarity_score = ratio(arabic_word.lower(), turkish_word.lower())

print(f"Original Arabic: {arabic_word}")
print(f"Original Turkish: {turkish_word}")
print(f"Levenshtein Similarity Score: {similarity_score:.2f}")

similarity_score = ratio(transliterated_arabic.lower(), turkish_word.lower())

print(f"Original Arabic: {transliterated_arabic}")
print(f"Original Turkish: {turkish_word}")
print(f"Levenshtein Similarity Score: {similarity_score:.2f}")


Original Arabic: موز
Original Turkish: muz
Levenshtein Similarity Score: 0.00
Original Arabic: mwz
Original Turkish: muz
Levenshtein Similarity Score: 0.67
