In [2]:
import sys
import os
sys.path.append(os.path.abspath(".."))  # üst klasörü ekle

from services.preprocessing import temizle


In [4]:
from transformers import pipeline

# MODEL 1
ner_bert = pipeline(
    task="ner",
    model="savasy/bert-base-turkish-ner-cased",
    tokenizer="savasy/bert-base-turkish-ner-cased",
    grouped_entities=True
)

# MODEL 2
ner_mdeberta = pipeline(
    task="ner",
    model="akdeniz27/mDeBERTa-v3-base-turkish-ner",
    tokenizer="akdeniz27/mDeBERTa-v3-base-turkish-ner",
    grouped_entities=True
)

OSError: [WinError 126] Belirtilen modül bulunamadı. Error loading "c:\Users\FEYZA ALEYNA ERKUL\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\lib\fbgemm.dll" or one of its dependencies.

In [3]:
import re

def lower_turkish(text: str) -> str:
    return text.lower()

def expand_abbreviations(text: str) -> str:
    abbreviations = {
        r"\bmah\.": "mahallesi",
        r"\bsk\.": "sokak",
        r"\bcad\.": "cadde",
        r"\bno\.": "numara"
    }
    for pattern, replacement in abbreviations.items():
        text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
    return text

def temizle(adres):
    return expand_abbreviations(lower_turkish(adres))

In [4]:
adresler = [
    "İNÖNÜ mah. çiçek sk. no.5 Bakırköy İstanbul",
    "Mustafa Kemal mah. cad. 14/2 Ankara",
    "valikonağı cad. no.25 Şişli",
    "Adnan Menderes bulvarı, Atatürk mah., Bornova İzmir"
]

for adres in adresler:
    print("🔸 Orijinal:", adres)
    temiz_adres = temizle(adres)
    print("🔹 Temiz:", temiz_adres)

    print("🧠 Model 1 (BERT):")
    for ent in ner_bert(temiz_adres):
        print(f"  - {ent['word']} → {ent['entity_group']}")

    print("🧠 Model 2 (mDeBERTa):")
    for ent in ner_mdeberta(temiz_adres):
        print(f"  - {ent['word']} → {ent['entity_group']}")

    print("-" * 60)

🔸 Orijinal: İNÖNÜ mah. çiçek sk. no.5 Bakırköy İstanbul
🔹 Temiz: i̇nönü mahallesi çiçek sokak numara5 bakırköy i̇stanbul
🧠 Model 1 (BERT):


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


  - i → LOC
  - ##nönü → LOC
  - bakırköy → LOC
  - ##stanbul → LOC
🧠 Model 2 (mDeBERTa):
  -  → LOC
  - i̇ → LOC
  - n → LOC
  - önü → LOC
  -  → LOC
  - çiç → LOC
  - bakı → LOC
  - r → LOC
  - köy → LOC
  - i̇ → LOC
  - stanbul → LOC
------------------------------------------------------------
🔸 Orijinal: Mustafa Kemal mah. cad. 14/2 Ankara
🔹 Temiz: mustafa kemal mahallesi cadde 14/2 ankara
🧠 Model 1 (BERT):
  - mustafa kemal → LOC
  - ankara → LOC
🧠 Model 2 (mDeBERTa):
  - musta → LOC
  - fa → LOC
  - ankara → LOC
------------------------------------------------------------
🔸 Orijinal: valikonağı cad. no.25 Şişli
🔹 Temiz: valikonağı cadde numara25 şişli
🧠 Model 1 (BERT):
  - valikonağı → LOC
  - şişli → LOC
🧠 Model 2 (mDeBERTa):
  - valik → LOC
  - şiş → LOC
------------------------------------------------------------
🔸 Orijinal: Adnan Menderes bulvarı, Atatürk mah., Bornova İzmir
🔹 Temiz: adnan menderes bulvarı, atatürk mahallesi, bornova i̇zmir
🧠 Model 1 (BERT):
  - adnan mendere