In [1]:
pip install sentence-transformers

Collecting sentence-transformers
  Using cached sentence_transformers-5.1.0-py3-none-any.whl.metadata (16 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Using cached transformers-4.56.0-py3-none-any.whl.metadata (40 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Using cached torch-2.8.0-cp312-cp312-win_amd64.whl.metadata (30 kB)
Collecting sympy>=1.13.3 (from torch>=1.11.0->sentence-transformers)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting huggingface-hub>=0.20.0 (from sentence-transformers)
  Using cached huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers<5.0.0,>=4.41.0->sentence-transformers)
  Using cached tokenizers-0.22.0-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting safetensors>=0.4.3 (from transformers<5.0.0,>=4.41.0->sentence-transformers)
  Using cached safetensors-0.6.2-cp38-abi3-win_amd64.whl.metadata (4.1 kB)
Using cached sentence_trans

In [5]:
import pandas as pd
import numpy as np
import re
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, util
import torch
import ipywidgets as widgets
from IPython.display import display


file = "hukuki_chatbot_dataset.csv"


df = pd.read_csv(file)
df.head()


Unnamed: 0,intent,text,response
0,selamlama,Merhaba,"Merhaba, ben Hukuki Yardım Botu. Size nasıl ya..."
1,selamlama,"Selam, kolay gelsin","Merhaba, ben Hukuki Yardım Botu. Size nasıl ya..."
2,depozito_iadesi,"Ev sahibi depozitomu geri vermiyor, ne yapabil...",Türk Borçlar Kanunu’na göre kiracı konuttan çı...
3,depozito_iadesi,Kira depozitosu iade süresi ne kadar?,Türk Borçlar Kanunu’na göre kiracı konuttan çı...
4,isten_cikarma,"Patronum beni işten çıkardı, tazminat alabilir...","4857 sayılı İş Kanunu’na göre, 1 yılı geçen ça..."


In [9]:
def clean_text(text):
    if pd.isna(text):
        text = ""
    text = str(text).lower()
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text


df['clean_question'] = df['text'].apply(clean_text)
df['clean_response'] = df['response'].apply(clean_text)

df[['text', 'clean_question', 'response', 'clean_response']].head()


Unnamed: 0,text,clean_question,response,clean_response
0,Merhaba,merhaba,"Merhaba, ben Hukuki Yardım Botu. Size nasıl ya...",merhaba ben hukuki yardım botu size nasıl yard...
1,"Selam, kolay gelsin",selam kolay gelsin,"Merhaba, ben Hukuki Yardım Botu. Size nasıl ya...",merhaba ben hukuki yardım botu size nasıl yard...
2,"Ev sahibi depozitomu geri vermiyor, ne yapabil...",ev sahibi depozitomu geri vermiyor ne yapabilirim,Türk Borçlar Kanunu’na göre kiracı konuttan çı...,türk borçlar kanunu’na göre kiracı konuttan çı...
3,Kira depozitosu iade süresi ne kadar?,kira depozitosu iade süresi ne kadar,Türk Borçlar Kanunu’na göre kiracı konuttan çı...,türk borçlar kanunu’na göre kiracı konuttan çı...
4,"Patronum beni işten çıkardı, tazminat alabilir...",patronum beni işten çıkardı tazminat alabilir ...,"4857 sayılı İş Kanunu’na göre, 1 yılı geçen ça...",4857 sayılı i̇ş kanunu’na göre 1 yılı geçen ça...


In [11]:

vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df['clean_question'])
print("TF-IDF matris boyutu:", tfidf_matrix.shape)


model = SentenceTransformer('all-MiniLM-L6-v2')
question_embeddings = model.encode(df['text'].tolist(), convert_to_tensor=True)


TF-IDF matris boyutu: (305, 1570)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [13]:
def get_response_combined(user_input, tfidf_weight=0.5, bert_weight=0.5, threshold=0.5):
    cleaned_input = clean_text(user_input)
    
    
    user_vec = vectorizer.transform([cleaned_input])
    tfidf_sim = cosine_similarity(user_vec, tfidf_matrix)[0]
    tfidf_sim_norm = tfidf_sim / (tfidf_sim.max() + 1e-8)  # normalize 0-1
    
 
    user_embedding = model.encode(user_input, convert_to_tensor=True)
    bert_sim = util.cos_sim(user_embedding, question_embeddings)[0].cpu().numpy()
    bert_sim_norm = bert_sim / (bert_sim.max() + 1e-8)  # normalize 0-1
    
    
    combined_score = tfidf_weight * tfidf_sim_norm + bert_weight * bert_sim_norm
    best_idx = np.argmax(combined_score)
    
    if combined_score[best_idx] < threshold:
        return "Bu konuda bilgim yok, başka bir soru sorun."
    
    return df.iloc[best_idx]['response']


In [15]:
input_box = widgets.Text(
    value='',
    placeholder='Nasıl yardımcı olabilirim...',
    description='Siz:',
    disabled=False
)
send_btn = widgets.Button(description="Gönder")
output_box = widgets.Output()

def send_message(_=None):
    user_question = input_box.value.strip()
    if not user_question:
        with output_box:
            output_box.clear_output()
            print("Lütfen bir soru yazın.")
        return

    try:
        response = get_response_combined(user_question)
    except Exception as e:
        response = f"Hata: {e}"

    with output_box:
        output_box.clear_output()
        print("Chatbot:", response)

    input_box.value = ""

send_btn.on_click(send_message)
try:
    input_box.on_submit(lambda _: send_message())
except Exception:
    pass

display(widgets.HBox([input_box, send_btn]), output_box)


  input_box.on_submit(lambda _: send_message())


HBox(children=(Text(value='', description='Siz:', placeholder='Nasıl yardımcı olabilirim...'), Button(descript…

Output()