# 📊 TF-IDF ile Metin Sınıflandırma (Bölüm 3)
Bu notebook, Türkçe metinlerle TF-IDF hesaplama, Naive Bayes eğitimi ve model değerlendirmesini içermektedir.

In [12]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split, cross_val_score   # Import cross_val_score
from sklearn.metrics import accuracy_score, classification_report

In [2]:
data = [
    ("Kendimi geliştirmeye çalışıyorum", "kişisel gelişim"),
    ("Bugün yeni bir kitap okudum", "kişisel gelişim"),
    ("Çok çalışmalı ve disiplinli olmalıyım", "kişisel gelişim"),
    ("Hava bugün çok sıcak", "günlük yaşam"),
    ("Markete gitmem gerekiyor", "günlük yaşam"),
    ("Yürüyüş yaparken müzik dinledim", "günlük yaşam"),
]

texts = [x[0] for x in data]
labels = [x[1] for x in data]

In [3]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(texts)

df = pd.DataFrame(X.toarray(), columns=vectorizer.get_feature_names_out())
df

Unnamed: 0,bir,bugün,dinledim,disiplinli,geliştirmeye,gerekiyor,gitmem,hava,kendimi,kitap,...,okudum,olmalıyım,sıcak,ve,yaparken,yeni,yürüyüş,çalışmalı,çalışıyorum,çok
0,0.0,0.0,0.0,0.0,0.57735,0.0,0.0,0.0,0.57735,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.57735,0.0
1,0.462625,0.379359,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.462625,...,0.462625,0.0,0.0,0.0,0.0,0.462625,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.462625,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.462625,0.0,0.462625,0.0,0.0,0.0,0.462625,0.0,0.379359
3,0.0,0.448367,0.0,0.0,0.0,0.0,0.0,0.546779,0.0,0.0,...,0.0,0.0,0.546779,0.0,0.0,0.0,0.0,0.0,0.0,0.448367
4,0.0,0.0,0.0,0.0,0.0,0.57735,0.57735,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.5,0.0,0.5,0.0,0.0,0.0


In [4]:
le = LabelEncoder()
y = le.fit_transform(labels)
y

array([1, 1, 1, 0, 0, 0])

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
model = MultinomialNB()
model.fit(X_train, y_train)

In [6]:
y_pred = model.predict(X_test)

print("Doğruluk:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=le.classes_))

Doğruluk: 0.0
                 precision    recall  f1-score   support

   günlük yaşam       0.00      0.00      0.00       0.0
kişisel gelişim       0.00      0.00      0.00       2.0

       accuracy                           0.00       2.0
      macro avg       0.00      0.00      0.00       2.0
   weighted avg       0.00      0.00      0.00       2.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [7]:
test_cümle = ["Yeni kararlar aldım, hayatımı düzene sokmalıyım"]
test_vec = vectorizer.transform(test_cümle)
tahmin = model.predict(test_vec)
print("Tahmin edilen sınıf:", le.inverse_transform(tahmin)[0])

Tahmin edilen sınıf: günlük yaşam


In [9]:
df = {
    "text": [
        "Bugün hava çok güzel",
        "Yürüyüş yaparken müzik dinledim",
        "Kahve içmek için dışarı çıktım",
        "Yeni kararlar aldım, hayatımı düzene sokmalıyım",
        "Kendimi geliştirmek için kitap okuyorum",
        "Çok çalışmalı ve disiplinli olmalıyım",
        "Sabah erken kalkmak bana iyi geliyor",
        "Alışveriş yapmam gerekiyor",
        "Markete uğrayıp süt alacağım",
        "Bugün yeni bir kitap sipariş ettim",
        "Kendimi geliştirmeye çalışıyorum",
        "Kitap okuyarak yeni şeyler öğreniyorum",
        "Mutfakta yemek yapmayı öğreniyorum",
        "Bugün temizlik yaptım",
        "Film izleyip dinlenmeyi planlıyorum",
        "Yeni projelere başlamayı düşünüyorum",
        "Yarın spora başlamayı planlıyorum",
        "Bugün markette çok sıra vardı",
        "Yaz tatili için plan yapmaya başladım",
        "Kendime hedefler koydum ve çalışıyorum"
    ],
    "label": [
        "günlük yaşam", "günlük yaşam", "günlük yaşam",
        "kişisel gelişim", "kişisel gelişim", "kişisel gelişim",
        "günlük yaşam", "günlük yaşam", "günlük yaşam",
        "kişisel gelişim", "kişisel gelişim", "kişisel gelişim",
        "günlük yaşam", "günlük yaşam", "günlük yaşam",
        "kişisel gelişim", "kişisel gelişim", "günlük yaşam",
        "günlük yaşam", "kişisel gelişim"
    ]
}

In [13]:
# 2. TF-IDF vektörleştirme
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["text"])

# 3. Etiketleri sayıya çevir
le = LabelEncoder()
y = le.fit_transform(df["label"])

# 4. Eğitim/Test böl
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

# 5. Naive Bayes eğitimi
model = MultinomialNB()
model.fit(X_train, y_train)

# 6. Test doğruluğu ve sınıflandırma raporu
y_pred = model.predict(X_test)
print("Doğruluk (test seti):", accuracy_score(y_test, y_pred))
print("\nSınıflandırma Raporu:\n", classification_report(y_test, y_pred, target_names=le.classes_))

# 7. Cross-validation ile sağlamlık testi
scores = cross_val_score(model, X, y, cv=5)
print("5-Fold CV Ortalama Doğruluk:", scores.mean())

# 8. Yeni bir cümleyi sınıflandır
test_text = ["Bugün kendime yeni hedefler koydum"]
test_vec = vectorizer.transform(test_text)
prediction = model.predict(test_vec)
print("Tahmin edilen sınıf:", le.inverse_transform(prediction)[0])

Doğruluk (test seti): 0.6

Sınıflandırma Raporu:
                  precision    recall  f1-score   support

   günlük yaşam       0.67      0.67      0.67         3
kişisel gelişim       0.50      0.50      0.50         2

       accuracy                           0.60         5
      macro avg       0.58      0.58      0.58         5
   weighted avg       0.60      0.60      0.60         5

5-Fold CV Ortalama Doğruluk: 0.8
Tahmin edilen sınıf: kişisel gelişim
