In [9]:
pip install xgboost



In [23]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# CSV dosyasından veriyi oku
df = pd.read_csv('all_texts.csv')  # CSV dosya adı

# Özellik ve hedefi ayır
X = df['text']
y = df['author']

# Yazar adlarını sayısal etiketlere dönüştür
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Eğitim ve test veri kümesini ayır
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# TF-IDF vektörleştirme
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Kullanılacak modeller
models = {
    "Naive Bayes": MultinomialNB(),
    "SVM": LinearSVC(),
    "Random Forest": RandomForestClassifier(),
    "MLP": MLPClassifier(max_iter=300),
     "Decision Tree": DecisionTreeClassifier(random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
}

# Modelleri eğit ve değerlendir
for name, model in models.items():
    print(f"\n=== {name} ===")
    model.fit(X_train_vec, y_train)
    y_pred = model.predict(X_test_vec)

    # Tahminleri tekrar yazar adına dönüştür
    y_test_labels = label_encoder.inverse_transform(y_test)
    y_pred_labels = label_encoder.inverse_transform(y_pred)

    print(classification_report(y_test_labels, y_pred_labels))
    print("Accuracy:", accuracy_score(y_test_labels, y_pred_labels))


=== Naive Bayes ===
              precision    recall  f1-score   support

      AAltan       0.80      0.36      0.50        11
AAydintasbas       0.02      0.67      0.04         3
      AHakan       0.00      0.00      0.00        11
 ATuranAlkan       0.00      0.00      0.00         9
    AYArslan       0.75      0.30      0.43        10
     BCoskun       0.00      0.00      0.00         7
     CCandar       0.00      0.00      0.00        11
    COzdemir       0.00      0.00      0.00        10
  DCundioglu       1.00      0.50      0.67         8
  DUAribogan       0.86      0.75      0.80         8
      EArdic       0.33      0.33      0.33         6
      ECakir       0.00      0.00      0.00         9
    GGokturk       0.00      0.00      0.00         8
   HBabaoglu       0.00      0.00      0.00         7
      HCemal       0.33      0.50      0.40         4
       HUluc       1.00      0.29      0.44         7
  IKucukkaya       0.00      0.00      0.00         9
    MA

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

      AAltan       0.90      0.82      0.86        11
AAydintasbas       0.40      0.67      0.50         3
      AHakan       0.85      1.00      0.92        11
 ATuranAlkan       1.00      0.89      0.94         9
    AYArslan       0.80      0.80      0.80        10
     BCoskun       0.86      0.86      0.86         7
     CCandar       0.82      0.82      0.82        11
    COzdemir       0.91      1.00      0.95        10
  DCundioglu       1.00      1.00      1.00         8
  DUAribogan       0.89      1.00      0.94         8
      EArdic       0.60      1.00      0.75         6
      ECakir       0.67      0.67      0.67         9
    GGokturk       1.00      0.50      0.67         8
   HBabaoglu       0.80      0.57      0.67         7
      HCemal       0.43      0.75      0.55         4
       HUluc       1.00      1.00      1.00         7
  IKucukkaya       0.71      0.56      0.62         9
    MABirand       0.71    

Parameters: { "use_label_encoder" } are not used.



              precision    recall  f1-score   support

      AAltan       0.82      0.82      0.82        11
AAydintasbas       0.29      0.67      0.40         3
      AHakan       0.82      0.82      0.82        11
 ATuranAlkan       0.43      0.33      0.38         9
    AYArslan       0.80      0.80      0.80        10
     BCoskun       0.42      0.71      0.53         7
     CCandar       0.88      0.64      0.74        11
    COzdemir       0.56      0.50      0.53        10
  DCundioglu       1.00      0.75      0.86         8
  DUAribogan       0.47      0.88      0.61         8
      EArdic       0.44      0.67      0.53         6
      ECakir       0.60      0.33      0.43         9
    GGokturk       0.50      0.25      0.33         8
   HBabaoglu       0.50      0.86      0.63         7
      HCemal       1.00      0.25      0.40         4
       HUluc       0.86      0.86      0.86         7
  IKucukkaya       0.50      0.33      0.40         9
    MABirand       0.67    