In [2]:
pip install xgboost



In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer # Import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from sklearn.neural_network import MLPClassifier # Import MLPClassifier
from sklearn.tree import DecisionTreeClassifier # Import DecisionTreeClassifier


# CSV dosyasından veriyi oku
df = pd.read_csv('all_texts.csv')  # CSV dosya adı

# Özellik ve hedefi ayır
X = df['text']
y = df['author']

# Yazar adlarını sayısal etiketlere dönüştür
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Eğitim ve test veri kümesini ayır
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

#  Character n-gram (2-gram ve 3-gram) vektörizasyon
vectorizer = CountVectorizer(analyzer='char', ngram_range=(2, 3))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


# Kullanılacak modeller
models = {
    "Naive Bayes": MultinomialNB(),
    "SVM": LinearSVC(),
    "Random Forest": RandomForestClassifier(),
    "MLP": MLPClassifier(max_iter=300),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
   "Decision Tree": DecisionTreeClassifier(random_state=42)
}

# Modelleri eğit ve değerlendir
for name, model in models.items():
    print(f"\n=== {name} ===")
    model.fit(X_train_vec, y_train)
    y_pred = model.predict(X_test_vec)

    # Tahminleri tekrar yazar adına dönüştür
    y_test_labels = label_encoder.inverse_transform(y_test)
    y_pred_labels = label_encoder.inverse_transform(y_pred)

    print(classification_report(y_test_labels, y_pred_labels))
    print("Accuracy:", accuracy_score(y_test_labels, y_pred_labels))


=== Naive Bayes ===
              precision    recall  f1-score   support

      AAltan       0.82      0.82      0.82        11
AAydintasbas       0.50      1.00      0.67         3
      AHakan       0.65      1.00      0.79        11
 ATuranAlkan       1.00      0.89      0.94         9
    AYArslan       0.75      0.90      0.82        10
     BCoskun       1.00      0.71      0.83         7
     CCandar       0.89      0.73      0.80        11
    COzdemir       0.91      1.00      0.95        10
  DCundioglu       0.89      1.00      0.94         8
  DUAribogan       0.73      1.00      0.84         8
      EArdic       1.00      1.00      1.00         6
      ECakir       1.00      0.44      0.62         9
    GGokturk       0.83      0.62      0.71         8
   HBabaoglu       1.00      0.71      0.83         7
      HCemal       0.75      0.75      0.75         4
       HUluc       1.00      1.00      1.00         7
  IKucukkaya       0.88      0.78      0.82         9
    MA



              precision    recall  f1-score   support

      AAltan       0.85      1.00      0.92        11
AAydintasbas       0.75      1.00      0.86         3
      AHakan       1.00      1.00      1.00        11
 ATuranAlkan       1.00      0.78      0.88         9
    AYArslan       1.00      0.70      0.82        10
     BCoskun       1.00      1.00      1.00         7
     CCandar       1.00      0.73      0.84        11
    COzdemir       1.00      1.00      1.00        10
  DCundioglu       1.00      1.00      1.00         8
  DUAribogan       0.73      1.00      0.84         8
      EArdic       0.86      1.00      0.92         6
      ECakir       0.78      0.78      0.78         9
    GGokturk       1.00      0.38      0.55         8
   HBabaoglu       0.86      0.86      0.86         7
      HCemal       1.00      0.50      0.67         4
       HUluc       1.00      1.00      1.00         7
  IKucukkaya       0.89      0.89      0.89         9
    MABirand       1.00    

Parameters: { "use_label_encoder" } are not used.



              precision    recall  f1-score   support

      AAltan       1.00      1.00      1.00        11
AAydintasbas       0.40      0.67      0.50         3
      AHakan       1.00      1.00      1.00        11
 ATuranAlkan       1.00      0.67      0.80         9
    AYArslan       1.00      0.80      0.89        10
     BCoskun       1.00      1.00      1.00         7
     CCandar       0.90      0.82      0.86        11
    COzdemir       0.89      0.80      0.84        10
  DCundioglu       1.00      1.00      1.00         8
  DUAribogan       0.89      1.00      0.94         8
      EArdic       0.86      1.00      0.92         6
      ECakir       1.00      0.89      0.94         9
    GGokturk       0.88      0.88      0.88         8
   HBabaoglu       1.00      0.86      0.92         7
      HCemal       1.00      0.50      0.67         4
       HUluc       1.00      1.00      1.00         7
  IKucukkaya       0.89      0.89      0.89         9
    MABirand       1.00    