In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
import re
import string

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.combine import SMOTETomek

from sklearn.metrics import (
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score, 
    confusion_matrix,
    classification_report
)


all_sheets = pd.read_excel(
    "/kaggle/input/trke-nefret-sylemleri-veriseti/Turkce Nefret Soylemi Veriseti.xlsx",
    sheet_name=None, 
    header=1
)
all_sheets_copy = all_sheets.copy()
all_sheets_copy.pop('TOPLAM', None)  
df = pd.concat(all_sheets_copy.values(), ignore_index=True)

def preprocess_text(text):
    text = str(text).lower()
    text = re.sub(r'http\S+', '', text)   
    text = re.sub(r'@\w+', '', text)      
    text = re.sub(r'#\w+', '', text)      
    text = text.translate(str.maketrans('', '', string.punctuation))  
    text = re.sub(r'\d+', '', text)       
    text = text.strip()
    return text

df['cleaned_text'] = df['Tweet'].apply(preprocess_text)

X_train, X_test, y_train, y_test = train_test_split(
    df['cleaned_text'],
    df['Etiket'],
    test_size=0.2,
    random_state=42
)

le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

class_names = le.classes_

print("Training set shape:", X_train.shape, y_train.shape)
print("Test set shape:", X_test.shape, y_test.shape)
print("Class names:", class_names)




Training set shape: (8179,) (8179,)
Test set shape: (2045,) (2045,)
Class names: ['hiçbiri' 'nefret' 'saldırgan']


In [3]:

vectorizers = {
    'Count Unigram': CountVectorizer(ngram_range=(1, 1)),
    'Count Bigram': CountVectorizer(ngram_range=(1, 2)),
    'TFIDF Unigram': TfidfVectorizer(ngram_range=(1, 1)),
    'TFIDF Bigram': TfidfVectorizer(ngram_range=(1, 2))
}

numeric_resampling_methods = {
    'SMOTE': SMOTE(random_state=42),
    'RandomUnderSampler': RandomUnderSampler(random_state=42),
    'SMOTETomek': SMOTETomek(random_state=42),
    'None': None 
}

preprocessed_data = {}

for vec_name, vectorizer in vectorizers.items():
    print(f"Vektörleştirici uygulanıyor: {vec_name}")
    X_train_vec = vectorizer.fit_transform(X_train)
    X_test_vec = vectorizer.transform(X_test)

    X_train_vec = X_train_vec.astype(np.float32)
    X_test_vec = X_test_vec.astype(np.float32)

    print(f"{vec_name} vektörleştirici uygulandı. Eğitim verisi boyutu: {X_train_vec.shape}")

    for resample_name, resampler in numeric_resampling_methods.items():
        print(f"Yeniden örnekleme yöntemi uygulanıyor: {resample_name}")
        if resampler is not None:
            X_res, y_res = resampler.fit_resample(X_train_vec, y_train)
            print(f"{resample_name} yöntemi uygulandı. Yeni eğitim verisi boyutu: {X_res.shape}")
        else:
            X_res, y_res = X_train_vec, y_train
            print(f"Yeniden örnekleme yapılmadı. Eğitim verisi boyutu: {X_res.shape}")

        key = (vec_name, resample_name)
        preprocessed_data[key] = {
            'X_train': X_res,
            'X_test': X_test_vec,
            'y_train': y_res,
            'y_test': y_test
        }
        print(f"Veri {key} anahtarı ile preprocessed_data'ya eklendi.\n")


Vektörleştirici uygulanıyor: Count Unigram
Count Unigram vektörleştirici uygulandı. Eğitim verisi boyutu: (8179, 46317)
Yeniden örnekleme yöntemi uygulanıyor: SMOTE
SMOTE yöntemi uygulandı. Yeni eğitim verisi boyutu: (18585, 46317)
Veri ('Count Unigram', 'SMOTE') anahtarı ile preprocessed_data'ya eklendi.

Yeniden örnekleme yöntemi uygulanıyor: RandomUnderSampler
RandomUnderSampler yöntemi uygulandı. Yeni eğitim verisi boyutu: (414, 46317)
Veri ('Count Unigram', 'RandomUnderSampler') anahtarı ile preprocessed_data'ya eklendi.

Yeniden örnekleme yöntemi uygulanıyor: SMOTETomek
SMOTETomek yöntemi uygulandı. Yeni eğitim verisi boyutu: (18585, 46317)
Veri ('Count Unigram', 'SMOTETomek') anahtarı ile preprocessed_data'ya eklendi.

Yeniden örnekleme yöntemi uygulanıyor: None
Yeniden örnekleme yapılmadı. Eğitim verisi boyutu: (8179, 46317)
Veri ('Count Unigram', 'None') anahtarı ile preprocessed_data'ya eklendi.

Vektörleştirici uygulanıyor: Count Bigram
Count Bigram vektörleştirici uygulandı

In [6]:

def print_classification_results(y_true, y_pred, conf_matrix, title="Sınıflandırma Sonuçları"):

    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    
    print(title + ":")
    print(f"  Doğruluk (Accuracy): {accuracy:.4f}")
    print(f"  Precision (Ağ.): {precision:.4f}")
    print(f"  Recall (Ağ.)   : {recall:.4f}")
    print(f"  F1-Skor (Ağ.)  : {f1:.4f}")
    
    print("\nKarışıklık Matrisi:")
    for row in conf_matrix:
        print("  " + " ".join(map(str, row)))

def extended_classification_report(
    clf, 
    X_train, y_train, 
    X_test, y_test, 
    target_names, 
    vec_name="", 
    resample_name="", 
    classifier_name=""
):

    
    clf.fit(X_train, y_train)
    
    y_train_pred = clf.predict(X_train)
    y_test_pred  = clf.predict(X_test)
    
    train_cm = confusion_matrix(y_train, y_train_pred)
    test_cm  = confusion_matrix(y_test,  y_test_pred)
    
    print("="*80)
    print(f"Detaylı Rapor - Vektörleştirici: {vec_name}, Yeniden Örnekleme: {resample_name}, Model: {classifier_name}")
    print("="*80)
    
    print("\n[EĞİTİM (TRAIN) SONUÇLARI]")
    print_classification_results(
        y_train, 
        y_train_pred, 
        train_cm, 
        title="Eğitim Verisi Sonuçları"
    )
    train_report_dict = classification_report(
        y_train, 
        y_train_pred, 
        target_names=target_names, 
        output_dict=True, 
        zero_division=0
    )
    train_report_df = pd.DataFrame(train_report_dict).transpose()
    print("\nSınıf bazlı metrikler (EĞİTİM):")
    print(train_report_df)

    if hasattr(clf, 'loss_curve_'):
        print("\nMLP Eğitim Kayıp Eğrisi (Loss Curve):")
        print(clf.loss_curve_) 

    print("\n[TEST SONUÇLARI]")
    print_classification_results(
        y_test, 
        y_test_pred, 
        test_cm, 
        title="Test Verisi Sonuçları"
    )

    test_report_dict = classification_report(
        y_test, 
        y_test_pred, 
        target_names=target_names, 
        output_dict=True, 
        zero_division=0
    )
    test_report_df = pd.DataFrame(test_report_dict).transpose()
    print("\nSınıf bazlı metrikler (TEST):")
    print(test_report_df)

    print("\n" + "="*80 + "\n")

    return {
        'train_report': train_report_df,
        'test_report': test_report_df
    }


final_results = {}  


def run_detailed_experiments(clf, preprocessed_data, class_names, classifier_name):

    global final_results  

    best_score = 0.0
    best_combination = None

    for (vec_name, resample_name), data in preprocessed_data.items():
        X_resampled = data['X_train']
        X_test_vec  = data['X_test']
        y_resampled = data['y_train']
        y_test_curr = data['y_test']

        _ = extended_classification_report(
            clf=clf,
            X_train=X_resampled,
            y_train=y_resampled,
            X_test=X_test_vec,
            y_test=y_test_curr,
            target_names=class_names,
            vec_name=vec_name,
            resample_name=resample_name,
            classifier_name=classifier_name
        )

        y_pred_test = clf.predict(X_test_vec)
        acc = accuracy_score(y_test_curr, y_pred_test)
        prec_w = precision_score(y_test_curr, y_pred_test, average='weighted', zero_division=0)
        rec_w  = recall_score(y_test_curr, y_pred_test, average='weighted', zero_division=0)
        f1_w   = f1_score(y_test_curr, y_pred_test, average='weighted', zero_division=0)

        # prec_m = precision_score(y_test_curr, y_pred_test, average="macro", zero_division=0)
        # rec_m  = recall_score(y_test_curr, y_pred_test, average="macro", zero_division=0)
        # f1_m   = f1_score(y_test_curr, y_pred_test, average="macro", zero_division=0)

        if acc > best_score:
            best_score = acc
            best_combination = {
                'vectorizer': vec_name,
                'resampling': resample_name,
                'classifier': classifier_name,
                'accuracy': acc
            }

        combo_key = f"{classifier_name} + {vec_name} + {resample_name}"
        final_results[combo_key] = {
            "Accuracy": acc,
            "Precision(Weighted)": prec_w,
            "Recall(Weighted)":    rec_w,
            "F1-Score(Weighted)":  f1_w
            # "Precision(Macro)": prec_m, vb. isterseniz
        }

    if best_combination:
        print(f"=== {classifier_name} İçin En İyi Kombinasyon ===")
        print(f"Vektörleştirici : {best_combination['vectorizer']}")
        print(f"Yeniden Örnekleme : {best_combination['resampling']}")
        print(f"Model : {best_combination['classifier']}")
        print(f"Test Accuracy : {best_combination['accuracy']:.4f}")
    else:
        print(f"{classifier_name} için geçerli bir kombinasyon bulunamadı.")

    return best_combination


In [7]:

from sklearn.linear_model import LogisticRegression



# Logistic Regression
log_reg = LogisticRegression(
    penalty='l2',
    C=1.0,
    solver='lbfgs',
    max_iter=500,
    random_state=42
)



print("\n\n=== LOGISTIC REGRESSION ===")
best_combination_lr = run_detailed_experiments(
    clf=log_reg,
    preprocessed_data=preprocessed_data,
    class_names=class_names,
    classifier_name="Logistic Regression"
)





=== LOGISTIC REGRESSION ===
Detaylı Rapor - Vektörleştirici: Count Unigram, Yeniden Örnekleme: SMOTE, Model: Logistic Regression

[EĞİTİM (TRAIN) SONUÇLARI]
Eğitim Verisi Sonuçları:
  Doğruluk (Accuracy): 0.9990
  Precision (Ağ.): 0.9990
  Recall (Ağ.)   : 0.9990
  F1-Skor (Ağ.)  : 0.9990

Karışıklık Matrisi:
  6177 18 0
  1 6194 0
  0 0 6195

Sınıf bazlı metrikler (EĞİTİM):
              precision    recall  f1-score       support
hiçbiri        0.999838  0.997094  0.998464   6195.000000
nefret         0.997102  0.999839  0.998469   6195.000000
saldırgan      1.000000  1.000000  1.000000   6195.000000
accuracy       0.998978  0.998978  0.998978      0.998978
macro avg      0.998980  0.998978  0.998978  18585.000000
weighted avg   0.998980  0.998978  0.998978  18585.000000

[TEST SONUÇLARI]
Test Verisi Sonuçları:
  Doğruluk (Accuracy): 0.8298
  Precision (Ağ.): 0.8231
  Recall (Ağ.)   : 0.8298
  F1-Skor (Ağ.)  : 0.8256

Karışıklık Matrisi:
  1391 131 5
  188 297 5
  7 12 9

Sınıf baz

In [10]:
from sklearn.ensemble import RandomForestClassifier

# Random Forest
rf = RandomForestClassifier(
    n_estimators=100,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    bootstrap=True,
    random_state=42
)
print("\n\n=== RANDOM FOREST ===")
best_combination_rf = run_detailed_experiments(
    clf=rf,
    preprocessed_data=preprocessed_data,
    class_names=class_names,
    classifier_name="Random Forest"
)





=== RANDOM FOREST ===
Detaylı Rapor - Vektörleştirici: Count Unigram, Yeniden Örnekleme: SMOTE, Model: Random Forest

[EĞİTİM (TRAIN) SONUÇLARI]
Eğitim Verisi Sonuçları:
  Doğruluk (Accuracy): 0.9999
  Precision (Ağ.): 0.9999
  Recall (Ağ.)   : 0.9999
  F1-Skor (Ağ.)  : 0.9999

Karışıklık Matrisi:
  6195 0 0
  2 6193 0
  0 0 6195

Sınıf bazlı metrikler (EĞİTİM):
              precision    recall  f1-score       support
hiçbiri        0.999677  1.000000  0.999839   6195.000000
nefret         1.000000  0.999677  0.999839   6195.000000
saldırgan      1.000000  1.000000  1.000000   6195.000000
accuracy       0.999892  0.999892  0.999892      0.999892
macro avg      0.999892  0.999892  0.999892  18585.000000
weighted avg   0.999892  0.999892  0.999892  18585.000000

[TEST SONUÇLARI]
Test Verisi Sonuçları:
  Doğruluk (Accuracy): 0.7966
  Precision (Ağ.): 0.8009
  Recall (Ağ.)   : 0.7966
  F1-Skor (Ağ.)  : 0.7448

Karışıklık Matrisi:
  1517 10 0
  377 112 1
  22 6 0

Sınıf bazlı metrikler (

In [11]:

import xgboost as xgb
# XGBoost
xgb_clf = xgb.XGBClassifier(
    learning_rate=0.03,
    n_estimators=100,
    random_state=42,
    use_label_encoder=False,   
    eval_metric='logloss'      
)

print("\n\n=== XGBOOST ===")
best_combination_xgb = run_detailed_experiments(
    clf=xgb_clf,
    preprocessed_data=preprocessed_data,
    class_names=class_names,
    classifier_name="XGBoost"
)





=== XGBOOST ===
Detaylı Rapor - Vektörleştirici: Count Unigram, Yeniden Örnekleme: SMOTE, Model: XGBoost

[EĞİTİM (TRAIN) SONUÇLARI]
Eğitim Verisi Sonuçları:
  Doğruluk (Accuracy): 0.8750
  Precision (Ağ.): 0.8931
  Recall (Ağ.)   : 0.8750
  F1-Skor (Ağ.)  : 0.8736

Karışıklık Matrisi:
  6000 193 2
  1833 4311 51
  141 103 5951

Sınıf bazlı metrikler (EĞİTİM):
              precision    recall  f1-score       support
hiçbiri        0.752445  0.968523  0.846919   6195.000000
nefret         0.935750  0.695884  0.798186   6195.000000
saldırgan      0.991173  0.960613  0.975654   6195.000000
accuracy       0.875007  0.875007  0.875007      0.875007
macro avg      0.893123  0.875007  0.873586  18585.000000
weighted avg   0.893123  0.875007  0.873586  18585.000000

[TEST SONUÇLARI]
Test Verisi Sonuçları:
  Doğruluk (Accuracy): 0.7702
  Precision (Ağ.): 0.7419
  Recall (Ağ.)   : 0.7702
  F1-Skor (Ağ.)  : 0.7181

Karışıklık Matrisi:
  1479 48 0
  398 86 6
  14 4 10

Sınıf bazlı metrikler (TE

In [12]:
import lightgbm as lgb


# LightGBM
ligtgbm = lgb.LGBMClassifier(
    learning_rate=0.03,
    n_estimators=100,
    random_state=42
)
print("\n\n=== LIGHTGBM ===")
best_combination_ligtgbm = run_detailed_experiments(
    clf=ligtgbm,
    preprocessed_data=preprocessed_data,
    class_names=class_names,
    classifier_name="LightGBM"
)





=== LIGHTGBM ===
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.157315 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 69637
[LightGBM] [Info] Number of data points in the train set: 18585, number of used features: 2956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
Detaylı Rapor - Vektörleştirici: Count Unigram, Yeniden Örnekleme: SMOTE, Model: LightGBM

[EĞİTİM (TRAIN) SONUÇLARI]
Eğitim Verisi Sonuçları:
  Doğruluk (Accuracy): 0.9205
  Precision (Ağ.): 0.9303
  Recall (Ağ.)   : 0.9205
  F1-Skor (Ağ.)  : 0.9200

Karışıklık Matrisi:
  6084 110 1
  1238 4918 39
  72 18 6105

Sınıf bazlı metrikler (EĞİTİM):
              precision    recall  f1-score       support
hiçbiri        0.822829  0.982082  0.895430   6195.000000
n

In [8]:
from sklearn.neural_network import MLPClassifier


# MLP (ANN)
ann = MLPClassifier(
    hidden_layer_sizes=(50,25), 
    max_iter=100, 
    random_state=42
)
print("\n\n=== MULTI-LAYER PERCEPTRON (ANN) ===")
best_combination_ann = run_detailed_experiments(
    clf=ann,
    preprocessed_data=preprocessed_data,
    class_names=class_names,
    classifier_name="ANN"
)



=== MULTI-LAYER PERCEPTRON (ANN) ===
Detaylı Rapor - Vektörleştirici: Count Unigram, Yeniden Örnekleme: SMOTE, Model: ANN

[EĞİTİM (TRAIN) SONUÇLARI]
Eğitim Verisi Sonuçları:
  Doğruluk (Accuracy): 0.9996
  Precision (Ağ.): 0.9996
  Recall (Ağ.)   : 0.9996
  F1-Skor (Ağ.)  : 0.9996

Karışıklık Matrisi:
  6195 0 0
  7 6188 0
  0 0 6195

Sınıf bazlı metrikler (EĞİTİM):
              precision    recall  f1-score       support
hiçbiri        0.998871  1.000000  0.999435   6195.000000
nefret         1.000000  0.998870  0.999435   6195.000000
saldırgan      1.000000  1.000000  1.000000   6195.000000
accuracy       0.999623  0.999623  0.999623      0.999623
macro avg      0.999624  0.999623  0.999623  18585.000000
weighted avg   0.999624  0.999623  0.999623  18585.000000

MLP Eğitim Kayıp Eğrisi (Loss Curve):
[0.5738755716484836, 0.06362167378082052, 0.016626115114477844, 0.0080011489838992, 0.005399770419557215, 0.0043639916183976085, 0.003677989057102669, 0.0031843007848761182, 0.0025953



Detaylı Rapor - Vektörleştirici: TFIDF Unigram, Yeniden Örnekleme: RandomUnderSampler, Model: ANN

[EĞİTİM (TRAIN) SONUÇLARI]
Eğitim Verisi Sonuçları:
  Doğruluk (Accuracy): 1.0000
  Precision (Ağ.): 1.0000
  Recall (Ağ.)   : 1.0000
  F1-Skor (Ağ.)  : 1.0000

Karışıklık Matrisi:
  138 0 0
  0 138 0
  0 0 138

Sınıf bazlı metrikler (EĞİTİM):
              precision  recall  f1-score  support
hiçbiri             1.0     1.0       1.0    138.0
nefret              1.0     1.0       1.0    138.0
saldırgan           1.0     1.0       1.0    138.0
accuracy            1.0     1.0       1.0      1.0
macro avg           1.0     1.0       1.0    414.0
weighted avg        1.0     1.0       1.0    414.0

MLP Eğitim Kayıp Eğrisi (Loss Curve):
[1.1309629415579465, 1.1165655658256604, 1.1036399010192945, 1.0886781982316485, 1.071889053660372, 1.052745123689071, 1.0308001298877354, 1.0061406243424487, 0.978868760364355, 0.9482562114372921, 0.9140788558030474, 0.876717112470134, 0.836861561877025, 0.794



Detaylı Rapor - Vektörleştirici: TFIDF Bigram, Yeniden Örnekleme: RandomUnderSampler, Model: ANN

[EĞİTİM (TRAIN) SONUÇLARI]
Eğitim Verisi Sonuçları:
  Doğruluk (Accuracy): 1.0000
  Precision (Ağ.): 1.0000
  Recall (Ağ.)   : 1.0000
  F1-Skor (Ağ.)  : 1.0000

Karışıklık Matrisi:
  138 0 0
  0 138 0
  0 0 138

Sınıf bazlı metrikler (EĞİTİM):
              precision  recall  f1-score  support
hiçbiri             1.0     1.0       1.0    138.0
nefret              1.0     1.0       1.0    138.0
saldırgan           1.0     1.0       1.0    138.0
accuracy            1.0     1.0       1.0      1.0
macro avg           1.0     1.0       1.0    414.0
weighted avg        1.0     1.0       1.0    414.0

MLP Eğitim Kayıp Eğrisi (Loss Curve):
[1.118611343927199, 1.09830546208741, 1.0774102380357502, 1.0532042917494036, 1.0251451301875898, 0.993802947729394, 0.958441111067931, 0.9193318670635641, 0.8786034438759233, 0.8365865647980556, 0.7933437218493886, 0.749918556469247, 0.7067193743379796, 0.66431

In [10]:
results_df = pd.DataFrame(final_results)
results_df_T = results_df.T

results_df_T.head()


Unnamed: 0,Accuracy,Precision(Weighted),Recall(Weighted),F1-Score(Weighted)
Logistic Regression + Count Unigram + SMOTE,0.829829,0.823114,0.829829,0.825588
Logistic Regression + Count Unigram + RandomUnderSampler,0.562836,0.72743,0.562836,0.61466
Logistic Regression + Count Unigram + SMOTETomek,0.829829,0.823114,0.829829,0.825588
Logistic Regression + Count Unigram + None,0.83912,0.826892,0.83912,0.820806
Logistic Regression + Count Bigram + SMOTE,0.827384,0.815703,0.827384,0.817889


In [11]:
model_name = "Logistic Regression"

lr_df = results_df_T.loc[
    results_df_T.index.str.startswith(model_name)
]
lr_df


Unnamed: 0,Accuracy,Precision(Weighted),Recall(Weighted),F1-Score(Weighted)
Logistic Regression + Count Unigram + SMOTE,0.829829,0.823114,0.829829,0.825588
Logistic Regression + Count Unigram + RandomUnderSampler,0.562836,0.72743,0.562836,0.61466
Logistic Regression + Count Unigram + SMOTETomek,0.829829,0.823114,0.829829,0.825588
Logistic Regression + Count Unigram + None,0.83912,0.826892,0.83912,0.820806
Logistic Regression + Count Bigram + SMOTE,0.827384,0.815703,0.827384,0.817889
Logistic Regression + Count Bigram + RandomUnderSampler,0.555501,0.719114,0.555501,0.606229
Logistic Regression + Count Bigram + SMOTETomek,0.827384,0.815703,0.827384,0.817889
Logistic Regression + Count Bigram + None,0.828362,0.821494,0.828362,0.802628
Logistic Regression + TFIDF Unigram + SMOTE,0.828851,0.823977,0.828851,0.825857
Logistic Regression + TFIDF Unigram + RandomUnderSampler,0.576528,0.73993,0.576528,0.630542


In [16]:
model_name = "Random Forest"

rf_df = results_df_T.loc[
    results_df_T.index.str.startswith(model_name)
]
rf_df


Unnamed: 0,Accuracy,Precision(Weighted),Recall(Weighted),F1-Score(Weighted)
Random Forest + Count Unigram + SMOTE,0.796577,0.80086,0.796577,0.744846
Random Forest + Count Unigram + RandomUnderSampler,0.517359,0.721103,0.517359,0.552655
Random Forest + Count Unigram + SMOTETomek,0.796577,0.80086,0.796577,0.744846
Random Forest + Count Unigram + None,0.801467,0.816743,0.801467,0.752864
Random Forest + Count Bigram + SMOTE,0.783374,0.805562,0.783374,0.718219
Random Forest + Count Bigram + RandomUnderSampler,0.586797,0.718596,0.586797,0.619743
Random Forest + Count Bigram + SMOTETomek,0.783374,0.805562,0.783374,0.718219
Random Forest + Count Bigram + None,0.791687,0.801924,0.791687,0.733897
Random Forest + TFIDF Unigram + SMOTE,0.811247,0.803693,0.811247,0.771345
Random Forest + TFIDF Unigram + RandomUnderSampler,0.498289,0.724423,0.498289,0.533864


In [17]:
model_name = "XGBoost"

xgb_df = results_df_T.loc[
    results_df_T.index.str.startswith(model_name)
]
xgb_df


Unnamed: 0,Accuracy,Precision(Weighted),Recall(Weighted),F1-Score(Weighted)
XGBoost + Count Unigram + SMOTE,0.770171,0.741892,0.770171,0.718056
XGBoost + Count Unigram + RandomUnderSampler,0.596577,0.702937,0.596577,0.634198
XGBoost + Count Unigram + SMOTETomek,0.770171,0.741892,0.770171,0.718056
XGBoost + Count Unigram + None,0.783863,0.800883,0.783863,0.721403
XGBoost + Count Bigram + SMOTE,0.764303,0.729501,0.764303,0.708268
XGBoost + Count Bigram + RandomUnderSampler,0.59511,0.701852,0.59511,0.632524
XGBoost + Count Bigram + SMOTETomek,0.764303,0.729501,0.764303,0.708268
XGBoost + Count Bigram + None,0.781907,0.792781,0.781907,0.719445
XGBoost + TFIDF Unigram + SMOTE,0.755501,0.727794,0.755501,0.73364
XGBoost + TFIDF Unigram + RandomUnderSampler,0.579951,0.700254,0.579951,0.621413


In [18]:
model_name = "LightGBM"

lgb_df = results_df_T.loc[
    results_df_T.index.str.startswith(model_name)
]
lgb_df


Unnamed: 0,Accuracy,Precision(Weighted),Recall(Weighted),F1-Score(Weighted)
LightGBM + Count Unigram + SMOTE,0.80489,0.799305,0.80489,0.769342
LightGBM + Count Unigram + RandomUnderSampler,0.453301,0.714625,0.453301,0.527475
LightGBM + Count Unigram + SMOTETomek,0.80489,0.799305,0.80489,0.769342
LightGBM + Count Unigram + None,0.804401,0.803307,0.804401,0.765372
LightGBM + Count Bigram + SMOTE,0.805379,0.807511,0.805379,0.765767
LightGBM + Count Bigram + RandomUnderSampler,0.453301,0.714625,0.453301,0.527475
LightGBM + Count Bigram + SMOTETomek,0.805379,0.807511,0.805379,0.765767
LightGBM + Count Bigram + None,0.803912,0.801731,0.803912,0.765011
LightGBM + TFIDF Unigram + SMOTE,0.801956,0.78863,0.801956,0.790207
LightGBM + TFIDF Unigram + RandomUnderSampler,0.454768,0.698966,0.454768,0.528255


In [12]:
model_name = "ANN"

ann_df = results_df_T.loc[
    results_df_T.index.str.startswith(model_name)
]
ann_df


Unnamed: 0,Accuracy,Precision(Weighted),Recall(Weighted),F1-Score(Weighted)
ANN + Count Unigram + SMOTE,0.800978,0.784159,0.800978,0.791574
ANN + Count Unigram + RandomUnderSampler,0.513447,0.733535,0.513447,0.583239
ANN + Count Unigram + SMOTETomek,0.800978,0.784159,0.800978,0.791574
ANN + Count Unigram + None,0.829829,0.810568,0.829829,0.81426
ANN + Count Bigram + SMOTE,0.811247,0.793758,0.811247,0.801415
ANN + Count Bigram + RandomUnderSampler,0.452812,0.745935,0.452812,0.496272
ANN + Count Bigram + SMOTETomek,0.811247,0.793758,0.811247,0.801415
ANN + Count Bigram + None,0.821516,0.802874,0.821516,0.809259
ANN + TFIDF Unigram + SMOTE,0.820538,0.805989,0.820538,0.81264
ANN + TFIDF Unigram + RandomUnderSampler,0.533007,0.727373,0.533007,0.600276
