In [None]:
import pandas as pd
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from transformers import BertTokenizer, TFBertForSequenceClassification

base_path = os.path.dirname(os.path.dirname(os.getcwd()))
data_path = os.path.join(base_path, "data", "processed", "diplomacy")
model_dir = os.path.join(base_path, "models", "deceptency")

train_df = pd.read_parquet(os.path.join(data_path, "train_processed.parquet"))
test_df = pd.read_parquet(os.path.join(data_path, "test_processed.parquet"))

# Prepare X_test with all features for sklearn pipelines
categorical_features = ["speaker", "receiver", "season"]
# Ensure correct types for categorical columns
cat_types = {col: 'object' for col in categorical_features}
test_df = test_df.astype(cat_types)

X_test = test_df.drop('target', axis=1)
y_test = test_df['target'].values

def evaluate_model(y_test, y_pred, model_name="Model"):
    print(f"\n--- {model_name} Results ---")
    
    report = classification_report(y_test, y_pred, target_names=['Truth', 'Deception'], output_dict=True)
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=['Truth', 'Deception']))

    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Truth', 'Deception'], yticklabels=['Truth', 'Deception'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'Confusion Matrix - {model_name}')
    plt.show()


Model Klasörü: c:\work environment\Projects\amazon-spam-review\models\deceptency


In [None]:
# --- LOGISTIC REGRESSION ---
path = os.path.join(model_dir, "logistic_regression_diplomacy.pkl")
print(f"Loading: {path}")
model = joblib.load(path)

y_pred = model.predict(X_test)

evaluate_model(y_test, y_pred, "Logistic Regression")

Yükleniyor: c:\work environment\Projects\amazon-spam-review\models\deceptency\logistic_regression_diplomacy.pkl


ValueError: X has 5000 features, but ColumnTransformer is expecting 12 features as input.

In [None]:
# --- NAIVE BAYES ---
model_path = os.path.join(model_dir, "naive_bayes_deceptency.pkl")
print(f"Yükleniyor: {model_path}")
nb_model = joblib.load(model_path)

y_pred_nb = nb_model.predict(X_test)

plot_results("Naive Bayes", y_test, y_pred_nb)

In [None]:
# --- SVM ---
model_path = os.path.join(model_dir, "svm_diplomacy.pkl")
print(f"Yükleniyor: {model_path}")
svm_model = joblib.load(model_path)

y_pred_svm = svm_model.predict(X_test)

plot_results("SVM", y_test, y_pred_svm)

In [None]:
# --- RANDOM FOREST ---
model_path = os.path.join(model_dir, "random_forest_diplomacy.pkl")
print(f"Yükleniyor: {model_path}")
rf_model = joblib.load(model_path)

y_pred_rf = rf_model.predict(X_test)

plot_results("Random Forest", y_test, y_pred_rf)

In [None]:
# --- LIGHTGBM ---
model_path = os.path.join(model_dir, "lightgbm_diplomacy.pkl")
print(f"Yükleniyor: {model_path}")
lgbm_model = joblib.load(model_path)

y_pred_lgbm = lgbm_model.predict(X_test)

plot_results("LightGBM", y_test, y_pred_lgbm)

In [None]:
# --- NEURAL NETWORK (MLP) ---
model_path = os.path.join(model_dir, "neural_network_diplomacy.pkl")
print(f"Yükleniyor: {model_path}")
nn_model = joblib.load(model_path)

y_pred_nn = nn_model.predict(X_test)

plot_results("Neural Network (MLP)", y_test, y_pred_nn)

In [None]:
# --- LSTM MODEL ---
model_path = os.path.join(model_dir, "lstm_diplomacy.h5")
print(f"Yükleniyor: {model_path}")

# LSTM Hazırlığı (Eğitimdeki parametrelerle AYNI olmalı)
VOCAB_SIZE = 10000
MAX_LEN = 150
tokenizer = Tokenizer(num_words=VOCAB_SIZE, oov_token="<OOV>")
tokenizer.fit_on_texts(train_df['message_text']) # Tokenizer'ı eğitilmiş veriden kuruyoruz

X_test_seq = tokenizer.texts_to_sequences(test_df['message_text'])
X_test_pad = pad_sequences(X_test_seq, maxlen=MAX_LEN, padding='post', truncating='post')

# Modeli Yükle ve Tahmin Et
lstm_model = load_model(model_path)
y_pred_prob_lstm = lstm_model.predict(X_test_pad)
y_pred_lstm = (y_pred_prob_lstm > 0.5).astype(int)

plot_results("LSTM", y_test, y_pred_lstm)

In [None]:
# --- BERT MODEL ---
model_path = os.path.join(model_dir, "bert_diplomacy") # Klasör yolu
print(f"Yükleniyor: {model_path}")

# BERT Tokenizer
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
MAX_LEN_BERT = 128

def encode_bert(texts):
    input_ids = []
    attention_masks = []
    for text in texts:
        encoded = bert_tokenizer.encode_plus(
            text, add_special_tokens=True, max_length=MAX_LEN_BERT,
            padding='max_length', truncation=True,
            return_attention_mask=True, return_tensors='tf'
        )
        input_ids.append(encoded['input_ids'][0])
        attention_masks.append(encoded['attention_mask'][0])
    return np.array(input_ids), np.array(attention_masks)

X_test_ids, X_test_masks = encode_bert(test_df['message_text'])

# Modeli Yükle
bert_model = TFBertForSequenceClassification.from_pretrained(model_path)
y_pred_logits_bert = bert_model.predict([X_test_ids, X_test_masks]).logits
y_pred_prob_bert = tf.sigmoid(y_pred_logits_bert).numpy().flatten()
y_pred_bert = (y_pred_prob_bert > 0.5).astype(int)

plot_results("BERT", y_test, y_pred_bert)