In [1]:
import pandas as pd
import numpy as np
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

# CSV dosyanızı yükleyin
df = pd.read_csv('./data_without_extra_features/lemmatized_and_misspelled_removed_SEFACED.csv', encoding='utf-8')

# Corpusu (lemmatize edilmiş belgeler) ve hedef değişkeni çıkarın
corpus = df['lemmatized_tokens']
y = df['Class_Label']

# Metni tokenize edin
tokenized_corpus = [word_tokenize(text) for text in corpus]

# Word2Vec modelini eğitin
word2vec_model = Word2Vec(tokenized_corpus, vector_size=100, window=5, min_count=1, workers=4)

# Belge gömülerini ortalama kelime vektörlerini kullanarak oluşturan bir fonksiyon
def document_embedding(tokens):
    vectors = [word2vec_model.wv[word] for word in tokens if word in word2vec_model.wv]
    if vectors:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(word2vec_model.vector_size)

# Belge gömülerini oluşturun
X_word2vec = np.array([document_embedding(tokens) for tokens in tokenized_corpus])

# Veri kümesini eğitim ve test setlerine ayırın
X_train, X_test, y_train, y_test = train_test_split(X_word2vec, y, test_size=0.2, random_state=42)

# Lojistik Regresyon modelini başlatın
lr_model = LogisticRegression(max_iter=10000)

# Lojistik Regresyon modelini eğitin
lr_model.fit(X_train, y_train)

# Test seti üzerinde tahminler yapın
lr_predictions = lr_model.predict(X_test)

# Modeli değerlendirin

lr_accuracy = accuracy_score(y_test, lr_predictions)
lr_precision = precision_score(y_test, lr_predictions, average='weighted')
lr_recall = recall_score(y_test, lr_predictions, average='weighted')

print("Logistic Regression Classifier:")
print(f"Accuracy: {lr_accuracy}")
print(f"Precision: {lr_precision}")
print(f"Recall: {lr_recall}")
print("Sınıflandırma Raporu:")
print(classification_report(y_test, lr_predictions))


Logistic Regression Classifier:
Accuracy: 0.8975
Precision: 0.8976307235505548
Recall: 0.8975
Sınıflandırma Raporu:
              precision    recall  f1-score   support

  Fraudulent       0.89      0.91      0.90       796
      Normal       0.91      0.89      0.90       804

    accuracy                           0.90      1600
   macro avg       0.90      0.90      0.90      1600
weighted avg       0.90      0.90      0.90      1600



In [3]:
#RF

from sklearn.ensemble import RandomForestClassifier

# Rastgele Orman modelini başlatın
rf_model = RandomForestClassifier()

# Rastgele Orman modelini eğitin
rf_model.fit(X_train, y_train)

# Test seti üzerinde tahminler yapın
rf_predictions = rf_model.predict(X_test)

# Modeli değerlendirin

rf_accuracy = accuracy_score(y_test, rf_predictions)
rf_precision = precision_score(y_test, rf_predictions, average='weighted')
rf_recall = recall_score(y_test, rf_predictions, average='weighted')

print("Random Forest Classifier:")
print(f"Accuracy: {rf_accuracy}")
print(f"Precision: {rf_precision}")
print(f"Recall: {rf_recall}")
print("Sınıflandırma Raporu:")
print(classification_report(y_test, rf_predictions))


Random Forest Classifier:
Accuracy: 0.960625
Precision: 0.9608372406151
Recall: 0.960625
Classification Report:
              precision    recall  f1-score   support

  Fraudulent       0.95      0.97      0.96       796
      Normal       0.97      0.95      0.96       804

    accuracy                           0.96      1600
   macro avg       0.96      0.96      0.96      1600
weighted avg       0.96      0.96      0.96      1600



In [4]:
#NB

from sklearn.naive_bayes import GaussianNB

# Çoklu Naive Bayes modelini başlatın
nb_model = GaussianNB()

# Çoklu Naive Bayes modelini eğitin
nb_model.fit(X_train, y_train)

# Test seti üzerinde tahminler yapın
nb_predictions = nb_model.predict(X_test)

# Modeli değerlendirin

nb_accuracy = accuracy_score(y_test, nb_predictions)
nb_precision = precision_score(y_test, nb_predictions, average='weighted')
nb_recall = recall_score(y_test, nb_predictions, average='weighted')

print("Naive Bayes Classifier:")
print(f"Accuracy: {nb_accuracy}")
print(f"Precision: {nb_precision}")
print(f"Recall: {nb_recall}")
print("Sınıflandırma Raporu:")
print(classification_report(y_test, nb_predictions))


Naive Bayes Classifier:
Accuracy: 0.82625
Precision: 0.8319707756012105
Recall: 0.82625
Classification Report:
              precision    recall  f1-score   support

  Fraudulent       0.88      0.76      0.81       796
      Normal       0.79      0.89      0.84       804

    accuracy                           0.83      1600
   macro avg       0.83      0.83      0.83      1600
weighted avg       0.83      0.83      0.83      1600



In [2]:
# SVM

from sklearn.svm import SVC

# SVM modelini başlat
svm_model = SVC()

# SVM modelini eğit
svm_model.fit(X_train, y_train)

# Test seti üzerinde tahminler yap
svm_predictions = svm_model.predict(X_test)

# Modeli değerlendir

svm_accuracy = accuracy_score(y_test, svm_predictions)
svm_precision = precision_score(y_test, svm_predictions, average='weighted')
svm_recall = recall_score(y_test, svm_predictions, average='weighted')

print("Support Vector Machine Classifier:")
print(f"Accuracy: {svm_accuracy}")
print(f"Precision: {svm_precision}")
print(f"Recall: {svm_recall}")

print("Classification Report:")
print(classification_report(y_test, svm_predictions))




Support Vector Machine Classifier:
Accuracy: 0.888125
Precision: 0.8882060774965583
Recall: 0.888125
Classification Report:
              precision    recall  f1-score   support

  Fraudulent       0.88      0.89      0.89       796
      Normal       0.89      0.88      0.89       804

    accuracy                           0.89      1600
   macro avg       0.89      0.89      0.89      1600
weighted avg       0.89      0.89      0.89      1600

