In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from scipy.stats import mode
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel

  from tqdm.autonotebook import tqdm, trange


In [4]:
data = pd.read_csv('/content/drive/MyDrive/Yüksek Lisans 2. Dönem/Kolektif Öğrenme/Proje2/Türkçe Duygu Analizi Proje/magaza_yorumlari.csv',encoding = "utf-16")
data.head()

Unnamed: 0,Görüş,Durum
0,Ürün gayet güzel boyutları dört kişilik aile ...,Olumlu
1,Ürünü kullandım gayet memnunum,Olumlu
2,Hızlı kargo,Olumlu
3,8 yıl önce lg 42 lm serisi almıştım ve tv den ...,Olumsuz
4,ürün fena değil ancak gövdenin plastik olması ...,Olumlu


In [5]:
data.shape

(8491, 2)

In [6]:
data.isnull().sum()

Unnamed: 0,0
Görüş,2
Durum,0


In [7]:
data = data.dropna(axis=0)
data.isnull().sum()

Unnamed: 0,0
Görüş,0
Durum,0


In [8]:
data["Durum"].value_counts()

Unnamed: 0_level_0,count
Durum,Unnamed: 1_level_1
Olumlu,4252
Olumsuz,4237


In [9]:
# SentenceTransformers modelleri
sentence_transformers_models = {
    "all-MiniLM-L12-v2": SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2"),
    "multilingual-e5-large-instruct": SentenceTransformer("intfloat/multilingual-e5-large-instruct"),
    "gte-large": SentenceTransformer("thenlper/gte-large"),
    "bert-base-turkish-uncased": SentenceTransformer("dbmdz/bert-base-turkish-uncased"),
    "jina-embeddings-v3": SentenceTransformer("jinaai/jina-embeddings-v3", trust_remote_code=True)
}

# Makine öğrenimi modelleri
ml_models = {
    "SVM": SVC(kernel="linear", probability=True, random_state=42),
    "RF": RandomForestClassifier(n_estimators=100, random_state=42),
    "MLP": MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42)
}

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/352 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/128 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/140k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/690 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/271 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/67.9k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/445M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/59.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/263k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/378 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/464 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/734k [00:00<?, ?B/s]

custom_st.py:   0%|          | 0.00/8.78k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/jina-embeddings-v3:
- custom_st.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

configuration_xlm_roberta.py:   0%|          | 0.00/6.54k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- configuration_xlm_roberta.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_lora.py:   0%|          | 0.00/15.4k [00:00<?, ?B/s]

modeling_xlm_roberta.py:   0%|          | 0.00/49.9k [00:00<?, ?B/s]

mha.py:   0%|          | 0.00/34.4k [00:00<?, ?B/s]

rotary.py:   0%|          | 0.00/24.5k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- rotary.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- mha.py
- rotary.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


xlm_padding.py:   0%|          | 0.00/10.0k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- xlm_padding.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


block.py:   0%|          | 0.00/17.8k [00:00<?, ?B/s]

stochastic_depth.py:   0%|          | 0.00/3.76k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- stochastic_depth.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


mlp.py:   0%|          | 0.00/7.62k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- mlp.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- block.py
- stochastic_depth.py
- mlp.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


embedding.py:   0%|          | 0.00/3.88k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- embedding.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- modeling_xlm_roberta.py
- mha.py
- xlm_padding.py
- block.py
- embedding.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- modeling_lora.py
- modeling_xlm_roberta.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/1.14G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/192 [00:00<?, ?B/s]

In [10]:
X_train, X_test, y_train, y_test = train_test_split(data["Görüş"], data["Durum"], test_size=0.2,  random_state=42)

In [11]:
y_train.value_counts()

Unnamed: 0_level_0,count
Durum,Unnamed: 1_level_1
Olumlu,3397
Olumsuz,3394


In [12]:
y_test.value_counts()

Unnamed: 0_level_0,count
Durum,Unnamed: 1_level_1
Olumlu,855
Olumsuz,843


In [13]:
# Hedef değişkenleri encode etme
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

In [14]:
# 3. Temsil Yöntemi ile Embedding Oluşturma
def create_embeddings(texts, model_name, model_type):
    if model_type == "sentence-transformers":
        return model_name.encode(texts, batch_size=32, show_progress_bar=True)

In [None]:
# Embedding'leri oluştur ve kaydet
for representation_name, representation_model in sentence_transformers_models.items():
    print(f"\nTemsil yöntemi: {representation_name}")

    # Embedding oluşturma
    X_train_embeddings = create_embeddings(X_train.tolist(), representation_model, "sentence-transformers")
    X_test_embeddings = create_embeddings(X_test.tolist(), representation_model, "sentence-transformers")

    # Numpy formatında kaydetme
    np.save(f"/content/drive/MyDrive/Yüksek Lisans 2. Dönem/Kolektif Öğrenme/Proje2/Türkçe Duygu Analizi Proje/duygu_analizi_tranformers/{representation_name}_train.npy", X_train_embeddings)
    np.save(f"/content/drive/MyDrive/Yüksek Lisans 2. Dönem/Kolektif Öğrenme/Proje2/Türkçe Duygu Analizi Proje/duygu_analizi_tranformers/{representation_name}_test.npy", X_test_embeddings)

    print(f"Embedding'ler {representation_name} için kaydedildi!")


Temsil yöntemi: all-MiniLM-L12-v2


Batches:   0%|          | 0/213 [00:00<?, ?it/s]

Batches:   0%|          | 0/54 [00:00<?, ?it/s]

Embedding'ler all-MiniLM-L12-v2 için kaydedildi!

Temsil yöntemi: multilingual-e5-large-instruct


Batches:   0%|          | 0/213 [00:00<?, ?it/s]

Batches:   0%|          | 0/54 [00:00<?, ?it/s]

Embedding'ler multilingual-e5-large-instruct için kaydedildi!

Temsil yöntemi: gte-large


Batches:   0%|          | 0/213 [00:00<?, ?it/s]

Batches:   0%|          | 0/54 [00:00<?, ?it/s]

Embedding'ler gte-large için kaydedildi!

Temsil yöntemi: bert-base-turkish-uncased


Batches:   0%|          | 0/213 [00:00<?, ?it/s]

Batches:   0%|          | 0/54 [00:00<?, ?it/s]

Embedding'ler bert-base-turkish-uncased için kaydedildi!

Temsil yöntemi: jina-embeddings-v3


Batches:   0%|          | 0/213 [00:00<?, ?it/s]

Batches:   0%|          | 0/54 [00:00<?, ?it/s]

Embedding'ler jina-embeddings-v3 için kaydedildi!


In [17]:
param_grids = {
    "SVM": {"C": [0.1, 1], "kernel": ["linear", "rbf"]},
    "RF": {"n_estimators": [50, 100], "max_depth": [10, None]},
    "MLP": {"hidden_layer_sizes": [(50,), (100,)], "max_iter": [300, 500]}
}

In [18]:
# Sonuçları saklamak için sözlükler
results = {}
model_ensemble_results = {}
final_ensemble_results = {}

# Her temsil yöntemi için ensemble oluşturma
for representation_name in sentence_transformers_models:
    print(f"\nTemsil yöntemi: {representation_name}")


    # Embedding'leri yükleme
    X_train_embeddings = np.load(f"/content/drive/MyDrive/Yüksek Lisans 2. Dönem/Kolektif Öğrenme/Proje2/Türkçe Duygu Analizi Proje/duygu_analizi_tranformers/{representation_name}_train.npy")
    X_test_embeddings = np.load(f"/content/drive/MyDrive/Yüksek Lisans 2. Dönem/Kolektif Öğrenme/Proje2/Türkçe Duygu Analizi Proje/duygu_analizi_tranformers/{representation_name}_test.npy")

    # Temsil yöntemi için sonuçları başlatma
    results[representation_name] = {"Individual": {}, "Ensemble": None}
    optimized_models = []  # Her temsil yöntemi için optimize modeller

    # Bireysel modeller için
    for ml_name, ml_model in ml_models.items():
        print(f"\nModel: {ml_name}")

        # GridSearchCV ile parametre optimizasyonu
        grid = GridSearchCV(ml_model, param_grids[ml_name], cv=3, scoring='accuracy', verbose=1)
        grid.fit(X_train_embeddings, y_train)

        # Optimize edilmiş modeli saklama
        optimized_models.append((ml_name, grid.best_estimator_))
        print(grid.best_estimator_)

        # Test setinde doğruluk hesaplama
        y_pred = grid.best_estimator_.predict(X_test_embeddings)
        accuracy = accuracy_score(y_test, y_pred)
        conf_matrix = confusion_matrix(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        results[representation_name]["Individual"][ml_name] = accuracy
        print(f"{ml_name} Accuracy: {accuracy}")
        print(f"{ml_name} Confusion Matrix:\n{conf_matrix}")
        print(f"{ml_name} Precision:\n{precision}")
        print(f"{ml_name} Recall:\n{recall}")
        print(f"{ml_name} F1 Score:\n{f1}")


    # Temsil yöntemi için ensemble modeli
    representation_ensemble = VotingClassifier(estimators=optimized_models, voting='hard')
    representation_ensemble.fit(X_train_embeddings, y_train)
    y_rep_ensemble_pred = representation_ensemble.predict(X_test_embeddings)
    representation_ensemble_accuracy = accuracy_score(y_test, y_rep_ensemble_pred)
    representation_ensemble_conf_matrix = confusion_matrix(y_test, y_rep_ensemble_pred)
    representation_ensemble_precision = precision_score(y_test, y_rep_ensemble_pred, average='weighted')
    representation_ensemble_recall = recall_score(y_test, y_rep_ensemble_pred, average='weighted')
    representation_ensemble_f1 = f1_score(y_test, y_rep_ensemble_pred, average='weighted')
    results[representation_name]["Ensemble"] = representation_ensemble_accuracy
    print(f"Representation Ensemble Accuracy: {representation_ensemble_accuracy}")
    print(f"Representation Ensemble Confusion Matrix:\n{representation_ensemble_conf_matrix}")
    print(f"Representation Ensemble Precision: {representation_ensemble_precision}")
    print(f"Representation Ensemble Recall: {representation_ensemble_recall}")
    print(f"Representation Ensemble F1 Score: {representation_ensemble_f1}")


Temsil yöntemi: all-MiniLM-L12-v2

Model: SVM
Fitting 3 folds for each of 4 candidates, totalling 12 fits
SVC(C=1, probability=True, random_state=42)
SVM Accuracy: 0.8457008244994111
SVM Confusion Matrix:
[[690 165]
 [ 97 746]]
SVM Precision:
0.848018222441114
SVM Recall:
0.8457008244994111
SVM F1 Score:
0.8454968115176668

Model: RF
Fitting 3 folds for each of 4 candidates, totalling 12 fits
RandomForestClassifier(max_depth=10, random_state=42)
RF Accuracy: 0.7626619552414605
RF Confusion Matrix:
[[598 257]
 [146 697]]
RF Precision:
0.767444203568171
RF Recall:
0.7626619552414605
RF F1 Score:
0.7617542831279939

Model: MLP
Fitting 3 folds for each of 4 candidates, totalling 12 fits




MLPClassifier(max_iter=300, random_state=42)
MLP Accuracy: 0.8297997644287397
MLP Confusion Matrix:
[[693 162]
 [127 716]]
MLP Precision:
0.8304105577518341
MLP Recall:
0.8297997644287397
MLP F1 Score:
0.8297522352450354




Representation Ensemble Accuracy: 0.8492343934040048
Representation Ensemble Confusion Matrix:
[[687 168]
 [ 88 755]]
Representation Ensemble Precision: 0.8524601545740348
Representation Ensemble Recall: 0.8492343934040048
Representation Ensemble F1 Score: 0.8489494734435096

Temsil yöntemi: multilingual-e5-large-instruct

Model: SVM
Fitting 3 folds for each of 4 candidates, totalling 12 fits
SVC(C=1, probability=True, random_state=42)
SVM Accuracy: 0.9458186101295641
SVM Confusion Matrix:
[[801  54]
 [ 38 805]]
SVM Precision:
0.9459841863915934
SVM Recall:
0.9458186101295641
SVM F1 Score:
0.9458174074299998

Model: RF
Fitting 3 folds for each of 4 candidates, totalling 12 fits
RandomForestClassifier(random_state=42)
RF Accuracy: 0.9358068315665489
RF Confusion Matrix:
[[791  64]
 [ 45 798]]
RF Precision:
0.9360353036402245
RF Recall:
0.9358068315665489
RF F1 Score:
0.9358038703343565

Model: MLP
Fitting 3 folds for each of 4 candidates, totalling 12 fits




MLPClassifier(hidden_layer_sizes=(50,), max_iter=300, random_state=42)
MLP Accuracy: 0.9411071849234394
MLP Confusion Matrix:
[[791  64]
 [ 36 807]]
MLP Precision:
0.94160102586557
MLP Recall:
0.9411071849234394
MLP F1 Score:
0.9410980331866913




Representation Ensemble Accuracy: 0.9469964664310954
Representation Ensemble Confusion Matrix:
[[801  54]
 [ 36 807]]
Representation Ensemble Precision: 0.9472053747421908
Representation Ensemble Recall: 0.9469964664310954
Representation Ensemble F1 Score: 0.94699448098258

Temsil yöntemi: gte-large

Model: SVM
Fitting 3 folds for each of 4 candidates, totalling 12 fits
SVC(C=1, probability=True, random_state=42)
SVM Accuracy: 0.8333333333333334
SVM Confusion Matrix:
[[686 169]
 [114 729]]
SVM Precision:
0.8348134743875278
SVM Recall:
0.8333333333333334
SVM F1 Score:
0.833196534269467

Model: RF
Fitting 3 folds for each of 4 candidates, totalling 12 fits
RandomForestClassifier(random_state=42)
RF Accuracy: 0.790341578327444
RF Confusion Matrix:
[[631 224]
 [132 711]]
RF Precision:
0.7939484812335447
RF Recall:
0.790341578327444
RF F1 Score:
0.789805189340391

Model: MLP
Fitting 3 folds for each of 4 candidates, totalling 12 fits




MLPClassifier(max_iter=300, random_state=42)
MLP Accuracy: 0.8321554770318021
MLP Confusion Matrix:
[[731 124]
 [161 682]]
MLP Precision:
0.8327361268720662
MLP Recall:
0.8321554770318021
MLP F1 Score:
0.8320498460253982




Representation Ensemble Accuracy: 0.8362779740871613
Representation Ensemble Confusion Matrix:
[[690 165]
 [113 730]]
Representation Ensemble Precision: 0.8376142715109514
Representation Ensemble Recall: 0.8362779740871613
Representation Ensemble F1 Score: 0.8361597962202861

Temsil yöntemi: bert-base-turkish-uncased

Model: SVM
Fitting 3 folds for each of 4 candidates, totalling 12 fits
SVC(C=1, probability=True, random_state=42)
SVM Accuracy: 0.9228504122497055
SVM Confusion Matrix:
[[769  86]
 [ 45 798]]
SVM Precision:
0.9238646071944762
SVM Recall:
0.9228504122497055
SVM F1 Score:
0.9228185873685802

Model: RF
Fitting 3 folds for each of 4 candidates, totalling 12 fits
RandomForestClassifier(random_state=42)
RF Accuracy: 0.8987043580683156
RF Confusion Matrix:
[[749 106]
 [ 66 777]]
RF Precision:
0.8996245866907239
RF Recall:
0.8987043580683156
RF F1 Score:
0.8986649983917371

Model: MLP
Fitting 3 folds for each of 4 candidates, totalling 12 fits
MLPClassifier(max_iter=300, random_



MLPClassifier(hidden_layer_sizes=(50,), max_iter=300, random_state=42)
MLP Accuracy: 0.9352179034157833
MLP Confusion Matrix:
[[801  54]
 [ 56 787]]
MLP Precision:
0.9352192402230439
MLP Recall:
0.9352179034157833
MLP F1 Score:
0.9352172742469761




Representation Ensemble Accuracy: 0.9411071849234394
Representation Ensemble Confusion Matrix:
[[798  57]
 [ 43 800]]
Representation Ensemble Precision: 0.9412340051154253
Representation Ensemble Recall: 0.9411071849234394
Representation Ensemble F1 Score: 0.9411066129898855


In [19]:
# Aynı model türü için ensemble oluşturma (SVM, RF, MLP)
for ml_name in ml_models.keys():
    print(f"\nModel Türü Ensemble: {ml_name}")

    # Tüm temsil yöntemlerinden alınan bireysel modelleri birleştirme
    model_estimators = [
        (representation_name, results[representation_name]["Individual"][ml_name])
        for representation_name in sentence_transformers_models
    ]

    # Ensemble modeli
    model_ensemble = VotingClassifier(
        estimators=[
            (representation_name, grid.best_estimator_)  # Doğrudan optimize edilmiş modeller
            for representation_name in sentence_transformers_models
        ],
        voting='hard'
    )
    model_ensemble.fit(X_train_embeddings, y_train)
    y_model_pred = model_ensemble.predict(X_test_embeddings)
    model_ensemble_accuracy = accuracy_score(y_test, y_model_pred)
    model_ensemble_conf_matrix = confusion_matrix(y_test, y_model_pred)
    model_ensemble_precision = precision_score(y_test, y_model_pred, average='weighted')
    model_ensemble_recall = recall_score(y_test, y_model_pred, average='weighted')
    model_ensemble_f1 = f1_score(y_test, y_model_pred, average='weighted')
    model_ensemble_results[ml_name] = model_ensemble_accuracy
    print(f"Model Ensemble ({ml_name}) Accuracy: {model_ensemble_accuracy}")
    print(f"Representation Ensemble Confusion Matrix:\n{model_ensemble_conf_matrix}")
    print(f"Representation Ensemble Confusion Matrix:\n{model_ensemble_precision}")
    print(f"Representation Ensemble Confusion Matrix:\n{model_ensemble_recall}")
    print(f"Representation Ensemble Confusion Matrix:\n{model_ensemble_f1}")


Model Türü Ensemble: SVM




Model Ensemble (SVM) Accuracy: 0.9352179034157833
Representation Ensemble Confusion Matrix:
[[801  54]
 [ 56 787]]
Representation Ensemble Confusion Matrix:
0.9352192402230439
Representation Ensemble Confusion Matrix:
0.9352179034157833
Representation Ensemble Confusion Matrix:
0.9352172742469761

Model Türü Ensemble: RF




Model Ensemble (RF) Accuracy: 0.9352179034157833
Representation Ensemble Confusion Matrix:
[[801  54]
 [ 56 787]]
Representation Ensemble Confusion Matrix:
0.9352192402230439
Representation Ensemble Confusion Matrix:
0.9352179034157833
Representation Ensemble Confusion Matrix:
0.9352172742469761

Model Türü Ensemble: MLP




Model Ensemble (MLP) Accuracy: 0.9352179034157833
Representation Ensemble Confusion Matrix:
[[801  54]
 [ 56 787]]
Representation Ensemble Confusion Matrix:
0.9352192402230439
Representation Ensemble Confusion Matrix:
0.9352179034157833
Representation Ensemble Confusion Matrix:
0.9352172742469761


In [20]:
# Genel ensemble (Tüm temsil yöntemleri ve modeller)
print("\nGenel Ensemble")

overall_estimators = []

# Temsil yöntemi ensemble modellerini ekleme
for representation_name in sentence_transformers_models:
    representation_ensemble = VotingClassifier(
        estimators=[
            (ml_name, grid.best_estimator_)  # Optimize edilmiş modeller
            for ml_name in ml_models.keys()
        ],
        voting='hard'
    )
    representation_ensemble.fit(X_train_embeddings, y_train)
    overall_estimators.append((representation_name, representation_ensemble))

# Genel ensemble modeli
overall_ensemble = VotingClassifier(estimators=overall_estimators, voting='hard')
overall_ensemble.fit(X_train_embeddings, y_train)

# Genel ensemble modeliyle tahmin yapma
y_overall_pred = overall_ensemble.predict(X_test_embeddings)
overall_accuracy = accuracy_score(y_test, y_overall_pred)
overall_conf_matrix = confusion_matrix(y_test, y_overall_pred)
overall_precision = precision_score(y_test, y_overall_pred, average='weighted')
overall_recall = recall_score(y_test, y_overall_pred, average='weighted')
overall_f1 = f1_score(y_test, y_overall_pred, average='weighted')
final_ensemble_results["Overall"] = overall_accuracy
print(f"Overall Ensemble Accuracy: {overall_accuracy}")
print(f"Overall Ensemble Accuracy: {overall_conf_matrix}")
print(f"Overall Ensemble Accuracy: {overall_precision}")
print(f"Overall Ensemble Accuracy: {overall_recall}")
print(f"Overall Ensemble Accuracy: {overall_f1}")


# Sonuçları yazdırma
print("\nSonuçlar:")
for representation_name, metrics in results.items():
    print(f"\nTemsil Yöntemi: {representation_name}")
    for ml_name, acc in metrics["Individual"].items():
        print(f"  {ml_name} Accuracy: {acc}")
    print(f"  Representation Ensemble Accuracy: {metrics['Ensemble']}")

print("\nModel Ensemble Sonuçları:")
for ml_name, acc in model_ensemble_results.items():
    print(f"  {ml_name} Ensemble Accuracy: {acc}")

print("\nGenel Ensemble Sonucu:")
print(f"  Overall Ensemble Accuracy: {final_ensemble_results['Overall']}")


Genel Ensemble




Overall Ensemble Accuracy: 0.9352179034157833
Overall Ensemble Accuracy: [[801  54]
 [ 56 787]]
Overall Ensemble Accuracy: 0.9352192402230439
Overall Ensemble Accuracy: 0.9352179034157833
Overall Ensemble Accuracy: 0.9352172742469761

Sonuçlar:

Temsil Yöntemi: all-MiniLM-L12-v2
  SVM Accuracy: 0.8457008244994111
  RF Accuracy: 0.7626619552414605
  MLP Accuracy: 0.8297997644287397
  Representation Ensemble Accuracy: 0.8492343934040048

Temsil Yöntemi: multilingual-e5-large-instruct
  SVM Accuracy: 0.9458186101295641
  RF Accuracy: 0.9358068315665489
  MLP Accuracy: 0.9411071849234394
  Representation Ensemble Accuracy: 0.9469964664310954

Temsil Yöntemi: gte-large
  SVM Accuracy: 0.8333333333333334
  RF Accuracy: 0.790341578327444
  MLP Accuracy: 0.8321554770318021
  Representation Ensemble Accuracy: 0.8362779740871613

Temsil Yöntemi: bert-base-turkish-uncased
  SVM Accuracy: 0.9228504122497055
  RF Accuracy: 0.8987043580683156
  MLP Accuracy: 0.9199057714958775
  Representation Ensem