In [None]:
!pip install transformers sentence-transformers scikit-learn seaborn

      Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75
  Attempting uninstall: nvidia-cusolver-cu12
    Found existing installation: nvidia-cusolver-cu12 11.6.3.83
    Uninstalling nvidia-cusolver-cu12-11.6.3.83:
      Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83
Successfully installed nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127


In [None]:
from google.colab import files
uploaded = files.upload()


Saving sc.csv to sc.csv


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns
import os
from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer
from google.colab import files


In [None]:
def average_pool(last_hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
    last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
    return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]

def get_embeddings(model_name, texts):
    if model_name == "jinaai/jina-embeddings-v3":
        model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
        embeddings = model.encode(texts, task="text-matching")
        return embeddings

    elif model_name == "intfloat/multilingual-e5-large-instruct":
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModel.from_pretrained(model_name)
        task_description = "Represent this sentence for retrieval:"
        inputs = [f"Instruct: {task_description}\nQuery: {t}" for t in texts]

        encoded = tokenizer(inputs, max_length=512, padding=True, truncation=True, return_tensors="pt")
        with torch.no_grad():
            output = model(**encoded)
        embeddings = average_pool(output.last_hidden_state, encoded["attention_mask"])
        embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1).cpu().numpy()
        return embeddings

    elif model_name == "ytu-ce-cosmos/turkish-e5-large":
        model = SentenceTransformer(model_name)
        return model.encode(texts, convert_to_tensor=False, normalize_embeddings=True)

    else:
        raise ValueError(f"Desteklenmeyen model: {model_name}")

def evaluate_top_k(soru_embeds, answer_embeds, k=5):
    sims = cosine_similarity(soru_embeds, answer_embeds)
    top_k = np.argsort(-sims, axis=1)[:, :k]
    corrects = np.arange(len(soru_embeds))

    top1_acc = np.mean(top_k[:, 0] == corrects)
    top5_acc = np.mean([corrects[i] in top_k[i] for i in range(len(corrects))])
    return top1_acc, top5_acc


In [None]:
models = {
    "e5": "intfloat/multilingual-e5-large-instruct",
    "cosmosE5": "ytu-ce-cosmos/turkish-e5-large",
    "jina": "jinaai/jina-embeddings-v3"
}

df = pd.read_csv("sc.csv")
all_results = []

In [None]:
df = df.rename(columns={
    "Soru": "soru",
    "gpt4o cevabı": "gpt4o",
    "deepseek cevabı": "deepseek",
    "Hangisi iyi? (1: gpt4o daha iyi, 2: deepseek daha iyi, 3: ikisi de yeterince iyi, 4: ikisi de kötü)": "hangisi_iyi"
})

df_sampled = df.sample(n=1000, random_state=42).reset_index(drop=True)


In [None]:
df_sampled.head()

Unnamed: 0,soru,gpt4o,deepseek,hangisi_iyi
0,Güneşte çok kalmanın ne gibi zararları olabilir ?,Güneşte çok kalmanın çeşitli zararları vardır....,"Güneşte uzun süre kalmak, **UV (ultraviyole) r...",2
1,Özgürlük güvenlikten daha mı önemlidir?,"Özgürlük ve güvenlik, toplumların dengede tutm...",Özgürlük ile güvenlik arasındaki denge toplumd...,1
2,"Kaşık, çatal, tabak ile aynı kategoride olan b...","**Kaşık, çatal ve tabak**; **mutfak gereçleri*...","Kaşık, çatal ve tabak, genellikle **mutfak ve ...",3
3,Neden her ülkede sadece tek bir dil konuşuluyor?,"Aslında, her ülkede sadece tek bir dil konuşul...",Her ülkede tek bir dil konuşulduğu yaygın bir ...,3
4,Demirin yoğunluğunun 100 kg/m^3 olduğu bir ort...,"Yoğunluk, maddenin kendine özgü bir özelliğidi...",Demirin yoğunluğunun 100 kg/m³ olduğu bir orta...,2


In [None]:
for tag, model_name in models.items():
    print(f"\n🚀 {tag.upper()} modeli işleniyor: {model_name}")

    soru_embeds = get_embeddings(model_name, df_sampled["soru"].tolist())
    gpt_embeds = get_embeddings(model_name, df_sampled["gpt4o"].tolist())
    deep_embeds = get_embeddings(model_name, df_sampled["deepseek"].tolist())

    gpt_top1, gpt_top5 = evaluate_top_k(soru_embeds, gpt_embeds)
    deep_top1, deep_top5 = evaluate_top_k(soru_embeds, deep_embeds)

    gpt_diag = np.diag(cosine_similarity(soru_embeds, gpt_embeds))
    deep_diag = np.diag(cosine_similarity(soru_embeds, deep_embeds))
    df_sampled["gpt_score"] = gpt_diag
    df_sampled["deep_score"] = deep_diag

    corr = df_sampled[["gpt_score", "deep_score", "hangisi_iyi"]].corr()

    os.makedirs(f"model_output_{tag}", exist_ok=True)
    plt.figure(figsize=(6, 4))
    sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
    plt.title(f"{tag.upper()} - Korelasyon Matrisi")
    plt.tight_layout()
    save_path = f"model_output_{tag}/korelasyon.png"
    plt.savefig(save_path)
    plt.close()

    # Grafik otomatik indir
    files.download(save_path)

    all_results.append({
        "Model": tag,
        "GPT4o Top-1": gpt_top1,
        "GPT4o Top-5": gpt_top5,
        "DeepSeek Top-1": deep_top1,
        "DeepSeek Top-5": deep_top5
    })


🚀 E5 modeli işleniyor: intfloat/multilingual-e5-large-instruct


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


🚀 COSMOSE5 modeli işleniyor: ytu-ce-cosmos/turkish-e5-large


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/205 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.40k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


🚀 JINA modeli işleniyor: jinaai/jina-embeddings-v3


config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

configuration_xlm_roberta.py:   0%|          | 0.00/6.54k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- configuration_xlm_roberta.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_lora.py:   0%|          | 0.00/15.4k [00:00<?, ?B/s]

modeling_xlm_roberta.py:   0%|          | 0.00/51.1k [00:00<?, ?B/s]

embedding.py:   0%|          | 0.00/3.88k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- embedding.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


mha.py:   0%|          | 0.00/34.4k [00:00<?, ?B/s]

rotary.py:   0%|          | 0.00/24.5k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- rotary.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- mha.py
- rotary.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


block.py:   0%|          | 0.00/17.8k [00:00<?, ?B/s]

stochastic_depth.py:   0%|          | 0.00/3.76k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- stochastic_depth.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


mlp.py:   0%|          | 0.00/7.62k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- mlp.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- block.py
- stochastic_depth.py
- mlp.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


xlm_padding.py:   0%|          | 0.00/10.0k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- xlm_padding.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- modeling_xlm_roberta.py
- embedding.py
- mha.py
- block.py
- xlm_padding.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- modeling_lora.py
- modeling_xlm_roberta.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/1.14G [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
results_df = pd.DataFrame(all_results)
results_df.to_csv("tum_model_sonuclari.csv", index=False)

# Sonuç CSV'sini otomatik indir
files.download("tum_model_sonuclari.csv")

print("\n✅ Tüm modeller işlendi ve başarılar 'tum_model_sonuclari.csv' olarak kaydedildi.")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ Tüm modeller işlendi ve başarılar 'tum_model_sonuclari.csv' olarak kaydedildi.
