In [1]:
!pip install FlagEmbedding

Collecting FlagEmbedding
  Downloading FlagEmbedding-1.3.3.tar.gz (161 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.8/161.8 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting transformers==4.44.2 (from FlagEmbedding)
  Downloading transformers-4.44.2-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets==2.19.0 (from FlagEmbedding)
  Downloading datasets-2.19.0-py3-none-any.whl.metadata (19 kB)
Collecting ir-datasets (from FlagEmbedding)
  Downloading ir_datasets-0.5.9-py3-none-any.whl.metadata (12 kB)
Collecting pyarrow-hotfix (from datasets==2.19.0->FlagEmbedding)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets==2.19.0->FlagEmbedding)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xx

In [56]:
import pandas as pd
import tensorflow as tf
import torch
import numpy as np
import time
import datetime
import random
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import mode

from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

from transformers import AutoModel
from sentence_transformers import SentenceTransformer
#from FlagEmbedding import BGEM3FlagModel


df = pd.read_csv("hf://datasets/merve/turkish_instructions/instructions.csv")

# Yeni "soru" sütununu oluştur
df["soru"] = df.apply(
    lambda row: row["talimat"] if pd.isna(row[" giriş"]) else row["talimat"] + " " + row[" giriş"], axis=1
)

# Çıkış sütununu cevap olarak kullan
df["cevap"] = df[" çıktı"]

new_df = df[["soru", "cevap"]].copy()

# 2000 rastgele satır seç
sample_df = new_df.sample(n=2000, random_state=42).reset_index(drop=True)
questions = sample_df['soru'].tolist()
answers = sample_df['cevap'].tolist()

In [15]:
questions[0]

' Zümrüt yeşili rengini açıklayınız.'

In [14]:
answers[0]

'Zümrüt yeşili, uçuk yeşilden koyu yeşime kadar değişen canlı ve parlak bir renk tonudur. Taşın rengini yansıtan mücevher benzeri bir kaliteye sahiptir. Zümrüt yeşili, yaşam ve enerji dolu, gür, şiirsel olarak derin bir gölge olarak tanımlanabilir. Resimde, ağaçların, çimenlerin ve yaprakların dış mekan sahnelerini yakalamak için mükemmel renk tonudur.'

In [None]:
device_name = tf.test.gpu_device_name()
if device_name == '/device:GPU:0':
    device = torch.device("cuda")
    print('GPU:', torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")

models = []
embeddings_dict_questions = {}
embeddings_dict_answers = {}
#Initializing the embedding models and creating embeddings for data
model1 = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
embeddings1 = model1.encode(questions)
embeddings1_a = model1.encode(answers)
embeddings_dict_questions['model1'] = embeddings1
embeddings_dict_answers['model1'] = embeddings1_a

model2 = SentenceTransformer("jinaai/jina-embeddings-v3", trust_remote_code=True)
task = "classification"
embeddings2 = model2.encode(questions, task=task, prompt_name=task)
embeddings2_a = model2.encode(answers)
embeddings_dict_questions['model2'] = embeddings2
embeddings_dict_answers['model2'] = embeddings2_a

model5 = SentenceTransformer('thenlper/gte-large')
embeddings5 = model5.encode(questions)
embeddings5_a = model5.encode(answers)
embeddings_dict_questions['model5'] = embeddings5
embeddings_dict_answers['model5'] = embeddings5_a

model4 = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
embeddings4 = model4.encode(questions)
embeddings4_a = model4.encode(answers)
embeddings_dict_questions['model4'] = embeddings4
embeddings_dict_answers['model4'] = embeddings4_a


model3 = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
embeddings3 = model3.encode(questions)
embeddings3_a = model3.encode(answers)
embeddings_dict_questions['model3'] = embeddings3
embeddings_dict_answers['model3'] = embeddings3_a

In [4]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

top1_results = {}
top5_results = {}

for model_name in embeddings_dict_questions.keys():
    # Get embeddings for questions and answers
    question_embeddings = embeddings_dict_questions[model_name]
    answer_embeddings = embeddings_dict_answers[model_name]

    # Compute cosine similarity between questions and answers
    similarities = cosine_similarity(question_embeddings, answer_embeddings)

    # Retrieve Top-1 and Top-5 indices
    top1_results[model_name] = [np.argmax(row) for row in similarities]
    top5_results[model_name] = [np.argsort(row)[-5:][::-1] for row in similarities]  # Reverse for descending order

**Ensemble**

*Majority*

In [52]:
from collections import Counter

ensemble_top1_majority = []

for i in range(len(top1_results['model1'])):
    votes = [top1_results[model][i] for model in top1_results]
    majority_vote = Counter(votes).most_common(1)[0][0]
    # Store the majority vote in the dictionary for the respective model
    ensemble_top1_majority.append(majority_vote)


ensemble_top5_majority = []
for i in range(len(top5_results['model1'])):
    combined_top5 = [pred for model in top5_results for pred in top5_results[model][i]]
    most_common_top5 = [item[0] for item in Counter(combined_top5).most_common(5)]
    ensemble_top5_majority.append(most_common_top5)

*Average*

In [7]:
ensemble_top1_average = []

for i in range(len(top1_results['model1'])):
    rankings = [np.argsort(similarities[i]) for similarities in top1_results.values()]
    avg_rank = np.mean(rankings, axis=0).astype(int)
    ensemble_top1_average.append(avg_rank[0])


ensemble_top5_average = []

for i in range(len(top5_results['model1'])):
    all_ranks = [np.argsort(similarities[i]) for similarities in top5_results.values()]
    avg_rank = np.mean(all_ranks, axis=0)
    avg_rank_indices = np.argsort(avg_rank)[:5]
    ensemble_top5_average.append(avg_rank_indices)

*Weighted*

In [54]:
ensemble_top1_weighted = []
weights = [0.2, 0.5, 0.3, 0.2, 0.4]
for i in range(len(top1_results['model1'])):
    weighted_votes = {}
    for model, weight in zip(top1_results.keys(), weights):
        top1_index = top1_results[model][i]
        weighted_votes[top1_index] = weighted_votes.get(top1_index, 0) + weight
    ensemble_top1_weighted.append(max(weighted_votes, key=weighted_votes.get))


ensemble_top5_weighted = []
weights = [0.2, 0.2, 0.2, 0.2, 0.2]
for i in range(len(top5_results['model1'])):
    weighted_scores = {}
    for model, weight in zip(top5_results.keys(), weights):
        for rank, index in enumerate(top5_results[model][i]):
            weighted_scores[index] = weighted_scores.get(index, 0) + weight / (rank + 1)

    top5_indices = sorted(weighted_scores, key=weighted_scores.get, reverse=True)[:5]
    ensemble_top5_weighted.append(top5_indices)


In [16]:
ground_truth = sample_df['cevap'].tolist()

Results

In [55]:
index_to_cevap = {i: cevap for i, cevap in enumerate(sample_df['cevap'])}

def calculate_top1_accuracy(predictions, ground_truth):
    correct = sum([1 for pred, true in zip(predictions, ground_truth) if pred == true])
    return correct / len(ground_truth)

converted_top1_results = {
    model: [index_to_cevap[idx] for idx in indices]
    for model, indices in top1_results.items()
}

# Calculate Top-1 accuracy using the converted results
model_accuracies_top1 = {
    model: calculate_top1_accuracy(converted_top1_results[model], sample_df['cevap'].tolist())
    for model in top1_results.keys()
}


def calculate_top5_accuracy(predictions, ground_truth):
    correct = sum([1 for preds, true in zip(predictions, ground_truth) if true in preds])
    return correct / len(ground_truth)

converted_top5_results = {
    model: [[index_to_cevap[idx] for idx in indices] for indices in top5_results[model]]
    for model, top5_results[model] in top5_results.items()
}

model_accuracies_top5 = {
    model: calculate_top5_accuracy(converted_top5_results[model], sample_df['cevap'].tolist())
    for model in top5_results.keys()
}


# Accuracy for ensemble methods Top 1
ensemble_accuracies_top1 = {
    "Majority Voting Top 1": calculate_top1_accuracy(ensemble_top1_majority, index_to_cevap),
    "Average Ranking Top 1": calculate_top1_accuracy(ensemble_top1_average, index_to_cevap),
    "Weighted Voting Top 1": calculate_top1_accuracy(ensemble_top1_weighted, index_to_cevap)
}

# Accuracy for ensemble methods Top 5
ensemble_accuracies_top5 = {
    "Majority Voting Top 5": calculate_top5_accuracy(ensemble_top5_majority, index_to_cevap),
    "Average Ranking Top 5": calculate_top5_accuracy(ensemble_top5_average, index_to_cevap),
    "Weighted Voting Top 5": calculate_top5_accuracy(ensemble_top5_weighted, index_to_cevap)
}

print("Top-1 Accuracy:")
print("Models:", model_accuracies_top1)
print("Ensembles:", ensemble_accuracies_top1)

print("\nTop-5 Accuracy:")
print("Models:", model_accuracies_top5)
print("Ensembles:", ensemble_accuracies_top5)


Top-1 Accuracy:
Models: {'model1': 0.314, 'model2': 0.7815, 'model5': 0.413, 'model4': 0.467, 'model3': 0.661}
Ensembles: {'Majority Voting Top 1': 0.6725, 'Average Ranking Top 1': 0.0005, 'Weighted Voting Top 1': 0.7725}

Top-5 Accuracy:
Models: {'model1': 0.429, 'model2': 0.9015, 'model5': 0.545, 'model4': 0.5905, 'model3': 0.8105}
Ensembles: {'Majority Voting Top 5': 0.846, 'Average Ranking Top 5': 0.0025, 'Weighted Voting Top 5': 0.8815}


In [19]:
for i in range(5):  # Check first 5 questions
    print(f"Question: {sample_df['soru'][i]}")
    print(f"Ground Truth: {ground_truth[i]}")
    print(f"Top-1 Prediction: {ensemble_top1_majority[i]}")
    print(f"Top-5 Predictions: {ensemble_top5_majority[i]}")
    print("-----------")

Question:  Zümrüt yeşili rengini açıklayınız.
Ground Truth: Zümrüt yeşili, uçuk yeşilden koyu yeşime kadar değişen canlı ve parlak bir renk tonudur. Taşın rengini yansıtan mücevher benzeri bir kaliteye sahiptir. Zümrüt yeşili, yaşam ve enerji dolu, gür, şiirsel olarak derin bir gölge olarak tanımlanabilir. Resimde, ağaçların, çimenlerin ve yaprakların dış mekan sahnelerini yakalamak için mükemmel renk tonudur.
Top-1 Prediction: 0
Top-5 Predictions: [0, 1764, 615, 1800, 938]
-----------
Question:  Birim dönüştürme problemi için bir kopya kağıdı hazırlayın.  375 milimetreyi inç'e dönüştürün.
Ground Truth:  1 milimetre = 0,0393701 inç 375 milimetre = 14,724409448818898 inç
Top-1 Prediction: 1
Top-5 Predictions: [859, 351, 1, 640, 1415]
-----------
Question:  Sınavda başarısız olan birine cesaret verici bir şey söyleyin.
Ground Truth: Bazen başarısız olmak iyidir. Başarısızlık, öğrenme sürecinin bir parçasıdır. Bu deneyimi daha fazlasını öğrenmek ve bir dahaki sefere daha iyisini yapmak iç