In [38]:
from openai import OpenAI
import configparser
import pandas as pd
from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [39]:
model = KeyedVectors.load_word2vec_format('../../Modell_Fast_text/wiki-news-300d-1M-subword.vec', binary=False)

In [41]:
# Funktion, um Text in einen Durchschnittsvektor umzuwandeln
def text_to_vector(text, model):
    words = text.split()
    word_vectors = [model[word] for word in words if word in model]
    if not word_vectors:
        return np.zeros(model.vector_size)
    return np.mean(word_vectors, axis=0)

# Funktion zur Berechnung der Kosinusähnlichkeit zwischen zwei Texten
def cosine_similarity_texts(text1, text2, model):
    vector1 = text_to_vector(text1, model)
    vector2 = text_to_vector(text2, model)
    return cosine_similarity([vector1], [vector2])[0][0]

In [21]:
# OpenAI API Key
config = configparser.ConfigParser()
config.read('../../Edu_Llama/config.ini')
api_key = config['openai']['api_key']

In [22]:
client = OpenAI(api_key=api_key)

In [23]:
#Modelle gpt-4-1106-preview und gpt-3.5-turbo
llm1 = "gpt-3.5-turbo"
llm2 = "gpt-4-1106-preview"

In [24]:
questions = pd.read_csv("../../Fragen/Fragen DB.CSV", sep=";") 

In [25]:
questions.head()


Unnamed: 0,Question,Answer
0,Question: What are the basic features of Relat...,Answer: The basic features of RDMS include the...
1,Question: What are the key components of a dat...,Answer: Key components of a database system en...
2,Question: What is a relation (relation state) ...,Answer: A relation (relation state) in relatio...
3,Question: What is the definition and significa...,Answer: A binary relation in relational databa...
4,Question: What are entity types and sets in th...,Answer: An entity type is a definition of a co...


In [26]:
question = questions["Question"][0]
answer = questions["Answer"][0]

In [28]:
response1 = client.chat.completions.create(
    model=llm1,
    messages=[{"role": "user", "content": f"{question}"}]
)
answer1 = response1.choices[0].message.content

In [29]:
response2 = client.chat.completions.create(
    model=llm2,
    messages=[{"role": "user", "content": f"{question}"}]
)
answer2 = response2.choices[0].message.content

In [30]:
auswertung = response2 = client.chat.completions.create(
    model=llm2,
    messages=[{"role": "system", "content": "I'll now give you a question, the sample solution to this question and 2 other answers. Decide based on the context of the model solution which of the two answers is better and only output “1” or “2” as the result!"},
              {"role": "user", "content": f"{question} \n sample {answer} \n 1. Answer: {answer1} \n 2. Answer: {answer2}"}]
)

In [35]:
better_answer = auswertung.choices[0].message.content

In [32]:
f"{question} \n sample {answer} \n 1. Answer: {answer1} \n 2. Answer: {answer2}"

'Question: What are the basic features of Relational Database Management Systems (RDMS)? \n sample Answer: The basic features of RDMS include the ability to create a database, write and execute SQL queries, evaluate storage structures, read execution plans and optimize queries, and understand transaction isolation layers. \n 1. Answer: Relational Database Management Systems (RDBMS) have several basic features that distinguish them from other types of database management systems. Some of the key features of RDBMS include:\n\n1. Tabular Structure: RDBMS store data in the form of tables, where each table consists of rows and columns. Tables are used to represent entities or relations in the database.\n\n2. Data Integrity: RDBMS enforce data integrity through the use of constraints such as unique values, primary keys, foreign keys, and referential integrity. These constraints ensure that data remains consistent and accurate.\n\n3. Relationships between Tables: RDBMS allow for establishing 

### Kosinusähnlichkeit

In [42]:
# Berechnen der Ähnlichkeit für jede Zeile
cos_1 = cosine_similarity_texts(answer, answer1, model)
cos_2 = cosine_similarity_texts(answer, answer2, model)

In [45]:
if cos_1 > cos_2:
    better_answer = 1
else:
    better_answer = 2

### Final Data Frame

In [46]:

final_table = pd.DataFrame(columns=["Question", "Answer", "Answer1", "Answer2", "Result", "similarity_1", "similarity_2","similarity_winner"])
# Ihre Liste
neue_zeile = [question, answer, answer1, answer2, better_answer,cos_1, cos_2,better_answer]

# Hinzufügen der neuen Zeile
final_table.loc[len(final_table)] = neue_zeile

In [47]:
final_table

Unnamed: 0,Question,Answer,Answer1,Answer2,Result,similarity_1,similarity_2,similarity_winner
0,Question: What are the basic features of Relat...,Answer: The basic features of RDMS include the...,Relational Database Management Systems (RDBMS)...,Relational Database Management Systems (RDBMS)...,2,0.946672,0.955012,2
