# Install

In [1]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git 
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets
!pip install numpy==1.22.0
!pip install gensim
!pip install pydantic==1.8.1
!pip install openai
!pip install tqdm

Collecting numpy==1.22.0
  Downloading numpy-1.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.8/16.8 MB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.24.3
    Uninstalling numpy-1.24.3:
      Successfully uninstalled numpy-1.24.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
apache-beam 2.46.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.7 which is incompatible.
apache-beam 2.46.0 requires pyarrow<10.0.0,>=3.0.0, but you hav

## Imports + Daten

In [2]:
import transformers
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import numpy as np
import pandas as pd
from openai import OpenAI
from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity
import random
from tqdm import tqdm



In [3]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/llama-2/pytorch/7b-chat-hf/1/model.safetensors.index.json
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/config.json
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/model-00001-of-00002.safetensors
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/model-00002-of-00002.safetensors
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/pytorch_model-00002-of-00002.bin
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/README.md
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/USE_POLICY.md
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/tokenizer.json
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/tokenizer_config.json
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/pytorch_model.bin.index.json
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/LICENSE.txt
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/pytorch_model-00001-of-00002.bin
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/special_tokens_map.json
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/.gitattributes
/kaggle/input/llama-2/pytorch/7b-chat-hf/1/tokenizer.model
/kaggle/input/llama-2/pyt

In [4]:
# load FastText
model_word = KeyedVectors.load_word2vec_format('/kaggle/input/fast-text/wiki-news-300d-1M-subword.vec', binary=False)

In [63]:
# Creating the final data frame with the results
final_table = pd.DataFrame(columns=["Question", "Answer", "Answer1", "Answer2", "Result", "similarity_1", "similarity_2","similarity_winner", "explenation"])

In [64]:
final_table

Unnamed: 0,Question,Answer,Answer1,Answer2,Result,similarity_1,similarity_2,similarity_winner,explenation


In [7]:
# convert text into an average vector
def text_to_vector(text, model):
    words = text.split()
    word_vectors = [model[word] for word in words if word in model]
    if not word_vectors:
        return np.zeros(model.vector_size)
    return np.mean(word_vectors, axis=0)

# Calculating the cosine similarity between two texts
def cosine_similarity_texts(text1, text2, model):
    vector1 = text_to_vector(text1, model)
    vector2 = text_to_vector(text2, model)
    return cosine_similarity([vector1], [vector2])[0][0]

In [None]:
# Loading Secrets (API Key from OpenAi) from UserSecretsClient 
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
api_key = user_secrets.get_secret("api_key")
client = OpenAI(api_key=api_key)

In [66]:
# Load questions with sample answers
questions = pd.read_csv("/kaggle/input/ds-data/Fragen DS.csv",encoding='ISO-8859-1', sep=";") 

In [67]:
questions.head()

Unnamed: 0,Question,Answer
0,What are the three pillars of Data Science as ...,The three pillars of Data Science are Data Sci...
1,What is the central goal of Data Science as de...,The central goal of Data Science is knowledge ...
2,What are the key stages and participants in th...,The Data Science process model includes six ph...
3,What are some examples of methodologies and te...,"In the Data Science process, methodologies inc..."
4,What are the key components of Data Science fr...,"From an IT perspective, Data Science is an int..."


In [68]:
questions.shape[0]

20

### Tokenizer

In [69]:
# Base model id
model_id = "/kaggle/input/llama-2/pytorch/7b-chat-hf/1"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

#count max tokens of the sample answeres
len_list = []
for line in range(questions.shape[0]):
    text = questions["Answer"][line]
    # Tokenisierung des Textes
    tokens = tokenizer.tokenize(text)
    # Anzahl der Tokens
    token_count = len(tokens)
    len_list.append(token_count)

In [70]:
max_tokens_for_model = max(len_list)

In [71]:
max_tokens_for_model

176

## Load Llama 2 7B Base

In [15]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)


model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
    tokenizer=tokenizer
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



## Llama Finetuned Load

In [16]:
model_id_2 = "/kaggle/input/finetuned-llama/model_7_4Entries"

model_2 = AutoModelForCausalLM.from_pretrained(model_id_2, quantization_config=bnb_config, device_map={"":0})

pipeline2 = transformers.pipeline(
    "text-generation",
    model=model_2,
    torch_dtype=torch.float16,
    device_map="auto",
    tokenizer=tokenizer
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



# Autoeval

In [72]:
for line in tqdm(range(questions.shape[0]), desc="Verarbeite Fragen"):
    question = questions["Question"][line]
    answer = questions["Answer"][line]
    #Llama 2 Base
    sequences = pipeline(
       f'[INST]{question}[/INST]',
       do_sample=True,
       top_k=10,
       num_return_sequences=1,
       eos_token_id=tokenizer.eos_token_id,
       max_length=max_tokens_for_model,
    )
    answer1 = ""
    for seq in sequences:
       answer1+=seq['generated_text']
    answer1 = answer1.split("[/INST]  ", 1)[-1]
    #Llama 2 Finetuned 
    sequences2 = pipeline2(
       f'[INST]{question}[/INST]',
       do_sample=True,
       top_k=10,
       num_return_sequences=1,
       eos_token_id=tokenizer.eos_token_id,
       max_length=max_tokens_for_model,
    )
    answer2 = ""
    for seq in sequences2:
       answer2+=seq['generated_text']
    answer2 = answer2.split("[/INST]  ", 1)[-1]

    # Random generator
    zufallszahl = random.choice([0, 1])
    answers= [answer1, answer2]

    if zufallszahl == 0:
        zufallszahl2 = 1
    else:
        zufallszahl2 = 0

    auswertung = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[{"role": "system", "content": "I'll now give you a question, the sample solution to this question and 2 other answers. Decide based on the context of the model solution which of the two answers is better and only output “1” or “2” and after that a short explanation why!"},
                  {"role": "user", "content": f"{question} \n sample {answer} \n 1. Answer: {answers[zufallszahl]} \n 2. Answer: {answers[zufallszahl2]}"}]
    )
    explain = auswertung.choices[0].message.content
    better_answer_llm = int(explain[0])
    # similarity for each row
    cos_1 = cosine_similarity_texts(answer, answer1, model_word)
    cos_2 = cosine_similarity_texts(answer, answer2, model_word)
    if cos_1 > cos_2:
        better_answer = 1
    else:
        better_answer = 2
    # Undo random order
    if zufallszahl == 1:
        if int(better_answer_llm) == 1:
            better_answer_llm = 2
        else:
            better_answer_llm = 1
    neue_zeile = [question, answer, answer1, answer2, better_answer_llm,cos_1, cos_2,better_answer,explain]

    final_table.loc[len(final_table)] = neue_zeile

Verarbeite Fragen: 100%|██████████| 20/20 [14:59<00:00, 44.99s/it]


In [73]:
# show finale table
final_table

Unnamed: 0,Question,Answer,Answer1,Answer2,Result,similarity_1,similarity_2,similarity_winner,explenation
0,What are the three pillars of Data Science as ...,The three pillars of Data Science are Data Sci...,"In the Data Science lecture, the three pillars...","The three pillars of Data Science, as outlined...",1,0.921012,0.918245,1,1\n\nThe first answer aligns more closely with...
1,What is the central goal of Data Science as de...,The central goal of Data Science is knowledge ...,"The central goal of Data Science, as described...","The central goal of Data Science, as described...",2,0.959813,0.937084,1,1\n\nThe first answer is better because it cap...
2,What are the key stages and participants in th...,The Data Science process model includes six ph...,The Data Science process model is a standardiz...,The Data Science Process Model is a common mod...,2,0.818927,0.610425,1,1\n\nThe first answer provides a more structur...
3,What are some examples of methodologies and te...,"In the Data Science process, methodologies inc...",Data Science is a multidisciplinary field that...,Here are some examples of methodologies and te...,2,0.841836,0.889097,2,1\n\nThe first answer is better because it pro...
4,What are the key components of Data Science fr...,"From an IT perspective, Data Science is an int...","From an IT perspective, the key components of ...","From an IT perspective, Data Science projects ...",2,0.961563,0.642676,1,1\n\nThe first answer is better in the given c...
5,How does Data Science approach knowledge disco...,"From a conceptual-algorithmic perspective, Dat...",Data Science is an interdisciplinary field tha...,Data Science approaches knowledge discovery an...,2,0.941162,0.860103,1,1\n\nThe first answer provides a more structur...
6,What are the three classes of defects in data ...,The three classes of defects in data preproces...,"In data preprocessing, there are three main cl...",Classification of Defects in Data Transformati...,2,0.887036,0.646392,1,1\n\nThe first answer goes more in-depth into ...
7,What are the key subprocesses in data transfor...,Key subprocesses in data transformation includ...,Data transformation is a crucial step in the d...,The key subprocesses in data transformation fo...,1,0.897222,0.406597,1,2\n\nThe second answer aligns more closely wit...
8,What are the methodologies and types of learni...,Data Mining methodologies include Machine & De...,"Data mining, as an interdisciplinary field tha...",Data mining involves statistical models and al...,2,0.763194,0.710218,1,1\n\nThe first answer is better as it remains ...
9,How is Machine Learning (ML) characterized in ...,Machine Learning (ML) is the science and metho...,Machine Learning (ML) is a subfield of Data Sc...,Machine Learning (ML) is a subfield of Data Sc...,1,0.962102,0.96269,2,2\n\nAnswer 2 is better because it aligns clos...


In [75]:
# Save DataFrame as CSV file
file_name = 'Final_DS_Eval.csv'
final_table.to_csv(file_name, index=False)