In [1]:
# Mit diesem Notebook wurde die Generierung aller drei Evaluationsausgaben für einen ausgewählten LoRA Adapter durchgeführt
# RESTART KERNEL BEFORE NEW USAGE
# RESULTS ARE IN /generation_results

yes_no_questions_path = "model_evaluation_questions/questions_25.json"
manual_questions_path = "model_evaluation_questions/manual_questions.json"
correctness_questions_path = "model_evaluation_questions/correctness_questions.json"

use_logging = True
use_base_model = False # Nutzung des mistral base models, wenn true wird der save_name verändert -> mistral_base
lora_to_evaluate = "../fullRun/lorasPlainDataRunBig/checkpoint-3000"
save_name = "plainData-finetuning-3000"

manual_and_correctness_prompt = "Bitte beantworte folgende juristische Frage: " # Added to start of question
yes_no_prompt = " Antworte mit 'Ja,' 'Nein,' oder 'Unklar'" # Added to end of question

In [2]:
import torch
from huggingface_hub import login, logout
from unsloth import FastLanguageModel
import os
from peft import PeftModel
import json
import re

login("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")
os.environ["WANDB_DISABLED"] = "true"

device = torch.device("cuda")

# Gute Daten vom bisher best-trainierten Model
base_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-v0.3", 
    local_files_only = True, 
    max_seq_length = 400, # Choose any! We auto support RoPE Scaling internally!
    dtype = None, # None for auto detection. Float16 "or Tesla T4, V100, Bfloat16 for Ampere+
    load_in_4bit = True, # Use 4bit quantization to reduce memory usage. Can be False.
)
FastLanguageModel.for_inference(base_model)

if use_base_model:
    print("USING MISTRAL BASE")
    peft_model = base_model
    save_name = "mistral_base"
else:
    print("USING LORA: " + lora_to_evaluate)
    peft_model = PeftModel.from_pretrained(base_model, lora_to_evaluate, is_trainable=False) 



# Manual Answers Generation
with open(manual_questions_path, 'r') as file:
    json_data = json.load(file)

questions_withprompt = [manual_and_correctness_prompt + question for question in json_data]

model_answers = []

for question in questions_withprompt:
    input_ids = tokenizer(question, return_tensors="pt").input_ids
    peft_model_outputs = peft_model.generate(input_ids=input_ids, repetition_penalty=1.4, early_stopping=True, max_new_tokens=400) # top_k=5, top_p=0.9, temperature=0.1,
    cut_tensor = peft_model_outputs[:, input_ids.size(1):]
    peft_model_text_output = tokenizer.decode(cut_tensor[0], skip_special_tokens=True, add_generation_prompt = False)
    model_answers.append(peft_model_text_output)

# Save the dictionary to a JSON file 
with open('generation_results/manual_' + save_name + '.json', 'w') as file:
    json.dump(model_answers, file, indent=4)

if use_logging:
    for question, answer in zip(questions_withprompt, model_answers):
        print("Frage:")
        print(question)
        print("\n Antwort:")
        print(answer)
        print("\n ###########################")
    
print("FINISHED MANUAL")



# Correctness Answers Generation
with open(correctness_questions_path, 'r') as file:
    json_data = json.load(file)

questions_withprompt = [manual_and_correctness_prompt + question for question in json_data]

model_answers = []

for question in questions_withprompt:
    input_ids = tokenizer(question, return_tensors="pt").input_ids
    peft_model_outputs = peft_model.generate(input_ids=input_ids, repetition_penalty=1.4, early_stopping=True, max_new_tokens=400) # top_k=5, top_p=0.9, temperature=0.1,
    cut_tensor = peft_model_outputs[:, input_ids.size(1):]
    peft_model_text_output = tokenizer.decode(cut_tensor[0], skip_special_tokens=True, add_generation_prompt = False)
    model_answers.append(peft_model_text_output)

# Save the dictionary to a JSON file
with open('generation_results/correctness_' + save_name + '.json', 'w') as file:
    json.dump(model_answers, file, indent=4)

if use_logging:
    for question, answer in zip(questions_withprompt, model_answers):
        print("Frage:")
        print(question)
        print("\n Antwort:")
        print(answer)
        print("\n ###########################")

print("FINISHED CORRECTNESS")



# Yes_No Answers Generation
with open(yes_no_questions_path, 'r') as file:
    json_data = json.load(file)

questions_withprompt = [question + yes_no_prompt for question in json_data]

model_answers = []

for question in questions_withprompt:
    input_ids = tokenizer(question, return_tensors="pt").input_ids
    peft_model_outputs = peft_model.generate(input_ids=input_ids, repetition_penalty=1.4, early_stopping=True, max_new_tokens=400) # top_k=5, top_p=0.9, temperature=0.1,
    cut_tensor = peft_model_outputs[:, input_ids.size(1):]
    peft_model_text_output = tokenizer.decode(cut_tensor[0], skip_special_tokens=True, add_generation_prompt = False)
    model_answers.append(peft_model_text_output)

yes_pattern = r'\b([Yy]es|[Jj]a)[.!?]?\b'
no_pattern = r'\b([Nn]o|[Nn]ein)[.!?]?\b'

yes_no_answers = []
for answer in model_answers:
    matches_yes = re.findall(yes_pattern, answer)
    matches_no = re.findall(no_pattern, answer)

    if len(matches_yes) != 0 and len(matches_no) != 0:
        yes_no_answers.append("Unklar")
    elif len(matches_yes) != 0:
        yes_no_answers.append("Ja")
    elif len(matches_no) != 0:
        yes_no_answers.append("Nein")
    else:
        yes_no_answers.append("Unklar")

answers_list = [{"long": longAnswer, "short": shortAnswer} for longAnswer, shortAnswer in zip(model_answers, yes_no_answers)]

# Save the dictionary to a JSON file
with open('generation_results/yes_no_' + save_name + '.json', 'w') as file:
    json.dump(answers_list, file, indent=4)
    
if use_logging:
    for question, answer, yes_no_answer in zip(questions_withprompt, model_answers, yes_no_answers):
        print("Frage:")
        print(question)
        print("\n Antwort:")
        print(answer)
        print("\n YES_NO:")
        print(yes_no_answer)
        print("\n ###########################")

print("FINISHED YES_NO")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/ps2024/.cache/huggingface/token
Login successful
==((====))==  Unsloth 2024.9.post4: Fast Mistral patching. Transformers = 4.45.1.
   \\   /|    GPU: Quadro RTX 6000. Max memory: 23.462 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
USING LORA: ../fullRun/lorasPlainDataRunBig/checkpoint-3000


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Frage:
Bitte beantworte folgende juristische Frage: Bedeuten „Bürgerliches Recht“, „Zivilrecht“ und „Privatrecht“ dasselbe oder bestehen Unterschiede?

 Antwort:


Die Entscheidung ist nicht anfechtbar. Die Beschwerde wird zurückgewiesen. 1) Der Antragsteller wendet sich mit seiner sofortigen Beschwerde gegen die Versagung von Prozesskostenhilfebewilligung durch den angefochtenen Beschluss des Amtsgerichts - Familiengerichte in Halle (Saale). Das Verwaltungsamt für Soziales, Jugend-und Bildungswesen hat dem Antragsgegner am XX.XX.2018 eine Unterhaltsverpflichtung auferlegt; der Antragsgegenstand betrifft das Kind A., geboren XXXX, verheiratet seit X.XXXX mit Frau Y.. Mit Schreiben vom XX.XX.2019 beantragt er bei Gericht die Bewilligung von PKH unter Beiordnung seines Bevollmächtigten zur Durchsetzung einer Zahlungenspflicht aus einem Urteil im Wege eines Vaterschaftsanerkenntnisses nach § 36 Absatz 4 SGB XII iVm Artikel 5 EGRL 7/2007 sowie aufgrund eines gerügten Mangels beim Ausgleich