# Colab Notebook für die Erzeugung der Fragebogen-Items für das nachtrainierte Modell

Für die Ausführung dieses Notebooks ist ein Hugging-Face Account notwendig, in welchem die Lizenzbedinungen von LLaMA 2 akzeptiert wurden. Der Hugging-Face Token muss in das Feld "SECRET" eingesetzt werden. Zudem muss der Google Drive verbunden sein und den Modell-Adapter enthalten.

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

!sudo fuser -v /dev/nvidia* -k

In [None]:
!pip install huggingface_hub
import huggingface_hub
huggingface_hub.login(token="SECRET")
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7



In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
    GenerationConfig
)
from peft import LoraConfig, PeftModel

# from google.colab import drive
# drive.mount('/content/drive')



# Load model

In [None]:
model_name_or_path = "meta-llama/Llama-2-13b-hf"
adapter_path = "/content/drive/MyDrive/llama-2-13b_lora/checkpoint-300_ep1/"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
# Fixing some of the early LLaMA HF conversion issues.
tokenizer.bos_token_id = 1

# Load the model (use f16 for faster inference)

model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    torch_dtype=torch.float16,
    device_map={"": 0},
    load_in_4bit=True,
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type='nf4',
    )
)

model = PeftModel.from_pretrained(model, adapter_path)
model.eval()





# Create generation function

In [None]:
tokenizer_with_prefix_space = AutoTokenizer.from_pretrained(model_name_or_path, add_prefix_space=True)

def get_tokens_as_list(word_list):
    "Converts a sequence of words into a list of tokens"
    tokens_list = []
    for word in word_list:
        tokenized_word = tokenizer_with_prefix_space([word], add_special_tokens=False).input_ids[0]
        tokens_list.append(tokenized_word)
    return tokens_list

bad_words = get_tokens_as_list(["#","##","###","/n","\n###","###\n"])
bad_words.append([13])

def generate_items_sample_p(prompt, max_new_tokens=200, temperature=0.9, num_return_sequences=15, num_batches=1, top_p=0.90):



    config = GenerationConfig(
        max_new_tokens=max_new_tokens,  # Maximum length of the generated text
        do_sample=True,  # Whether or not to use sampling in generation
        temperature=temperature,  # The value used to module the next token probabilities,
        num_return_sequences=num_return_sequences,
        remove_invalid_values=True,
        top_p = top_p,
        bad_words_ids =bad_words
    )

    input = tokenizer(prompt, return_tensors="pt")
    input.to('cuda')

    out_dec = []
    for _ in range(num_batches):
        with torch.no_grad():
            out = model.generate(**input, generation_config=config)
            batch_out_dec = tokenizer.batch_decode(out, skip_special_tokens=True)

        for i in range(len(batch_out_dec)):
            batch_out_dec[i] = batch_out_dec[i].replace(prompt, "")

        out_dec.extend(list(dict.fromkeys(batch_out_dec)))

        del out
        torch.cuda.empty_cache()

    del input
    torch.cuda.empty_cache()

    return out_dec

# Itemgenerierung für den Fragebogen
- Big five
- Need for closure

In [None]:
e_prompt = """###Construct: Extraversion
 ###C_definition: Extraversion is a personality characteristic capturing traits like being Talkative, Assertive, Active, Energetic and Outgoing.
 ###Subconstruct: NA
 ###S_defintion: NA
 ###Difficulty: NA
 ###Inverted: FALSE
 ###Item:"""

a_prompt = """###Construct: Agreeableness
 ###C_definition: Agreeableness is a personality characteristic capturing traits like being Sympathetic, Kind, Appreciative, Affectionate and Soft-hearted.
 ###Subconstruct: NA
 ###S_defintion: NA
 ###Difficulty: NA
 ###Inverted: FALSE
 ###Item:"""

c_prompt = """###Construct: Conscientiousness
 ###C_definition: Conscientiousness is a personality characteristic capturing traits like being Organized, Thorough, Planful, Efficient and Responsible.
 ###Subconstruct: NA
 ###S_defintion: NA
 ###Difficulty: NA
 ###Inverted: FALSE
 ###Item:"""

n_prompt = """###Construct: Neuroticism
 ###C_definition: Neuroticism is a personality characteristic capturing traits like being Tense, Anxious, Nervous, Moody, Worrying
 ###Subconstruct: NA
 ###S_defintion: NA
 ###Difficulty: NA
 ###Inverted: FALSE
 ###Item:"""

o_prompt = """###Construct: Openness
 ###C_definition: Openness is a personality characteristic capturing traits like having wide interests and being Imaginative, Intelligent, Original, Insightful.
 ###Subconstruct: NA
 ###S_defintion: NA
 ###Difficulty: NA
 ###Inverted: FALSE
 ###Item:"""



nfc_1_na_prompt = """###Construct: Need for Closure
 ###C_definition: Need for closure can be defined as the desire for an answer on a given topic, any answer compared to confusion and ambiguity. People with a high Need for Closure prefer order, predictability and decisiveness. They feel discomfort with ambiguity and are closed-minded.
 ###Subconstruct: NA
 ###S_defintion: NA
 ###Difficulty: NA
 ###Inverted: FALSE
 ###Item:"""

nfc_1_prompt = """###Construct: Need for Closure
 ###C_definition: Need for closure can be defined as the desire for an answer on a given topic, any answer compared to confusion and ambiguity. People with a high Need for Closure prefer order, predictability and decisiveness. They feel discomfort with ambiguity and are closed-minded.
 ###Subconstruct: Preference for Order
 ###S_defintion: NA
 ###Difficulty: NA
 ###Inverted: FALSE
 ###Item:"""

nfc_2_prompt = """###Construct: Need for Closure
 ###C_definition: Need for closure can be defined as the desire for an answer on a given topic, any answer compared to confusion and ambiguity. People with a high Need for Closure prefer order, predictability and decisiveness. They feel discomfort with ambiguity and are closed-minded.
 ###Subconstruct: Preference for Predictability
 ###S_defintion: NA
 ###Difficulty: NA
 ###Inverted: FALSE
 ###Item:"""

nfc_3_prompt = """###Construct: Need for Closure
 ###C_definition: Need for closure can be defined as the desire for an answer on a given topic, any answer compared to confusion and ambiguity. People with a high Need for Closure prefer order, predictability and decisiveness. They feel discomfort with ambiguity and are closed-minded.
 ###Subconstruct: Preference for Decisiveness
 ###S_defintion: NA
 ###Difficulty: NA
 ###Inverted: FALSE
 ###Item:"""

nfc_4_prompt = """###Construct: Need for Closure
 ###C_definition: Need for closure can be defined as the desire for an answer on a given topic, any answer compared to confusion and ambiguity. People with a high Need for Closure prefer order, predictability and decisiveness. They feel discomfort with ambiguity and are closed-minded.
 ###Subconstruct: Discomfort with Ambiguity
 ###S_defintion: NA
 ###Difficulty: NA
 ###Inverted: FALSE
 ###Item:"""

nfc_5_prompt = """###Construct: Need for Closure
 ###C_definition: Need for closure can be defined as the desire for an answer on a given topic, any answer compared to confusion and ambiguity. People with a high Need for Closure prefer order, predictability and decisiveness. They feel discomfort with ambiguity and are closed-minded.
 ###Subconstruct: Closed-Mindedness
 ###S_defintion: NA
 ###Difficulty: NA
 ###Inverted: FALSE
 ###Item:"""

oc_1_prompt = """###Construct: Organizational Commitment
 ###C_definition: Organizational Commitmentis the strength of an individual’s identification with and involvement in a particular organization.
 ###Subconstruct: NA
 ###S_defintion: NA
 ###Difficulty: NA
 ###Inverted: FALSE
 ###Item:"""


## Generiere Items für die Need for Closure
## Generiere Items für die Organizational Commitment



In [None]:
# Define the prompts
prompts = [nfc_1_prompt,nfc_2_prompt,nfc_3_prompt,nfc_4_prompt,nfc_5_prompt, oc_1_prompt]

# Define the parameters
invert_values = ["FALSE","TRUE"]
difficulty_values = [1,5]
temperature_values = [1]

# Initialize the list to store the outputs
outputs = []

# Generate items for each prompt with different parameters
for i, prompt in enumerate(prompts):
    for invert in invert_values:
        for difficulty in difficulty_values:
            for temperature in temperature_values:
                # Update the prompt with the current parameters
                updated_prompt = prompt.replace("###Inverted: FALSE", f"###Inverted: {invert}").replace("###Difficulty: NA", f"###Difficulty: {difficulty if difficulty is not None else 'NA'}")

                # Generate the items
                items = generate_items_sample_p(updated_prompt, temperature=temperature, max_new_tokens=40,num_return_sequences=10,num_batches=1,top_p = .90)

                # Append the items to the list with a key representing the parameters
                key = f"prompt_{i+1}_invert_{invert}_difficulty_{difficulty if difficulty is not None else 'NA'}_temperature_{temperature}"
                outputs.append({key: items})



In [None]:
import json

outputs_json= json.dumps(outputs)

with open("/content/drive/MyDrive/llama-2-13b_lora/generated_items_nacherhebung_1.json", "w") as outfile:
    outfile.write(outputs_json)