# Notebook for an Example Gradio App for Model Control

To run this notebook, a Hugging-Face account is required, in which the license terms of LLaMA 2 have been accepted. The Hugging-Face token must be placed in the "SECRET" field.

## Execute to Free Memory in Case of an OOM Error



In [1]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

#!sudo fuser -v /dev/nvidia* -k

In [2]:
!pip install gradio
!pip install huggingface_hub
import huggingface_hub
huggingface_hub.login(token="SECRET")
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7



[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m321.4/321.4 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m40.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.9/116.9 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import gradio as gr

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    GenerationConfig
)
from peft import LoraConfig, PeftModel





## Load the Model and the Adapter


In [None]:
model_name_or_path = "meta-llama/Llama-2-13b-hf"
adapter_path = "bgilles/PsychometricLLaMA"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
tokenizer.bos_token_id = 1


model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    torch_dtype=torch.float16,
    device_map={"": 0},
    load_in_4bit=True,
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type='nf4',
    )
)

model = PeftModel.from_pretrained(model, adapter_path)
model.eval()





## Load the Tokenizer and Create the Generation Function

Avoid bad_words to prevent rarely parts of the prompt from being generated.

In [6]:
tokenizer_with_prefix_space = AutoTokenizer.from_pretrained(model_name_or_path, add_prefix_space=True)

def get_tokens_as_list(word_list):
    "Converts a sequence of words into a list of tokens"
    tokens_list = []
    for word in word_list:
        tokenized_word = tokenizer_with_prefix_space([word], add_special_tokens=False).input_ids[0]
        tokens_list.append(tokenized_word)
    return tokens_list

bad_words = get_tokens_as_list(["#","##","###","/n","\n###","###\n"])
bad_words.append([13])

def generate_items_sample_p(prompt, max_new_tokens=200, temperature=0.9, num_return_sequences=15, num_batches=1, top_p=0.90):



    config = GenerationConfig(
        max_new_tokens=max_new_tokens,  # Maximum length of the generated text
        do_sample=True,  # Whether or not to use sampling in generation
        temperature=temperature,  # The value used to module the next token probabilities,
        num_return_sequences=num_return_sequences,
        remove_invalid_values=True,
        top_p = top_p,
        bad_words_ids =bad_words
    )

    input = tokenizer(prompt, return_tensors="pt")
    input.to('cuda')

    out_dec = []
    for _ in range(num_batches):
        with torch.no_grad():
            out = model.generate(**input, generation_config=config)
            batch_out_dec = tokenizer.batch_decode(out, skip_special_tokens=True)

        for i in range(len(batch_out_dec)):
            batch_out_dec[i] = batch_out_dec[i].replace(prompt, "")

        out_dec.extend(list(dict.fromkeys(batch_out_dec)))

        del out
        torch.cuda.empty_cache()

    del input
    torch.cuda.empty_cache()

    return out_dec

In [None]:
def format_prompt(construct="NA", definition="NA", subconstruct="NA", subdefinition="NA", difficulty="NA",inverted="FALSE"):
    return (
        f"###Construct: "
        f"{construct} \n"
        f" ###C_Definition: "
        f"{definition} \n"
        f" ###Subconstruct: "
        f"{subconstruct} \n"
        f" ###S_Definition: "
        f"{subdefinition} \n"
        f" ###Difficulty: "
        f"{difficulty} \n"
        f" ###Inverted: "
        f"{inverted} \n"
        f" ###Item:"
    )

In [47]:
na_prompt = format_prompt()

na_og_prompt = """###Construct: NA
###C_definition: NA
###Subconstruct: NA
###S_defintion: NA
###Inverted: FALSE
###Item:"""




Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a51f31308c4617c7fa.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




## Start the Gradio App

In [None]:
def format_for_clipboard(items):
    return "\n".join(items)

def format_output(items):
    items_text = format_for_clipboard(items)
    html = f"""
    <div style="max-width: 800px; margin: 0 auto;">
        <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 15px;">
            <h3 style="color: #2c3e50; margin: 0;">Generated Items:</h3>
            <button onclick="navigator.clipboard.writeText(`{items_text}`)"
                    style="padding: 8px 16px; background: #4CAF50; color: white; border: none;
                    border-radius: 4px; cursor: pointer;">
                Copy All Items
            </button>
        </div>
        <div style="background: #f8f9fa; padding: 20px; border-radius: 8px;">
            <ul>
    """
    for item in items:
        html += f"<li>{item}</li>"
    html += "</ul></div></div>"
    return html

def generate_text(construct="Extraversion", definition="NA", subconstruct="NA", subdefinition="NA", difficulty="NA", inverted="FALSE", temperature=0.9, n_samples=10, batches=2, prepend_text=""):
    prompt = prepend_text + format_prompt(construct, definition, subconstruct, subdefinition, inverted, difficulty)
    items = generate_items_sample_p(prompt, temperature=temperature, max_new_tokens=30, num_return_sequences=n_samples, num_batches=batches)
    return format_output(items)

def refined_submit(selected_items, construct, definition, subconstruct, subdefinition, difficulty, inverted, temperature, n_samples, batches):
    prepend_text = "\n".join(selected_items) + "\n"
    return generate_text(construct, definition, subconstruct, subdefinition, difficulty, inverted, temperature, n_samples, batches, prepend_text)

with gr.Blocks() as demo:
    gr.Markdown("# Psychometric LLaMA - Detailed Transformer Based Item Generation")
    gr.Markdown("""To use this generator to create likert-items, fill in construct names and definitions. You can also leave parts blank to try the default generations. Examples are provided below.
    \n For more or less content variance try to change the temperature parameter. Best accuracy with limited creativity is achieved at t = .7 - .9.
    \n The number of generated samples is n_samples * number of batches.""")

    with gr.Row():
        with gr.Column():
            construct = gr.Textbox(label="Construct", value="Extraversion")
            definition = gr.Textbox(label="Definition", value="NA")
            subconstruct = gr.Textbox(label="Subconstruct", value="NA")
            subdefinition = gr.Textbox(label="Subdefinition", value="NA")
            difficulty = gr.Slider(label="Difficulty", minimum=1, maximum=5, step=1, value=3)
            inverted = gr.Textbox(label="Inverted", value="FALSE")
            temperature = gr.Slider(label="Temperature", minimum=0, maximum=2, step=0.1, value=0.9)
            n_samples = gr.Slider(label="Number of Samples", minimum=1, maximum=15, step=1, value=10)
            batches = gr.Slider(label="Number of Batches", minimum=1, maximum=3, step=1, value=2)
            generate_btn = gr.Button("Generate")
        with gr.Column():
            output = gr.HTML()

    # Add CheckboxGroup for selecting items
    selected_items = gr.CheckboxGroup(label="Select Items", choices=[])

    # Add Refined Submit button
    refined_btn = gr.Button("Refined Submit", interactive=False)

    def on_generate(construct, definition, subconstruct, subdefinition, difficulty, inverted, temperature, n_samples, batches):
        generated_html = generate_text(construct, definition, subconstruct, subdefinition, difficulty, inverted, temperature, n_samples, batches)
        # Extract items from the generated HTML
        items = [item.strip() for item in generated_html.split('<li>')[1:]]
        items = [item.split('</li>')[0] for item in items]
        return generated_html, gr.update(choices=items, value=[]), gr.update(interactive=True)

    generate_btn.click(
        on_generate,
        inputs=[construct, definition, subconstruct, subdefinition, difficulty, inverted, temperature, n_samples, batches],
        outputs=[output, selected_items, refined_btn],
    )

    refined_btn.click(
        refined_submit,
        inputs=[selected_items, construct, definition, subconstruct, subdefinition, difficulty, inverted, temperature, n_samples, batches],
        outputs=output,
    )

    gr.Examples(
        examples=[
            ["Emotional intelligence","Trait Emotional intelligence refers to self-reported emotion-related dispositions",
             "emotional regulation of the self","NA","3","FALSE"],
            ["Conscientiousness","Conscientiousness describes socially prescribed impulse control that facilitates task- and goal-directed behavior, such as thinking before acting, delaying gratification, following normas and rules, and planning, organizing, and prioritizing tasks.",
             "NA","NA","3","FALSE"],
            ["Germanness","This construct measures how German somebody is. It includes accents, food choices, hobbies like golfing etc, and tendencies like always being punctual.",
             "NA","NA","3","FALSE"],
            ["Trust in AI","Trust in AI describes the individual trust somebody has into AI applications at the workspace.",
             "Trusting Data","Trusting data describes the degree to which people trust the used training data in a variety of contexts.","3","FALSE"]
        ],
        inputs=[construct, definition, subconstruct, subdefinition, difficulty, inverted]
    )

demo.launch()

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

Keyboard interruption in main thread... closing server.


