# Granite Guardian : Quick Start Guide

Links to ðŸ¤— models: [8B](https://huggingface.co/ibm-granite/granite-guardian-3.3-8b)

<span style="color: red;">Content Warning</span>: *The examples used in this page may contain offensive language, stereotypes, or discriminatory content.*


## What's new? âœ¨
* _Hybrid Reasoning Model_: Tackle complex tasks with new hybrid reasoning model. Users can now toggle reasoning **on** or **off** (via `think` flag) to best suit their needs. 
* _Enchanced Performance_: We have made significant improvements in groundedness and function-call hallucination detection capabilities. ðŸš€ 
* _Updated Customization_: **'risk'** is now **'criteria'** to better fit a wide variety of use cases. ðŸŽ¯ And with new *'Bring Your Own Criteria'* feature, users can provide their own custom criteria and scoring schema, giving them power to customize the results!

## Dependencies

In [1]:
# %pip install vllm torch transformers

In [2]:

from transformers import AutoTokenizer
from llama_cpp import Llama

# Point to your downloaded gguf model
llm = Llama(model_path="../model_gguf/granite-guardian-3.3-8b-Q8_0.gguf", n_ctx=1024, logits_all=True, verbose=False)

# Taking the prompt format from here
tokenizer = AutoTokenizer.from_pretrained("../model")

user_text = "How can I fine-tune this model for question answering?"
messages = [{"role": "user", "content": user_text}]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

output = llm(
    prompt,
    max_tokens=32,
    temperature=0.0,
    logprobs=20,
    echo=False
)

print(output["choices"][0]["text"])

  from .autonotebook import tqdm as notebook_tqdm
llama_context: n_ctx_per_seq (1024) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


<think>
</think>
<score> no </score>


## Helper functions
A few utility functions to parse the vLLM output.

In [3]:
import re


def parse_response(response):
    trace_match = re.findall(r'<think>(.*?)</think>', response, re.DOTALL)
    score_match = re.findall(r'<score>(.*?)</score>', response, re.DOTALL)

    score, trace = None, None
    
    if trace_match:
        trace =  [-1]

    if score_match:
        score = score_match[-1]
    
    return score, trace

In [4]:
import pandas as pd

csvs = [
    # "hiv/qa.csv",
    "library/angelica.csv",
    # "library/initial_questions.csv"
]

dfs = []

for csv in csvs:
    dfs.append(pd.read_csv(f"../data/in/{csv}"))

dfs[0]

Unnamed: 0,question,answer
0,What was John McCray's position in the Progres...,John McCray was chairman in 1944.
1,Did John McCray receive W. J. Hunterâ€™s support...,"Yes, W.J. Hunter contributed to the Re-elect T..."
2,Did John McCray write to John Oscar Floyd on S...,"No, John McCray wrote to John Oscar Floyd on O..."
3,Did John McCray apply to work with the S.C. Di...,"Yes, he applied to work for the organization i..."
4,What year did John McCray resign as the editor...,John McCray resigned from his position as edit...
5,Was it necessary for John McCray to organize t...,"Yes, John McCray helped create the PDP because..."
6,Why did Richard J. Foster endorse John McCrayâ€™...,Foster endorsed McCray's application because h...
7,Did John McCray enjoy working for The Lighthou...,McCray was dedicated to working at The Lightho...
8,Did John McCray have any enemies?,McCray's collection reflects that he had sever...
9,"According to John McCray, were Black voters in...",Black residents in Marion County frequently ha...


In [5]:
# import os
# import csv

# criteria_ids = [
#     "harm",
#     "social_bias",
#     "profanity",
#     "sexual_content",
#     "unethical_behavior",
#     "violence",
# ]
# for idx, df in enumerate(dfs):
#     for criterion in criteria_ids:
#         guardian_config = {"criteria_id": criterion}
#         col_q = []
#         col_qa = []
#         for _, row in df.iterrows(): 
#             user_text = row['question']
#             response_text = row['answer']

#             messages = [{"role": "user", "content": user_text}]

#             prompt = tokenizer.apply_chat_template(messages, guardian_config = guardian_config, tokenize=False, add_generation_prompt=True)

#             output = llm(
#                 prompt,
#                 max_tokens=32,
#                 temperature=0.0,
#                 logprobs=20,
#                 echo=False
#             )

#             response = output["choices"][0]["text"]

#             score_q, _ = parse_response(response)

#             col_q.append(score_q)

#             messages.append({"role": "assistant", "content": response_text})

#             prompt = tokenizer.apply_chat_template(messages, guardian_config = guardian_config, tokenize=False, add_generation_prompt=True)

#             output = llm(
#                 prompt,
#                 max_tokens=32,
#                 temperature=0.0,
#                 logprobs=20,
#                 echo=False
#             )

#             response = output["choices"][0]["text"]

#             score_qa, _ = parse_response(response)

#             col_qa.append(score_qa)
        
#         # Add new columns
#         df[f"{criterion}_q"] = col_q
#         df[f"{criterion}_qa"] = col_qa
#     out_path = os.path.join("../data/out", csvs[idx])
#     os.makedirs(os.path.dirname(out_path), exist_ok=True)
#     df.to_csv(out_path, index=False, quoting=csv.QUOTE_ALL)

In [10]:
import os
import csv

import pandas as pd

csvs = [
    # "hiv/qa.csv",
    "library/angelica.csv",
    # "library/initial_questions.csv"
]

new_order = ["question","answer","harm_q","harm_a","harm_qa","social_bias_q","social_bias_a","social_bias_qa","profanity_q","profanity_a","profanity_qa","sexual_content_q","sexual_content_a","sexual_content_qa","unethical_behavior_q","unethical_behavior_a","unethical_behavior_qa","violence_q","violence_a","violence_qa"]

dfs = []

for _csv in csvs:
    dfs.append(pd.read_csv(f"../data/out/{_csv}"))

dfs[0]

criteria_ids = [
    "harm",
    "social_bias",
    "profanity",
    "sexual_content",
    "unethical_behavior",
    "violence",
]
for idx, df in enumerate(dfs):
    for criterion in criteria_ids:
        guardian_config = {"criteria_id": criterion}
        col_a = []
        for _, row in df.iterrows(): 
            response_text = row['answer']

            messages = [{"role": "assistant", "content": response_text}]

            prompt = tokenizer.apply_chat_template(messages, guardian_config = guardian_config, tokenize=False, add_generation_prompt=True)

            output = llm(
                prompt,
                max_tokens=32,
                temperature=0.0,
                logprobs=20,
                echo=False
            )

            response = output["choices"][0]["text"]

            score_a, _ = parse_response(response)

            col_a.append(score_a)
        
        # Add new columns
        df[f"{criterion}_a"] = col_a
    out_path = os.path.join("../data/out2", csvs[idx])
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    df = df[new_order]
    df.to_csv(out_path, index=False, quoting=csv.QUOTE_ALL)