In [2]:
from src.models.HFQwen72BModel import HFQwen72BModel
from src.models.gemini_model import GeminiModel
from src.models.prompt import Prompt

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
gemini = GeminiModel()
qwen72 = HFQwen72BModel()

In [16]:
def validate_response(response: str, batch_size: int, expected_answers: list[list[str]]):
    for text in expected_answers:
        for i in range(len(text)):
            text[i] = text[i].lower()
    try:
        batch = response.split('|')[:batch_size]
    except ValueError:
        print("FAIL: Wrong response structure")
        return

    if len(batch) != batch_size:
        print(f"FAIL: Wrong batch size, expected {batch_size}, got {len(batch)}")
        return

    count_all = 0
    count_extra = 0
    count_missing = 0
    for text, expected_answer in zip(batch, expected_answers):
        topics = text.split(',')
        for topic in expected_answer:
            count_all += 1
            if topic not in topics:
                count_missing += 1

        for topic in topics:
            count_all += 1
            if topic not in expected_answer:
                count_extra += 1

    missing_score = (count_all - count_missing) / count_all
    extra_score = (count_all - count_extra) / count_all
    mixed_score = (count_all - count_extra - count_missing) / count_all
    print(f"Missing Topics Score: {missing_score}") # Closer to one is better
    print(f"Extra Topics Score: {extra_score}")
    print(f"Mixed Score: {mixed_score}")
    if missing_score < 0.8:
        print("FAIL: Low Missing Topics Score")
    if extra_score < 0.8:
        print("FAIL: Low Extra Topics Score")
    if missing_score < 0.8:
        print(f"FAIL: Low Mixed Score")

    if extra_score < 0.8 or missing_score < 0.8 or extra_score < 0.8: return

    print(f"Batch Size: {batch_size} batch size test SUCCESSFUL")

In [5]:
prompt_en_32b = Prompt(
    instructions=(
                "Identify and list only the relevant topics from the provided list that "
                f"relate to the content of the text. The topics are: Food, Cleanliness, Staff, Activities.\n"
                "Only respond with relevant topics. If no topics are relevant, respond with 'no relevant topics found.'"
                "Don't add a space after or before each topic"
                "For each comment that starts with Text{n} write the relevant topics, for separating topics use a comma, for separating each text use |"
            ),
            context=None,
            examples=(
                (
                    "I didn't enjoy the food; it was bland and lacked variety.",
                    "food quality",
                ),
                (
                    "The check-in process was very slow and we had to wait for over an hour.",
                    "customer service,wait time",
                ),
                (
                    "The service was excellent; the staff were always polite, friendly, and eager to help.",
                    "service",
                ),
                (
                    "\nTEXT1: I didn't enjoy the food; it was bland and lacked variety.\n"
                    "TEXT2: The check-in process was very slow and we had to wait for over an hour.\n"
                    "TEXT3: The service was excellent; the staff were always polite, friendly, and eager to help.\n",
                    "food quality|customer service,wait time|service",
                ),
            ),
            input_text=(
                "Text{1}: This food is good, that guy treated me well\n"
                "Text{2}: the food isn't that good\n"
                "Text{3}: the food is perfect\n"
                "Text{4}: I wanted to play volleyball\n"
                "Text{5}: I love myself\n"
                "Text{6}: This food is good, that guy treated me well\n"
                "Text{7}: the food isn't that good\n"
                "Text{8}: the food is perfect\n"
                "Text{9}: I wanted to play voleyball\n"
                "Text{10}: I love myself\n"
                "Text{11}: This food is good, that guy treated me well\n"
                "Text{12}: the food isn't that good\n"
                "Text{13}: the food is perfect\n"
                "Text{14}: I wanted to play voleyball\n"
                "Text{15}: I love myself\n"
                "Text{16}: This food is good, that guy treated me well\n"
                "Text{17}: the food isn't that good\n"
                "Text{18}: the food is perfect\n"
                "Text{19}: I wanted to play voleyball\n"
                "Text{20}: I love myself\n"
                "Text{21}: This food is good, that guy treated me well\n"
                "Text{22}: the food isn't that good\n"
                "Text{23}: the food is perfect\n"
                "Text{24}: I wanted to play voleyball\n"
                "Text{25}: I love myself\n"
                "Text{26}: This food is good, that guy treated me well\n"
                "Text{27}: the food isn't that good\n"
                "Text{28}: the food is perfect\n"
                "Text{29}: I wanted to play voleyball\n"
                "Text{30}: I love myself\n"
                "Text{31}: I wanted to play voleyball\n"
                "Text{32}: I love myself\n"
            ),
        )

In [5]:
prompt_en_16b = prompt_en_32b
prompt_en_16b.input_text = (
                "Text1: This food is good, that guy treated me well\n"
                "Text2: the food isn't that good\n"
                "Text3: the food is perfect\n"
                "Text4: I wanted to play volleyball\n"
                "Text5: I love myself\n"
                "Text6: This food is good, that guy treated me well\n"
                "Text7: the food isn't that good\n"
                "Text8: the food is perfect\n"
                "Text9: I wanted to play voleyball\n"
                "Text10: I love myself\n"
                "Text11: This food is good, that guy treated me well\n"
                "Text12: the food isn't that good\n"
                "Text13: the food is perfect\n"
                "Text14: I wanted to play voleyball\n"
                "Text15: I love myself\n"
                "Text16: This food is good, that guy treated me well\n"
)

In [26]:
prompt_en_8b = prompt_en_32b
prompt_en_8b.input_text = (
                "Text{1}: This food is good, that guy treated me well\n"
                "Text{2}: the food isn't that good\n"
                "Text{3}: the food is perfect\n"
                "Text{4}: I wanted to play volleyball\n"
                "Text{5}: I love myself\n"
                "Text{6}: This food is good, that guy treated me well\n"
                "Text{7}: the food isn't that good\n"
                "Text{8}: the food is perfect\n"
)

In [6]:
expected_answers = [
    ["Food", "Staff"],
    ["Food"],
    ["Food"],
    ["Activities"],
    ["no relevant topics found"],
    ["Food", "Staff"],
    ["Food"],
    ["Food"],
    ["Activities"],
    ["no relevant topics found"],
    ["Food", "Staff"],
    ["Food"],
    ["Food"],
    ["Activities"],
    ["no relevant topics found"],
    ["Food", "Staff"],
    ["Food"],
    ["Food"],
    ["Activities"],
    ["no relevant topics found"],
    ["Food", "Staff"],
    ["Food"],
    ["Food"],
    ["Activities"],
    ["no relevant topics found"],
    ["Food", "Staff"],
    ["Food"],
    ["Food"],
    ["Activities"],
    ["no relevant topics found"],
    ["Activities"],
    ["no relevant topics found"],
]

In [7]:
print(str(prompt_en_32b))

Instructions: Identify and list only the relevant topics from the provided list that relate to the content of the text. The topics are: Food, Cleanliness, Staff, Activities.
Only respond with relevant topics. If no topics are relevant, respond with 'no relevant topics found.'Don't add a space after or before each topicFor each comment that starts with Text{n} write the relevant topics, for separating topics use a comma, for separating each text use |
Examples:
When provided with I didn't enjoy the food; it was bland and lacked variety., expected output should be food quality
When provided with The check-in process was very slow and we had to wait for over an hour., expected output should be customer service,wait time
When provided with The service was excellent; the staff were always polite, friendly, and eager to help., expected output should be service
When provided with 
TEXT1: I didn't enjoy the food; it was bland and lacked variety.
TEXT2: The check-in process was very slow and we

In [13]:
response = qwen72.generate_content(prompt_en_32b).lower()

In [14]:
print(response)

food,staff|food|food|activities|no relevant topics found|food,staff|food|food|activities|no relevant topics found|food,staff|food|food|activities|no relevant topics found|food,staff|food|food|activities|no relevant topics found|food,staff|food|food|activities|no relevant topics found|food,staff|food|food|activities|no relevant topics found|activities|no relevant topics found


In [17]:
validate_response(response, 32, expected_answers[:32])

Missing Topics Score: 1.0
Extra Topics Score: 1.0
Mixed Score: 1.0
Batch Size: 32 batch size test SUCCESSFUL
