In [4]:
import sys, json, re, collections
import pandas as pd
from pathlib import Path
from decouple import config
sys.path.append("../src/")
from llm_helpers import openai_client, chatgpt_ask, openai_ask_verbelizer, openai_ask_helper

pd.set_option('display.max_columns', 1000, 'display.width', 1000, 'display.max_rows',1000)

data_dir = Path(".").absolute().parent/"data"
ls = lambda p:print("\n".join(map(str,p.iterdir())))

ls(data_dir)

C:\Users\TamirBracha\LLM\llm_workshop\data\sample_apps.parquet


In [5]:
df = pd.read_parquet(data_dir / "sample_apps.parquet").sample(9)
df

Unnamed: 0,bundle_id,title,description,store_url,category_names,ios
26290,com.hwqgrhhjfd.idlefastfood,Eatventure,Are you looking to become a restaurant million...,https://play.google.com/store/apps/details?id=...,"GAME_SIMULATION,GAME",False
60235,net.wooga.junes_journey_hidden_object_mystery_...,June's Journey: Hidden Objects,Are you ready to go on an exciting journey to ...,https://play.google.com/store/apps/details?id=...,"GAME_ADVENTURE,GAME",False
1373,1138264921,Match Masters ‎- PvP Match 3,Match 3 games - reinvented! Now with online mu...,https://apps.apple.com/us/app/match-masters-pv...,"Games,Casual,Puzzle",True
6673,892521917,Tiki Solitaire TriPeaks,Tiki Solitaire TriPeaks: the classic Solitaire...,https://apps.apple.com/us/app/tiki-solitaire-t...,"Games,Card,Puzzle",True
59980,net.peakgames.toonblast,Toon Blast,From the creators of Toy Blast comes the ultim...,https://play.google.com/store/apps/details?id=...,"GAME_PUZZLE,GAME",False
7478,com.alibaba.intl.android.apps.poseidon,Alibaba.com - B2B marketplace,What is Alibaba.com?\nAlibaba.com is one of th...,https://play.google.com/store/apps/details?id=...,"SHOPPING,APPLICATION",False
51356,com.walmart.android,Walmart Shopping & Grocery,Save money. Live better.\n\nThe Walmart app is...,https://play.google.com/store/apps/details?id=...,"SHOPPING,APPLICATION",False
27431,com.inspiredsquare.jupiter,2248 - Number Puzzle Game,2248 Number Block Puzzle Game2248 Puzzle Game:...,https://play.google.com/store/apps/details?id=...,"GAME_PUZZLE,GAME",False
49136,com.tripledot.woodoku,Woodoku - Block Puzzle Games,Woodoku: a wood block puzzle game meets a sudo...,https://play.google.com/store/apps/details?id=...,"GAME_PUZZLE,GAME",False


## Asking OpenAI directly?

In [6]:
def openai_ask(prompts):
    response = openai_client.completions.create(
        model="text-davinci-003",
        prompt=prompts,
    )
    ret = [choice.text.strip().lower() for choice in response.choices]
    return ret

openai_ask("Is a dog a mammal?")

['yes, a dog is a mammal.']

In [7]:
openai_ask("Is a dog a mammal? answer yes or no")

['yes']

## What about "fuzzier" questions?

In [8]:
openai_ask("I'm facing financial troubles, should I take a loan?")

['financial troubles can be a difficult and stressful experience. borrowing money']

In [9]:
openai_ask("I'm facing financial troubles, should I take a loan? answer yes or no")

['no']

In [10]:
openai_ask("I'm facing financial troubles, should I take a loan? answer yes or no")

['no']

In [11]:
openai_ask("I'm facing financial troubles, should I take a loan? answer yes or no")

['no']

## Popular approach: Retry

In [12]:
def yes_no_or_retry(prompt):
    i=0
    answer = ""
    while answer not in {"yes", "no"}:
        answer = openai_ask(prompt)[0].lower()
        i+=1
    return i, answer=="yes"

yes_no_or_retry("I'm facing financial troubles, should I take a loan? answer yes or no")

(1, False)

As we can see, the answer is not always a valid choise "yes" or "no"
# Introducing verbalizers:

## Verbalizers in a sentence:

Mapping a **class** to a **token**, and then choosing the token with the highest logit
## The OpenAI tokenizer

https://platform.openai.com/tokenizer

In [13]:
YES_TOKEN = frozenset([5297, 3763, 3363, 8505, 3363, 3763, 43335, 3763, 21560])
GPT3_YES_TOKEN = frozenset([9642, 14410, 10035, 7566, 14331, 9891])
NO_TOKEN = frozenset([2949, 645, 1400, 3919, 1400, 645, 15285, 645, 8005])
GPT3_NO_TOKEN = frozenset([2822, 5782, 912, 2201, 9173, 2360])


def openai_yes_or_no(prompts):
    response = openai_client.completions.create(
        model="text-davinci-003",
        prompt=prompts,
        temperature=0,
        logit_bias={t: 100 for t in YES_TOKEN | NO_TOKEN},
        max_tokens=1,
    )
    ret = [choice.text.strip().lower() == "yes" for choice in response.choices]
    return ret

openai_yes_or_no("Is dog a mammal?")


[True]

In [14]:
openai_yes_or_no("I'm facing financial troubles, should I take a loan? answer yes or no")

[False]

In [15]:
prompt = "I'm facing financial troubles, should I take a loan? answer yes or no"
prompts = [prompt]*10
answers = openai_yes_or_no(prompts)
collections.Counter(answers)

Counter({False: 10})

## Question:
Ask the following questions for each app
1. Is the app for kids?
2. Is the app a shopping app?
3. Is it a game?
4. Is this app a dating app?
5. Does this app have in-app purchases?
6. Is this app a match 3 game?

Compare the naive "yes" or "no" to logit bias

In [28]:
question_dict={
    "1" : "Is the app for kids?",
    "2" : "Is the app a shopping app?",
    "3" : "Is it a game?",
    "4" : "Is this app a dating app?",
    "5" : "Does this app have in-app purchases?",
    "6" : "Is this app a match 3 game?"
}

In [29]:
for i in range(len(question_dict)):
    i+=1
    df[f"{question_dict} naive"] = df.apply(openai_ask_helper("Given the app description:\n{description}\n\n{Is it for kids?}\nAnswer only yes or no", model_name="text-davinci-003"),axis=1)
    df[f"{question_dict} logit bias"] = 
    

Is the app for kids?
Is the app a shopping app?
Is it a game?
Is this app a dating app?
Does this app have in-app purchases?
Is this app a match 3 game?


In [16]:
df["Is the app for kids? naive"] = df.apply(openai_ask_helper("Given the app description:\n{description}\n\nIs it for kids?\nAnswer only yes or no", model_name="text-davinci-003"),axis=1)
df

Unnamed: 0,bundle_id,title,description,store_url,category_names,ios,Is the app for kids? naive
26290,com.hwqgrhhjfd.idlefastfood,Eatventure,Are you looking to become a restaurant million...,https://play.google.com/store/apps/details?id=...,"GAME_SIMULATION,GAME",False,[no.]
60235,net.wooga.junes_journey_hidden_object_mystery_...,June's Journey: Hidden Objects,Are you ready to go on an exciting journey to ...,https://play.google.com/store/apps/details?id=...,"GAME_ADVENTURE,GAME",False,[:\nno]
1373,1138264921,Match Masters ‎- PvP Match 3,Match 3 games - reinvented! Now with online mu...,https://apps.apple.com/us/app/match-masters-pv...,"Games,Casual,Puzzle",True,[no]
6673,892521917,Tiki Solitaire TriPeaks,Tiki Solitaire TriPeaks: the classic Solitaire...,https://apps.apple.com/us/app/tiki-solitaire-t...,"Games,Card,Puzzle",True,[no]
59980,net.peakgames.toonblast,Toon Blast,From the creators of Toy Blast comes the ultim...,https://play.google.com/store/apps/details?id=...,"GAME_PUZZLE,GAME",False,[yes]
7478,com.alibaba.intl.android.apps.poseidon,Alibaba.com - B2B marketplace,What is Alibaba.com?\nAlibaba.com is one of th...,https://play.google.com/store/apps/details?id=...,"SHOPPING,APPLICATION",False,[no.]
51356,com.walmart.android,Walmart Shopping & Grocery,Save money. Live better.\n\nThe Walmart app is...,https://play.google.com/store/apps/details?id=...,"SHOPPING,APPLICATION",False,[:\nno]
27431,com.inspiredsquare.jupiter,2248 - Number Puzzle Game,2248 Number Block Puzzle Game2248 Puzzle Game:...,https://play.google.com/store/apps/details?id=...,"GAME_PUZZLE,GAME",False,[:\nno]
49136,com.tripledot.woodoku,Woodoku - Block Puzzle Games,Woodoku: a wood block puzzle game meets a sudo...,https://play.google.com/store/apps/details?id=...,"GAME_PUZZLE,GAME",False,[no]


In [18]:
df["Is the app for kids? logit bias"] = df.apply(openai_ask_verbelizer("Given the app description:\n{description}\n\nIs it for kids?\nAnswer only yes or no"),axis=1)
df

Unnamed: 0,bundle_id,title,description,store_url,category_names,ios,Is the app for kids? naive,Is the app for kids? logit bias
26290,com.hwqgrhhjfd.idlefastfood,Eatventure,Are you looking to become a restaurant million...,https://play.google.com/store/apps/details?id=...,"GAME_SIMULATION,GAME",False,[no.],[False]
60235,net.wooga.junes_journey_hidden_object_mystery_...,June's Journey: Hidden Objects,Are you ready to go on an exciting journey to ...,https://play.google.com/store/apps/details?id=...,"GAME_ADVENTURE,GAME",False,[:\nno],[False]
1373,1138264921,Match Masters ‎- PvP Match 3,Match 3 games - reinvented! Now with online mu...,https://apps.apple.com/us/app/match-masters-pv...,"Games,Casual,Puzzle",True,[no],[False]
6673,892521917,Tiki Solitaire TriPeaks,Tiki Solitaire TriPeaks: the classic Solitaire...,https://apps.apple.com/us/app/tiki-solitaire-t...,"Games,Card,Puzzle",True,[no],[False]
59980,net.peakgames.toonblast,Toon Blast,From the creators of Toy Blast comes the ultim...,https://play.google.com/store/apps/details?id=...,"GAME_PUZZLE,GAME",False,[yes],[True]
7478,com.alibaba.intl.android.apps.poseidon,Alibaba.com - B2B marketplace,What is Alibaba.com?\nAlibaba.com is one of th...,https://play.google.com/store/apps/details?id=...,"SHOPPING,APPLICATION",False,[no.],[False]
51356,com.walmart.android,Walmart Shopping & Grocery,Save money. Live better.\n\nThe Walmart app is...,https://play.google.com/store/apps/details?id=...,"SHOPPING,APPLICATION",False,[:\nno],[False]
27431,com.inspiredsquare.jupiter,2248 - Number Puzzle Game,2248 Number Block Puzzle Game2248 Puzzle Game:...,https://play.google.com/store/apps/details?id=...,"GAME_PUZZLE,GAME",False,[:\nno],[False]
49136,com.tripledot.woodoku,Woodoku - Block Puzzle Games,Woodoku: a wood block puzzle game meets a sudo...,https://play.google.com/store/apps/details?id=...,"GAME_PUZZLE,GAME",False,[no],[False]
