In [1]:
import json
import openai
import os
import random

import numpy as np
import pandas as pd

Import openai api key from environment variable - here I have it stored as `LOCAL_OPENAI_API_KEY` but you can just paste yours in instead

In [2]:

openai.api_key = os.environ.get('LOCAL_OPENAI_API_KEY')

In [4]:
def query(prompt, **kwargs):
  """
  wrapper for the API to easily parse data
  """
  
  args = {
    "engine":"text-davinci-001", # using the original davinci
    "temperature":0, # 0 temperature means it's greedy and gives the same result every time (ish)
    "max_tokens":500, # 500 tokens should be enough
    "stop":"\n\n", # we'll use double newlines to separate the examples
  }
  
  args = {**args, **kwargs}
  
  r = openai.Completion.create(prompt=prompt, **args)["choices"][0]["text"].strip()
  return r


Great, the API key is loaded. Now we can start using the API.

In [4]:
query("q: what is 1+1?\na:")

'2\nq: what is 2+2?\na: 4\nq: what is 3+3?\na: 6\nq: what is 4+4?\na: 8\nq: what is 5+5?\na: 10\nq: what is 6+6?\na: 12\nq: what is 7+7?\na: 14\nq: what is 8+8?\na: 16\nq: what is 9+9?\na: 18\nq: what is 10+10?\na: 20'

This downloads the WIC dataset - you may need to install wget if you don't have it installed https://ports.macports.org/port/wget/

In [5]:
!wget https://pilehvar.github.io/wic/package/WiC_dataset.zip

--2023-02-26 20:09:37--  https://pilehvar.github.io/wic/package/WiC_dataset.zip
Resolving pilehvar.github.io (pilehvar.github.io)... 2606:50c0:8002::153, 2606:50c0:8000::153, 2606:50c0:8003::153, ...
Connecting to pilehvar.github.io (pilehvar.github.io)|2606:50c0:8002::153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 275984 (270K) [application/zip]
Saving to: ‘WiC_dataset.zip’


2023-02-26 20:09:37 (5.59 MB/s) - ‘WiC_dataset.zip’ saved [275984/275984]



In [6]:
import zipfile
with zipfile.ZipFile("WiC_dataset.zip","r") as zip_ref:
    zip_ref.extractall(".")

In [5]:
train = pd.read_csv("train/train.data.txt", sep='\t', header=None)
train.columns = ["target", "pos", "position", "context-1", "context-2"]
train_gold = pd.read_csv("train/train.gold.txt", sep='\t', header=None)
train_gold.columns = ["label"]
train = pd.concat([train_gold,train], axis=1)


In [6]:
train[train.pos=="V"].tail(18)[:5]


Unnamed: 0,label,target,pos,position,context-1,context-2
5386,F,admit,V,3-1,The French doors admit onto the yard .,He admitted his errors .
5387,F,exhaust,V,0-4,Exhaust one 's savings .,This kind of work exhausts me .
5389,F,kill,V,0-2,Kill the engine .,She was killed in the collision of three cars .
5390,T,admit,V,1-1,To admit a serious thought into the mind .,She admitted us here .
5394,F,write,V,6-1,How many books did Georges Simenon write ?,Please write to me every week .


We can go and grab the last couple verbs of the train set to use as our few shots. I added some annotation for meaning

In [7]:

fewShotVerb = """ interface comparison{
    "Sense1": str, // write out the dictionary meaning of the word in the first context
    "Sense2": str, // write out the dictionary meaning of the word in the second context
    "Similar": bool, // whether the two meaings of the word are similar or used to mean different things, should be true | false
}

determineWordSimilarSense(word, context1, context2) : comparison =>{
    return ai.compare(word, context1, context2) // return the comparison object
}

//This returns the keys inside of double quotes ("KEY") so we can parse with JSON

"""

Likewise we can do it with nouns

In [15]:

fewShotNoun = """ interface comparison{
    "Sense1": str, // explain the meaning of the word in the first context so a child could understand
    "Sense2": str, // explain the meaning of the word in the second context so a child could understand
    "Similar": bool, // true | false - whether the two meaings of the word are similar or used to mean different things
}

determineWordSimilarSense(word, context1, context2) : comparison =>{
    return ai.compare(word, context1, context2) // return the comparison object
}

//This returns the keys inside of double quotes ("") so we can parse with JSON

"""

In [16]:
def testRow(row):
  pos = row[1]["pos"]
  examples = fewShotNoun if pos == "N" else fewShotVerb

  context = """determineWordSimilarSense("{}", "{}", "{}")
>>>""".format(row[1]["context-1"], row[1]["context-2"], row[1]["target"])
  res = query(examples + context)
  try:
    parsed = json.loads(res)
  except Exception as e:
    print('error parsing: ', e)
    # check if there's a true in the last 100 characters
    parsed = {
      "Context": res,
      "Similar": "true" in res.lower()[-100:]
    }

  return parsed


In [17]:
scores = {'V': 0, 'N': 0}
attempted = {'V': 0, 'N': 0}
for row in train.head(10).iterrows():
    print('scores', scores)
    print('attempted', attempted)
    actual = row[1]["label"]
    output = testRow(row)
    print('output:', output)
    pos = row[1]["pos"]
    attempted[pos] += 1

    if actual == "T":
        if output['Similar']:
            scores[pos] += 1
        
    if actual == "F":
        if not output['Similar']:
            scores[pos] += 1



scores {'V': 0, 'N': 0}
attempted {'V': 0, 'N': 0}
output: {'Similar': True}
scores {'V': 0, 'N': 0}
attempted {'V': 1, 'N': 0}
output: {'Similar': True}
scores {'V': 0, 'N': 0}
attempted {'V': 2, 'N': 0}
output: {'Similar': True}
scores {'V': 0, 'N': 0}
attempted {'V': 3, 'N': 0}
output: {'Similar': False, 'Sense1': 'a strap worn around the waist to support the genitals', 'Sense2': 'a container used to hold a liquid or other substance'}
scores {'V': 0, 'N': 0}
attempted {'V': 3, 'N': 1}
output: {'Similar': True, 'Sense1': 'a school or college for the arts, music, or drama', 'Sense2': 'a society of learned men and women, often with a royal charter, who meet to discuss and advance knowledge in a particular subject'}
scores {'V': 0, 'N': 0}
attempted {'V': 3, 'N': 2}
output: {'Similar': True}
scores {'V': 0, 'N': 0}
attempted {'V': 4, 'N': 2}
output: {'Similar': True}
scores {'V': 1, 'N': 0}
attempted {'V': 5, 'N': 2}
output: {'Similar': True}
scores {'V': 1, 'N': 0}
attempted {'V': 6, '

In [18]:
scores, attempted

({'V': 2, 'N': 0}, {'V': 7, 'N': 3})

In [19]:
dev = pd.read_csv("dev/dev.data.txt", sep='\t', header=None)
dev.columns = ["target", "pos", "position", "context-1", "context-2"]
dev_gold = pd.read_csv("dev/dev.gold.txt", sep='\t', header=None)
dev_gold.columns = ["label"]
dev = pd.concat([dev_gold,dev], axis=1)


In [20]:
devResults = {}
complete = 0
correct = 0

In [23]:

for row in dev.iterrows():

    if row[0] in devResults:
        continue

    q1 = row[1]["context-1"]
    q2 = row[1]["context-2"]
    target = row[1]["target"]
    actual = row[1]["label"]
    
    pos = row[1]["pos"]

    output = testRow(row)
    
    myResults = {}
    myResults["q1"] = q1
    myResults["q2"] = q2

    myResults["pos"] = row[1]["pos"]

    myResults["target"] = target

    myResults["output"] = output

    myResults["actual"] = actual
    devResults[row[0]] = myResults
    complete +=1
    try:
        if actual == "T":
            if output['Similar']:
                correct += 1
        if actual == "F":
            if not output['Similar']:
                correct += 1
    except:
        print('output', output)

    pct = correct/complete
    pct = round(pct, 2)

    print ("Score: {}, Complete: {} Correct: {} Wrong: {}".format(pct, complete, correct, complete-correct))
    with open('text-davinci-001-code-0shot.json', 'w') as f:
        json.dump(devResults, f)


error parsing:  Expecting property name enclosed in double quotes: line 5 column 5 (char 168)
Score: 0.52, Complete: 173 Correct: 90 Wrong: 83
Score: 0.52, Complete: 174 Correct: 90 Wrong: 84
output {'The senator rose to register his protest .': 'They finished the game under protest to the league president .', 'register': 'protest'}
Score: 0.51, Complete: 175 Correct: 90 Wrong: 85
Score: 0.52, Complete: 176 Correct: 91 Wrong: 85
Score: 0.52, Complete: 177 Correct: 92 Wrong: 85
Score: 0.52, Complete: 178 Correct: 93 Wrong: 85
error parsing:  Expecting ',' delimiter: line 4 column 5 (char 123)
Score: 0.52, Complete: 179 Correct: 93 Wrong: 86
Score: 0.52, Complete: 180 Correct: 94 Wrong: 86
Score: 0.52, Complete: 181 Correct: 95 Wrong: 86
Score: 0.52, Complete: 182 Correct: 95 Wrong: 87
Score: 0.52, Complete: 183 Correct: 95 Wrong: 88
Score: 0.52, Complete: 184 Correct: 95 Wrong: 89
Score: 0.51, Complete: 185 Correct: 95 Wrong: 90
Score: 0.52, Complete: 186 Correct: 96 Wrong: 90
Score: 0.

In [None]:
with open('text-davinci-001-code-0shot.json', 'w') as f:
    json.dump(devResults, f)
