In [1]:
from transformers import GPTNeoXForCausalLM, AutoTokenizer, PreTrainedTokenizerFast

model = GPTNeoXForCausalLM.from_pretrained(
  "EleutherAI/pythia-2.8B-deduped",
  revision="step143000",
  cache_dir="./pythia-2.8B-deduped/step143000",
).to("cuda")

In [5]:
tokenizer: PreTrainedTokenizerFast = AutoTokenizer.from_pretrained(
  "EleutherAI/pythia-2.8B-deduped",
  revision="step143000",
  cache_dir="./pythia-2.8B-deduped/step143000",
)

In [24]:
num_added_toks = tokenizer.add_tokens(["[","]","->"], special_tokens=True)
print(num_added_toks)

0


In [25]:
model.resize_token_embeddings(len(tokenizer))

Embedding(50281, 2560)

In [26]:
print(tokenizer.all_special_tokens) 
# doesn't show our new ones for some reason /shrug

['<|endoftext|>']


In [31]:
tokens = tokenizer("it is [NOW() -> 123]", return_tensors="pt").to("cuda")["input_ids"][0]
print(tokens)
print(tokenizer.convert_ids_to_tokens(tokens))

tensor([  262,   310,   209,    60, 22731,  1082,   209,  1168, 15567,    62],
       device='cuda:0')
['it', 'Ġis', 'Ġ', '[', 'NOW', '()', 'Ġ', '->', 'Ġ123', ']']


In [1]:
def list_windows():
    import psutil, win32process, win32gui, time
    def get_process_name(handle):
        pid = win32process.GetWindowThreadProcessId(handle) #This produces a list of PIDs active window relates to
        return (psutil.Process(pid[-1]).name()) #pid[-1] is the most likely to survive last longer

    rtn = []
    def winEnumHandler( hwnd, ctx ):
        nonlocal rtn
        if win32gui.IsWindowVisible( hwnd ):
            rtn.append((hwnd, win32gui.GetWindowText( hwnd ), get_process_name(hwnd)))
    win32gui.EnumWindows( winEnumHandler, None )
    return rtn
list_windows()

[(1904908, '', 'explorer.exe'),
 (65828, '', 'explorer.exe'),
 (65986, '', 'explorer.exe'),
 (66004, '', 'explorer.exe'),
 (132490, '● main.ipynb - toolformer - Visual Studio Code', 'Code.exe'),
 (187893914, 'pythia.ipynb - gptj - Visual Studio Code', 'Code.exe'),
 (265090, '', 'pwsh.exe'),
 (721110, 'PowerShell', 'WindowsTerminal.exe'),
 (460880,
  'TeamDman/toolformer: Experimenting with toolformer prompting, simple notebooks inside and 4 more pages - Personal - Microsoft\u200b Edge Dev',
  'msedge.exe'),
 (9047612, '', 'ApplicationFrameHost.exe'),
 (2099944, '', 'svchost.exe'),
 (200150, '', 'explorer.exe'),
 (459980, 'Groove Music', 'Music.UI.exe'),
 (262342, 'Groove Music', 'ApplicationFrameHost.exe'),
 (197516, 'Settings', 'SystemSettings.exe'),
 (263536, 'Settings', 'ApplicationFrameHost.exe'),
 (66582, 'Microsoft Text Input Application', 'TextInputHost.exe'),
 (66108, '', 'explorer.exe'),
 (66126, '', 'explorer.exe'),
 (66144, '', 'explorer.exe'),
 (66152, '', 'explorer.exe'),


In [3]:
def get_best_match(name):
    from fuzzywuzzy import fuzz
    x = sorted([(fuzz.ratio(str(x), name),x) for x in list_windows()], key=lambda x: x[0], reverse=True)
    print(x[0][1])
    return x[0][1][0]
get_best_match("VSCode")

(132490, '● main.ipynb - toolformer - Visual Studio Code', 'Code.exe')


132490

In [5]:
def pos2pos(pos: str) -> (int,int,int,int):
    from screeninfo import get_monitors
    monitors = get_monitors()
    left = sorted(monitors, key=lambda x: x.x)[0]
    right = sorted(monitors, key=lambda x: x.x, reverse=True)[0]
    main = [x for x in monitors if x.is_primary][0]
    corners = {
        "top left": lambda m: (m.x, m.y, m.width//2, m.height//2),
        "top right": lambda m: (m.x + m.width//2, m.y, m.width//2, m.height//2),
        "bottom left": lambda m: (m.x, m.y + m.height//2, m.width//2, m.height//2),
        "bottom right": lambda m: (m.x + m.width//2, m.y + m.height//2, m.width//2, m.height//2),
    }
    lookup = {}
    for corner, func in corners.items():
        lookup[f"left monitor, {corner}"] = func(left)
        lookup[f"third monitor, {corner}"] = func(left)
        lookup[f"main monitor, {corner}"] = func(main)
        lookup[f"second monitor, {corner}"] = func(right)
    lookup[f"main monitor, full"] = (main.x, main.y, main.width, main.height)
    lookup[f"left monitor, full"] = (left.x, left.y, left.width, left.height)
    lookup[f"third monitor, full"] = (left.x, left.y, left.width, left.height)
    lookup[f"right monitor, full"] = (right.x, right.y, right.width, right.height)
    lookup[f"second monitor, full"] = (right.x, right.y, right.width, right.height)

    from fuzzywuzzy import fuzz
    x = sorted([(fuzz.ratio(label, pos),data) for label,data in lookup.items()], key=lambda x: x[0], reverse=True)
    # print(x)
    return x[0][1]
    
pos2pos("right monitor, bottom left")

(-2048, 499, 1024, 576)

In [213]:
def snap(params):
    window, position = params.split(",")

    import ctypes
    user32 = ctypes.windll.user32

    user32.SetProcessDPIAware()
    res = (user32.GetSystemMetrics(0), user32.GetSystemMetrics(1))
    print("found resolution: ", res)
    handle = get_best_match(window)
    # handle = user32.FindWindowW(None, u'Untitled - Notepad')
    if handle == 0:
        return "could not find window"
    # move window using handle
    # MoveWindow(handle, x, y, height, width, repaint(bool))
    pos = pos2pos(position)
    user32.MoveWindow(handle, *pos, True)
    return f"moved {window} to {position}"

In [None]:
snap("notepad, second monitor bottom left")

In [None]:
known_functions = {
    "SNAP": lambda x: snap(x),
}

In [235]:
def invoke(response):
    import re
    pattern = r".*\[(\w+)\((.*?)\s*\)\s*->"
    print(re.findall(pattern, response, re.DOTALL))
    x = re.findall(pattern, response)
    if len(x) == 0:
        return "bad match"
    func, params = x[0]

    if func not in known_functions:
        return f"Unknown function {func}"
    print(f"Calling {func} with {params}")
    result = known_functions[func](params)
    return result

In [245]:
pre_prompt = r"""You are an AI assistant with several tools available to you. The tools are the following:
MATH: This allows you to evaluate mathematical expressions using the math.js library.
NOW: This returns the current date and time. You must not pass any arguments to this tool!
SEARCH: This returns a web search result for the given string argument.
REMEMBER: This stores a key-value pair in the assistant's memory. You must pass two arguments to this tool, the first being the key, and the second being the value.
RECALL: This returns a value from the assistant's memory. You must pass one argument to this tool, the key.
OPEN: This opens a file or a website. The only argument is the URI for the file or website.
SNAP: This moves a window to an area of the screen. The first argument is the program, and the second argument is the position.

DO NOT USE TOOLS WITHIN TOOLS! KEEP ALL TOOL CALLS SEPARATE FROM EACH OTHER!

User: What is 10 times 14?
Assistant:10 * 14 is [MATH(10 * 14) -> 140] 140.
User: Has Avatar the way of water been released yet?
Assistant:[SEARCH(Avatar the way of water release date) -> 22.11.2022] Avatar: The way of water was released on the 22nd of november 2022. Today is [NOW() -> Mon Feb 27 2023] the 13th of February 2023. Therefore, [MATH(2023 > 2022) -> true] it was released last year.
User: What day is it?
Assistant:It is [NOW() -> 13/02/2023, 20:01:35] the 13th of February 2023.
User: How old is Donald Trump?
Assistant:Donald Trump is [SEARCH(Donald Trump age) -> 76] 76 years old.
User: Where was the inventor of the light bulb born, and what is the last country his home country was at war with?
Assistant:Thomas Edison, the inventor of the lightbulb, was born in [SEARCH(Thomas Edison birthplace) -> Milan, Ohio] Milan, Ohio. The last country the United States was at war with was [SEARCH(last country US at war with) -> Iraq] Iraq.
User: Set my default browser to Firefox.
Assistant:Okay, I set your default browser to [REMEMBER(default browser, Firefox) -> Firefox].
User: Open youtube.
Assistant: Okay, opening YouTube... [OPEN(https://youtube.com) -> 0] done!
User: Open my csgo config.
Assistant: Okay, opening your csgo config... [OPEN(C:\Program Files (x86)\Steam\steamapps\common\Counter-Strike Global Offensive\csgo\cfg\autoexec.cfg) -> 0] done!
User: Move spotify to my left monitor.
Assistant: Okay, moving spotify... [SNAP(Spotify, left monitor) -> 0] done!
User: Move spotify to the top right corner of my second monitor.
Assistant: Okay, moving spotify... [SNAP(Spotify, right monitor top right) -> 0] done!
User: Move spotify to the main monitor, top right.
Assistant: Okay, moving spotify... [SNAP(Spotify, main monitor top right) -> 0] done!
User: Move spotify to my third monitor
Assistant: Okay, moving spotify... [SNAP(Spotify, third monitor) -> 0] done!
User: move notepad, second monitor, top right
Assistant: Okay, moving spotify... [SNAP(notepad, second monitor top right) -> 0] done!
User: 
"""

prompt = pre_prompt + "move notepad, main monitor"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
# print(prompt)
tokens = model.generate(
    **inputs,
    # do_sample=True,
    temperature=0.9,
    max_new_tokens=500,
    eos_token_id=tokenizer.encode("->")[0],
)
response = tokenizer.decode(tokens[:,inputs["input_ids"].shape[1]:][0])
output = invoke(response)
prompt = prompt + response + output + "]"
print(prompt)
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
# print(prompt)
tokens = model.generate(
    **inputs,
    # do_sample=True,
    temperature=0.9,
    max_new_tokens=100,
    eos_token_id=tokenizer.encode("\n")[0],
)
response = tokenizer.decode(tokens[:,inputs["input_ids"].shape[1]:][0])
print(response)


Setting `pad_token_id` to `eos_token_id`:1168 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:187 for open-end generation.


[('SNAP', 'notepad, main monitor top right')]
Calling SNAP with notepad, main monitor top right
found resolution:  (1920, 1080)
(264808, 'Untitled - Notepad', 'notepad.exe')
[(96, (960, 0, 960, 540)), (86, (960, 540, 960, 540)), (81, (-1024, -77, 1024, 576)), (80, (0, 0, 960, 540)), (79, (2880, 0, 960, 540)), (78, (-1024, -77, 1024, 576)), (72, (-1024, 499, 1024, 576)), (71, (0, 540, 960, 540)), (71, (2880, 540, 960, 540)), (69, (-1024, 499, 1024, 576)), (65, (-2048, -77, 1024, 576)), (64, (1920, 0, 960, 540)), (63, (0, 0, 1920, 1080)), (62, (-2048, -77, 1024, 576)), (57, (-2048, 499, 1024, 576)), (56, (1920, 540, 960, 540)), (54, (-2048, 499, 1024, 576)), (48, (-2048, -77, 2048, 1152)), (48, (1920, 0, 1920, 1080)), (47, (1920, 0, 1920, 1080)), (44, (-2048, -77, 2048, 1152))]
You are an AI assistant with several tools available to you. The tools are the following:
MATH: This allows you to evaluate mathematical expressions using the math.js library.
NOW: This returns the current date an

In [238]:
print(response)

 corner.
Assistant: Okay, moving notepad... 
[SNAP(Notepad, main monitor, top left corner) 
->
