In [1]:
!pip install todocli==5.0.0



In [46]:
"""
 * ==============================================
 * All credit to https://github.com/clue/json-stream/blob/master/src/StreamingJsonParser.php
 * ==============================================
"""

class StreamingJsonParser:
    def __init__(self):
        self.buffer = ''
        self.end_character = None
        self.assoc = True

    def parse(self, chunk):
        objects = []

        while chunk != '':
            if self.end_character is None:
                chunk = chunk.strip()

                if chunk == '':
                    break
                elif chunk[0] == '[':
                    self.end_character = ']'
                elif chunk[0] == '{':
                    self.end_character = '}'
                else:
                    raise ValueError('Invalid start')

            pos = chunk.find(self.end_character)

            if pos == -1:
                self.buffer += chunk
                break

            self.buffer += chunk[:pos + 1]
            chunk = chunk[pos + 1:]

            try:
                json_obj = json.loads(self.buffer)
            except ValueError:
                continue

            if json_obj is not None:
                objects.append(json_obj)

                self.buffer = ''
                self.end_character = None

        return objects

    def is_empty(self):
        return self.buffer == ''


In [36]:
import json
import requests

def api_query(query,model=False):
    r = requests.post("http://127.0.0.1:8080/api/query", json={"payload":{"query":query,"feature":"calendar","commit":False}}, stream=True)
    parser = StreamingJsonParser()
    for line in r.iter_lines():

        if line:
            decoded_line = line.decode('utf-8')
            for chunk in parser.parse(decoded_line):
                if model and chunk.get("type")=="model":
                    response = chunk["payload"]["response"]
                    indexes = [m.start() for m in re.finditer('```', response)]
                    if len(indexes)>=2:
                        return response[indexes[0]+3:indexes[1]-1]
                    return response
                if (chunk.get("payload") or {}).get("action")=="execute":
                    return "\n".join(chunk.get("payload",{}).get("commands"))

In [37]:
import subprocess
import re

def exec_commands(commands):    
    p = subprocess.run(["bash", "-c", commands], capture_output=True, text=True)
    ansi_escape = re.compile(r'''
    \x1B  # ESC
    (?:   # 7-bit C1 Fe (except CSI)
        [@-Z\\-_]
    |     # or [ for CSI, followed by a control sequence
        \[
        [0-?]*  # Parameter bytes
        [ -/]*  # Intermediate bytes
        [@-~]   # Final byte
    )
''', re.VERBOSE)
    return ansi_escape.sub('', p.stdout)

# student LLM will be put here for testing
def student_llm(task,model=False):
    return api_query(task,model=model)

TODO_LOC = exec_commands('todo --location')
TEST = True

# 1 Predefined Test Cases

#### Task 1) *Mark the first and third items on my homework_list as done*

In [47]:
task = "Mark the first and third items on my homework_list as done"

init_commands = f"""
rm -r {TODO_LOC}
todo add "LLM Homework" --context homework_list
todo add "NLP Homework" --context homework_list
todo add "Math Homework" --context homework_list
todo add "ML Homework"  --context homework_list

todo homework_list
"""

# setup initial state
init_state = exec_commands(init_commands)
print(init_state)

# run commands from LLM and get final state
final_commands = student_llm(task)
exec_commands(final_commands)
final_state = exec_commands('todo --flat')

if TEST:
    # final_state should contain "ML Homework" and "NLP Homework" only
    assert "ML Homework" in final_state and "NLP Homework" in final_state
    assert "LLM Homework" not in final_state and "Math Homework" not in final_state
    # searching for undone tasks with term LLM or Math should yield nothing
    assert "LLM Homework"  in exec_commands('todo search "LLM Homework" --done')
    assert "Math Homework" in exec_commands('todo search "Math Homework" --done')

 1 | LLM Homework
 2 | NLP Homework
 3 | Math Homework
 4 | ML Homework



#### Task 2) *Prioritize the first item in my shopping list*

In [48]:
task = "Prioritize the first item in my shopping list"

init_commands = f"""
rm -r {TODO_LOC}

todo add "Two bottles of milk" --context shopping_list
todo add "Three cans of SinaCola" --context shopping_list
todo add "Fifty eggs" --context shopping_list


todo shopping_list
"""

# setup initial state
init_state = exec_commands(init_commands)
print(init_state)

# run commands from LLM and get final state
final_commands = student_llm(task)
exec_commands(final_commands)
final_state = exec_commands('todo --flat')

if TEST:
    # Should have the first item with priority greater than zero
    assert bool(re.search(r"Two bottles of milk ★[1-9]\d*", final_state))
    # Should be the only instance
    assert final_state.count("★") == 1

 1 | Two bottles of milk
 2 | Three cans of SinaCola
 3 | Fifty eggs



#### Task 3) *Move all completed tasks from my project_list to an archive_list*

In [49]:
task = "Move all completed tasks from my project_list to an archive_list"

init_commands = f"""
rm -r {TODO_LOC}

todo add "NLP Project" --context project_list
todo add "Math Project" --context project_list
todo add "ML Project" --context project_list

todo add "Algebra I Project" --context archive_list

todo done 1            
todo done 2            

todo project_list
todo archive_list
todo search '' --context project_list --done
"""

# setup initial state
init_state = exec_commands(init_commands)
print(init_state)

# run commands from LLM and get final state
final_commands = student_llm(task)
exec_commands(final_commands)
final_state = exec_commands('todo --flat')

if TEST:
    # The two completed tasks should now be in the archive list
    assert "[DONE] NLP Project #archive_list" in exec_commands("todo search '' --context archive_list --done")
    assert "[DONE] Math Project #archive_list" in exec_commands("todo search '' --context archive_list --done")
    # There should be no done projects
    assert exec_commands("todo search '' --context project_list --done") == ""
    # There should be one undone project
    assert "ML Project" in exec_commands("todo search '' --context project_list --undone")

 3 | ML Project
 4 | Algebra I Project
 1 | [DONE] NLP Project #project_list
 2 | [DONE] Math Project #project_list



#### Task 4) *Prioritize all tasks that have to do with my studies*

In [50]:
task = "Prioritize all tasks that have to do with my studies"

init_commands = f"""
rm -r {TODO_LOC}

todo add "Mathematics" --context study_list1

todo add "Buy chocolate" --context shopping_list
todo add "Buy bread" --context shopping_list

todo add "History" --context study_list2
todo add "Arts" --context study_list2

todo --flat
"""

# setup initial state
init_state = exec_commands(init_commands)
print(init_state)

# run commands from LLM and get final state
final_commands = student_llm(task)
exec_commands(final_commands)
final_state = exec_commands('todo --flat')

if TEST:
    # The study related items should be prioritized
    assert bool(re.search(r"Mathematics ★[1-9]\d*", final_state))
    assert bool(re.search(r"History ★[1-9]\d*", final_state))
    assert bool(re.search(r"Arts ★[1-9]\d*", final_state))
    # Only they should be prioritized
    assert final_state.count("★") == 3

 1 | Mathematics #study_list1
 2 | Buy chocolate #shopping_list
 3 | Buy bread #shopping_list
 4 | History #study_list2
 5 | Arts #study_list2



#### Task 5) *Merge my work_list and personal_list together into a combined_list*

In [51]:
task = "Merge my work_list and personal_list together into a combined_list"

init_commands = f"""
rm -r {TODO_LOC}

todo add "Write these tests" --context work_list
todo add "Write more tests" --context work_list

todo add "Hang out with friends" --context personal_list
todo add "Go to the dentist" --context personal_list

# todo mv personal_list work_list
# todo rmctx personal_list --force
# todo ctx work_list --name "combined_list"

todo --flat
"""

# setup initial state
init_state = exec_commands(init_commands)
print(init_state)

# run commands from LLM and get final state
final_commands = student_llm(task)
exec_commands(final_commands)
final_state = exec_commands('todo --flat')

if TEST:
    # work_list and personal_list should not be in final_state
    assert "#work_list" not in final_state
    assert "#personal_list" not in final_state
    # combined_list should appear four times in final_state
    assert final_state.count("#combined_list") == 4
    # check that a random task is in the combined_list
    assert "Hang out with friends #combined_list" in final_state

 1 | Write these tests #work_list
 2 | Write more tests #work_list
 3 | Hang out with friends #personal_list
 4 | Go to the dentist #personal_list



#### Task 6) *Set all items in my study_list to maximum importance*

In [52]:
task = "Set all items in my study_list to maximum importance"

init_commands = f"""
rm -r {TODO_LOC}

todo add "Matrix Calculus" --context study_list
todo add "Convex Optimization" --context study_list
todo add "Differential Equations" --context study_list

todo add "League of Legends" --context gaming_list
todo add "Heros of the Storm" --context gaming_list
todo add "Study Quizzes" --context gaming_list

todo --flat
"""

# setup initial state
init_state = exec_commands(init_commands)
print(init_state)

# run commands from LLM and get final state
final_commands = student_llm(task)
exec_commands(final_commands)
final_state = exec_commands('todo --flat')

# Test will assume maximum priority is 99
if TEST:
    # Check that all items in study_list have maximum priority
    assert bool(re.search(r"Matrix Calculus ★99", final_state))
    assert bool(re.search(r"Convex Optimization ★99", final_state))
    assert bool(re.search(r"Differential Equations ★99", final_state))
    # There must be only three ★99
    assert final_state.count("★99") == 3
    
    # gaming items must occur with no priority
    assert "League of Legends #gaming_list" in final_state
    assert "Heros of the Storm #gaming_list" in final_state
    assert "Study Quizzes #gaming_list" in final_state

 1 | Matrix Calculus #study_list
 2 | Convex Optimization #study_list
 3 | Differential Equations #study_list
 4 | League of Legends #gaming_list
 5 | Heros of the Storm #gaming_list
 6 | Study Quizzes #gaming_list



#### Task 7) *Prepare for the team meeting by moving all high priority tasks to the meeting_agenda_list*

In [53]:
task = "Prepare for the team meeting by moving all high priority tasks to the meeting_agenda_list"

init_commands = f"""
rm -r {TODO_LOC}

todo add "Eat lunch together" --context meeting_agenda_list

todo add "Review notes on quantum mechanics" --context study_list
todo add "Solve practice problems for organic chemistry" --context study_list
todo add "Watch tutorial videos on machine learning algorithms" --context study_list

todo add "Complete project proposal for client X" --context work_list
todo add "Respond to emails from stakeholders" --context work_list
todo add "Schedule follow-up meetings with collaborators" --context work_list

todo task 5 --priority 9
todo task 6 --priority 9
todo task 7 --priority 9

todo add "Go for a 30-minute jog" --context health_list
todo add "Do yoga for 20 minutes" --context health_list
todo add "Schedule a check-up appointment with the doctor" --context health_list

todo task a --priority 9

todo add "Organize closet and donate old clothes" --context personal_list
todo add "Start learning a new language with Duolingo" --context personal_list

todo --flat
"""

# setup initial state
init_state = exec_commands(init_commands)
print(init_state)

# run commands from LLM and get final state
final_commands = student_llm(task)
exec_commands(final_commands)
final_state = exec_commands('todo --flat')

if TEST:
    # check that the final state has the high priority items in the agenda list
    assert "Complete project proposal for client X ★9 #meeting_agenda_list" in final_state
    assert "Respond to emails from stakeholders ★9 #meeting_agenda_list" in final_state
    assert "Schedule follow-up meetings with collaborators ★9 #meeting_agenda_list" in final_state
    assert "Schedule a check-up appointment with the doctor ★9 #meeting_agenda_list" in final_state
    # and that they no longer belong to their original lists
    assert "Complete project proposal for client X ★9 #work_list" not in final_state
    assert "Respond to emails from stakeholders ★9 #work_list" not in final_state
    assert "Schedule follow-up meetings with collaborators ★9 #work_list" not in final_state
    assert "Schedule a check-up appointment with the doctor ★9 #health_list" not in final_state
    # number of ★ should be 4 (no othe priorities)
    assert final_state.count("★9") == 4

 5 | Complete project proposal for client X ★9 #work_list
 6 | Respond to emails from stakeholders ★9 #work_list
 7 | Schedule follow-up meetings with collaborators ★9 #work_list
 a | Schedule a check-up appointment with the doctor ★9 #health_list
 1 | Eat lunch together #meeting_agenda_list
 2 | Review notes on quantum mechanics #study_list
 3 | Solve practice problems for organic chemistry #study_list
 4 | Watch tutorial videos on machine learning algorithms #study_list
 8 | Go for a 30-minute jog #health_list
 9 | Do yoga for 20 minutes #health_list
 b | Organize closet and donate old clothes #personal_list
 c | Start learning a new language with Duolingo #personal_list



#### Task 8) *Move all high-importance items from my tasks_list to my priorities_list*

In [54]:
task = "Move all high-importance items from my task_list to my priorities_list"

init_commands = f"""
rm -r {TODO_LOC}

todo add "Go swimming" --context priorities_list

todo add "Complete project proposal for client X" --context task_list
todo add "Respond to emails from stakeholders" --context task_list
todo add "Schedule follow-up meetings with collaborators" --context task_list

todo task 2 --priority 99
todo task 3 --priority 99
todo task 4 --priority 90

todo add "Go to China and see the great wall" --context travel_list
todo add "Fly to Paris" --context travel_list

todo task 5 --priority 90
todo task 6 --priority 90

todo --flat
"""

# setup initial state
init_state = exec_commands(init_commands)
print(init_state)

# run commands from LLM and get final state
final_commands = student_llm(task)
exec_commands(final_commands)
final_state = exec_commands('todo --flat')

if TEST:
    # Check that the final state has the high priority items in the agenda list
    assert "Complete project proposal for client X ★99 #priorities_list" in final_state
    assert "Respond to emails from stakeholders ★99 #priorities_list" in final_state
    assert "Schedule follow-up meetings with collaborators ★90 #priorities_list" in final_state
    # Ensure priorities_list was not erased
    assert "Go swimming #priorities_list" in final_state
    # and that they no longer belong to their original lists
    assert "Complete project proposal for client X ★99 #task_list" not in final_state
    assert "Respond to emails from stakeholders ★99 #task_list" not in final_state
    assert "Schedule follow-up meetings with collaborators ★90 #task_list" not in final_state

 2 | Complete project proposal for client X ★99 #task_list
 3 | Respond to emails from stakeholders ★99 #task_list
 4 | Schedule follow-up meetings with collaborators ★90 #task_list
 5 | Go to China and see the great wall ★90 #travel_list
 6 | Fly to Paris ★90 #travel_list
 1 | Go swimming #priorities_list



<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=90ff4dcc-b342-4b11-93e0-7eecf5cb71a0' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>

# 2 Own Test Cases

## 2.1 Single Instruction

### 2.1.1 Date & Time Extraction

In [25]:
print(student_llm("What sport activities do I have to do next week?",model=True))
print("-------------------------------------------------")
print(student_llm("Add a daily task for exam revisions between 21:00 and 23:00",model=True))
print("-------------------------------------------------")
print(student_llm("Move tasks starting in 2 hours to priorities",model=True))


show --context sport --start 2024-05-01 00:00:00 --deadline 2024-05-07 23:59:00
-------------------------------------------------

add --title exam revisions --outdoor-related false --start 2024-04-29 21:00:00 --deadline 2024-04-29 23:00:00 --period 1d
-------------------------------------------------

move --start 2024-04-29 03:28:46 --output context priority


### 2.1.2 Long query

In [27]:
print(student_llm("Today I am very busy so remove all tasks in my sport list",model=True))
print("-------------------------------------------------")
print(student_llm("Because the weather now is horrible set the priority of sport activities to lowest.",model=True))
print("-------------------------------------------------")
print(student_llm("We didn’t eat anything yesterday. Add a task for cooking today between 10:00 and 12:00. I will invite some friends.",model=True))


remove --context sport
-------------------------------------------------

prioritize --context sport --priority lowest
-------------------------------------------------

add --title cooking --outdoor-related false --start 2024-04-29 10:00:00 --deadline 2024-04-29 12:00:00 --context personal


### 2.1.3 Ambiguous query

In [32]:
print(student_llm("Move no no no. Please mark all tasks in my work list as done",model=True))
print("-------------------------------------------------")
print(student_llm("Add task to sport list by merging cooking with sport list",model=True))
print("-------------------------------------------------")
print(student_llm("Prioritize the first task by moving it to priorities",model=True))


mark --context work --done true
-------------------------------------------------

merge --context cooking --output context sport
-------------------------------------------------

move --title [1] --context all --output context priorities


## 2.2 Multiple Instruction

### 2.2.1 Instruction Segmentation

In [33]:
print(student_llm("Empty my sport list. Add a task for pizza delivery. Show all tasks.",model=True))
print("-------------------------------------------------")
print(student_llm("Prioritize and mark as done the first task.",model=True))
print("-------------------------------------------------")
print(student_llm("Before removing the first task show me all tasks.",model=True))


move --context sport --output context all
add --title pizza delivery --outdoor-related false
show
-------------------------------------------------

prioritize --id [1]
mark --id [1] --done true
-------------------------------------------------

show --context all


### 2.2.2 Co-reference Resolution

In [34]:
print(student_llm("Show the tasks in cooking list and prioritize the first one of them.",model=True))
print("-------------------------------------------------")
print(student_llm("I have a cooking list merge it with my sport list.",model=True))
print("-------------------------------------------------")
print(student_llm("There are a cooking list and a sport list prioritize the second task in the latter.",model=True))


show --context cooking
prioritize --id [1] --priority maximum --context cooking
-------------------------------------------------

merge --context cooking --output context sport
-------------------------------------------------

prioritize --context sport --id [2]
