In [1]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("autodl-tmp/Mistral-7B-Instruct-v0.1", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("autodl-tmp/Mistral-7B-Instruct-v0.1", trust_remote_code=True)

len(tokenizer("hello")["input_ids"])

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

  return self.fget.__get__(instance, owner)()


3

In [2]:
template_ = [
  {
    "instruction": "user instruction",
    "input": "user input",
    "output": [
      "chosen answer",
      "rejected answer"
    ]
  }
]

In [2]:
# zero-shot
instruction = ( 
    "You are a web navigation intelligence who interacts with webpage environments to achieve human user intent.\n"
    "You always generate the next ACTION based on the user's INTENT, current webpage HTML and ACTION_HISTORY sequence which recording the actions that have been performed.\n\n"
    "Given HTML and INTENT and ACTION_HISTORY, you should\n"
    "(1) Rely on your HTML code comprehension to analyze and understand what elements are on the current page.\n"
    "(2) Depend on your reasoning skills to parse the user's INTENT and infer the next action that should be taken in conjunction with the historical trajectory ACTION_HISTORY.\n"
    "(3) Select an element carefully from HTML code to interact with, thus bringing the goal closer to completion.\n\n"
    "Your output format should be strictly as follows\n"
    "Operation: ... (should be CLICK or TYPE)\n"
    "Value: ... (optional textual value for the operation TYPE)\n"
    "ID: ... (unique id number for the element to click or type into)\n\n"
    # "Thought: ... (A paragraph explaining why you chose this element to interact with, no more than 50 words)"
    "Now, begin!"
)

print(instruction)

You are a web navigation intelligence who interacts with webpage environments to achieve human user intent.
You always generate the next ACTION based on the user's INTENT, current webpage HTML and ACTION_HISTORY sequence which recording the actions that have been performed.

Given HTML and INTENT and ACTION_HISTORY, you should
(1) Rely on your HTML code comprehension to analyze and understand what elements are on the current page.
(2) Depend on your reasoning skills to parse the user's INTENT and infer the next action that should be taken in conjunction with the historical trajectory ACTION_HISTORY.
(3) Select an element carefully from HTML code to interact with, thus bringing the goal closer to completion.

Your output format should be strictly as follows
Operation: ... (should be CLICK or TYPE)
Value: ... (optional textual value for the operation TYPE)
ID: ... (unique id number for the element to click or type into)

Now, begin!


In [4]:
int(len(instruction) / 4)

236

In [3]:
user_input_template = "INTENT:\n{intent}\n\nHTML:\n{html}\n\nACTION_HISTORY:\n{action_history}"
output_template = "Operation: {op}\nValue: {value}\nID: {id}"

In [6]:
# unfold the trajectory 
# change the backend_node_id to id for token reduction...
# CL 1:3 percentage
# drop the too long html data sample

train_dataset = []

In [8]:
import json
import datasets

import random
from tqdm import tqdm

for ID in range(11):
    with open(f"autodl-tmp/train_dataset/train_{ID}.json") as file:
        data = json.load(file)

    pos_candidate_na = 0
    total_dataset_num = 0
    
    for dat in tqdm(data):
        intent = dat["confirmed_task"] # + f"(domain {dat['subdomain']})"
        action_history_all = dat["action_reprs"]
    
        for index, d in enumerate(dat["actions"]):
            cleaned_html = d["cleaned_html"].replace("backend_node_id", "id")
            token_num = len(tokenizer(cleaned_html)["input_ids"])
    
            if token_num > 120000:
                print(f"oversize intent: {intent}")
            else:
                html = cleaned_html
                action_history = action_history_all[:index]
    
                op = d["operation"]["op"]
                value = d["operation"]["value"]
                if len(d["pos_candidates"]) > 0:
                    chosen_answer_ = output_template.format(op=op, value=value, id=d["pos_candidates"][0]["backend_node_id"])
                else:
                    pos_candidate_na += 1
                    continue
                
                random.shuffle(d["neg_candidates"])
                rand_neg_candidates = d["neg_candidates"][:3]
                
                for c in rand_neg_candidates: # 1:3 proportion
                    if op != "CLICK" and random.uniform(0, 1) < 0.33: # 1/3 for type/select -> click
                        rejected_answer_ = output_template.format(op="CLICK", value="", id=c["backend_node_id"])
                    else:
                        rejected_answer_ = output_template.format(op=op, value=value, id=c["backend_node_id"])
                        
                    instruction_ = instruction
                    input_ = user_input_template.format(intent=intent, html=html, action_history=action_history)
                    output_ = [chosen_answer_, rejected_answer_]
                    
                    total_dataset_num += 1
                    train_dataset.append({
                        "instruction": instruction_,
                        "input": input_,
                        "output": output_
                    })

    print(ID, pos_candidate_na, total_dataset_num)

 18%|█▊        | 18/100 [00:48<02:47,  2.04s/it]

oversize intent: See today's deals on video games and find the best deal on the lowest-priced mouse.


 19%|█▉        | 19/100 [00:53<03:55,  2.91s/it]

oversize intent: Find Metformin 1000mg tablet price and details.
oversize intent: Find Metformin 1000mg tablet price and details.
oversize intent: Find Metformin 1000mg tablet price and details.


 28%|██▊       | 28/100 [01:08<01:38,  1.37s/it]

oversize intent: Get the lowest priced plus size one piece active swimsuit in color black.


 29%|██▉       | 29/100 [01:13<03:01,  2.56s/it]

oversize intent: Check for travel requirements when travelling from New York to Tokyo as US citizen.
oversize intent: Check for travel requirements when travelling from New York to Tokyo as US citizen.


 39%|███▉      | 39/100 [01:40<02:56,  2.90s/it]

oversize intent: Find climbing gear and sort the results by price high to low.
oversize intent: Add Western Digital internal SSD with 1TB storage and 8GB Ram DDR to the cart.
oversize intent: Add Western Digital internal SSD with 1TB storage and 8GB Ram DDR to the cart.
oversize intent: Add Western Digital internal SSD with 1TB storage and 8GB Ram DDR to the cart.
oversize intent: Add Western Digital internal SSD with 1TB storage and 8GB Ram DDR to the cart.
oversize intent: Add Western Digital internal SSD with 1TB storage and 8GB Ram DDR to the cart.
oversize intent: Add Western Digital internal SSD with 1TB storage and 8GB Ram DDR to the cart.
oversize intent: Add Western Digital internal SSD with 1TB storage and 8GB Ram DDR to the cart.
oversize intent: Add Western Digital internal SSD with 1TB storage and 8GB Ram DDR to the cart.
oversize intent: Add Western Digital internal SSD with 1TB storage and 8GB Ram DDR to the cart.
oversize intent: Add Western Digital internal SSD with 1T

 40%|████      | 40/100 [01:58<07:31,  7.53s/it]

oversize intent: Add Western Digital internal SSD with 1TB storage and 8GB Ram DDR to the cart.
oversize intent: Get the most popular documentary tv series sorted by IMDB rating, see the details of the top result.


 41%|████      | 41/100 [02:01<05:58,  6.08s/it]

oversize intent: Get the most popular documentary tv series sorted by IMDB rating, see the details of the top result.
oversize intent: Show top 50 Movies and TV shows by Sci fi genre.
oversize intent: Show top 50 Movies and TV shows by Sci fi genre.
oversize intent: Show top 50 Movies and TV shows by Sci fi genre.


 42%|████▏     | 42/100 [02:05<05:20,  5.52s/it]

oversize intent: Show top 50 Movies and TV shows by Sci fi genre.


 64%|██████▍   | 64/100 [02:38<00:39,  1.10s/it]

oversize intent: Find the cheapest power supply unit with at least 600W power output.
oversize intent: Find the cheapest power supply unit with at least 600W power output.
oversize intent: Find the cheapest power supply unit with at least 600W power output.
oversize intent: Find the cheapest power supply unit with at least 600W power output.


 65%|██████▌   | 65/100 [02:48<02:11,  3.75s/it]

oversize intent: Find the cheapest power supply unit with at least 600W power output.


 94%|█████████▍| 94/100 [03:29<00:06,  1.13s/it]

oversize intent: Find a wireless keyboard and mouse combo under $100.
oversize intent: Find a wireless keyboard and mouse combo under $100.
oversize intent: Find a wireless keyboard and mouse combo under $100.
oversize intent: Find a wireless keyboard and mouse combo under $100.


 95%|█████████▌| 95/100 [03:40<00:19,  3.99s/it]

oversize intent: Find a wireless keyboard and mouse combo under $100.


100%|██████████| 100/100 [03:48<00:00,  2.29s/it]


0 31 2121


  0%|          | 0/100 [00:00<?, ?it/s]

oversize intent: Look up the scores for the previous day's NBA games
oversize intent: Look up the scores for the previous day's NBA games


 32%|███▏      | 32/100 [01:00<02:29,  2.20s/it]

oversize intent: Add a pack of toilet paper and a bottle of laundry detergent to your Amazon cart with the lowest total price.
oversize intent: Add a pack of toilet paper and a bottle of laundry detergent to your Amazon cart with the lowest total price.
oversize intent: Add a pack of toilet paper and a bottle of laundry detergent to your Amazon cart with the lowest total price.
oversize intent: Add a pack of toilet paper and a bottle of laundry detergent to your Amazon cart with the lowest total price.
oversize intent: Add a pack of toilet paper and a bottle of laundry detergent to your Amazon cart with the lowest total price.
oversize intent: Add a pack of toilet paper and a bottle of laundry detergent to your Amazon cart with the lowest total price.
oversize intent: Add a pack of toilet paper and a bottle of laundry detergent to your Amazon cart with the lowest total price.


 33%|███▎      | 33/100 [01:11<05:29,  4.91s/it]

oversize intent: Add a pack of toilet paper and a bottle of laundry detergent to your Amazon cart with the lowest total price.


 49%|████▉     | 49/100 [01:33<00:51,  1.00s/it]

oversize intent: Get a Hasbro Hulk action figure manufactured in 1990 with the lowest price + shipping.
oversize intent: Get a Hasbro Hulk action figure manufactured in 1990 with the lowest price + shipping.


 58%|█████▊    | 58/100 [01:50<00:58,  1.38s/it]

oversize intent: Find Travel Pack for hiking.


 59%|█████▉    | 59/100 [01:53<01:15,  1.83s/it]

oversize intent: Find rental charges for bikes in 37863.


 66%|██████▌   | 66/100 [02:02<00:36,  1.06s/it]

oversize intent: Find a gaming pc which can run Call of Duty Warzone at 4k.
oversize intent: Find a gaming pc which can run Call of Duty Warzone at 4k.


 75%|███████▌  | 75/100 [02:25<01:31,  3.65s/it]

oversize intent: add a motherboard under $200 and a compatible processor at any price to the shopping cart.
oversize intent: add a motherboard under $200 and a compatible processor at any price to the shopping cart.
oversize intent: add a motherboard under $200 and a compatible processor at any price to the shopping cart.
oversize intent: add a motherboard under $200 and a compatible processor at any price to the shopping cart.
oversize intent: add a motherboard under $200 and a compatible processor at any price to the shopping cart.
oversize intent: add a motherboard under $200 and a compatible processor at any price to the shopping cart.
oversize intent: add a motherboard under $200 and a compatible processor at any price to the shopping cart.


 76%|███████▌  | 76/100 [02:40<02:45,  6.88s/it]

oversize intent: add a motherboard under $200 and a compatible processor at any price to the shopping cart.


 83%|████████▎ | 83/100 [02:50<00:39,  2.30s/it]

oversize intent: Show me the newest on-demand releases.


 89%|████████▉ | 89/100 [03:19<00:53,  4.89s/it]

oversize intent: Find a hydropack and make  the results to show only items that have a capacity of 21 to 35L.


 90%|█████████ | 90/100 [03:22<00:44,  4.48s/it]

oversize intent: Search for kayaks and filter the results to show only those that are suitable for whitewater rapids.


100%|██████████| 100/100 [03:39<00:00,  2.20s/it]


1 52 2328


 24%|██▍       | 24/100 [00:49<02:58,  2.35s/it]

oversize intent: Search the  actor who won the Oscars for best actor  in a suuporing role ine 1990, and add his upcoming movies to my watchlist.
oversize intent: Search the  actor who won the Oscars for best actor  in a suuporing role ine 1990, and add his upcoming movies to my watchlist.
oversize intent: Search the  actor who won the Oscars for best actor  in a suuporing role ine 1990, and add his upcoming movies to my watchlist.
oversize intent: Search the  actor who won the Oscars for best actor  in a suuporing role ine 1990, and add his upcoming movies to my watchlist.


 25%|██▌       | 25/100 [00:53<03:30,  2.81s/it]

oversize intent: Search the  actor who won the Oscars for best actor  in a suuporing role ine 1990, and add his upcoming movies to my watchlist.


 27%|██▋       | 27/100 [00:55<02:20,  1.92s/it]

oversize intent: Find The Last of Us series and add it to my watch list.
oversize intent: Give me the IMDB recommendations of what to watch.


 28%|██▊       | 28/100 [00:56<02:04,  1.73s/it]

oversize intent: Give me the IMDB recommendations of what to watch.
oversize intent: Show me a list of comedy movies, sorted by user ratings.
oversize intent: Show me a list of comedy movies, sorted by user ratings.
oversize intent: Show me a list of comedy movies, sorted by user ratings.


 29%|██▉       | 29/100 [00:59<02:40,  2.26s/it]

oversize intent: Show me a list of comedy movies, sorted by user ratings.


 60%|██████    | 60/100 [01:56<00:56,  1.42s/it]

oversize intent: Show me RPG video games.
oversize intent: Show me RPG video games.
oversize intent: Show me RPG video games.
oversize intent: Show me RPG video games.


 64%|██████▍   | 64/100 [02:11<01:46,  2.95s/it]

oversize intent: Play Trailer of "The Flash" 2023.


 70%|███████   | 70/100 [02:20<00:52,  1.74s/it]

oversize intent: Add a dome tent to my shopping cart.


 75%|███████▌  | 75/100 [02:26<00:33,  1.35s/it]

oversize intent: Play a star wars movie trailer.
oversize intent: Add the second most popular horror series to my watchlist.


 76%|███████▌  | 76/100 [02:28<00:38,  1.62s/it]

oversize intent: Add the second most popular horror series to my watchlist.
oversize intent: Find videos from the Oscar 2023.
oversize intent: Find videos from the Oscar 2023.


 77%|███████▋  | 77/100 [02:30<00:40,  1.77s/it]

oversize intent: Find videos from the Oscar 2023.


 78%|███████▊  | 78/100 [02:32<00:42,  1.93s/it]

oversize intent: Find the list of critic reviews for the movie Creed III.
oversize intent: Find superhero sci-fi movies based on comic books rated 7 to 9, and add the three movies with the most votes to my watchlist.
oversize intent: Find superhero sci-fi movies based on comic books rated 7 to 9, and add the three movies with the most votes to my watchlist.
oversize intent: Find superhero sci-fi movies based on comic books rated 7 to 9, and add the three movies with the most votes to my watchlist.
oversize intent: Find superhero sci-fi movies based on comic books rated 7 to 9, and add the three movies with the most votes to my watchlist.
oversize intent: Find superhero sci-fi movies based on comic books rated 7 to 9, and add the three movies with the most votes to my watchlist.
oversize intent: Find superhero sci-fi movies based on comic books rated 7 to 9, and add the three movies with the most votes to my watchlist.
oversize intent: Find superhero sci-fi movies based on comic books r

 79%|███████▉  | 79/100 [02:46<01:52,  5.38s/it]

oversize intent: Find superhero sci-fi movies based on comic books rated 7 to 9, and add the three movies with the most votes to my watchlist.


 80%|████████  | 80/100 [02:49<01:31,  4.59s/it]

oversize intent: Find hotels in Las Vegas, NV that offer free airport shuttle service.


 93%|█████████▎| 93/100 [03:10<00:07,  1.14s/it]

oversize intent: Browse the women's hiking boots section and filter the results to show only those that are waterproof and have a rating of at least 4 stars and size 6.
oversize intent: Browse the women's hiking boots section and filter the results to show only those that are waterproof and have a rating of at least 4 stars and size 6.


 94%|█████████▍| 94/100 [03:15<00:13,  2.19s/it]

oversize intent: Browse the women's hiking boots section and filter the results to show only those that are waterproof and have a rating of at least 4 stars and size 6.


100%|██████████| 100/100 [03:18<00:00,  1.99s/it]


2 33 1701


  5%|▌         | 5/100 [00:05<01:56,  1.23s/it]

oversize intent: What are Jerry Trainor's upcoming projects?
oversize intent: What are Jerry Trainor's upcoming projects?
oversize intent: What are Jerry Trainor's upcoming projects?


  6%|▌         | 6/100 [00:07<02:26,  1.56s/it]

oversize intent: What are Jerry Trainor's upcoming projects?


 24%|██▍       | 24/100 [00:54<04:23,  3.46s/it]

oversize intent: Look for an English language book on roman empire history in the Kindle store released in the last 90 days, it should be displayed according to its latest publication date, and add the top result to the shopping list.


 25%|██▌       | 25/100 [01:00<05:06,  4.09s/it]

oversize intent: Look for an English language book on roman empire history in the Kindle store released in the last 90 days, it should be displayed according to its latest publication date, and add the top result to the shopping list.


 30%|███       | 30/100 [01:17<04:03,  3.48s/it]

oversize intent: Add 10 of the cheapest green shirt to my cart
oversize intent: Add 10 of the cheapest green shirt to my cart
oversize intent: Add 10 of the cheapest green shirt to my cart


 52%|█████▏    | 52/100 [01:48<01:15,  1.57s/it]

oversize intent: Search for used BMW X5 Crossovers and compare the mileage of the first two cars.
oversize intent: Search for used BMW X5 Crossovers and compare the mileage of the first two cars.
oversize intent: Search for used BMW X5 Crossovers and compare the mileage of the first two cars.
oversize intent: Search for used BMW X5 Crossovers and compare the mileage of the first two cars.
oversize intent: Search for used BMW X5 Crossovers and compare the mileage of the first two cars.
oversize intent: Search for used BMW X5 Crossovers and compare the mileage of the first two cars.
oversize intent: Search for used BMW X5 Crossovers and compare the mileage of the first two cars.


 54%|█████▍    | 54/100 [01:59<02:22,  3.10s/it]

oversize intent: Find a store in Los Angeles, California, and set Riverside as my store and add a card from that store to my favorites


 55%|█████▌    | 55/100 [01:59<01:41,  2.25s/it]

oversize intent: Search for an automatic grey sports car with the lowest price, gas fuel and free shipping manufactured between 2018 to 2022, compare the top two results and compare photos.
oversize intent: Search for an automatic grey sports car with the lowest price, gas fuel and free shipping manufactured between 2018 to 2022, compare the top two results and compare photos.
oversize intent: Search for an automatic grey sports car with the lowest price, gas fuel and free shipping manufactured between 2018 to 2022, compare the top two results and compare photos.
oversize intent: Search for an automatic grey sports car with the lowest price, gas fuel and free shipping manufactured between 2018 to 2022, compare the top two results and compare photos.
oversize intent: Search for an automatic grey sports car with the lowest price, gas fuel and free shipping manufactured between 2018 to 2022, compare the top two results and compare photos.
oversize intent: Search for an automatic grey spor

 70%|███████   | 70/100 [02:33<00:41,  1.39s/it]

oversize intent: Compare two wireless printers that are rated above 4 stars.
oversize intent: Compare two wireless printers that are rated above 4 stars.
oversize intent: Compare two wireless printers that are rated above 4 stars.
oversize intent: Compare two wireless printers that are rated above 4 stars.
oversize intent: Compare two wireless printers that are rated above 4 stars.
oversize intent: Compare two wireless printers that are rated above 4 stars.
oversize intent: Compare two wireless printers that are rated above 4 stars.


 71%|███████   | 71/100 [02:47<02:32,  5.26s/it]

oversize intent: Compare two wireless printers that are rated above 4 stars.


 80%|████████  | 80/100 [02:58<00:23,  1.19s/it]

oversize intent: Find flights from Washington airports to New York airports and only show those with scheduled status.


 81%|████████  | 81/100 [03:01<00:32,  1.71s/it]

oversize intent: Find flights from Washington airports to New York airports and only show those with scheduled status.


100%|██████████| 100/100 [03:42<00:00,  2.23s/it]


3 51 2139


  0%|          | 0/100 [00:00<?, ?it/s]

oversize intent: Browse new laptops from $400 to $500 that offers free shippping.


  4%|▍         | 4/100 [00:13<05:02,  3.15s/it]

oversize intent: Find the cheapest unused wireless Logitech Keyboard under $70.
oversize intent: Find the cheapest unused wireless Logitech Keyboard under $70.
oversize intent: Find the cheapest unused wireless Logitech Keyboard under $70.
oversize intent: Find the cheapest unused wireless Logitech Keyboard under $70.
oversize intent: Find the cheapest unused wireless Logitech Keyboard under $70.
oversize intent: Find the cheapest unused wireless Logitech Keyboard under $70.
oversize intent: Find the cheapest unused wireless Logitech Keyboard under $70.
oversize intent: Find the cheapest unused wireless Logitech Keyboard under $70.
oversize intent: Find the cheapest unused wireless Logitech Keyboard under $70.


  5%|▌         | 5/100 [00:32<13:58,  8.82s/it]

oversize intent: Find the cheapest unused wireless Logitech Keyboard under $70.


  6%|▌         | 6/100 [00:33<10:03,  6.42s/it]

oversize intent: Find a living History event to attend in in April .
oversize intent: Find a living History event to attend in in April .


  8%|▊         | 8/100 [00:37<05:44,  3.75s/it]

oversize intent: Find a living History event to attend in in April .


 13%|█▎        | 13/100 [00:41<01:48,  1.25s/it]

oversize intent: Show me a list of children's program events in Illinois.
oversize intent: Show me a list of children's program events in Illinois.
oversize intent: Show me a list of children's program events in Illinois.
oversize intent: Show me a list of children's program events in Illinois.


 24%|██▍       | 24/100 [01:08<02:28,  1.96s/it]

oversize intent: Search for hiking boots and filter the results to show only those with a waterproof rating of at least 3 stars.


 25%|██▌       | 25/100 [01:12<03:11,  2.56s/it]

oversize intent: Search for hiking boots and filter the results to show only those with a waterproof rating of at least 3 stars.


 37%|███▋      | 37/100 [01:34<02:01,  1.93s/it]

oversize intent: Add Prometheus movie to watchlist.
oversize intent: Add documentary  "The Elephant Whisperers" and "The Magician's Elephant" to the watchlist.
oversize intent: Add documentary  "The Elephant Whisperers" and "The Magician's Elephant" to the watchlist.


 38%|███▊      | 38/100 [01:37<02:12,  2.14s/it]

oversize intent: Add documentary  "The Elephant Whisperers" and "The Magician's Elephant" to the watchlist.


 47%|████▋     | 47/100 [01:51<01:20,  1.53s/it]

oversize intent: Find series 1 and series 2 BMW vehicles in the store nearest to 07055
oversize intent: Find series 1 and series 2 BMW vehicles in the store nearest to 07055
oversize intent: Find series 1 and series 2 BMW vehicles in the store nearest to 07055
oversize intent: Find series 1 and series 2 BMW vehicles in the store nearest to 07055
oversize intent: Find series 1 and series 2 BMW vehicles in the store nearest to 07055
oversize intent: Find series 1 and series 2 BMW vehicles in the store nearest to 07055
oversize intent: Find series 1 and series 2 BMW vehicles in the store nearest to 07055
oversize intent: Find series 1 and series 2 BMW vehicles in the store nearest to 07055
oversize intent: Find series 1 and series 2 BMW vehicles in the store nearest to 07055
oversize intent: Find series 1 and series 2 BMW vehicles in the store nearest to 07055
oversize intent: Find series 1 and series 2 BMW vehicles in the store nearest to 07055
oversize intent: Find series 1 and series 2

 49%|████▉     | 49/100 [02:07<03:38,  4.28s/it]

oversize intent: Find an Xbox Wireless controller rated above 4 stars.
oversize intent: Find an Xbox Wireless controller rated above 4 stars.
oversize intent: Find an Xbox Wireless controller rated above 4 stars.
oversize intent: Find an Xbox Wireless controller rated above 4 stars.
oversize intent: Find an Xbox Wireless controller rated above 4 stars.


 50%|█████     | 50/100 [02:18<05:14,  6.29s/it]

oversize intent: Find an Xbox Wireless controller rated above 4 stars.


 57%|█████▋    | 57/100 [02:25<01:12,  1.68s/it]

oversize intent: Remove the SSD on my cart


 59%|█████▉    | 59/100 [02:28<01:00,  1.48s/it]

oversize intent: Remove the SSD on my cart


 68%|██████▊   | 68/100 [02:37<00:56,  1.77s/it]

oversize intent: Build a PC with Intel i9 Raptor Lake 24 cores, Asus ROG strix Z790 motherboard, Asus TUF Rtx 4090 and remaining all compatible products with SSD.
oversize intent: Build a PC with Intel i9 Raptor Lake 24 cores, Asus ROG strix Z790 motherboard, Asus TUF Rtx 4090 and remaining all compatible products with SSD.
oversize intent: Build a PC with Intel i9 Raptor Lake 24 cores, Asus ROG strix Z790 motherboard, Asus TUF Rtx 4090 and remaining all compatible products with SSD.
oversize intent: Build a PC with Intel i9 Raptor Lake 24 cores, Asus ROG strix Z790 motherboard, Asus TUF Rtx 4090 and remaining all compatible products with SSD.
oversize intent: Build a PC with Intel i9 Raptor Lake 24 cores, Asus ROG strix Z790 motherboard, Asus TUF Rtx 4090 and remaining all compatible products with SSD.
oversize intent: Build a PC with Intel i9 Raptor Lake 24 cores, Asus ROG strix Z790 motherboard, Asus TUF Rtx 4090 and remaining all compatible products with SSD.
oversize intent: Build

 69%|██████▉   | 69/100 [03:11<05:54, 11.45s/it]

oversize intent: Build a PC with Intel i9 Raptor Lake 24 cores, Asus ROG strix Z790 motherboard, Asus TUF Rtx 4090 and remaining all compatible products with SSD.


 97%|█████████▋| 97/100 [03:46<00:04,  1.58s/it]

oversize intent: Order one TAHE inflatable stand up paddle board using my default account/shipping info.
oversize intent: Order one TAHE inflatable stand up paddle board using my default account/shipping info.


100%|██████████| 100/100 [03:55<00:00,  2.35s/it]


4 30 1860


 22%|██▏       | 22/100 [00:44<02:54,  2.24s/it]

oversize intent: Find a Lenovo laptop under $800 and create a price alert for $400.
oversize intent: Find a Lenovo laptop under $800 and create a price alert for $400.
oversize intent: Find a Lenovo laptop under $800 and create a price alert for $400.
oversize intent: Find a Lenovo laptop under $800 and create a price alert for $400.
oversize intent: Find a Lenovo laptop under $800 and create a price alert for $400.


 23%|██▎       | 23/100 [00:53<05:10,  4.03s/it]

oversize intent: Find a Lenovo laptop under $800 and create a price alert for $400.
oversize intent: Browse laptops with at least 16gb of ram and under $700.
oversize intent: Browse laptops with at least 16gb of ram and under $700.
oversize intent: Browse laptops with at least 16gb of ram and under $700.
oversize intent: Browse laptops with at least 16gb of ram and under $700.
oversize intent: Browse laptops with at least 16gb of ram and under $700.
oversize intent: Browse laptops with at least 16gb of ram and under $700.
oversize intent: Browse laptops with at least 16gb of ram and under $700.


 24%|██▍       | 24/100 [01:11<10:22,  8.20s/it]

oversize intent: Browse laptops with at least 16gb of ram and under $700.


 43%|████▎     | 43/100 [01:36<01:24,  1.48s/it]

oversize intent: Find the cheapest 2017 Honda Civic within 100 miles of 07470 which has a sun roof and black exterior color while excluding out of market vehicles
oversize intent: Find the cheapest 2017 Honda Civic within 100 miles of 07470 which has a sun roof and black exterior color while excluding out of market vehicles
oversize intent: Find the cheapest 2017 Honda Civic within 100 miles of 07470 which has a sun roof and black exterior color while excluding out of market vehicles
oversize intent: Find the cheapest 2017 Honda Civic within 100 miles of 07470 which has a sun roof and black exterior color while excluding out of market vehicles
oversize intent: Find the cheapest 2017 Honda Civic within 100 miles of 07470 which has a sun roof and black exterior color while excluding out of market vehicles
oversize intent: Find the cheapest 2017 Honda Civic within 100 miles of 07470 which has a sun roof and black exterior color while excluding out of market vehicles
oversize intent: Find 

 63%|██████▎   | 63/100 [02:14<00:13,  2.82it/s]

oversize intent: check the 2022 highest goal scorer in the MLS league
oversize intent: check the 2022 highest goal scorer in the MLS league


 65%|██████▌   | 65/100 [02:19<00:44,  1.27s/it]

oversize intent: Show camping hammocks with lowest price first.


 66%|██████▌   | 66/100 [02:22<01:05,  1.94s/it]

oversize intent: Show camping hammocks with lowest price first.


 73%|███████▎  | 73/100 [02:33<00:35,  1.32s/it]

oversize intent: Find the most expensive Sony controller compatible with PS5 with free shipping and add it to my cart.
oversize intent: Find the most expensive Sony controller compatible with PS5 with free shipping and add it to my cart.
oversize intent: Find the most expensive Sony controller compatible with PS5 with free shipping and add it to my cart.
oversize intent: Find the most expensive Sony controller compatible with PS5 with free shipping and add it to my cart.
oversize intent: Find the most expensive Sony controller compatible with PS5 with free shipping and add it to my cart.
oversize intent: Find the most expensive Sony controller compatible with PS5 with free shipping and add it to my cart.
oversize intent: Find the most expensive Sony controller compatible with PS5 with free shipping and add it to my cart.
oversize intent: Find the most expensive Sony controller compatible with PS5 with free shipping and add it to my cart.


 74%|███████▍  | 74/100 [02:46<02:05,  4.84s/it]

oversize intent: Find the most expensive Sony controller compatible with PS5 with free shipping and add it to my cart.


 80%|████████  | 80/100 [02:51<00:27,  1.37s/it]

oversize intent: Find a Blue iPhone 12 Pro 128gb and add to cart.
oversize intent: Find a Blue iPhone 12 Pro 128gb and add to cart.


 84%|████████▍ | 84/100 [03:01<00:34,  2.15s/it]

oversize intent: Show me the Biography of the NFL player named Michael Jordan
oversize intent: Show me the Biography of the NFL player named Michael Jordan
oversize intent: Show me the Biography of the NFL player named Michael Jordan


 85%|████████▌ | 85/100 [03:04<00:36,  2.40s/it]

oversize intent: Find FC Barcelona's next fixture in the Spanish Copa de Rey
oversize intent: Find FC Barcelona's next fixture in the Spanish Copa de Rey


 86%|████████▌ | 86/100 [03:07<00:34,  2.47s/it]

oversize intent: Find the latest news about Mikal Bridges
oversize intent: Find the latest news about Mikal Bridges


 87%|████████▋ | 87/100 [03:09<00:33,  2.55s/it]

oversize intent: Find the latest news about Mikal Bridges
oversize intent: Play a podcast about F1
oversize intent: Play a podcast about F1


100%|██████████| 100/100 [03:29<00:00,  2.10s/it]


5 26 1968


  0%|          | 0/100 [00:00<?, ?it/s]

oversize intent: Select an event from the 2019 UFC schedule and show me the full profile from one of the athletes
oversize intent: Select an event from the 2019 UFC schedule and show me the full profile from one of the athletes


  4%|▍         | 4/100 [00:09<03:44,  2.34s/it]

oversize intent: Buy a GoBites Uno Spork in the color magenta and have it delivered to a store in Seattle, WA. Checkout with default address, and use credit number 123456789, CVV 123, expire 01, 2024
oversize intent: Buy a GoBites Uno Spork in the color magenta and have it delivered to a store in Seattle, WA. Checkout with default address, and use credit number 123456789, CVV 123, expire 01, 2024
oversize intent: Buy a GoBites Uno Spork in the color magenta and have it delivered to a store in Seattle, WA. Checkout with default address, and use credit number 123456789, CVV 123, expire 01, 2024
oversize intent: Buy a GoBites Uno Spork in the color magenta and have it delivered to a store in Seattle, WA. Checkout with default address, and use credit number 123456789, CVV 123, expire 01, 2024
oversize intent: Buy a GoBites Uno Spork in the color magenta and have it delivered to a store in Seattle, WA. Checkout with default address, and use credit number 123456789, CVV 123, expire 01, 2024


  5%|▌         | 5/100 [00:19<07:42,  4.87s/it]

oversize intent: Browse bluetooth headphones with active noise-cancellation.
oversize intent: Browse bluetooth headphones with active noise-cancellation.
oversize intent: Browse bluetooth headphones with active noise-cancellation.
oversize intent: Browse bluetooth headphones with active noise-cancellation.
oversize intent: Browse bluetooth headphones with active noise-cancellation.


  6%|▌         | 6/100 [00:29<10:39,  6.80s/it]

oversize intent: Browse bluetooth headphones with active noise-cancellation.


 16%|█▌        | 16/100 [00:45<01:47,  1.28s/it]

oversize intent: Find the schedule for the Dallas Mavericks.
oversize intent: Find the schedule for the Dallas Mavericks.


 35%|███▌      | 35/100 [01:09<01:17,  1.19s/it]

oversize intent: Open the most popular photos of the Adam A-500 model aircraft.


 36%|███▌      | 36/100 [01:11<01:28,  1.39s/it]

oversize intent: Look up Popular Photos in the Community featuring Aegean Airlines


 48%|████▊     | 48/100 [01:32<01:22,  1.59s/it]

oversize intent: Find forum threads which include the name Taylor Swift in the title within last month


 71%|███████   | 71/100 [01:55<00:33,  1.17s/it]

oversize intent: Show me the new arrivals on Easter home decor


 87%|████████▋ | 87/100 [02:23<00:28,  2.18s/it]

oversize intent: Search for flights from New York City to London and filter the results to show only non-stop flights.


 94%|█████████▍| 94/100 [02:48<00:18,  3.11s/it]

oversize intent: Upgrade the count of the current SSD in my cart to 10
oversize intent: Upgrade the count of the current SSD in my cart to 10


 95%|█████████▌| 95/100 [02:52<00:16,  3.25s/it]

oversize intent: Upgrade the count of the current SSD in my cart to 10


100%|██████████| 100/100 [02:58<00:00,  1.78s/it]


6 49 2043


  8%|▊         | 8/100 [00:07<02:02,  1.33s/it]

oversize intent: Find Kevin Durant's bio
oversize intent: Find Kevin Durant's bio
oversize intent: Find Kevin Durant's bio


  9%|▉         | 9/100 [00:10<02:52,  1.89s/it]

oversize intent: Find Golden State Warriors Player List.
oversize intent: Find Golden State Warriors Player List.
oversize intent: Find Golden State Warriors Player List.


 10%|█         | 10/100 [00:14<03:38,  2.43s/it]

oversize intent: Find the movie which won the jury prize in Cannes 2022, add the director of the movie to the new list named director and add "to watch" in the description of that list.
oversize intent: Find the movie which won the jury prize in Cannes 2022, add the director of the movie to the new list named director and add "to watch" in the description of that list.
oversize intent: Find the movie which won the jury prize in Cannes 2022, add the director of the movie to the new list named director and add "to watch" in the description of that list.
oversize intent: Find the movie which won the jury prize in Cannes 2022, add the director of the movie to the new list named director and add "to watch" in the description of that list.


 14%|█▍        | 14/100 [00:21<02:29,  1.74s/it]

oversize intent: Find the lowest-priced single pack of Xerox genuine magenta toner sold by Newegg with free shipping.
oversize intent: Find the lowest-priced single pack of Xerox genuine magenta toner sold by Newegg with free shipping.
oversize intent: Find the lowest-priced single pack of Xerox genuine magenta toner sold by Newegg with free shipping.
oversize intent: Find the lowest-priced single pack of Xerox genuine magenta toner sold by Newegg with free shipping.
oversize intent: Find the lowest-priced single pack of Xerox genuine magenta toner sold by Newegg with free shipping.
oversize intent: Find the lowest-priced single pack of Xerox genuine magenta toner sold by Newegg with free shipping.
oversize intent: Find the lowest-priced single pack of Xerox genuine magenta toner sold by Newegg with free shipping.
oversize intent: Find the lowest-priced single pack of Xerox genuine magenta toner sold by Newegg with free shipping.
oversize intent: Find the lowest-priced single pack of X

 15%|█▌        | 15/100 [00:35<07:27,  5.27s/it]

oversize intent: Find the lowest-priced single pack of Xerox genuine magenta toner sold by Newegg with free shipping.


 18%|█▊        | 18/100 [00:39<03:39,  2.67s/it]

oversize intent: Show me the NFL Scoreboard from the superbowl 2015


 21%|██        | 21/100 [00:46<02:59,  2.27s/it]

oversize intent: Find the NBA team with the highest average points scored in the current season
oversize intent: Find the NBA team with the highest average points scored in the current season


 32%|███▏      | 32/100 [01:00<01:22,  1.21s/it]

oversize intent: Show all the external solid state drives available with lowest price first.
oversize intent: Show all the external solid state drives available with lowest price first.
oversize intent: Show all the external solid state drives available with lowest price first.
oversize intent: Show all the external solid state drives available with lowest price first.


 34%|███▍      | 34/100 [01:08<02:43,  2.48s/it]

oversize intent: Add a smartwatch to the cart that is compatible with an iPhone.


 64%|██████▍   | 64/100 [01:47<01:15,  2.11s/it]

oversize intent: Find the cheapest new surge protector with 6 to 8 outlets under 25 dollars with customer reviews above 4+, add 2 pieces to the cart.
oversize intent: Find the cheapest new surge protector with 6 to 8 outlets under 25 dollars with customer reviews above 4+, add 2 pieces to the cart.


 65%|██████▌   | 65/100 [01:55<02:06,  3.62s/it]

oversize intent: Find the cheapest new surge protector with 6 to 8 outlets under 25 dollars with customer reviews above 4+, add 2 pieces to the cart.
oversize intent: Find a dog bed on Amazon that is machine-washable and has a diameter of at least 30 inches.
oversize intent: Find a dog bed on Amazon that is machine-washable and has a diameter of at least 30 inches.


 71%|███████   | 71/100 [02:08<01:09,  2.40s/it]

oversize intent: Find the cheapest full-sized Amazon basic comforter with 4+ average customer reviews and priced between 50 to 100 dollars, if red color availabe in the cheapest product then add one to the cart.
oversize intent: Find the cheapest full-sized Amazon basic comforter with 4+ average customer reviews and priced between 50 to 100 dollars, if red color availabe in the cheapest product then add one to the cart.


 72%|███████▏  | 72/100 [02:15<01:49,  3.91s/it]

oversize intent: Find the cheapest full-sized Amazon basic comforter with 4+ average customer reviews and priced between 50 to 100 dollars, if red color availabe in the cheapest product then add one to the cart.
oversize intent: Show me the stats of the Athletic Club from spanish Laliga


 73%|███████▎  | 73/100 [02:17<01:28,  3.27s/it]

oversize intent: Who were the NBA season leaders from the 2020-21 regular season?


 74%|███████▍  | 74/100 [02:19<01:14,  2.88s/it]

oversize intent: Find the Real Madrid team schedule of the UEFA Champions League championship.
oversize intent: Find the Real Madrid team schedule of the UEFA Champions League championship.
oversize intent: Find the Real Madrid team schedule of the UEFA Champions League championship.


 89%|████████▉ | 89/100 [02:51<00:33,  3.04s/it]

oversize intent: I want to see the best seller gender neutral skirts
oversize intent: I want to see the best seller gender neutral skirts


 97%|█████████▋| 97/100 [03:14<00:12,  4.18s/it]

oversize intent: Reserve the cheapest hotel with 5 stars and free wifi located in downtown dc in Washington on June 5 for 4 adults in 2 rooms.


100%|██████████| 100/100 [03:17<00:00,  1.97s/it]


7 50 2202


 10%|█         | 10/100 [00:16<02:20,  1.56s/it]

oversize intent: Find the cheapest Samsung-made and larger than 11-inch screen Android tablet, in the price range of 200 to 300 dollars and add it to the cart.
oversize intent: Find the cheapest Samsung-made and larger than 11-inch screen Android tablet, in the price range of 200 to 300 dollars and add it to the cart.
oversize intent: Find the cheapest Samsung-made and larger than 11-inch screen Android tablet, in the price range of 200 to 300 dollars and add it to the cart.
oversize intent: Find the cheapest Samsung-made and larger than 11-inch screen Android tablet, in the price range of 200 to 300 dollars and add it to the cart.
oversize intent: Find the cheapest Samsung-made and larger than 11-inch screen Android tablet, in the price range of 200 to 300 dollars and add it to the cart.
oversize intent: Find the cheapest Samsung-made and larger than 11-inch screen Android tablet, in the price range of 200 to 300 dollars and add it to the cart.
oversize intent: Find the cheapest Samsu

 11%|█         | 11/100 [00:33<09:43,  6.56s/it]

oversize intent: Find the cheapest Samsung-made and larger than 11-inch screen Android tablet, in the price range of 200 to 300 dollars and add it to the cart.


 94%|█████████▍| 94/100 [02:35<00:05,  1.07it/s]

oversize intent: with a $200 budget check available in central parkto check in on 24th to 27th march  for 2 adult and a toddler less than a year old.
oversize intent: with a $200 budget check available in central parkto check in on 24th to 27th march  for 2 adult and a toddler less than a year old.
oversize intent: with a $200 budget check available in central parkto check in on 24th to 27th march  for 2 adult and a toddler less than a year old.
oversize intent: with a $200 budget check available in central parkto check in on 24th to 27th march  for 2 adult and a toddler less than a year old.
oversize intent: with a $200 budget check available in central parkto check in on 24th to 27th march  for 2 adult and a toddler less than a year old.
oversize intent: with a $200 budget check available in central parkto check in on 24th to 27th march  for 2 adult and a toddler less than a year old.
oversize intent: with a $200 budget check available in central parkto check in on 24th to 27th march

 95%|█████████▌| 95/100 [02:44<00:16,  3.21s/it]

oversize intent: with a $200 budget check available in central parkto check in on 24th to 27th march  for 2 adult and a toddler less than a year old.


100%|██████████| 100/100 [02:56<00:00,  1.77s/it]


8 46 2208


  1%|          | 1/100 [00:01<01:55,  1.17s/it]

oversize intent: Create a new list named New that includes thr
ee albums, one each from the genres pop, rock and electronic.
oversize intent: Create a new list named New that includes thr
ee albums, one each from the genres pop, rock and electronic.


  2%|▏         | 2/100 [00:08<07:55,  4.86s/it]

oversize intent: Find a projector which accepts crypto currency as payment.
oversize intent: Find a projector which accepts crypto currency as payment.


  3%|▎         | 3/100 [00:14<08:28,  5.25s/it]

oversize intent: Find a projector which accepts crypto currency as payment.
oversize intent: Find camping tents that can fit 6 people and sort the results by price from low to high.


  4%|▍         | 4/100 [00:17<07:03,  4.41s/it]

oversize intent: Find camping tents that can fit 6 people and sort the results by price from low to high.


  5%|▌         | 5/100 [00:19<05:47,  3.66s/it]

oversize intent: Browse camping stoves that have an auto ignition feature.
oversize intent: Order the cheapest climbing shoes in today's deals.
oversize intent: Order the cheapest climbing shoes in today's deals.


  7%|▋         | 7/100 [00:25<04:41,  3.02s/it]

oversize intent: Find a gaming desktop with an rtx4000 GPU, available in stock.
oversize intent: Find a gaming desktop with an rtx4000 GPU, available in stock.
oversize intent: Find a gaming desktop with an rtx4000 GPU, available in stock.


  8%|▊         | 8/100 [00:31<06:09,  4.02s/it]

oversize intent: Find a gaming desktop with an rtx4000 GPU, available in stock.


 11%|█         | 11/100 [00:37<03:36,  2.43s/it]

oversize intent: Find a pair of mens running shoes in black, size 7 extra wide, 4+ stars and under $50 and add them to my cart
oversize intent: Find a pair of mens running shoes in black, size 7 extra wide, 4+ stars and under $50 and add them to my cart
oversize intent: Find a pair of mens running shoes in black, size 7 extra wide, 4+ stars and under $50 and add them to my cart


 12%|█▏        | 12/100 [00:41<04:39,  3.17s/it]

oversize intent: Find a pair of mens running shoes in black, size 7 extra wide, 4+ stars and under $50 and add them to my cart
oversize intent: Browse dog food for puppies that is rated at least 4 stars and sort the results by price from low to high.
oversize intent: Browse dog food for puppies that is rated at least 4 stars and sort the results by price from low to high.
oversize intent: Browse dog food for puppies that is rated at least 4 stars and sort the results by price from low to high.


 13%|█▎        | 13/100 [00:46<05:01,  3.46s/it]

oversize intent: Browse dog food for puppies that is rated at least 4 stars and sort the results by price from low to high.


 19%|█▉        | 19/100 [00:51<01:30,  1.12s/it]

oversize intent: Look for a recently posted part-time Job in Gamestop stores in Fresno, California, and apply.


 39%|███▉      | 39/100 [01:16<00:50,  1.20it/s]

oversize intent: Find an LED TV below $1000.
oversize intent: Find an LED TV below $1000.
oversize intent: Find an LED TV below $1000.
oversize intent: Find an LED TV below $1000.
oversize intent: Find an LED TV below $1000.
oversize intent: Find an LED TV below $1000.
oversize intent: Find an LED TV below $1000.


 40%|████      | 40/100 [01:29<04:29,  4.49s/it]

oversize intent: Find an LED TV below $1000.
oversize intent: Show the Recommended Gaming PCs for someone who plays Fortnite, Overwatch and GTA V at 4k
oversize intent: Show the Recommended Gaming PCs for someone who plays Fortnite, Overwatch and GTA V at 4k


 55%|█████▌    | 55/100 [02:03<01:28,  1.98s/it]

oversize intent: Find a black sleeping pad that is under $40.
oversize intent: Find a black sleeping pad that is under $40.


 56%|█████▌    | 56/100 [02:06<01:47,  2.44s/it]

oversize intent: Find a black sleeping pad that is under $40.


 98%|█████████▊| 98/100 [03:12<00:03,  1.82s/it]

oversize intent: Find a Chevrolet with lowest mileage in zip 08817 with shipping charges of 99 dollars or less, with a price between 20k to 30k.
oversize intent: Find a Chevrolet with lowest mileage in zip 08817 with shipping charges of 99 dollars or less, with a price between 20k to 30k.
oversize intent: Find a Chevrolet with lowest mileage in zip 08817 with shipping charges of 99 dollars or less, with a price between 20k to 30k.
oversize intent: Find a Chevrolet with lowest mileage in zip 08817 with shipping charges of 99 dollars or less, with a price between 20k to 30k.
oversize intent: Find a Chevrolet with lowest mileage in zip 08817 with shipping charges of 99 dollars or less, with a price between 20k to 30k.
oversize intent: Find a Chevrolet with lowest mileage in zip 08817 with shipping charges of 99 dollars or less, with a price between 20k to 30k.
oversize intent: Find a Chevrolet with lowest mileage in zip 08817 with shipping charges of 99 dollars or less, with a price betwe

 99%|█████████▉| 99/100 [03:30<00:06,  6.41s/it]

oversize intent: Find a Chevrolet with lowest mileage in zip 08817 with shipping charges of 99 dollars or less, with a price between 20k to 30k.


100%|██████████| 100/100 [03:31<00:00,  2.11s/it]


9 28 2181


100%|██████████| 9/9 [00:09<00:00,  1.02s/it]

10 3 138





In [9]:
with open("autodl-tmp/mind2web_dpo_train.json", "w") as file:
    json.dump(train_dataset, file, indent=4)

In [10]:
test_task_dataset = []

In [11]:
import json
import datasets

import random
from tqdm import tqdm

for ID in range(3):
    with open(f"autodl-tmp/test_dataset/test_task/test_task_{ID}.json") as file:
        data = json.load(file)

    pos_candidate_na = 0
    total_dataset_num = 0
    
    for dat in tqdm(data):
        intent = dat["confirmed_task"] # + f"(domain {dat['subdomain']})"
        action_history_all = dat["action_reprs"]
    
        for index, d in enumerate(dat["actions"]):
            cleaned_html = d["cleaned_html"].replace("backend_node_id", "id")
            token_num = len(tokenizer(cleaned_html)["input_ids"])
    
            if token_num > 120000:
                print(f"oversize intent: {intent}")
            else:
                html = cleaned_html
                action_history = action_history_all[:index]
    
                op = d["operation"]["op"]
                value = d["operation"]["value"]
                if len(d["pos_candidates"]) > 0:
                    chosen_answer_ = output_template.format(op=op, value=value, id=d["pos_candidates"][0]["backend_node_id"])
                else:
                    pos_candidate_na += 1
                    continue
                
                # random.shuffle(d["neg_candidates"])
                # rand_neg_candidates = d["neg_candidates"][:3]
                
                # for c in rand_neg_candidates: # 1:3 proportion
                #     if op != "CLICK" and random.uniform(0, 1) < 0.33: # 1/3 for type/select -> click
                #         rejected_answer_ = output_template.format(op="CLICK", value="", id=c["backend_node_id"])
                #     else:
                #         rejected_answer_ = output_template.format(op=op, value=value, id=c["backend_node_id"])
                        
                instruction_ = instruction
                input_ = user_input_template.format(intent=intent, html=html, action_history=action_history)
                output_ = chosen_answer_
                
                total_dataset_num += 1
                test_task_dataset.append({
                    "instruction": instruction_,
                    "input": input_,
                    "output": output_
                })

    print(ID, pos_candidate_na, total_dataset_num)

  5%|▌         | 5/100 [00:11<03:43,  2.35s/it]

oversize intent: search gas pickup truck in Fremont with 2010 and 2017 with less than 80000 mile
oversize intent: search gas pickup truck in Fremont with 2010 and 2017 with less than 80000 mile
oversize intent: search gas pickup truck in Fremont with 2010 and 2017 with less than 80000 mile
oversize intent: search gas pickup truck in Fremont with 2010 and 2017 with less than 80000 mile
oversize intent: search gas pickup truck in Fremont with 2010 and 2017 with less than 80000 mile
oversize intent: search gas pickup truck in Fremont with 2010 and 2017 with less than 80000 mile
oversize intent: search gas pickup truck in Fremont with 2010 and 2017 with less than 80000 mile
oversize intent: search gas pickup truck in Fremont with 2010 and 2017 with less than 80000 mile
oversize intent: search gas pickup truck in Fremont with 2010 and 2017 with less than 80000 mile
oversize intent: search gas pickup truck in Fremont with 2010 and 2017 with less than 80000 mile
oversize intent: search gas pi

  6%|▌         | 6/100 [00:28<11:10,  7.13s/it]

oversize intent: search gas pickup truck in Fremont with 2010 and 2017 with less than 80000 mile


 12%|█▏        | 12/100 [00:43<04:26,  3.02s/it]

oversize intent: Browse the list of top 250 movies and add the first one to my watchlist.
oversize intent: Browse the list of top 250 movies and add the first one to my watchlist.


 13%|█▎        | 13/100 [00:46<04:13,  2.91s/it]

oversize intent: Browse the list of top 250 movies and add the first one to my watchlist.
oversize intent: see Nissan and Honda cars for sale near Kentwood, MI 49512
oversize intent: see Nissan and Honda cars for sale near Kentwood, MI 49512
oversize intent: see Nissan and Honda cars for sale near Kentwood, MI 49512
oversize intent: see Nissan and Honda cars for sale near Kentwood, MI 49512
oversize intent: see Nissan and Honda cars for sale near Kentwood, MI 49512
oversize intent: see Nissan and Honda cars for sale near Kentwood, MI 49512
oversize intent: see Nissan and Honda cars for sale near Kentwood, MI 49512


 14%|█▍        | 14/100 [00:54<06:22,  4.45s/it]

oversize intent: see Nissan and Honda cars for sale near Kentwood, MI 49512
oversize intent: Build an entry-level pc with an windows 11 64 bit intel i7 CPU with a256gb ssd drive +  4gb ram and adding cheapest component and accessories available.
oversize intent: Build an entry-level pc with an windows 11 64 bit intel i7 CPU with a256gb ssd drive +  4gb ram and adding cheapest component and accessories available.
oversize intent: Build an entry-level pc with an windows 11 64 bit intel i7 CPU with a256gb ssd drive +  4gb ram and adding cheapest component and accessories available.
oversize intent: Build an entry-level pc with an windows 11 64 bit intel i7 CPU with a256gb ssd drive +  4gb ram and adding cheapest component and accessories available.
oversize intent: Build an entry-level pc with an windows 11 64 bit intel i7 CPU with a256gb ssd drive +  4gb ram and adding cheapest component and accessories available.
oversize intent: Build an entry-level pc with an windows 11 64 bit intel i

 15%|█▌        | 15/100 [01:49<28:16, 19.96s/it]

oversize intent: Build an entry-level pc with an windows 11 64 bit intel i7 CPU with a256gb ssd drive +  4gb ram and adding cheapest component and accessories available.
oversize intent: Find 32" Curved monitor and add the third one to the wish list.
oversize intent: Find 32" Curved monitor and add the third one to the wish list.
oversize intent: Find 32" Curved monitor and add the third one to the wish list.


 16%|█▌        | 16/100 [01:56<22:18, 15.94s/it]

oversize intent: Find 32" Curved monitor and add the third one to the wish list.


 27%|██▋       | 27/100 [02:13<01:40,  1.38s/it]

oversize intent: check available hotels with one room for two adult in Harlem less than $200 to check in on Mar 17th and check out on Mar 20th
oversize intent: check available hotels with one room for two adult in Harlem less than $200 to check in on Mar 17th and check out on Mar 20th
oversize intent: check available hotels with one room for two adult in Harlem less than $200 to check in on Mar 17th and check out on Mar 20th
oversize intent: check available hotels with one room for two adult in Harlem less than $200 to check in on Mar 17th and check out on Mar 20th
oversize intent: check available hotels with one room for two adult in Harlem less than $200 to check in on Mar 17th and check out on Mar 20th
oversize intent: check available hotels with one room for two adult in Harlem less than $200 to check in on Mar 17th and check out on Mar 20th
oversize intent: check available hotels with one room for two adult in Harlem less than $200 to check in on Mar 17th and check out on Mar 20th

 28%|██▊       | 28/100 [02:18<03:12,  2.67s/it]

oversize intent: check available hotels with one room for two adult in Harlem less than $200 to check in on Mar 17th and check out on Mar 20th


 45%|████▌     | 45/100 [02:52<02:31,  2.75s/it]

oversize intent: Look for an intern career within Newegg in City of Industry, California, USA, and bookmark it.


 57%|█████▋    | 57/100 [03:10<00:48,  1.13s/it]

oversize intent: Find the most wanted contemporary Jazz album in the US from the last decade.


 64%|██████▍   | 64/100 [03:22<01:02,  1.73s/it]

oversize intent: Check travel requirements for trips between Tokyo and Guangzhou.
oversize intent: Check travel requirements for trips between Tokyo and Guangzhou.
oversize intent: Check travel requirements for trips between Tokyo and Guangzhou.
oversize intent: Check travel requirements for trips between Tokyo and Guangzhou.


 65%|██████▌   | 65/100 [03:26<01:23,  2.38s/it]

oversize intent: Check travel requirements for trips between Tokyo and Guangzhou.


 72%|███████▏  | 72/100 [03:40<00:56,  2.00s/it]

oversize intent: Find the top number one track by the artist "Alan Tam" of all time.


 85%|████████▌ | 85/100 [04:00<00:27,  1.86s/it]

oversize intent: Set the first recommended song on the homepage as a current obsession


 97%|█████████▋| 97/100 [04:21<00:05,  1.96s/it]

oversize intent: Find Chevrolet Silverado with black exterior color and maximum value of $30,000.
oversize intent: Find Chevrolet Silverado with black exterior color and maximum value of $30,000.
oversize intent: Find Chevrolet Silverado with black exterior color and maximum value of $30,000.
oversize intent: Find Chevrolet Silverado with black exterior color and maximum value of $30,000.
oversize intent: Find Chevrolet Silverado with black exterior color and maximum value of $30,000.
oversize intent: Find Chevrolet Silverado with black exterior color and maximum value of $30,000.


 98%|█████████▊| 98/100 [04:30<00:08,  4.01s/it]

oversize intent: Find Chevrolet Silverado with black exterior color and maximum value of $30,000.


100%|██████████| 100/100 [04:35<00:00,  2.76s/it]


0 56 680


  1%|          | 1/100 [00:03<06:21,  3.85s/it]

oversize intent: Add a pair of large Yaktrax traction cleats to the cart.


  2%|▏         | 2/100 [00:08<07:27,  4.56s/it]

oversize intent: Add a pair of large Yaktrax traction cleats to the cart.


  6%|▌         | 6/100 [00:12<02:35,  1.66s/it]

oversize intent: Show crazy credits for the movie " Prometheus".


  7%|▋         | 7/100 [00:14<02:18,  1.49s/it]

oversize intent: Find the most popular movies and showcase those with the highest IMDb ratings.


  8%|▊         | 8/100 [00:15<02:26,  1.59s/it]

oversize intent: Find the statistics of the player Cristiano Ronaldo for the national team of Portugal in all the world cups.
oversize intent: Find the statistics of the player Cristiano Ronaldo for the national team of Portugal in all the world cups.
oversize intent: Find the statistics of the player Cristiano Ronaldo for the national team of Portugal in all the world cups.


  9%|▉         | 9/100 [00:20<03:45,  2.48s/it]

oversize intent: Check the Italian Serie A Schedule fixtures on April 8th 2023
oversize intent: Check the Italian Serie A Schedule fixtures on April 8th 2023
oversize intent: Check the Italian Serie A Schedule fixtures on April 8th 2023
oversize intent: Check the Italian Serie A Schedule fixtures on April 8th 2023


 10%|█         | 10/100 [00:26<05:17,  3.53s/it]

oversize intent: Check the Italian Serie A Schedule fixtures on April 8th 2023
oversize intent: Make a build list with AI for a gaming pc with ryzen 9,rtx 6950xt and  price $2000-$2500.
oversize intent: Make a build list with AI for a gaming pc with ryzen 9,rtx 6950xt and  price $2000-$2500.
oversize intent: Make a build list with AI for a gaming pc with ryzen 9,rtx 6950xt and  price $2000-$2500.


 12%|█▏        | 12/100 [00:33<05:10,  3.53s/it]

oversize intent: Get the lowest priced women's plus size one piece swimsuit in color black with customer rating of atleat 5.


 28%|██▊       | 28/100 [01:06<02:14,  1.86s/it]

oversize intent: Search for job in Miami Florida in Human resources.
oversize intent: Search for job in Miami Florida in Human resources.
oversize intent: Search for job in Miami Florida in Human resources.


 39%|███▉      | 39/100 [01:31<02:00,  1.98s/it]

oversize intent: Find a new drone priced between 25 to 50 dollar and ships from USA with average customer rating of 4 and upwards and save the search at the end.
oversize intent: Find a new drone priced between 25 to 50 dollar and ships from USA with average customer rating of 4 and upwards and save the search at the end.
oversize intent: Find a new drone priced between 25 to 50 dollar and ships from USA with average customer rating of 4 and upwards and save the search at the end.
oversize intent: Find a new drone priced between 25 to 50 dollar and ships from USA with average customer rating of 4 and upwards and save the search at the end.
oversize intent: Find a new drone priced between 25 to 50 dollar and ships from USA with average customer rating of 4 and upwards and save the search at the end.
oversize intent: Find a new drone priced between 25 to 50 dollar and ships from USA with average customer rating of 4 and upwards and save the search at the end.
oversize intent: Find a new 

 40%|████      | 40/100 [01:53<08:09,  8.15s/it]

oversize intent: Find a new drone priced between 25 to 50 dollar and ships from USA with average customer rating of 4 and upwards and save the search at the end.


 41%|████      | 41/100 [01:56<06:17,  6.40s/it]

oversize intent: Find a resort near Patong beach in Phuket, Thailand nearest to the airport from May 9 to May 12 for two with a pay-later option.
oversize intent: Find a resort near Patong beach in Phuket, Thailand nearest to the airport from May 9 to May 12 for two with a pay-later option.
oversize intent: Find a resort near Patong beach in Phuket, Thailand nearest to the airport from May 9 to May 12 for two with a pay-later option.
oversize intent: Find a resort near Patong beach in Phuket, Thailand nearest to the airport from May 9 to May 12 for two with a pay-later option.
oversize intent: Find a resort near Patong beach in Phuket, Thailand nearest to the airport from May 9 to May 12 for two with a pay-later option.


 42%|████▏     | 42/100 [02:02<06:11,  6.40s/it]

oversize intent: Browse the clearance section and filter for women's dresses in size small.
oversize intent: Browse the clearance section and filter for women's dresses in size small.
oversize intent: Browse the clearance section and filter for women's dresses in size small.


 66%|██████▌   | 66/100 [02:37<00:39,  1.17s/it]

oversize intent: Find Toyota Corolla from the year 2018 to 2023 in red color.
oversize intent: Find Toyota Corolla from the year 2018 to 2023 in red color.
oversize intent: Find Toyota Corolla from the year 2018 to 2023 in red color.
oversize intent: Find Toyota Corolla from the year 2018 to 2023 in red color.
oversize intent: Find Toyota Corolla from the year 2018 to 2023 in red color.
oversize intent: Find Toyota Corolla from the year 2018 to 2023 in red color.


 67%|██████▋   | 67/100 [02:44<01:31,  2.78s/it]

oversize intent: Find Toyota Corolla from the year 2018 to 2023 in red color.


 70%|███████   | 70/100 [02:50<01:15,  2.51s/it]

oversize intent: What are the upcoming soccer events on ESPN2?
oversize intent: What are the upcoming soccer events on ESPN2?


 71%|███████   | 71/100 [02:53<01:09,  2.38s/it]

oversize intent: Find the team schedule of the Brooklyn Nets
oversize intent: Find the team schedule of the Brooklyn Nets


 73%|███████▎  | 73/100 [02:56<00:55,  2.06s/it]

oversize intent: Find travel requirements from Amsterdam to Cairo.
oversize intent: Find travel requirements from Amsterdam to Cairo.
oversize intent: Find travel requirements from Amsterdam to Cairo.
oversize intent: Find travel requirements from Amsterdam to Cairo.


 74%|███████▍  | 74/100 [03:00<01:08,  2.64s/it]

oversize intent: Find travel requirements from Amsterdam to Cairo.


 99%|█████████▉| 99/100 [03:39<00:01,  1.22s/it]

oversize intent: upvote a comment on the most relevant kiss chords & tabs


100%|██████████| 100/100 [03:40<00:00,  2.21s/it]


1 29 713


  6%|▌         | 3/52 [00:05<01:22,  1.67s/it]

oversize intent: Search for the playlists "Pop Workout mix" and filtered by tag #Dance & edm.


 35%|███▍      | 18/52 [00:42<01:18,  2.32s/it]

oversize intent: Find bluetooth vertical mouse with most reviews and add two to my shopping cart..
oversize intent: Find bluetooth vertical mouse with most reviews and add two to my shopping cart..
oversize intent: Find bluetooth vertical mouse with most reviews and add two to my shopping cart..
oversize intent: Find bluetooth vertical mouse with most reviews and add two to my shopping cart..
oversize intent: Find bluetooth vertical mouse with most reviews and add two to my shopping cart..
oversize intent: Find bluetooth vertical mouse with most reviews and add two to my shopping cart..


 37%|███▋      | 19/52 [00:53<02:40,  4.86s/it]

oversize intent: Find bluetooth vertical mouse with most reviews and add two to my shopping cart..


 56%|█████▌    | 29/52 [01:05<00:49,  2.15s/it]

oversize intent: Add a set of wireless headphones to your cart with a budget of $100 or less, that has an active noise-cancelling feature.


 96%|█████████▌| 50/52 [01:38<00:03,  1.51s/it]

oversize intent: Show me products from Calvin Klein brand menswear list.


 98%|█████████▊| 51/52 [01:38<00:01,  1.31s/it]

oversize intent: Add the cheapest SSD to my cart
oversize intent: Add the cheapest SSD to my cart
oversize intent: Add the cheapest SSD to my cart


100%|██████████| 52/52 [01:46<00:00,  2.06s/it]

oversize intent: Add the cheapest SSD to my cart
2 16 435





In [12]:
with open("autodl-tmp/mind2web_dpo_test_task.json", "w") as file:
    json.dump(test_task_dataset, file, indent=4)