# Action output Data

## Collect data from walkthrough to json

In [None]:
import json
from jericho import *

env = FrotzEnv("../../../z-machine-games-master/jericho-game-suite/zork1.z5")
initial_observation, info = env.reset()
walkthrough = env.get_walkthrough()
with open('raw/zork1_z5_walkthrough.jsonl', 'w', encoding='utf-8') as f:
    initial_data = {
        "observation": initial_observation,
        "inventory": env.get_inventory(),
        "candidates": env.get_valid_actions(),
        "gold_action": walkthrough[0]
    }
    f.write(json.dumps(initial_data) + '\n')  # Convert to JSON string and add newline
    
    for step, act in enumerate(walkthrough):
        return_ = env.step(act)
        print(return_)
        data = {
            "observation": return_[0],
            "inventory": [item.name for item in env.get_inventory()],
            "candidate": env.get_valid_actions(),
            "gold_action": walkthrough[step+1]
        }
        f.write(json.dumps(data) + '\n')  # Convert to JSON string and add newline

## Transfer to LoRA format dataset

In [None]:
import json
from torch.utils.data import Dataset
import torch
import argparse
import os

SYSTEM_PROMPT = (
    "You are playing the interactive fiction game Zork.\n"
    "Your goal is to explore the world, collect treasures, solve puzzles, and maximize your score.\n"
    "You interact with the world by typing commands like \"go north\", \"open door\", \"take lamp\", etc.\n"
    "You see the world through textual descriptions (observations), and your inventory tells you what you're carrying.\n"
    "At each step, you will be shown some candidate actions for reference.\n"
    "You can either choose one of them, or generate your own appropriate action to proceed.\n\n"
)

class ZorkSFTDataset(Dataset):
    def __init__(self, path, tokenizer=None, max_history=3, max_length=1024):
        self.tokenizer = tokenizer
        self.max_history = max_history
        self.max_length = max_length
        self.data = self.build_from_walkthrough(path)

    def build_from_walkthrough(self, path):
        with open(path, "r") as f:
            lines = [json.loads(line.strip()) for line in f]

        samples = []
        for i in range(1, len(lines)-1):
            history = lines[max(0, i - self.max_history):i]
            sample = {
                "history": history,
                "observation": lines[i]["observation"],
                "inventory": lines[i]["inventory"],
                "candidates": lines[i]["candidates"], 
                "gold_action": lines[i]["gold_action"]
            }
            samples.append(sample)
        return samples

    def build_prompt(self, item):
        prompt = SYSTEM_PROMPT

        if item.get("history"):
            prompt += "--- Previous Actions & Observations ---\n"
            for step in item["history"]:
                prompt += f"{step['observation'].strip()} \n>{step['gold_action'].strip()}\n"
            prompt += "\n"

        prompt += "--- Current Observation ---\n"
        prompt += f"Observation: {item['observation']}\n"
        prompt += f"Inventory: {item['inventory']}\n"

        if item["candidates"]:
            prompt += "Choices:\n"
            for i, choice in enumerate(item["candidates"], 1):
                prompt += f"({i}) {choice}\n"
        prompt += "\nWhat should you do next?\nAnswer:"
        return prompt

    def to_jsonl(self, output_path):
        with open(output_path, "w") as f:
            for item in self.data:
                context = self.build_prompt(item)
                target = item["gold_action"]
                record = {
                    "context": context,
                    "target": target
                }
                f.write(json.dumps(record, ensure_ascii=False) + "\n")


dataset = ZorkSFTDataset('raw/zork1_z5_walkthrough.jsonl')
dataset.to_jsonl('raw/zork1_wt_lora.jsonl')


## Tokenizer

In [None]:
import argparse
import json
from tqdm import tqdm

import datasets
import transformers


def preprocess(tokenizer, config, example, max_seq_length):
    prompt = example["context"]
    target = example["target"]
    prompt_ids = tokenizer.encode(prompt, max_length=max_seq_length, truncation=True)
    target_ids = tokenizer.encode(
        target,
        max_length=max_seq_length,
        truncation=True,
        add_special_tokens=False)
    input_ids = prompt_ids + target_ids + [config.eos_token_id]
    return {"input_ids": input_ids, "seq_len": len(prompt_ids)}


def read_jsonl(model_name, path, max_seq_length, skip_overlength=False):
    tokenizer = transformers.AutoTokenizer.from_pretrained(f"{model_name}", trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    config = transformers.AutoConfig.from_pretrained(
        f"/data3/whr/zhk/huggingface/{model_name}", trust_remote_code=True, device_map='auto')
    with open(path, "r") as f:
        for line in tqdm(f.readlines()):
            example = json.loads(line)
            feature = preprocess(tokenizer, config, example, max_seq_length)
            if skip_overlength and len(feature["input_ids"]) > max_seq_length:
                continue
            feature["input_ids"] = feature["input_ids"][:max_seq_length]
            yield feature



jsonl_path = "raw/zork1_wt_lora.jsonl"
save_path = "processed/"
max_seq_length = 1024
model_name = "qwen-2.5-3B-instruct"
skip_overlength = False

print("#> Tokenizing dataset...")
print("#> Input path: {}".format(jsonl_path))
print("#> Output path: {}".format(save_path))
print("#> Max sequence length: {}".format(max_seq_length))
print("#> Skip overlength: {}".format(skip_overlength))



dataset = datasets.Dataset.from_generator(
    lambda: read_jsonl(model_name, jsonl_path, max_seq_length, skip_overlength)
)
save_path = f'processed/{model_name}'
dataset.save_to_disk(save_path)

print("#> Tokenization finished!", "Total examples:", len(dataset))



## Inference Data

In [2]:
import json
from torch.utils.data import Dataset
import torch
import argparse
import os

SYSTEM_PROMPT = (
    "You are playing the interactive fiction game Zork.\n"
    "Your goal is to explore the world, collect treasures, solve puzzles, and maximize your score.\n"
    "You interact with the world by typing commands like \"go north\", \"open door\", \"take lamp\", etc.\n"
    "You see the world through textual descriptions (observations), and your inventory tells you what you're carrying.\n"
    "At each step, you will be shown some candidate actions for reference.\n"
    "You can either choose one of them, or generate your own appropriate action to proceed.\n\n"
)

class ZorkSFTDataset(Dataset):
    def __init__(self, path, tokenizer=None, max_history=3, max_length=1024):
        self.tokenizer = tokenizer
        self.max_history = max_history
        self.max_length = max_length
        self.data = self.build_from_walkthrough(path)

    def build_from_walkthrough(self, path):
        with open(path, "r") as f:
            lines = [json.loads(line.strip()) for line in f]

        samples = []
        for i in range(1, len(lines)-1):
            history = lines[max(0, i - self.max_history):i]
            sample = {
                "history": history,
                "observation": lines[i]["observation"],
                "inventory": lines[i]["inventory"],
                "candidates": lines[i]["candidates"], 
                "gold_action": lines[i]["gold_action"]
            }
            samples.append(sample)
        return samples

    def build_prompt(self, item):
        prompt = SYSTEM_PROMPT

        if item.get("history"):
            prompt += "--- Previous Actions & Observations ---\n"
            for step in item["history"]:
                prompt += f"{step['observation'].strip()} \n>{step['gold_action'].strip()}\n"
            prompt += "\n"

        prompt += "--- Current Observation ---\n"
        prompt += f"Observation: {item['observation']}\n"
        prompt += f"Inventory: {item['inventory']}\n"

        if item["candidates"]:
            prompt += "Choices:\n"
            for i, choice in enumerate(item["candidates"], 1):
                prompt += f"({i}) {choice}\n"
        prompt += "\nWhat should you do next?\nAnswer:"
        return prompt

    def to_jsonl(self, output_path):
        with open(output_path, "w") as f:
            for item in self.data:
                context = self.build_prompt(item)
                record = {
                    "context": context
                }
                f.write(json.dumps(record, ensure_ascii=False) + "\n")


dataset = ZorkSFTDataset('raw/zork1_z5_walkthrough.jsonl')
dataset.to_jsonl('raw/zork1_wt_inference.jsonl')


# Reward Data

In [1]:
import json
from jericho import *

env = FrotzEnv("../../../z-machine-games-master/jericho-game-suite/zork1.z5")

In [6]:
env.get_player_location().name

'West House'

## walkthrough data

In [1]:
import json
from jericho import *

env = FrotzEnv("../../../z-machine-games-master/jericho-game-suite/zork1.z5")
initial_observation, info = env.reset()
walkthrough = env.get_walkthrough()

with open('raw/zork1_z5_walkthrough_reward.jsonl', 'w', encoding='utf-8') as f:
    for step in range(len(walkthrough)):
        location = env.get_player_location().name
        inventory = [item.name for item in env.get_inventory()]
        gold_action = walkthrough[step]

        observation, reward, done, info = env.step(gold_action)

        data = {
            "location": location,
            "inventory": inventory,
            "gold_action": gold_action,
            "reward": reward
        }
        f.write(json.dumps(data) + '\n')

        if done:
            break


I beg your pardon?


West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


Opening the small mailbox reveals a leaflet.


It is already open.


(leaflet)
(Taken)
"WELCOME TO ZORK!

ZORK is a game of adventure, danger, and low cunning. In it you will explore some of the most amazing territory ever seen by mortals. No computer should be without one!"



You already have that!


(leaflet)
"WELCOME TO ZORK!

ZORK is a game of adventure, danger, and low cunning. In it you will explore some of the most amazing territory ever seen by mortals. No computer should be without one!"



North of House
You are facing the north side of a white house. There is no door here, and all the windows are boarded up. To the north a narrow path winds through the trees.




## LoRA Format

In [8]:
import json
from torch.utils.data import Dataset

SYSTEM_PROMPT = (
    "You are a reward model trained to evaluate the quality of actions taken in the game Zork.\n"
    "You are given the player's current location, their inventory, and the action they just performed.\n"
    "Your task is to score how beneficial the action is on a scale from 0 to 1, where:\n"
    "- 0 means the action is useless or redundant\n"
    "- 1 means the action is extremely valuable for game progression\n\n"
    "You should reward actions that:\n"
    "- Collect or use key items (e.g., lamp, keys)\n"
    "- Explore new areas\n"
    "- Solve puzzles (e.g., moving rug, praying at mirror)\n"
    "- Defeat enemies\n"
    "- Store treasures\n"
    "- Progress the story\n\n"
    "You should penalty actions that:\n"
    "- Useless direction move and repeat\n"
)

class ZorkRMHistoryDataset(Dataset):
    def __init__(self, path, tokenizer=None, max_history=3, max_length=1024):
        self.tokenizer = tokenizer
        self.max_history = max_history
        self.max_length = max_length
        self.data = self.build_from_walkthrough(path)

    def build_from_walkthrough(self, path):
        with open(path, "r", encoding="utf-8") as f:
            lines = [json.loads(line.strip()) for line in f]

        raw_rewards = []
        for line in lines:
            r = line.get("reward", 0)
            raw_rewards.append(r)

        min_reward = min(raw_rewards)
        max_reward = max(raw_rewards)
        reward_range = max_reward - min_reward if max_reward > min_reward else 1
        print(max_reward)
        samples = []
        for i in range(1, len(lines)-1):
            history = lines[max(0, i - self.max_history):i]
            location = lines[i].get("location", "Unknown")
            inventory = lines[i].get("inventory", [])
            action = lines[i].get("gold_action", "")
            reward = lines[i].get("reward", 0)

            normalized_reward = (reward - min_reward) / reward_range

            sample = {
                "history": history,
                "location": location,
                "inventory": inventory,
                "gold_action": action,
                "reward": normalized_reward
            }
            samples.append(sample)
        return samples


    def build_prompt(self, item):
        prompt = SYSTEM_PROMPT

        if item.get("history"):
            prompt += "--- Previous Actions & Observations ---\n"
            for step in item["history"]:
                prompt += f"location:{step['location'].strip()} \n inventory:{step['inventory']} \n>action:{step['gold_action'].strip()}\n\n"
            prompt += "\n"

        prompt += "--- Current Observation ---\n"
        prompt += f"Location: {item['location']}\n"
        prompt += f"Inventory: {item['inventory']}\n"
        prompt += f"Action: {item['gold_action']}\n\n"
        prompt += "Score this action from 0 to 1:\nAnswer:"
        return prompt

    def to_jsonl(self, output_path):
        with open(output_path, "w", encoding="utf-8") as f:
            for item in self.data:
                context = self.build_prompt(item)
                target = item["reward"]
                record = {
                    "context": context,
                    "target": str(target)
                }
                f.write(json.dumps(record, ensure_ascii=False) + "\n")


# 使用示例
dataset = ZorkRMHistoryDataset('raw/zork1_walkthrough_reward_relabel.jsonl')
dataset.to_jsonl('raw/zork1_rm_lora_dataset_penalty.jsonl')


10


## Tokenizer

In [5]:
import argparse
import json
from tqdm import tqdm

import datasets
import transformers


def preprocess(tokenizer, config, example, max_seq_length):
    prompt = example["context"]
    target = example["target"]
    prompt_ids = tokenizer.encode(prompt, max_length=max_seq_length, truncation=True)
    target_ids = tokenizer.encode(
        target,
        max_length=max_seq_length,
        truncation=True,
        add_special_tokens=False)
    input_ids = prompt_ids + target_ids + [config.eos_token_id]
    return {"input_ids": input_ids, "seq_len": len(prompt_ids)}


def read_jsonl(model_name, path, max_seq_length, skip_overlength=False):
    tokenizer = transformers.AutoTokenizer.from_pretrained(f"/data3/whr/zhk/huggingface/{model_name}", trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    config = transformers.AutoConfig.from_pretrained(
        f"/data3/whr/zhk/huggingface/{model_name}", trust_remote_code=True, device_map='auto')
    with open(path, "r") as f:
        for line in tqdm(f.readlines()):
            example = json.loads(line)
            feature = preprocess(tokenizer, config, example, max_seq_length)
            if skip_overlength and len(feature["input_ids"]) > max_seq_length:
                continue
            feature["input_ids"] = feature["input_ids"][:max_seq_length]
            yield feature



jsonl_path = "raw/zork1_rm_lora_dataset_penalty.jsonl"
save_path = "processed/"
max_seq_length = 1024
model_name = "qwen-2.5-1.5B-instruct"
skip_overlength = False

print("#> Tokenizing dataset...")
print("#> Input path: {}".format(jsonl_path))
print("#> Output path: {}".format(save_path))
print("#> Max sequence length: {}".format(max_seq_length))
print("#> Skip overlength: {}".format(skip_overlength))



dataset = datasets.Dataset.from_generator(
    lambda: read_jsonl(model_name, jsonl_path, max_seq_length, skip_overlength)
)
save_path = f'processed/{model_name}'
dataset.save_to_disk(save_path)

print("#> Tokenization finished!", "Total examples:", len(dataset))



#> Tokenizing dataset...
#> Input path: raw/zork1_rm_lora_dataset_penalty.jsonl
#> Output path: processed/
#> Max sequence length: 1024
#> Skip overlength: False


Saving the dataset (0/1 shards):   0%|          | 0/396 [00:00<?, ? examples/s]

#> Tokenization finished! Total examples: 396


## Data Augumentation

In [7]:
import json
from torch.utils.data import Dataset
import random

ACTION_SYNONYMS = {
    "get": ["take", "grab", "pick up", "Take", "Grab", "Pick up"],
    "Get": ["Take", "Grab", "Pick up", "take", "grab", "pick up"],
    "put": ["place", "drop"],
    "Put": ["Place", "Drop"],
    "drop": ["leave", "release"],
    "Drop": ["Leave", "Release"],
    "read": ["look at", "examine"],
    "Read": ["Look at", "Examine"],
    "kill": ["attack", "strike"],
    "Kill": ["Attack", "Strike"],
    "light": ["turn on"],
    "Light": ["Turn on"],
    "douse": ["extinguish", "put out"],
    "Douse": ["Extinguish", "Put out"],
}


SYSTEM_PROMPT = (
    "You are a reward model trained to evaluate the quality of actions taken in the game Zork.\n"
    "You are given the player's current location, their inventory, and the action they just performed.\n"
    "Your task is to score how beneficial the action is on a scale from 0 to 1, where:\n"
    "- 0 means the action is useless or redundant\n"
    "- 1 means the action is extremely valuable for game progression\n\n"
    "You should reward actions that:\n"
    "- Collect or use key items (e.g., lamp, keys)\n"
    "- Explore new areas\n"
    "- Solve puzzles (e.g., moving rug, praying at mirror)\n"
    "- Defeat enemies\n"
    "- Store treasures\n"
    "- Progress the story\n\n"
    "You should penalty actions that:\n"
    "- Useless direction move and repeat\n"
)

class ZorkRMHistoryDataset(Dataset):
    def __init__(self, path, tokenizer=None, max_history=3, max_length=1024):
        self.tokenizer = tokenizer
        self.max_history = max_history
        self.max_length = max_length
        self.data = self.build_from_walkthrough(path)

    def build_from_walkthrough(self, path):
        with open(path, "r", encoding="utf-8") as f:
            lines = [json.loads(line.strip()) for line in f]

        raw_rewards = []
        for line in lines:
            r = line.get("reward", 0)
            raw_rewards.append(r)

        min_reward = min(raw_rewards)
        max_reward = max(raw_rewards)
        reward_range = max_reward - min_reward if max_reward > min_reward else 1
        print(max_reward)
        samples = []
        for i in range(1, len(lines)-1):
            history = lines[max(0, i - self.max_history):i]
            location = lines[i].get("location", "Unknown")
            inventory = lines[i].get("inventory", [])
            action = lines[i].get("gold_action", "")
            reward = lines[i].get("reward", 0)

            normalized_reward = (reward - min_reward) / reward_range

            sample = {
                "history": history,
                "location": location,
                "inventory": inventory,
                "gold_action": action,
                "reward": normalized_reward
            }
            samples.append(sample)
            words = action.split()
            if words:
                verb = words[0]
                if verb in ACTION_SYNONYMS:
                    synonyms = ACTION_SYNONYMS[verb]
                    synonym = random.choice(synonyms)
                    aug_action = action.replace(verb, synonym, 1)
                    aug_sample = {
                        "history": history,
                        "location": location,
                        "inventory": inventory,
                        "gold_action": aug_action,
                        "reward": normalized_reward
                    }
                    samples.append(aug_sample)
        return samples


    def build_prompt(self, item):
        prompt = SYSTEM_PROMPT

        if item.get("history"):
            prompt += "--- Previous Actions & Observations ---\n"
            for step in item["history"]:
                prompt += f"location:{step['location'].strip()} \n inventory:{step['inventory']} \n>action:{step['gold_action'].strip()}\n\n"
            prompt += "\n"

        prompt += "--- Current Observation ---\n"
        prompt += f"Location: {item['location']}\n"
        prompt += f"Inventory: {item['inventory']}\n"
        prompt += f"Action: {item['gold_action']}\n\n"
        prompt += "Score this action from 0 to 1:\nAnswer:"
        return prompt

    def to_jsonl(self, output_path):
        with open(output_path, "w", encoding="utf-8") as f:
            for item in self.data:
                context = self.build_prompt(item)
                target = item["reward"]
                record = {
                    "context": context,
                    "target": str(target)
                }
                f.write(json.dumps(record, ensure_ascii=False) + "\n")


# 使用示例
dataset = ZorkRMHistoryDataset('raw/zork1_walkthrough_reward_relabel.jsonl')
dataset.to_jsonl('raw/zork1_rm_lora_dataset_penalty_argumented.jsonl')


10


## Zork-Ⅱ

In [10]:
import json
from jericho import *

env = FrotzEnv("../../../z-machine-games-master/jericho-game-suite/zork2.z5")
initial_observation, info = env.reset()
walkthrough = env.get_walkthrough()

with open('raw/zork2_z5_walkthrough.jsonl', 'w', encoding='utf-8') as f:
    initial_data = {
        "observation": initial_observation,
        "inventory": env.get_inventory(),
        "candidates": env.get_valid_actions(),
        "gold_action": walkthrough[0].lower()
    }
    f.write(json.dumps(initial_data) + '\n')

    for step, act in enumerate(walkthrough):
        return_ = env.step(act)
        if step + 1 >= len(walkthrough):
            break  # 避免越界
        data = {
            "observation": return_[0],
            "inventory": [item.name for item in env.get_inventory()],
            "candidates": env.get_valid_actions(),
            "gold_action": walkthrough[step + 1].lower()
        }
        f.write(json.dumps(data) + '\n')

In [12]:
import json
from torch.utils.data import Dataset
import torch
import argparse
import os

SYSTEM_PROMPT = (
    "You are playing the interactive fiction game Zork.\n"
    "Your goal is to explore the world, collect treasures, solve puzzles, and maximize your score.\n"
    "You interact with the world by typing commands like \"go north\", \"open door\", \"take lamp\", etc.\n"
    "You see the world through textual descriptions (observations), and your inventory tells you what you're carrying.\n"
    "At each step, you will be shown some candidate actions for reference.\n"
    "You can either choose one of them, or generate your own appropriate action to proceed.\n\n"
)

class ZorkSFTDataset(Dataset):
    def __init__(self, path, tokenizer=None, max_history=3, max_length=1024):
        self.tokenizer = tokenizer
        self.max_history = max_history
        self.max_length = max_length
        self.data = self.build_from_walkthrough(path)

    def build_from_walkthrough(self, path):
        with open(path, "r") as f:
            lines = [json.loads(line.strip()) for line in f]

        samples = []
        for i in range(1, len(lines)-1):
            history = lines[max(0, i - self.max_history):i]
            sample = {
                "history": history,
                "observation": lines[i]["observation"],
                "inventory": lines[i]["inventory"],
                "candidates": lines[i]["candidates"], 
                "gold_action": lines[i]["gold_action"]
            }
            samples.append(sample)
        return samples

    def build_prompt(self, item):
        prompt = SYSTEM_PROMPT

        if item.get("history"):
            prompt += "--- Previous Actions & Observations ---\n"
            for step in item["history"]:
                prompt += f"{step['observation'].strip()} \n>{step['gold_action'].strip()}\n"
            prompt += "\n"

        prompt += "--- Current Observation ---\n"
        prompt += f"Observation: {item['observation']}\n"
        prompt += f"Inventory: {item['inventory']}\n"

        if item["candidates"]:
            prompt += "Choices:\n"
            for i, choice in enumerate(item["candidates"], 1):
                prompt += f"({i}) {choice}\n"
        prompt += "\nWhat should you do next?\nAnswer:"
        return prompt

    def to_jsonl(self, output_path):
        with open(output_path, "w") as f:
            for item in self.data:
                context = self.build_prompt(item)
                target = item["gold_action"]
                record = {
                    "context": context,
                    "target": target
                }
                f.write(json.dumps(record, ensure_ascii=False) + "\n")


dataset = ZorkSFTDataset('raw/zork2_z5_walkthrough.jsonl')
dataset.to_jsonl('raw/zork2_wt_lora.jsonl')


In [None]:
import argparse
import json
from tqdm import tqdm

import datasets
import transformers


def preprocess(tokenizer, config, example, max_seq_length):
    prompt = example["context"]
    target = example["target"]
    prompt_ids = tokenizer.encode(prompt, max_length=max_seq_length, truncation=True)
    target_ids = tokenizer.encode(
        target,
        max_length=max_seq_length,
        truncation=True,
        add_special_tokens=False)
    input_ids = prompt_ids + target_ids + [config.eos_token_id]
    return {"input_ids": input_ids, "seq_len": len(prompt_ids)}


def read_jsonl(model_name, path, max_seq_length, skip_overlength=False):
    tokenizer = transformers.AutoTokenizer.from_pretrained(f"{model_name}", trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    config = transformers.AutoConfig.from_pretrained(
        f"/data3/whr/zhk/huggingface/{model_name}", trust_remote_code=True, device_map='auto')
    with open(path, "r") as f:
        for line in tqdm(f.readlines()):
            example = json.loads(line)
            feature = preprocess(tokenizer, config, example, max_seq_length)
            if skip_overlength and len(feature["input_ids"]) > max_seq_length:
                continue
            feature["input_ids"] = feature["input_ids"][:max_seq_length]
            yield feature



jsonl_path = "raw/zork2_wt_lora.jsonl"
save_path = "processed/"
max_seq_length = 1024
model_name = "qwen-2.5-0.5B-instruct"
skip_overlength = False

print("#> Tokenizing dataset...")
print("#> Input path: {}".format(jsonl_path))
print("#> Output path: {}".format(save_path))
print("#> Max sequence length: {}".format(max_seq_length))
print("#> Skip overlength: {}".format(skip_overlength))



dataset = datasets.Dataset.from_generator(
    lambda: read_jsonl(model_name, jsonl_path, max_seq_length, skip_overlength)
)
save_path = f'processed/{model_name}_z2'
dataset.save_to_disk(save_path)

print("#> Tokenization finished!", "Total examples:", len(dataset))

