# **Split**

In [18]:
import json
import random
from collections import defaultdict

#seed
SEED = 42
random.seed(SEED)

with open('data-RandomCurrentDate-Tell.json', 'r', encoding='utf-8') as f:
    data_DateByday = json.load(f)

with open('data-dateBytime-Tell.json', 'r', encoding='utf-8') as f:
    data_DateByText = json.load(f)

data = data_DateByday + data_DateByText

# action_split
actions = defaultdict(list)
for item in data:
    action = item['output']['action']
    actions[action].append(item)


train_data, valid_data, test_data = [], [], []

# stratified split
for action, items in actions.items():
    random.shuffle(items)
    n = len(items)
    n_train = int(n * 0.7)
    n_valid = int(n * 0.2)
    train_data.extend(items[:n_train])
    valid_data.extend(items[n_train:n_train + n_valid])
    test_data.extend(items[n_train + n_valid:])

# shuffle
random.shuffle(train_data)
random.shuffle(valid_data)
random.shuffle(test_data)

In [19]:
len(train_data), len(valid_data), len(test_data)

(2040, 584, 296)

In [20]:
train_data

[{'current_date': '2025-02-08',
  'current_day': 'Saturday',
  'input': 'ลบนัดดีลลูกค้าวันนี้',
  'output': {'action': 'delete_event_date',
   'date': '2025-02-08',
   'title': 'ดีลลูกค้า'}},
 {'current_date': '2025-02-01',
  'current_day': 'Saturday',
  'input': 'เอานัดทำการบ้านกับเพื่อนพฤหัสที่จะถึงนี้ออก',
  'output': {'action': 'delete_event_date',
   'date': '2025-02-06',
   'title': 'ทำการบ้านกับเพื่อน'}},
 {'current_date': '2025-09-05',
  'current_day': 'Friday',
  'input': 'ลบนัดติดตามงานวันที่ 20 มกรา',
  'output': {'action': 'delete_event_date',
   'date': '2026-01-20',
   'title': 'ติดตามงาน'}},
 {'current_date': '2025-06-22',
  'current_day': 'Sunday',
  'input': 'เอานัดดีลลูกค้าวันที่ 2 สิงหา ออกให้หน่อย',
  'output': {'action': 'delete_event_date',
   'date': '2025-08-02',
   'title': 'ดีลลูกค้า'}},
 {'current_date': '2025-05-16',
  'current_day': 'Friday',
  'input': 'เพิ่มนัดคุยโปรเจกต์วันที่ 13 พฤษภา ตอนทุ่มนึง',
  'output': {'action': 'add_event_date',
   'date': '202

# **Convert**

In [21]:
def convert_data(data):

  # convert
  converted_data = []
  for item in data:
      current_date = item["current_date"]
      current_day = item["current_day"]
      input_text = item["input"]
      output = item["output"]
      action = output["action"]
      arguments = {k: v for k, v in output.items() if k != "action"}
      instruction_text = f"You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: {current_date}.\n\nCurrent Day: {current_day}."

      answer = {"name": action, "arguments": arguments}

      message = [

          {   "role": "system",
              "content": instruction_text
          },
          {
              "role": "user",
              "content": input_text
          },
          {
              "role": "assistant",
              "content": f'<tool_call>\n{answer}\n</tool_call>'
          }
      ]

      messages = {"messages":message}
      converted_data.append(messages)

  return(converted_data)

train_data = convert_data(train_data)
valid_data = convert_data(valid_data)
test_data = convert_data(test_data)

In [22]:
len(data)

2920

In [23]:
len(train_data), len(valid_data), len(test_data)

(2040, 584, 296)

# **Save**

In [24]:
data_finetune ={'train': train_data, 'valid': valid_data, 'test': test_data}

with open('Data_RandomDate_finetune-Tell.json', 'w', encoding='utf-8') as f:
    json.dump(data_finetune, f, ensure_ascii=False, indent=2)


print("Done")

Done
