In [101]:
import os
import re
import json
import pandas as pd

def extract_text_from_sheet(sheet_df):
    text_list = []
    for value in sheet_df.values.flatten():
        if pd.notna(value) and isinstance(value, str):
            # 过滤掉括号及括号中的内容
            value = value.replace('\uff08', '(').replace('\uff09', ')').replace('\uff1a', ':')
            value = value.replace("AK", "").replace("ES", "")
            value = re.sub(r'\(.*?\)', '', value)  # 使用正则表达式去除括号中的内容

            text_list.extend([text.strip() for text in value.split('\n') if text.strip()])
    return text_list

def process_excel_to_json(file_path, output_folder):
    xl = pd.ExcelFile(file_path)
    base_name = os.path.splitext(os.path.basename(file_path))[0]
    specific_output_folder = os.path.join(output_folder, base_name)
    os.makedirs(specific_output_folder, exist_ok=True)
    
    all_text_data = {}
    for sheet_name in xl.sheet_names:
        if "Programming Details" in sheet_name:
            df = xl.parse(sheet_name)
            all_text_data["programming details"] = extract_text_from_sheet(df)
    
    if not all_text_data:
        return None
    
    json_output_path = os.path.join(specific_output_folder, 'input_data.json')
    with open(json_output_path, 'w') as json_file:
        json.dump(all_text_data, json_file, indent=4)
    
    return json_output_path

In [102]:
def process_devices(split_data, output_folder):
    devices_content = split_data.get("devices", [])
    devices_data = []
    devices_dict = {}

    p1 = 0  # 指向 appearanceShortname
    p2 = 1  # 指向 QTY 或者 deviceName

    while p2 < len(devices_content):
        line_p1 = devices_content[p1].strip()
        line_p2 = devices_content[p2].strip()

        # 打印指针位置和对应的值（调试用）
        # print(f"p1={p1}, line_p1={line_p1}")
        # print(f"p2={p2}, line_p2={line_p2}")

        # 如果 p2 指向 QTY 说明 p1 指向的是 appearanceShortname
        if line_p2.startswith("QTY:"):
            current_shortname = line_p1
            devices_dict[current_shortname] = []

            # 移动 p2，直到找到下一个 appearanceShortname 或者列表末尾
            p2 += 1
            while p2 < len(devices_content) and not devices_content[p2].strip().startswith("QTY:"):
                next_line = devices_content[p2].strip()
                # 如果 next_line 是字典中的一个 key，说明它是下一个 appearanceShortname，跳出循环
                if next_line in devices_dict:
                    break
                devices_dict[current_shortname].append(next_line)
                p2 += 1

            # 更新 p1 指向下一个 appearanceShortname
            p1 = p2 - 1
        else:
            # p2 移动，但 p1 保持在当前 appearanceShortname
            p2 += 1

    # 将字典转换为列表形式存入 devices_data
    for appearance_shortname, device_names in devices_dict.items():
        for device_name in device_names:
            # 确保 device_name 不是下一个 appearanceShortname
            if device_name not in devices_dict:
                devices_data.append({
                    "appearanceShortname": appearance_shortname,
                    "deviceName": device_name
                })

    # 输出 devices.json 文件
    devices_output_path = os.path.join(output_folder, "devices.json")
    with open(devices_output_path, 'w') as file:
        json.dump({"devices": devices_data}, file, indent=4)

In [103]:
def process_groups(split_data, output_folder):
    groups_content = split_data.get("groups", [])
    groups_data = []
    groups_dict = {}

    p1 = 0  # 指向 groupName
    p2 = 1  # 指向 "DEVICE CONTROL:"

    while p2 < len(groups_content):
        line_p1 = groups_content[p1].strip()
        line_p2 = groups_content[p2].strip()

        # 跳过无关的头部内容
        if "KASTA GROUP" in line_p1 or "TOTAL" in line_p1:
            p1 += 1
            p2 = p1 + 1
            continue

        # 检查 p2 是否指向 "DEVICE CONTROL"
        if "DEVICE CONTROL" in line_p2:
            current_group = line_p1
            groups_dict[current_group] = []

            # 移动 p2，直到找到下一个 groupName 或者列表末尾
            p2 += 1
            while p2 < len(groups_content) and "DEVICE CONTROL" not in groups_content[p2].strip():
                next_line = groups_content[p2].strip()

                # 检查 next_line 是否是下一个 groupName
                if next_line in groups_dict:
                    break

                groups_dict[current_group].append(next_line)
                p2 += 1

            # 在移动 p1 之前，移除最后一个设备（它实际上是下一个 groupName）
            if groups_dict[current_group]:
                last_item = groups_dict[current_group].pop()

            # 更新 p1 指向下一个 groupName
            p1 = p2 - 1
            if p1 < len(groups_content) and groups_content[p1].strip() in groups_dict:
                p1 += 1

        else:
            # p2 移动，但 p1 保持在当前 groupName
            p2 += 1

    # 最后检查并添加最后一个 groupName 的最后一个设备
    if current_group is not None and len(groups_content) > 0:
        last_device = groups_content[-1].strip()
        if last_device not in groups_dict[current_group]:
            groups_dict[current_group].append(last_device)

    # 将字典转换为列表形式存入 groups_data
    for group_name, devices in groups_dict.items():
        groups_data.append({
            "groupName": group_name,
            "devices": devices
        })

    # 输出 groups.json 文件
    groups_output_path = os.path.join(output_folder, "groups.json")
    with open(groups_output_path, 'w') as file:
        json.dump({"groups": groups_data}, file, indent=4)

In [104]:
def split_json_file(input_file_path, output_folder):
    with open(input_file_path, 'r') as file:
        data = json.load(file)
    content = data.get("programming details", [])
    split_keywords = {
        "devices": "KASTA DEVICE",
        "groups": "KASTA GROUP",
        "scenes": "KASTA SCENE",
        "remoteControls": "REMOTE CONTROL LINK"
    }
    split_data = {
        "devices": [],
        "groups": [],
        "scenes": [],
        "remoteControls": []
    }
    current_key = None
    for line in content:
        if line in split_keywords.values():
            current_key = next(key for key, value in split_keywords.items() if value == line)
            continue
        if current_key:
            split_data[current_key].append(line)
    os.makedirs(output_folder, exist_ok=True)
    
    # 处理设备数据
    process_devices(split_data, output_folder)
    
    # 处理组数据
    process_groups(split_data, output_folder)

def test_process_excel(input_folder, output_folder):
    for file_name in os.listdir(input_folder):
        if file_name.endswith('.xlsx'):
            file_path = os.path.join(input_folder, file_name)
            result = process_excel_to_json(file_path, output_folder)
            if result:
                print(f"Processed {file_name} into {result}")
                split_json_file(result, os.path.dirname(result))
            else:
                print(f"No matching worksheets found in {file_name}")

# 设置输入和输出文件夹路径
input_folder = 'MPH_input'
output_folder = 'MPH_output'

# 运行测试
test_process_excel(input_folder, output_folder)

Processed Kasta programming - MPH - Type M - 20240807.xlsx into MPH_output\Kasta programming - MPH - Type M - 20240807\input_data.json


PermissionError: [Errno 13] Permission denied: 'MPH_input\\~$Kasta programming - MPH - Type M - 20240807.xlsx'