# 목록 추출

In [131]:
import json, os
from DeviceObjects import ALL_DEVICE_TYPES
import devices
from typing import Any

def serialize_device_types() -> dict[str, Any]:
    result = {}

    for device_name, device_cls in ALL_DEVICE_TYPES.items():
        skill_data = []
        for skill in device_cls.skills:
            try:
                skill_dict = skill.dict()
                if hasattr(skill, 'Enums'):
                    try:
                        enums = {}
                        enums['descriptor'] = skill.Enums.descriptor
                        for attr_name in dir(skill.Enums):
                            attr = getattr(skill.Enums, attr_name)
                            if isinstance(attr, type) and not attr== type:
                                enums[attr_name] = [e.name for e in attr]
                        if enums:
                            skill_dict["enum"] = enums
                    except:
                        pass
                skill_data.append(skill_dict)
            except Exception as e:
                skill_data.append({
                    "id": getattr(skill, 'id', 'unknown'),
                    "error": str(e)
                })

        result[device_name] = {
            "skills": skill_data
        }

    return result



data = serialize_device_types()
# print(json.dumps(data, indent=2, ensure_ascii=False))
output_path = "0.1.0_exported_device_skills.json"
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(data, f, indent=2, ensure_ascii=False)

print(f"\n✅ JSON 파일 저장 완료: {os.path.abspath(output_path)}")


✅ JSON 파일 저장 완료: /home/endermaru/Project/ServiceExtraction/integration/exported_device_skills.json


# 분리

In [132]:
import json

with open("0.1.0_exported_device_skills.json", "r", encoding="utf-8") as f:
    data = json.load(f)
    #추출
    extracted = {}

    for device_name, device_info in data.items():
        skills_summary = []
        for skill in device_info.get("skills", []):
            skills_summary.append({
                "id": skill["id"],
                "descriptor": skill["descriptor"]
            })
        extracted[device_name] = {"skills": skills_summary}

    output_path = "0.1.1_exported_devices.json"
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(extracted, f, indent=2, ensure_ascii=False)

    print(f"\n✅ JSON 파일 저장 완료: {os.path.abspath(output_path)}")


✅ JSON 파일 저장 완료: /home/endermaru/Project/ServiceExtraction/integration/exported_devices.json


In [134]:
import json
import os

with open("0.1.0_exported_device_skills.json", "r", encoding="utf-8") as f:
    data = json.load(f)

    #중복 제거용 딕셔너리
    unique_skills = {}

    for device_info in data.values():
        for skill in device_info.get("skills", []):
            skill_id = skill["id"]
            if skill_id not in unique_skills:
                unique_skills[skill_id] = skill

    #결과 포맷
    extracted_skills = list(unique_skills.values())

    # 저장
    output_path = "0.1.1_exported_skills.json"
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(extracted_skills, f, indent=2, ensure_ascii=False)

    print(f"\n✅ 중복 제거된 스킬 JSON 저장 완료: {os.path.abspath(output_path)}")


✅ 중복 제거된 스킬 JSON 저장 완료: /home/endermaru/Project/ServiceExtraction/integration/exported_skills.json


## 압축

In [152]:
import json

with open("0.1.0_exported_device_skills.json", "r", encoding="utf-8") as f:
    data = json.load(f)
    # 추출
    extracted = {}

    for device_name, device_info in data.items():
        skills_summary = {}
        for skill in device_info.get("skills", []):
            skills_summary[skill["id"]] = skill["descriptor"]
        extracted[device_name] = skills_summary

    output_path = "0.1.2_compressed_devices.json"
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(extracted, f, indent=2, ensure_ascii=False)

    print(f"\n✅ JSON 파일 저장 완료: {os.path.abspath(output_path)}")


✅ JSON 파일 저장 완료: /home/endermaru/Project/ServiceExtraction/integration/0.1.2_compressed_devices.json


In [146]:
import json
import os

words = {
    "descriptor": "info", 
    "values": "values", 
    "functions": "functions", 
    "arguments": "arguements",
    "return_type": "return_type",
}

with open("0.1.0_exported_device_skills.json", "r", encoding="utf-8") as f:
    data = json.load(f)

    # 중복 제거용 딕셔너리
    unique_skills = {}

    for device_info in data.values():
        for skill in device_info.get("skills", []):
            skill_id = skill["id"]
            del skill["id"]

            skill["info"] = skill.pop("descriptor")

            original_values = skill.pop("values")
            converted_values = {
                item["id"]: {
                    (words[k] if k in words else k): v
                    for k, v in item.items() if k != "id" and v!=None
                }
                for item in original_values
            }
            if converted_values != {}:
                skill[words["values"]] = converted_values

            original_functions = skill.pop("functions")

            converted_functions = {}
            for func in original_functions:  # func는 dict
                func_id = func["id"]
                converted_item = {
                    (words[k] if k in words else k): v
                    for k, v in func.items() if k != "id" and v!=None
                }

                # args 내부도 처리
                if "args" in converted_item:
                    converted_item["args"] = {
                        arg["id"]:{
                            (words[k] if k in words else k): v
                            for k, v in arg.items() if k!="id" and v!=None
                        }
                        for arg in converted_item["args"]
                    }

                converted_functions[func_id] = converted_item

            if converted_functions != {}:
                skill[words["functions"]] = converted_functions

            original_enum = skill.pop("enum", None)
            converted_enum = {}

            if original_enum:
                descriptor = original_enum["descriptor"]
                enum_dict = {}
                for e in original_enum.keys():
                    if e == "descriptor":
                        pass
                    else:
                        enum_dict[e] = {"enums":original_enum[e],
                                        "info": descriptor}
            
    
                skill["enum"] = enum_dict

            if skill_id not in unique_skills:
                unique_skills[skill_id] = skill

    # 결과 포맷
    extracted_skills = unique_skills

    # 저장
    output_path = "0.1.2_compressed_skills.json"
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(extracted_skills, f, indent=2, ensure_ascii=False)

    print(f"\n✅ 중복 제거된 스킬 JSON 저장 완료: {os.path.abspath(output_path)}")


✅ 중복 제거된 스킬 JSON 저장 완료: /home/endermaru/Project/ServiceExtraction/integration/compressed_skills.json


# Docstring(txt)

In [198]:
import json
import yaml

c = 0
with open("0.1.2_compressed_skills.json", "r", encoding="utf-8") as f:
    skills = json.load(f)

    ret1 = dict()
    for skill_id, skill_info in skills.items():
        ret2 = dict()
        print("###" + skill_id)
        ret2["info"] = skill_info.get("info", '')
        if skill_info.get("enum",'') != '':
            lst = []
            print('Enums:')
            for enum_id, enum_info in skill_info.get("enum", {}).items():
                v = ""
                v += f"{enum_id} (Enum):"
                # v += f"{enum_id}({enum_info.get('enums', [])}): {enum_info.get('info', '')}"
                if enum_id == "alarmEnum":
                    enum_desc = {"both":"if the alarm is strobing and sounding the alarm",
                                 "off":"if the alarm is turned off",
                                 "siren":"if the alarm is sounding the siren",
                                 "strobe":"if the alarm is strobing",
                                }
                elif enum_id == "alarmVolumeEnum" or enum_id == "weekdayEnum":
                    enum_desc = {}
                else:
                    enum_desc = {}

                    for line in enum_info.get('info', '').split('\n'):
                        line = line.strip()
                        if line.startswith('• ') and '-' in line:
                            key, *desc = line[2:].split('-', maxsplit=1)  # remove bullet, split only once
                            key = key.strip()
                            value = desc[0].strip() if desc else ""
                            enum_desc[key] = value

                for e in enum_info.get('enums', []):
                    if enum_desc.get(e, '') != '':
                        v += f"\n\t\t{e}: {enum_desc.get(e, '')}"
                    else:
                        v += f"\n\t\t{e}"
                # print(enum_desc)
                print(v)
            lst.append(v)
            ret2["enum"] = lst
        

        
        if skill_info.get("values",'') != '':
            print('\nAttributes:')
            v = []
            for val in skill_info.get("values",''):
                d = skill_info.get("values")[val]
                t = d.get('format', d.get('type',''))
                i = skill_info.get("values")[val].get('info', '')
                value = (f"{skill_id}_{val} ({t}): {i}")
                print(value)
                v.append(value)
            ret2["Attributes"] = v
        
        if skill_info.get("functions",'') != '':
            print('\nMethods:')
            lst = []
            for fun in skill_info.get("functions",''):
                v = ""
                d = skill_info.get("functions")[fun]
                t = d.get('format', d.get('type',''))
                args = d.get('arguements', {})
                ret = d.get('return_type', 'VOID')
                arg_list = []
                for arg in args:
                    id = arg.get('id', '')
                    t = arg.get('format', None)
                    if t == None:
                        t = arg.get('type', '')
                    arg_list.append([id, t, arg.get('descriptor', '')])

                method = f"{skill_id}_{fun}({", ".join([f"{a[0]}: {a[1]}"for a in arg_list])}) -> {ret}:"
                method += f"\n\t\t{d.get('info', '')}\n"
                if (len(arg_list) > 0):
                    method += "\n\t\tArgs:\n"
                    for arg in arg_list:
                        method += f"\t\t\t{arg[0]} ({arg[1]}): {arg[2]}\n"
                print(method)

                lst.append(method)
            ret2["Methods"] = lst

        ret1[skill_id] = ret2
        print("==========================")
        
with open("0.1.3_compressed_skills_row.json", "w", encoding="utf-8") as f:
    json.dump(ret1, f, indent=2, ensure_ascii=False)

###airConditionerMode
Enums:
airConditionerModeEnum (Enum):
		auto: The fan is on auto
		cool: The fan is in sleep mode to reduce noise
		heat: The fan is on low
		fanonly
		dehumidify

Attributes:
airConditionerMode_airConditionerMode (airConditionerModeEnum): Current mode of the air conditioner
airConditionerMode_supportedAcModes (list[airConditionerModeEnum]): Supported states for this air conditioner to be in
airConditionerMode_targetTemperature (INTEGER): Current temperature status of the air conditioner

Methods:
airConditionerMode_setAirConditionerMode(mode: airConditionerModeEnum) -> VOID:
		Set the air conditioner mode

		Args:
			mode (airConditionerModeEnum): Set the air conditioner mode

airConditionerMode_setTemperature(temperature: INTEGER) -> VOID:
		Set the air conditioner temperature

		Args:
			temperature (INTEGER): Set the air conditioner temperature

###switch
Enums:
switchEnum (Enum):
		on: The value of the ``switch`` attribute if the switch is on
		off: The value

In [None]:
with open("0.1.2_compressed_devices.json", "r", encoding="utf-8") as f:
    with open("0.1.3_compressed_skills_row.json", "r", encoding="utf-8") as f2:
        lst = []
        data = json.load(f)
        skills = json.load(f2)
        for device_name, device_info in data.items():
            desc = f'class {device_name}:\n\t"""\n\tNote: This class has a pre-existing instance named `(#{device_name})`. Do NOT create a new instance.\n'
            
            attrs, methods, enums = [], [], []
            for skill_id, skill_desc in device_info.items():
                attrs += skills[skill_id].get("Attributes",[])
                methods += skills[skill_id].get("Methods",[])
                enums += skills[skill_id].get("enum",[])
            if enums != []:
                desc += "\n\tEnums:\n"
                for enum in enums:
                    desc += f"\t\t{'\n\t'.join(enum.split('\n'))}\n"
            if attrs != []:
                desc += "\n\tAttributes:\n"
                for attr in attrs:
                    desc += f"\t\t{attr}\n"
            if methods != []:
                desc += "\n\tMethods:\n"
                for method in methods:
                    desc += f"\t\t{'\n\t'.join(method.split('\n'))}\n"
            
            desc += '\t"""\n'
            print(desc)


class AirConditioner:
	"""
	Note: This class has a pre-existing instance named `(#AirConditioner)`. Do NOT create a new instance.

	Enums:
		airConditionerModeEnum (Enum):
			auto: The fan is on auto
			cool: The fan is in sleep mode to reduce noise
			heat: The fan is on low
			fanonly
			dehumidify
		switchEnum (Enum):
			on: The value of the ``switch`` attribute if the switch is on
			off: The value of the ``switch`` attribute if the switch is off

	Attributes:
		airConditionerMode_airConditionerMode (airConditionerModeEnum): Current mode of the air conditioner
		airConditionerMode_supportedAcModes (list[airConditionerModeEnum]): Supported states for this air conditioner to be in
		airConditionerMode_targetTemperature (INTEGER): Current temperature status of the air conditioner
		switch_switch (switchEnum): A string representation of whether the switch is on or off

	Methods:
		airConditionerMode_setAirConditionerMode(mode: airConditionerModeEnum) -> VOID:
			Set the air conditioner

# row - json

In [25]:
import json

c = 0
with open("0.1.2_compressed_skills.json", "r", encoding="utf-8") as f:
    skills = json.load(f)

    enums = dict()
    ret1 = dict()
    for skill_id, skill_info in skills.items():
        ret2 = dict()
        ret2["info"] = skill_info.get("info", '')
        if skill_info.get("enum",'') != '':
            for enum_id, enum_info in skill_info.get("enum", {}).items():
                enum_lst = {}
                if enum_id == "alarmEnum":
                    enum_desc = {"both":"if the alarm is strobing and sounding the alarm",
                                 "off":"if the alarm is turned off",
                                 "siren":"if the alarm is sounding the siren",
                                 "strobe":"if the alarm is strobing",
                                }
                elif enum_id == "alarmVolumeEnum" or enum_id == "weekdayEnum":
                    enum_desc = {}
                else:
                    enum_desc = {}

                    for line in enum_info.get('info', '').split('\n'):
                        line = line.strip()
                        if line.startswith('• ') and '-' in line:
                            key, *desc = line[2:].split('-', maxsplit=1)  # remove bullet, split only once
                            key = key.strip()
                            value = desc[0].strip() if desc else ""
                            enum_desc[key] = value

                for e in enum_info.get('enums', []):
                    if enum_desc.get(e, '') != '':
                        enum_lst[e] = enum_desc.get(e, '')
                    else:
                        enum_lst[e] = ''

            enums[enum_id] = enum_lst

        
        if skill_info.get("values",'') != '':
            attr_lst = {}

            for val in skill_info.get("values",''):
                d = skill_info.get("values")[val]
                t = d.get('format', d.get('type',''))
                i = skill_info.get("values")[val].get('info', '')
                attr_lst[f"{skill_id}_{val}"] = {
                    "info": i,
                    "type": t
                }
                
            ret2["Attributes"] = attr_lst

        if skill_info.get("functions",'') != '':
            method_lst = {}
            for fun in skill_info.get("functions",''):
                d = skill_info.get("functions")[fun]
                t = d.get('format', d.get('type',''))
                args = d.get('arguements', {})
                ret = d.get('return_type', 'VOID')
                arg_list = []
                for arg in args:
                    id = arg.get('id', '')
                    t = arg.get('format', None)
                    if t == None:
                        t = arg.get('type', '')
                    arg_list.append([id, t, arg.get('descriptor', '')])
                args_dict = {
                    a[0]: {
                        "info": a[2].strip(),
                        "type": a[1].strip()
                    }
                    for a in arg_list
                }

                method_name = f"{skill_id}_{fun}"
                method_lst[method_name] = {
                    "info": f"{d.get('info', '')}",
                    "args": args_dict,
                    "return_type": ret
                }

            ret2["Methods"] = method_lst
        ret1[skill_id] = ret2
        
with open("0.1.4_row_skills.json", "w", encoding="utf-8") as f:
    json.dump(ret1, f, indent=2, ensure_ascii=False)
with open("0.1.4_row_enums.json", "w", encoding="utf-8") as f:
    json.dump(enums, f, indent=2, ensure_ascii=False)

In [26]:
import json

with open("0.1.2_compressed_devices.json", "r", encoding="utf-8") as f:
    with open("0.1.4_row_skills.json", "r", encoding="utf-8") as f2:
        ret = {}
        data = json.load(f)
        skills = json.load(f2)
        for device_name, device_info in data.items():
            attrs_dict = {}
            methods_dict = {}

            for skill_id, skill_desc in device_info.items():
                attrs = skills[skill_id].get("Attributes", {})
                methods = skills[skill_id].get("Methods", {})

                attrs_dict.update(attrs)
                methods_dict.update(methods)

            for a in attrs_dict.keys():
                ret[f"(#{device_name}).{a}"] = attrs_dict[a]
            
            for m in methods_dict.keys():
                args = list(methods_dict[m].get("args", []).keys())
                ret[f"(#{device_name}).{m}({', '.join(args)})"] = methods_dict[m]

            
    with open("0.1.4_row_json.json", "w", encoding="utf-8") as f:
        json.dump(ret, f, indent=2, ensure_ascii=False)
            

In [27]:
# enum과 interface를 합침
import json

with open("0.1.4_row_json.json", "r", encoding="utf-8") as f:
    with open("0.1.4_row_enums.json", "r", encoding="utf-8") as f2:
        data = json.load(f)
        enums = json.load(f2)
        ret = {}

        ret["interfaces"] = data
        ret["enums"] = enums

    with open("0.1.4_row_all.json", "w", encoding="utf-8") as f:
        json.dump(ret, f, indent=2, ensure_ascii=False)

# 임베딩용

In [63]:
with open("0.1.2_compressed_skills.json", "r", encoding="utf-8") as f:
    skills = json.load(f)

    ret = {}

    for skill_id, skill_info in skills.items():
        ret[skill_id] = {}

        if skill_info.get("enum",'') != '':
            enum_dict = {}
            
            for enum_id, enum_info in skill_info.get("enum", {}).items():
                if enum_id == "alarmEnum":
                    enum_desc = {"both":"if the alarm is strobing and sounding the alarm",
                                 "off":"if the alarm is turned off",
                                 "siren":"if the alarm is sounding the siren",
                                 "strobe":"if the alarm is strobing",
                                }
                elif enum_id == "alarmVolumeEnum" or enum_id == "weekdayEnum":
                    enum_desc = {}
                else:
                    enum_desc = {}

                    for line in enum_info.get('info', '').split('\n'):
                        line = line.strip()
                        if line.startswith('• ') and '-' in line:
                            key, *desc = line[2:].split('-', maxsplit=1)  # remove bullet, split only once
                            key = key.strip()
                            value = desc[0].strip() if desc else ""
                            enum_desc[key] = value

                enum_lst = []
                for a,b in list(enum_desc.items()):
                    enum_lst += [a, b]
                enum_dict[enum] = enum_lst
        
        if skill_info.get("values",'') != '':
            attr_dict = {}

            for val in skill_info.get("values",''):
                d = skill_info.get("values")[val]
                t = d.get('format', d.get('type',''))
                if t.startswith("list["):
                    t = t[5:-1]
                i = skill_info.get("values")[val].get('info', '')

                # 추가 정보가 필요한 enum 타입
                if enum_dict.get(t,[]) != []:
                    ret[skill_id][f"{skill_id}_{val}"] = [skill_id, val, i] + enum_dict.get(t,[])
                else:
                    ret[skill_id][f"{skill_id}_{val}"] = [skill_id, val, i]
            
        
        if skill_info.get("functions",'') != '':
            method_dict = {}
            for fun in skill_info.get("functions",''):
                d = skill_info.get("functions")[fun]
                method_lst = [d["info"]]

                ret_type = d.get('return_type','VOID')
                if ret_type.startswith("list["):
                    ret_type = ret_type[5:-1]
                if 'enum' in ret_type or 'Enum' in ret_type:
                    method_lst += enum_dict.get(t,[])

                args = d.get('arguements', {})
                for arg in args:
                    t = arg.get('format', None)
                    if t == None:
                        t = arg.get('type', '')
                    if t.startswith("list["):
                        t = t[5:-1]

                    # 추가 정보가 필요한 enum 타입
                    if 'enum' in t or 'Enum' in t:
                        method_lst += [arg.get('id', ''), arg.get('descriptor', '')] + enum_dict.get(t,[])
                    else:
                        method_lst += [arg.get('id', ''), arg.get('descriptor', '')]
                ret[skill_id][fun] = method_lst
            # break

    with open("0.1.5_embedding_skills.json", "w", encoding="utf-8") as f:
        json.dump(ret, f, indent=2, ensure_ascii=False)

In [21]:
import re, json

def filter_words(s, device_lst):
    if not isinstance(s, str):
        return ""

    # 1. 영어만 남기고
    s = re.sub(r'[^a-zA-Z0-9]', ' ', s)
    s = re.sub(r'\s+', ' ', s).strip()

    # device map (소문자 기준 매칭용)
    device_map = {d.lower(): d for d in device_lst}
    result = []

    for token in s.split():
        lower_token = token.lower()

        # 1) device_lst 중에 완전히 일치하는 것이 있으면 그대로 사용
        if lower_token in device_map:
            result.append(device_map[lower_token])
        else:
            # 2) CamelCase를 분리
            parts = re.findall(r'[A-Z]?[a-z0-9]+|[A-Z]+(?![a-z])', token)
            result.extend(parts)

    return ' '.join(result)

with open("0.1.2_compressed_devices.json", "r", encoding="utf-8") as f:
    with open("0.1.5_embedding_skills.json", "r", encoding="utf-8") as f2:
        
        ret = {}
        data = json.load(f)
        skills = json.load(f2)

        device_lst = list(data.keys())

        for device_name, device_info in data.items():
            for skill_id, skill_desc in device_info.items():
                s = skills[skill_id]
                for k,v in s.items():
                    lst = [device_name, skill_id, k] + v
                    seen = set()
                    unique_lst = []
                    for item in reversed(lst):
                        if item not in seen:
                            seen.add(item)
                            unique_lst.append(item)
                    unique_lst.reverse()
                    filtered_lst = [filter_words(i, device_lst) for i in unique_lst]

                    seen = set()
                    unique_lst = []
                    for item in reversed(filtered_lst):
                        if item not in seen:
                            seen.add(item)
                            unique_lst.append(item)
                    unique_lst.reverse()

                    ret[f"{device_name}_{k}"] = ", ".join(unique_lst)
    print(len(ret))
    with open("0.1.5_embedding_all.json", "w", encoding="utf-8") as f:
        json.dump(ret, f, indent=2, ensure_ascii=False)            

288
