In [1]:
import csv
import json
import xml.etree.ElementTree as ET
import xml.dom.minidom
import pandas as pd

# Load structured mapping
with open('data\mapping.json', encoding='utf-8') as f:
    mapping = json.load(f)

# Create XML root
root = ET.Element('Records')

intermediate_data = []

# Read CSV and apply mapping
with open('data\data.csv', newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        row_data = {}
        record_elem = ET.SubElement(root, 'Record')
        for xml_tag, info in mapping.items():
            csv_col = info["csvColumn"]
            value = row.get(csv_col, "")
            child = ET.SubElement(record_elem, xml_tag)
            child.text = value
            row_data[xml_tag] = value
        intermediate_data.append(row_data)

# Save intermediate data to CSV
df = pd.DataFrame(intermediate_data)
df.to_csv('data\intermediate.csv', index=False)

# Convert XML to pretty format
rough_string = ET.tostring(root, encoding='utf-8')
reparsed = xml.dom.minidom.parseString(rough_string)
pretty_xml = reparsed.toprettyxml(indent="  ")

# Save to file
with open('data\output.xml', 'w', encoding='utf-8') as f:
    f.write(pretty_xml)


## sam build && sam local invoke CsvToXmlFunction để chạy

In [18]:
import pandas as pd
import json
from lxml import etree
import os

def csv_to_nested_xml(csv_file_path, mapping_json, output_folder):
    # Load CSV and fill NaNs
    df = pd.read_csv(csv_file_path, dtype=str).fillna("")

    # Parse mapping JSON
    if isinstance(mapping_json, str):
        mapping = json.loads(mapping_json)
    else:
        mapping = mapping_json

    # Create intermediate mapped DataFrame
    mapped_data = []
    for _, row in df.iterrows():
        mapped_row = {}

        for csv_key, xml_key in mapping.items():
            if isinstance(xml_key, dict):  # nested key mapping
                for nested_key, nested_value in xml_key.items():
                    if nested_key in row[csv_key]:
                        mapped_row[nested_value] = row[csv_key]
            else:
                mapped_row[xml_key] = row.get(csv_key, "")
        
        mapped_data.append(mapped_row)

    mapped_df = pd.DataFrame(mapped_data)

    # Save intermediate file
    intermediate_file = os.path.join(output_folder, "intermediate_output.csv")
    mapped_df.to_csv(intermediate_file, index=False, encoding="utf-8-sig")

    # Start building the XML structure
    root = etree.Element("ddash")

    for data in mapped_data:
        order_elem = etree.SubElement(root, "order_template_attributes")

        # --- Level 1: Order-level tags ---
        order_tags = [
            "order_id", "order_previous_id", "order_accept_number",
            "order_accept_datetime", "order_customer_master_id", "order_customer_name",
            "order_customer_zip_code", "order_customer_phone_number",
            "order_sales_user_department", "order_sales_user_name",
            "order_subtotal_amount", "order_estimation_number", "order_memo"
        ]

        for tag in order_tags:
            if tag in data:
                etree.SubElement(order_elem, tag).text = data[tag]

        # --- Level 2: Product nested inside Order ---
        product_elem = etree.SubElement(order_elem, "product_template_attributes")
        product_tags = [
            "product_id", "product_management_user_id", "product_management_user_name",
            "product_name", "product_name_kana", "product_master_id",
            "product_final_size_master_id", "product_final_size_master_name",
            "product_vertical_final_size_mm", "product_horizontal_final_size_mm",
            "product_weight_g", "product_quantities", "product_ordered_quantities",
            "product_external_aux_quantities", "product_internal_aux_quantities",
            "product_postpress_type_name", "product_extended_size_master_id",
            "product_extended_size_master_name", "product_vertical_extended_size",
            "product_horizontal_extended_size"
        ]
        for tag in product_tags:
            if tag in data:
                etree.SubElement(product_elem, tag).text = data[tag]

        # --- Level 3: Part nested inside Product ---
        part_elem = etree.SubElement(product_elem, "part_template_attributes")
        part_tags = [
            "part_seq", "part_type_id", "part_type_name", "part_original_file_path",
            "part_original_data_pages", "part_quantities", "part_external_aux_quantities",
            "part_internal_aux_quantities", "part_final_size_master_id",
            "part_final_size_master_name", "part_media_master_name",
            "part_colorant_front_number", "part_colorant_back_number", "part_memo",
            "platemaking_process_template_attributes", "prepress_process_template_attributes",
            "print_process_template_attributes", "postpress_process_template_attributes"
        ]
        for tag in part_tags:
            if tag in data:
                etree.SubElement(part_elem, tag).text = data[tag]

    # --- Level 1: Optional delivery_template_attributes at the root ---
    for data in mapped_data:
        delivery_elem = etree.SubElement(root, "delivery_template_attributes")
        delivery_tags = [
            "delivery_seq", "delivery_id", "delivery_planned_shipping_start_datetime",
            "delivery_planned_shipping_end_datetime", "delivery_planned_delivery_datetime",
            "delivery_customer_name", "delivery_zip_code", "delivery_phone_number",
            "delivery_supplier_master_id", "delivery_supplier_master_name",
            "delivery_case_quantities", "delivery_packing_type_master_id",
            "delivery_packing_type_master_name", "delivery_destination",
            "delivery_requester", "delivery_memo"
        ]
        for tag in delivery_tags:
            if tag in data:
                etree.SubElement(delivery_elem, tag).text = data[tag]

    # Save the final XML
    xml_output_path = os.path.join(output_folder, "output.xml")
    tree = etree.ElementTree(root)
    tree.write(xml_output_path, pretty_print=True, xml_declaration=True, encoding="UTF-8")

    print(f" Intermediate CSV saved at: {intermediate_file}")
    print(f" Final XML saved at: {xml_output_path}")

mapping_template = {
    "受注番号": "order_id",
    "元受注番号": "order_previous_id",
    "得意先": "order_customer_name",
    "品名": "product_name",
    "部品": "part_type_name",
    "仕様": {
        "製品形状": "product_postpress_type_name"
    }
}

csv_file = "data\lambda-function\input_job.csv"
output_dir = "data\lambda-function"
csv_to_nested_xml(csv_file, mapping_template, output_dir)



 Intermediate CSV saved at: data\lambda-function\intermediate_output.csv
 Final XML saved at: data\lambda-function\output.xml


In [27]:
import pandas as pd
import json
from lxml import etree
import os

def csv_to_nested_xml(csv_file_path, mapping_json_path, output_folder):
    # Load CSV and fill NaNs
    df = pd.read_csv(csv_file_path, dtype=str).fillna("")

    # Load mapping JSON from file path
    with open(mapping_json_path, 'r', encoding='utf-8') as f:
        mapping = json.load(f)

    # Create intermediate mapped DataFrame
    mapped_data = []
    for _, row in df.iterrows():
        mapped_row = {}

        for csv_key, xml_key in mapping.items():
            if isinstance(xml_key, dict):  # nested key mapping
                for nested_key, nested_value in xml_key.items():
                    if nested_key in row.get(csv_key, ""):
                        mapped_row[nested_value] = row[csv_key]
            else:
                mapped_row[xml_key] = row.get(csv_key, "")
        
        mapped_data.append(mapped_row)

    mapped_df = pd.DataFrame(mapped_data)

    # Save intermediate file
    os.makedirs(output_folder, exist_ok=True)
    intermediate_file = os.path.join(output_folder, "intermediate_output.csv")
    mapped_df.to_csv(intermediate_file, index=False, encoding="utf-8")

    # Build XML
    # Level 0: Root
    root = etree.Element("ddash")

    for data in mapped_data:
        # Level 1: order_template_attributes
        order_elem = etree.SubElement(root, "order_template_attributes")

        order_tags = [
            "order_id", "order_previous_id", "order_accept_number",
            "order_accept_datetime", "order_customer_master_id", "order_customer_name",
            "order_customer_zip_code", "order_customer_phone_number",
            "order_sales_user_department", "order_sales_user_name",
            "order_subtotal_amount", "order_estimation_number", "order_memo"
        ]
        for tag in order_tags:
            if tag in data:
                etree.SubElement(order_elem, tag).text = data[tag]

        # Level 2: product_template_attributes (inside order_template_attributes)
        product_elem = etree.SubElement(order_elem, "product_template_attributes")

        product_tags = [
            "product_id", "product_management_user_id", "product_management_user_name",
            "product_name", "product_name_kana", "product_master_id",
            "product_final_size_master_id", "product_final_size_master_name",
            "product_vertical_final_size_mm", "product_horizontal_final_size_mm",
            "product_weight_g", "product_quantities", "product_ordered_quantities",
            "product_external_aux_quantities", "product_internal_aux_quantities",
            "product_postpress_type_name", "product_extended_size_master_id",
            "product_extended_size_master_name", "product_vertical_extended_size",
            "product_horizontal_extended_size"
        ]
        for tag in product_tags:
            if tag in data:
                etree.SubElement(product_elem, tag).text = data[tag]

        # Level 3: product_delivery_template_attributes (inside product_template_attributes)
        delivery_in_product_elem = etree.SubElement(product_elem, "product_delivery_template_attributes")
        for tag in ["delivery_id", "product_delivery_quantities"]:
            if tag in data:
                etree.SubElement(delivery_in_product_elem, tag).text = data[tag]

        # Level 3: part_template_attributes (inside product_template_attributes)
        part_elem = etree.SubElement(product_elem, "part_template_attributes")
        part_tags = [
            "part_seq", "part_type_id", "part_type_name", "part_original_file_path",
            "part_original_data_pages", "part_quantities", "part_external_aux_quantities",
            "part_internal_aux_quantities", "part_final_size_master_id",
            "part_final_size_master_name", "part_media_master_name",
            "part_colorant_front_number", "part_colorant_back_number", "part_memo",
            "platemaking_process_template_attributes", "prepress_process_template_attributes",
            "print_process_template_attributes", "postpress_process_template_attributes"
        ]
        for tag in part_tags:
            if tag in data:
                etree.SubElement(part_elem, tag).text = data[tag]

         # Level 2: delivery_template_attributes (sibling to order_template_attributes)
        delivery_elem = etree.SubElement(order_elem, "delivery_template_attributes")
        delivery_tags = [
            "delivery_seq", "delivery_id", "delivery_planned_shipping_start_datetime",
            "delivery_planned_shipping_end_datetime", "delivery_planned_delivery_datetime",
            "delivery_customer_name", "delivery_zip_code", "delivery_phone_number",
            "delivery_supplier_master_id", "delivery_supplier_master_name",
            "delivery_case_quantities", "delivery_packing_type_master_id",
            "delivery_packing_type_master_name", "delivery_destination",
            "delivery_requester", "delivery_memo"
        ]
        for tag in delivery_tags:
            if tag in data:
                etree.SubElement(delivery_elem, tag).text = data[tag]

    # Save final XML
    xml_output_path = os.path.join(output_folder, "output.xml")
    tree = etree.ElementTree(root)
    tree.write(xml_output_path, pretty_print=True, xml_declaration=True, encoding="UTF-8")

    print(f"Intermediate CSV saved at: {intermediate_file}")
    print(f"Final XML saved at: {xml_output_path}")

csv_file = "data\lambda-function\input_job.csv"
mapping_file = "data\lambda-function\input_mapping_dict.json"
output_dir = "data\lambda-function\output"

csv_to_nested_xml(csv_file, mapping_file, output_dir)


Intermediate CSV saved at: data\lambda-function\output\intermediate_output.csv
Final XML saved at: data\lambda-function\output\output.xml
