In [None]:
from lxml import etree
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import time, sys, re, json
import os
import xmltodict
from os.path import dirname
# CURRENT_DIR = dirname(__file__)
# sys.path.append(os.path.abspath(CURRENT_DIR + '/../'))
from datetime import datetime

# from ..ami import constant, func, lp_config
from ami.constant import LOADPROFILE, QUALITYCODE
from ami import constant, lp_config, func

from pydantic import (
    BaseModel,
    NegativeInt,
    PositiveInt,
    conint,
    conlist,
    constr,
    ValidationError,
    Field,
    condecimal,
    validator,
    root_validator,
)

In [6]:
xml = '''
    <EventMessage xmlns="http://iec.ch/TC57/2011/schema/message">
        <Header>
          <Verb>created</Verb>
          <Noun>MeterReadings</Noun>
          <Revision>1</Revision>
          <Context>PRODUCTION</Context>
          <Timestamp>2022-11-18T08:42:21.539+08:00</Timestamp>
          <Source>HES-CHT20190919</Source>
          <MessageID>1cbd57c9-1ddc-42c2-bb22-07da17f6ad92</MessageID>
          <CorrelationID>1cbd57c9-1ddc-42c2-bb22-07da17f6ad92</CorrelationID>
          <Property>
            <Name>ReadingTypeGroup</Name>
            <Value>LP</Value>
          </Property>
          <Property>
            <Name>QOSLevel</Name>
            <Value>LEVEL-2</Value>
          </Property>
          <Property>
            <Name>MessageIndex</Name>
            <Value>1/1</Value>
          </Property>
        </Header>
    </EventMessage>
    '''

In [7]:
pxml = '''
    <EventMessage xmlns="http://iec.ch/TC57/2011/schema/message">
      <Header>
        <Verb>created</Verb>
        <Noun>MeterReadings</Noun>
        <Revision>1</Revision>
        <Context>PRODUCTION</Context>
        <Timestamp></Timestamp>
        <Source>HES-CHT20190919</Source>
        <MessageID>1cbd57c9-1ddc-42c2-bb22-07da17f6ad92</MessageID>
        <CorrelationID>1cbd57c9-1ddc-42c2-bb22-07da17f6ad92</CorrelationID>
        <Property>
          <Name>ReadingTypeGroup</Name>
          <Value>LP</Value>
        </Property>
        <Property>
          <Name>QOSLevel</Name>
          <Value>LEVEL-2</Value>
        </Property>
        <Property>
          <Name>MessageIndex</Name>
          <Value>1/1</Value>
        </Property>
      </Header>
      <Payload>
        <MeterReadings xmlns="http://iec.ch/TC57/2011/MeterReadings#">
          <MeterReading>
            <IntervalBlocks>
              <IntervalReadings>
                <timeStamp>2022-11-18T06:45:00.000+08:00</timeStamp>
                <value>6519.0955</value>
                <ReadingQualities>
                  <source>HES</source>
                  <timeStamp>2022-11-18T06:45:00.000+08:00</timeStamp>
                  <ReadingQualityType ref="1.0.0"></ReadingQualityType>
                </ReadingQualities>
                <ReadingQualities>
                  <comment>4057</comment>
                  <source>HES</source>
                  <timeStamp>2022-11-18T06:45:00.000+08:00</timeStamp>
                  <ReadingQualityType ref="5.4.260"></ReadingQualityType>
                </ReadingQualities>
              </IntervalReadings>
              <ReadingType ref="0.0.2.9.1.2.12.0.0.0.0.0.0.0.0.3.72.0"></ReadingType>
            </IntervalBlocks>
            <IntervalBlocks>
              <IntervalReadings>
                <timeStamp>2022-11-18T06:45:00.000+08:00</timeStamp>
                <value>0.8358</value>
                <ReadingQualities>
                  <source>HES</source>
                  <timeStamp>2022-11-18T06:45:00.000+08:00</timeStamp>
                  <ReadingQualityType ref="1.0.0"></ReadingQualityType>
                </ReadingQualities>
                <ReadingQualities>
                  <comment>4057</comment>
                  <source>HES</source>
                  <timeStamp>2022-11-18T06:45:00.000+08:00</timeStamp>
                  <ReadingQualityType ref="5.4.260"></ReadingQualityType>
                </ReadingQualities>
              </IntervalReadings>
              <ReadingType ref="0.0.2.9.15.2.164.0.0.0.0.0.0.0.0.3.73.0"></ReadingType>
            </IntervalBlocks>
            <Meter>
              <mRID>WT20069447</mRID>
              <Names>
                <name>WT20069447</name>
                <NameType>
                  <name>MeterUniqueID</name>
                </NameType>
              </Names>
            </Meter>
          </MeterReading>
          <MeterReading>
            <IntervalBlocks>
              <IntervalReadings>
                <timeStamp>2022-11-18T07:00:00.000+08:00</timeStamp>
                <value>6519.1396</value>
                <ReadingQualities>
                  <source>HES</source>
                  <timeStamp>2022-11-18T07:00:00.000+08:00</timeStamp>
                  <ReadingQualityType ref="1.0.0"></ReadingQualityType>
                </ReadingQualities>
                <ReadingQualities>
                  <comment>4058</comment>
                  <source>HES</source>
                  <timeStamp>2022-11-18T07:00:00.000+08:00</timeStamp>
                  <ReadingQualityType ref="5.4.260"></ReadingQualityType>
                </ReadingQualities>
              </IntervalReadings>
              <ReadingType ref="0.0.2.9.1.2.12.0.0.0.0.0.0.0.0.3.72.0"></ReadingType>
            </IntervalBlocks>
            <IntervalBlocks>
              <IntervalReadings>
                <timeStamp>2022-11-18T07:00:00.000+08:00</timeStamp>
                <value>0.8358</value>
                <ReadingQualities>
                  <source>HES</source>
                  <timeStamp>2022-11-18T07:00:00.000+08:00</timeStamp>
                  <ReadingQualityType ref="1.0.0"></ReadingQualityType>
                </ReadingQualities>
                <ReadingQualities>
                  <comment>4058</comment>
                  <source>HES</source>
                  <timeStamp>2022-11-18T07:00:00.000+08:00</timeStamp>
                  <ReadingQualityType ref="5.4.260"></ReadingQualityType>
                </ReadingQualities>
              </IntervalReadings>
              <ReadingType ref="0.0.2.9.15.2.164.0.0.0.0.0.0.0.0.3.73.0"></ReadingType>
            </IntervalBlocks>
            <Meter>
              <mRID>WT20069447</mRID>
              <Names>
                <name>WT20069447</name>
                <NameType>
                  <name>MeterUniqueID</name>
                </NameType>
              </Names>
            </Meter>
          </MeterReading>
        </MeterReadings>
      </Payload>
    </EventMessage>
'''

In [10]:
def parse_xml(xml: str) -> tuple:
    """
    Parse the xml file and return the dataframe

    Args:
        xml (str): xml

    Returns:
        tuple: dataframe, error dataframe, warning dataframe, file info
    """
    exitcode = 0

    # stored 讀表資料
    header_tmp = {}
    read_map = {}

    # create element tree object
    try:
        log_start_time = datetime.now()
        doc = xmltodict.parse(xml)

        # ---------------------------------------------------------------------------- #
        #                                Get Header Info                               #
        # ---------------------------------------------------------------------------- #
        source = doc["EventMessage"]["Header"]["Source"]
        msg_id = doc["EventMessage"]["Header"]["MessageID"]
        corr_id = doc["EventMessage"]["Header"]["CorrelationID"]
        msg_time = doc["EventMessage"]["Header"]["Timestamp"]
        verb = doc["EventMessage"]["Header"]["Verb"]
        noun = doc["EventMessage"]["Header"]["Noun"]
        context = doc["EventMessage"]["Header"]["Context"]
        rev = doc["EventMessage"]["Header"]["Revision"]
        read_group = doc["EventMessage"]["Header"]["Property"][0]["Value"]
        qos = doc["EventMessage"]["Header"]["Property"][1]["Value"]
        msg_idx = doc["EventMessage"]["Header"]["Property"][2]["Value"]
        print(msg_time)
        
        if msg_time is not None:
            msg_time = msg_time[0:19].replace("T", " ")
        else:
            file_log = {
                "file_type": 'test',
                "raw_gzfile": 'test',
                "raw_file": 'test',
                "rec_time": 'test',
                "file_path": 'test',
                "file_dir_ym": 'test',
                "file_dir_date": 'test',
                "file_seqno": 'test',
                "msg_id": 'test',
                "source": 'test',
                "read_group": 'test',
                "total_cnt": 'test',
                "warn_cnt": 'test',
                "main_succ_cnt": 'test',
                "dedup_cnt": 'test',
                "err_cnt": 'test',
                "dup_cnt": 'test',
                "hist_cnt": 'test',
                "wait_cnt": 'test',
                "proc_type": 'test',
                "file_batch_no": 'test',
                "batch_mk": 'test',
                "log_start_time": 'test',
                "log_upd_time": 'test',
                "log_end_time": 'test',
                "dw_update_time": 'test'
        } 
            

        header_tmp = {
            "source": source,
            "msg_id": msg_id,
            "corr_id": corr_id,
            "msg_time": msg_time,
            "read_group": read_group,
            "verb": verb,
            "noun": noun,
            "context": context,
            "msg_idx": msg_idx,
            "rev": rev,
            "qos": qos
        }
        print(header_tmp)
        
        try:
            lp_config.header_map(**header_tmp)
        except ValidationError as e:
            for v in e.errors():
                warn_type = constant.warn_cd[v["type"]]
                column = v["loc"][0]
                o_val = header_tmp[column]
                new_val = constant.warn_cd[v["type"]]["func"](o_val)
                # 格式修訂
                header_tmp[column] = new_val

                print(header_tmp)

                # 格式錯誤 | 資料異常告警 Log
                warn_log = {
                    "file_type": file_type,
                    "raw_gzfile": "",
                    "raw_file": file,
                    "rec_time": rec_time,
                    "file_path": path,
                    "source": header_tmp["source"],
                    "read_group": "LP",
                    "meter_id": meter_id,
                    "read_time": read_time,
                    "type_cd": warn_type,
                    "col_nm": column,
                    "o_val": o_val,
                }
                # print("warn_log: ",warn_log)
        # print(header_tmp)
    except Exception as e:
        file_log = {
            "file_type": 'test',
            "raw_gzfile": 'test',
            "raw_file": 'test',
            "rec_time": 'test',
            "file_path": 'test',
            "file_dir_ym": 'test',
            "file_dir_date": 'test',
            "file_seqno": 'test',
            "msg_id": 'test',
            "source": 'test',
            "read_group": 'test',
            "total_cnt": 'test',
            "warn_cnt": 'test',
            "main_succ_cnt": 'test',
            "dedup_cnt": 'test',
            "err_cnt": 'test',
            "dup_cnt": 'test',
            "hist_cnt": 'test',
            "wait_cnt": 'test',
            "proc_type": 'test',
            "file_batch_no": 'test',
            "batch_mk": 'test',
            "log_start_time": 'test',
            "log_upd_time": 'test',
            "log_end_time": 'test',
            "dw_update_time": 'test'
        } 
        # print(file_log)
        print("2", e)
        exitcode = 1
    finally:
        print("Duration: {}".format(datetime.now() - log_start_time))
        sys.exit(exitcode)


In [None]:
test = parse_xml(pxml)
display(test)

<class 'NoneType'>
{'source': 'HES-CHT20190919', 'msg_id': '1cbd57c9-1ddc-42c2-bb22-07da17f6ad92', 'corr_id': '1cbd57c9-1ddc-42c2-bb22-07da17f6ad92', 'msg_time': None, 'read_group': 'LP', 'verb': 'created', 'noun': 'MeterReadings', 'context': 'PRODUCTION', 'msg_idx': '1/1', 'rev': '1', 'qos': 'LEVEL-2'}
2 module 'ami.lp_config' has no attribute 'header_map'
Duration: 0:00:00


SystemExit: 1

In [44]:
file_log = FileLog(file_type='1')
dict(file_log)

{'file_type': '1', 'raw_gzfile': None}