In [0]:
# Use once DataFabricCommonFunctions in Simoun's Workspace is promoted to this folder
# %run "./DataFabricCommonFunctions"

In [0]:
%run "./DataFabricCommonFunctions"

In [0]:
# import sys
# from pyspark.sql.utils import AnalysisException
# from delta.tables import DeltaTable


# def append_or_create_table(df, target_table):
#     """Append to an existing table or create a new one."""
#     try:
#         spark.read.table(target_table)
#         df.write.format("delta").mode("append").option(
#             "mergeSchema", "true"
#         ).saveAsTable(target_table)
#         return True
#     except AnalysisException:
#         df.write.format("delta").mode("overwrite").option(
#             "overwriteSchema", "true"
#         ).saveAsTable(target_table)
#         return True
#     except Exception as e:
#         print(f"Error processing {target_table}: {str(e)}")
#         return False

In [0]:
import sys
from pyspark.sql.utils import AnalysisException
from delta.tables import DeltaTable

def append_or_create_table(param_df, param_target_table):
    try:
        spark.read.table(param_target_table)
        param_df.write.format("delta").mode("append").option("mergeSchema", "true").saveAsTable(param_target_table)
        return True
    except AnalysisException:
        param_df.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable(param_target_table)
        return True
    except Exception as v_error:
        print(f"Error processing {param_target_table}: {str(v_error)}")
        return False


In [0]:
# from pyspark.sql import DataFrame
# from pyspark.sql.functions import expr, current_timestamp

# def add_audit_fields(df, clientid, facilityid):
#     result = (
#         df.withColumn("client_id", clientid)
#         .withColumn("facility_id", facilityid)
#         .withColumn("created_by_user", expr("current_user()"))
#         .withColumn("process_timestamp", current_timestamp())
#         .withColumn("modified_by_user",  expr("current_user()"))
#         .withColumn("datetime_last_modified", current_timestamp())
#     )
#     return result

In [0]:
from pyspark.sql import DataFrame
from pyspark.sql.functions import expr, current_timestamp

def add_audit_fields(param_df, param_client_id, param_facility_id):
    v_result_df = (
        param_df.withColumn("client_id", param_client_id)
        .withColumn("facility_id", param_facility_id)
        .withColumn("created_by_user", expr("current_user()"))
        .withColumn("process_timestamp", current_timestamp())
        .withColumn("modified_by_user", expr("current_user()"))
        .withColumn("datetime_last_modified", current_timestamp())
    )
    return v_result_df


In [0]:
# import pandas as pd
# import json

# from pyspark.sql.functions import pandas_udf
# from pyspark.sql.types import ArrayType, StringType

# @pandas_udf("array<string>")
# def parse_edi_udf(content_series: pd.Series, filename_series: pd.Series) -> pd.Series:
#     results = []
#     for content, filename in zip(content_series, filename_series):
#         try:
#             edi_obj = EDI(content, strict_transactions=False)
#             flattened = hm.flatten2(edi_obj, filename=filename)
#             json_rows = [json.dumps(hm.flatten_to_json3(row)) for row in flattened]
#             results.append(json_rows)
#         except Exception as e1:
#             try:
#                 edi_obj = EDI(content)
#                 flattened = hm.flatten2(edi_obj, filename=filename)
#                 json_rows = [json.dumps(hm.flatten_to_json3(row)) for row in flattened]
#                 results.append(json_rows)
#             except Exception as e2:
#                 error_info = {
#                     "filename": filename,
#                     "error_first_attempt": str(e1),
#                     "error_second_attempt": str(e2),
#                     "quarantine": True
#                 }
#                 results.append([json.dumps(error_info)])
#     return pd.Series(results)

In [0]:
import pandas as pd
import json

from pyspark.sql.functions import pandas_udf
from pyspark.sql.types import ArrayType, StringType

@pandas_udf("array<string>")
def parse_edi_udf(param_content_series: pd.Series, param_filename_series: pd.Series) -> pd.Series:
    v_results_list = []

    for v_content, v_filename in zip(param_content_series, param_filename_series):
        try:
            v_edi_obj = EDI(v_content, strict_transactions=False)
            v_flattened_list = hm.flatten2(v_edi_obj, filename=v_filename)
            v_json_rows = [json.dumps(hm.flatten_to_json3(v_row)) for v_row in v_flattened_list]
            v_results_list.append(v_json_rows)
        except Exception as v_error_first:
            try:
                v_edi_obj = EDI(v_content)
                v_flattened_list = hm.flatten2(v_edi_obj, filename=v_filename)
                v_json_rows = [json.dumps(hm.flatten_to_json3(v_row)) for v_row in v_flattened_list]
                v_results_list.append(v_json_rows)
            except Exception as v_error_second:
                v_error_info = {
                    "filename": v_filename,
                    "error_first_attempt": str(v_error_first),
                    "error_second_attempt": str(v_error_second),
                    "quarantine": True
                }
                v_results_list.append([json.dumps(v_error_info)])

    return pd.Series(v_results_list)


In [0]:
import pandas as pd
import json

from pyspark.sql.functions import pandas_udf
from pyspark.sql.types import ArrayType, StringType

@pandas_udf("array<string>")
def parse_edi_udf0(param_content_series: pd.Series, param_filename_series: pd.Series) -> pd.Series:
    v_results_list = []

    for v_content, v_filename in zip(param_content_series, param_filename_series):
        try:
            v_edi_obj = EDI(v_content, strict_transactions=False)
            v_flattened_list = hm.flatten(v_edi_obj, filename=v_filename)
            v_json_rows = [json.dumps(hm.flatten_to_json(v_row)) for v_row in v_flattened_list]
            v_results_list.append(v_json_rows)
        except Exception as v_error_first:
            try:
                v_edi_obj = EDI(v_content)
                v_flattened_list = hm.flatten(v_edi_obj, filename=v_filename)
                v_json_rows = [json.dumps(hm.flatten_to_json(v_row)) for v_row in v_flattened_list]
                v_results_list.append(v_json_rows)
            except Exception as v_error_second:
                v_error_info = {
                    "filename": v_filename,
                    "error_first_attempt": str(v_error_first),
                    "error_second_attempt": str(v_error_second),
                    "quarantine": True
                }
                v_results_list.append([json.dumps(v_error_info)])

    return pd.Series(v_results_list)


In [0]:
# from pyspark.sql.types import *
# import json


# def parse_and_flatten_edi(content, filename):
#     try:
#         try:
#             edi = EDI(content)
#         except Exception:
#             edi = EDI(content, strict_transactions=False)

#         if not edi or (isinstance(edi, list) and len(edi) == 0):
#             raise ValueError("Parsed EDI content is empty.")

#         flattened = hm.flatten(edi, filename=filename)

#         if not flattened or (isinstance(flattened, list) and len(flattened) == 0):
#             raise ValueError("Flattened output is empty.")

#         return [json.dumps(hm.flatten_to_json(x)) for x in flattened]

#     except Exception as e:
#         return [
#             json.dumps({"filename": filename, "error": str(e), "quarantined": True})
#         ]


# parse_and_flatten_udf = udf(parse_and_flatten_edi, ArrayType(StringType()))


# from pyspark.sql.utils import AnalysisException


# def process_table(df, target_table):
#     """Append to an existing table or create a new one."""
#     try:
#         spark.read.table(target_table)

#         df.write.format("delta").mode("append").option(
#             "mergeSchema", "true"
#         ).saveAsTable(target_table)
#         return True
#     except AnalysisException:
#         df.write.format("delta").mode("overwrite").saveAsTable(target_table)
#         return True
#     except Exception as e:
#         print(f"Error processing {target_table}: {str(e)}")
#         return False

In [0]:
from pyspark.sql.types import *
import json
from pyspark.sql.utils import AnalysisException

def parse_and_flatten_edi(param_content, param_filename):
    try:
        try:
            v_edi_obj = EDI(param_content)
        except Exception:
            v_edi_obj = EDI(param_content, strict_transactions=False)

        if not v_edi_obj or (isinstance(v_edi_obj, list) and len(v_edi_obj) == 0):
            raise ValueError("Parsed EDI content is empty.")

        v_flattened_list = hm.flatten(v_edi_obj, filename=param_filename)

        if not v_flattened_list or (isinstance(v_flattened_list, list) and len(v_flattened_list) == 0):
            raise ValueError("Flattened output is empty.")

        return [json.dumps(hm.flatten_to_json(v_row)) for v_row in v_flattened_list]

    except Exception as v_error:
        return [
            json.dumps({
                "filename": param_filename,
                "error": str(v_error),
                "quarantined": True
            })
        ]

parse_and_flatten_udf = udf(parse_and_flatten_edi, ArrayType(StringType()))

def process_table(param_df, param_target_table):
    """Append to an existing table or create a new one."""
    try:
        spark.read.table(param_target_table)

        param_df.write.format("delta").mode("append").option(
            "mergeSchema", "true"
        ).saveAsTable(param_target_table)
        return True
    except AnalysisException:
        param_df.write.format("delta").mode("overwrite").saveAsTable(param_target_table)
        return True
    except Exception as v_error:
        print(f"Error processing {param_target_table}: {str(v_error)}")
        return False
