In [None]:
ECR_DELTA_TABLE_FILE_PATH = "/delta-tables/ecr_datastore"
iris_tsv_column_aliases = {
    "incident_id":"Incident ID",
    "last_name":" Last name",
    "first_name": "First name",
    # Section action
    # Section instance
    "rr_id": "RR ID",
    "status": "Status",
    "conditions": "Conditions",
    "eicr_id": "eICR ID",
    "eicr_version_number":"eICR Version Number",
    "authoring_datetime": "Authoring date/time",
    "provider_id": "Provider ID",
    "facility_id_number": "Facility ID Number",
    "facility_name": "Facility Name",
    "facility_type": "Facility Type/Hospital unit",
    "encounter_type": "Encounter Details: type",
    "encounter_start_date": "Encounter Details: date (from)",
    "encounter_end_date": "Encounter Details: date (to)",
    "active_problem_1": "Active Problem 1",
    "active_problem_date_1": "Active Problem Noted Date 1",
    "active_problem_2": "Active Problem 2",
    "active_problem_date_2": "Active Problem Noted Date 2",
    "active_problem_3": "Active Problem 3",
    "active_problem_date_3": "Active Problem Noted Date 3",
    "active_problem_4": "Active Problem 4",
    "active_problem_date_4": "Active Problem Noted Date 4",
    "active_problem_5": "Active Problem 5",
    "active_problem_date_5": "Active Problem Noted Date 5",
    "reason_for_visit": "Reason for visit",
    # Comments
    "test_type_1": "Test Type 1",
    "test_result_1": "Test Result 1",
    # "test_result_interp_1"
    "specimen_type_1": "Specimen Type 1",
    "performing_lab_1": "Performing Lab 1",
    "specimen_collection_date_1": "Specimen Collection Date 1",
    "result_date_1": "Result Date 1",
    "test_type_2": "Test Type 2",
    "test_result_2": "Test Result 2",
    # "test_result_interp_2"
    "specimen_type_2": "Specimen Type 2",
    "performing_lab_2": "Performing Lab 2",
    "specimen_collection_date_2": "Specimen Collection Date 2",
    "result_date_2": "Result Date 2"
    # "Note"
    }


from pyspark.sql import SparkSession
from delta.tables import *
from pyspark.sql.functions import *

spark = SparkSession.builder.getOrCreate()

# Read in data and rename columns for TSV export
ecr = spark.read.format("delta").load(ECR_DELTA_TABLE_FILE_PATH).select(
    col("incident_id").alias(iris_tsv_column_aliases["incident_id"]),
    col("iris_id"),
    col("patient_id"),
    col("person_id"),
    col("first_name").alias(iris_tsv_column_aliases["first_name"]),
    col("last_name").alias(iris_tsv_column_aliases["last_name"]),
    lit("[ListSectionInsert]").alias("Section action"),
    lit(None).alias("Section instance"),
    col("rr_id").alias(iris_tsv_column_aliases["rr_id"]),
    col("status").alias(iris_tsv_column_aliases["status"]),
    col("conditions").alias(iris_tsv_column_aliases["conditions"]),
    col("eicr_id").alias(iris_tsv_column_aliases["eicr_id"]),
    col("eicr_version_number").alias(iris_tsv_column_aliases["eicr_version_number"]),
    col("authoring_datetime").alias(iris_tsv_column_aliases["authoring_datetime"]),
    col("provider_id").alias(iris_tsv_column_aliases["provider_id"]),
    col("facility_id_number").alias(iris_tsv_column_aliases["facility_id_number"]),
    col("facility_name").alias(iris_tsv_column_aliases["facility_name"]),
    col("facility_type").alias(iris_tsv_column_aliases["facility_type"]),
    col("encounter_type").alias(iris_tsv_column_aliases["encounter_type"]),
    col("encounter_start_date").alias(iris_tsv_column_aliases["encounter_start_date"]),
    col("encounter_end_date").alias(iris_tsv_column_aliases["encounter_end_date"]),
    col("active_problem_1").alias(iris_tsv_column_aliases["active_problem_1"]),
    col("active_problem_date_1").alias(iris_tsv_column_aliases["active_problem_date_1"]),
    col("active_problem_2").alias(iris_tsv_column_aliases["active_problem_2"]),
    col("active_problem_date_2").alias(iris_tsv_column_aliases["active_problem_date_2"]),
    col("active_problem_3").alias(iris_tsv_column_aliases["active_problem_3"]),
    col("active_problem_date_3").alias(iris_tsv_column_aliases["active_problem_date_3"]),
    col("active_problem_4").alias(iris_tsv_column_aliases["active_problem_4"]),
    col("active_problem_date_4").alias(iris_tsv_column_aliases["active_problem_date_4"]),
    col("active_problem_5").alias(iris_tsv_column_aliases["active_problem_5"]),
    col("active_problem_date_5").alias(iris_tsv_column_aliases["active_problem_date_5"]),
    col("reason_for_visit").alias(iris_tsv_column_aliases["reason_for_visit"]),
    lit(None).alias("Comments"),
    col("test_type_1").alias(iris_tsv_column_aliases["test_type_1"]),
    col("test_result_1").alias(iris_tsv_column_aliases["test_result_1"]),
    col("specimen_type_1").alias(iris_tsv_column_aliases["specimen_type_1"]),
    col("performing_lab_1").alias(iris_tsv_column_aliases["performing_lab_1"]),
    col("specimen_collection_date_1").alias(iris_tsv_column_aliases["specimen_collection_date_1"]),
    col("result_date_1").alias(iris_tsv_column_aliases["result_date_1"]),
    col("test_type_2").alias(iris_tsv_column_aliases["test_type_2"]),
    col("test_result_2").alias(iris_tsv_column_aliases["test_result_2"]),
    col("specimen_type_2").alias(iris_tsv_column_aliases["specimen_type_2"]),
    col("performing_lab_2").alias(iris_tsv_column_aliases["performing_lab_2"]),
    col("specimen_collection_date_2").alias(iris_tsv_column_aliases["specimen_collection_date_2"]),
    # col("result_date_2").alias(iris_tsv_column_aliases["result_date_2"]),
    lit("eCR data added to UDF through import utility").alias("Note"),
)

def flatten(row):
    incident_id = row[0]
    last_name = row[1]
    first_name = row[2]
    note = row[-1]
    return iter([(incident_id,last_name,first_name,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None)] + [row])

flattend_rdd=ecr.rdd.flatMap(flatten)

df3 = spark.createDataFrame(flattend_rdd, ecr.columns)
df3.show(6)

# Separate ecrs with incident IDs and ones without using conditional .filter()

# df3.write.option("header","true").option("sep","/t").csv("/test/test.tsv")
        