##Configuration Variables for Borg Warner (L14)

This script contains configuration details needed to transform data from the Borg Warner format into our data cube format. Although the configuration details are hard-coded in this prototype, they will be stored in a database and loaded during runtime in production. This configuration based approach ensures flexibility towards a range of diversely formatted source data. 

###Configuration Variables defined in this script:

1. configuration and authentication info to connect to Kafka topic on Confluent cluster
2. the assembly line for which the cube is generated
3. the target test station for Root Cause Analysis (RCA)
4. the check point location, output directory, and trigger interval for the data stream
5. the output directory of DF_RUN (for RCA)
6. the technical schema for Kafka messages
7. the sensor measurement data schema
8. the business logic mapping
9. sensor mapping table created from a csv file
10. the pivot table schema and pivotted features list

In [0]:
import pyspark.sql.types as T
import json

### configuration details to connect to Kafka source on Confluent cluster
conf={'bootstrap.servers': 'pkc-4rn2p.canadacentral.azure.confluent.cloud:9092', 
      'security.protocol': 'SASL_SSL', 
      'sasl.mechanisms': 'PLAIN', 
      'sasl.username': 'D27IHIL45XD46XTF', 
      'sasl.password': 'UB3NYoxYI1NYvLMUEZrHuu5nYO9ZFR4jwAJMxQckb10QxvWtjU3zP1363Y2Akgcg',
      'startingOffsets':'earliest',
      #'startingOffsets':'{"data_test_topic":{"0":252}}',
      'topic':'kv_data_test_topic'
     }


# NOTE: the following attributes should be loaded from a sql table or config file during production
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# line and target station for the transformation workflow
line = "BW_L14"
target = 'L14_Final_04'

# transformation pipeline specs and output locations
checkpointPath = 'dbfs:/acerta/checkpoint/test-stream-2/'
triggerProcessingInterval = "30 seconds"
outputStagingPath = f'dbfs:/acerta/output/cube/staging/{line}/'
outputPivotPath = f'dbfs:/acerta/output/cube/pivot/{line}/'
outputPathDir = f'dbfs:/acerta/output/cube/{line}/'
inputPathDir = f'dbfs:/acerta/output/cube/{line}/'
outputDfRunDir = f'dbfs:/acerta/output/rca/run/{line}/'
outputDfRunHistoryDir = f'dbfs:/acerta/output/rca/history/{line}/'

# 

### define data schema and business logic mapping configurations 

sensor_data_schema_ddl = "`Line` STRING, `Station` STRING, `Part_Number` STRING, `Database_Code` STRING, `Serial_Number` STRING, `Time` STRING, \
                  `Gun` STRING, `Job` STRING, `Pass` STRING, `Torque` STRING, `Ang__deg__` STRING"

sensor_data_logic_mapping = '{"line":["Line"], \
                              "station_config":["Line", "Station"], \
                              "sensor_config":["Line", "Station", "Gun", "Job"], \
                              "part_number": ["Part_Number"], \
                              "serial_number": ["Serial_Number"], \
                              "measurement":["Torque", "Ang__deg__"], \
                              "measured_time":"Time"}'
logic_mapping_json = json.loads(sensor_data_logic_mapping)


# ingress data configuration
ingress_schema_ddl = f"`dataSourceId` STRING, `sourceFileId` STRING, `schemaVersion` STRING, `rows` ARRAY<MAP<STRING, STRING>>"
ingress_schema = T._parse_datatype_string(ingress_schema_ddl)


### Load sensor to uuid mapping table
sensorDf = spark.read.option("header",True).csv('/FileStore/tables/sensor_mapping_bw_l14.csv')


pivot_schema_ddl = "`part_number` STRING, `serial_number` STRING, `timestamp` STRING, `station` STRING, `acc8bdf3-c872-4c77-bc1d-f8d3134aa77f_Ang__deg__` STRING,`acc8bdf3-c872-4c77-bc1d-f8d3134aa77f_Torque` STRING,`4fae6029-da79-4ac1-b03c-98f3e5f33b4a_Ang__deg__` STRING,`4fae6029-da79-4ac1-b03c-98f3e5f33b4a_Torque` STRING,`8515451f-c865-4cd5-9d55-279578065db5_Ang__deg__` STRING,`8515451f-c865-4cd5-9d55-279578065db5_Torque` STRING,`002f3aa6-fc1a-4030-b92f-d7ef66704225_Ang__deg__` STRING,`002f3aa6-fc1a-4030-b92f-d7ef66704225_Torque` STRING,`4ad3b2c5-5198-4c11-ac65-86aef08f3db3_Ang__deg__` STRING,`4ad3b2c5-5198-4c11-ac65-86aef08f3db3_Torque` STRING,`30963d81-db2e-467f-8a4c-28a37a15da8c_Ang__deg__` STRING,`30963d81-db2e-467f-8a4c-28a37a15da8c_Torque` STRING,`f269a193-9198-434b-bc4c-fb46f7cdb1f3_Ang__deg__` STRING,`f269a193-9198-434b-bc4c-fb46f7cdb1f3_Torque` STRING,`e6b8d50e-a042-4a6a-8d75-6519b3c02934_Ang__deg__` STRING,`e6b8d50e-a042-4a6a-8d75-6519b3c02934_Torque` STRING,`a65c94ea-4616-4ece-ac87-d919cc97d035_Ang__deg__` STRING,`a65c94ea-4616-4ece-ac87-d919cc97d035_Torque` STRING,`1f100034-a532-4a58-8b6c-f4444e074afe_Ang__deg__` STRING,`1f100034-a532-4a58-8b6c-f4444e074afe_Torque` STRING,`b8d46933-5220-4bad-8221-c54715a835fd_Ang__deg__` STRING,`b8d46933-5220-4bad-8221-c54715a835fd_Torque` STRING"

pivot_feature_cols = ["acc8bdf3-c872-4c77-bc1d-f8d3134aa77f_Ang__deg__","acc8bdf3-c872-4c77-bc1d-f8d3134aa77f_Torque","4fae6029-da79-4ac1-b03c-98f3e5f33b4a_Ang__deg__","4fae6029-da79-4ac1-b03c-98f3e5f33b4a_Torque","8515451f-c865-4cd5-9d55-279578065db5_Ang__deg__","8515451f-c865-4cd5-9d55-279578065db5_Torque","002f3aa6-fc1a-4030-b92f-d7ef66704225_Ang__deg__","002f3aa6-fc1a-4030-b92f-d7ef66704225_Torque","4ad3b2c5-5198-4c11-ac65-86aef08f3db3_Ang__deg__","4ad3b2c5-5198-4c11-ac65-86aef08f3db3_Torque","30963d81-db2e-467f-8a4c-28a37a15da8c_Ang__deg__","30963d81-db2e-467f-8a4c-28a37a15da8c_Torque","f269a193-9198-434b-bc4c-fb46f7cdb1f3_Ang__deg__","f269a193-9198-434b-bc4c-fb46f7cdb1f3_Torque","e6b8d50e-a042-4a6a-8d75-6519b3c02934_Ang__deg__","e6b8d50e-a042-4a6a-8d75-6519b3c02934_Torque","a65c94ea-4616-4ece-ac87-d919cc97d035_Ang__deg__","a65c94ea-4616-4ece-ac87-d919cc97d035_Torque","1f100034-a532-4a58-8b6c-f4444e074afe_Ang__deg__","1f100034-a532-4a58-8b6c-f4444e074afe_Torque","b8d46933-5220-4bad-8221-c54715a835fd_Ang__deg__","b8d46933-5220-4bad-8221-c54715a835fd_Torque"]