In [0]:
%run "./config/configurations-bw"

In [0]:
from pyspark.sql.functions import col, concat, concat_ws, lit, collect_list, array_join, desc, first, last, when
import pyspark.sql.functions as f
from pyspark.sql.window import Window

In [0]:
pivot_load_df = spark.read.format('delta').load(outputPivotPath)
pivot_load_df.cache()
pivot_load_df.count()
#display(pivot_load_df)

Out[4]: 297

In [0]:
def col_in_df(col_name, df):
  listColumns = df.columns
  return col_name in listColumns

def set_min_max_cols(min_col, max_col, sensor_min, sensor_max, feature, df):
  assert min_col is None or col_in_df(min_col, df), f"{feature} min column ({min_col}) not found in dataframe."
  assert max_col is None or col_in_df(max_col, df), f"{feature} max column ({max_col}) not found in dataframe."
  
  feature_min_col, feature_max_col = feature+'_00min00', feature+'_00max00'
  assert not col_in_df(feature_min_col, df) and not col_in_df(feature_max_col, df), "Min/Max column name in conflict with existing columns."
  
  if min_col is None:
    if sensor_min is None:
      df1 = df.withColumn(feature_min_col, lit(None))
    else:
      df1 = df.withColumn(feature_min_col, lit(float(sensor_min)))
  else:
    df1 = df.withColumn(feature_min_col, col(min_col).cast('float'))
    
  if max_col is None:
    if sensor_max is None:
      df2 = df1.withColumn(feature_max_col, lit(None))
    else:
      df2 = df1.withColumn(feature_max_col, lit(float(sensor_max)))
  else:
    df2 = df1.withColumn(feature_max_col, col(max_col).cast('float'))
    
  return feature_min_col, feature_max_col, df2
  

In [0]:
# SVD - select feature and value validation
#feature = '003a5adb-84ac-42cc-8a40-8ab769e37793_data_value' #ASC1
#feature = '01c7ae65-eb2f-4c3b-adda-47909f3607e0_data_value'  #ASC1
feature = '30963d81-db2e-467f-8a4c-28a37a15da8c_Torque' #bw
sensor_uuid = feature[:36]

# filter to remove incorrect values according to limits. 
# first, get sensor min/max information: 
# -> if the values are fixed and can be uploaded from a configuration file, they are found in the 'min', 'max' columns;
# -> if the value changes at run time, column names for the limits are given in 'min_col' and 'max_col'
min_col, max_col, sensor_min, sensor_max = sensorDf.filter(f"sensor_uuid='{sensor_uuid}'").select('min_col', 'max_col', "min", "max").first()
# sensor_min=50
# sensor_max=60
min_col, max_col, limits_df = set_min_max_cols(min_col, max_col, sensor_min, sensor_max, feature, pivot_load_df)

display(limits_df
        .withColumn(feature + "_isvalid", 
                    when(
                      ((col(min_col).isNull()) | (col(min_col)<=col(feature))) & ((col(max_col).isNull()) | (col(max_col)>=col(feature))), 1
                    )
                    .otherwise(0)
                   )
        .filter(col(feature + "_isvalid") == 1)
        .select("part_number", "serial_number", "timestamp", feature)
        .na.drop()
       )

part_number,serial_number,timestamp,30963d81-db2e-467f-8a4c-28a37a15da8c_Torque
12639700098,632112004400054,2021-03-22T20:43:02.977+0000,13.72
12639700098,482110035100049,2021-03-16T07:44:08.900+0000,13.69
12639700098,882110240400046,2021-04-12T16:24:32.410+0000,13.71
12639700098,542106411600017,2021-03-15T12:59:10.457+0000,13.68
12639700098,842119445100130,2021-04-12T13:19:58.800+0000,13.69
12769700033,338751200150,2021-04-09T20:53:00.197+0000,13.72
12639700098,742114003600085,2021-03-30T15:46:53.213+0000,13.64
12639700098,882111550700060,2021-04-12T05:15:10.297+0000,13.78
12639700098,882111554700061,2021-04-12T05:17:09.563+0000,13.68
12639700098,882111554700061,2021-04-12T05:17:24.037+0000,13.78


In [0]:
# MVD: filter and aggregate (currently under development)

#query_time = '2020-09-07T16:27:56.785+0000' #ASC1
query_time = '2021-04-12T13:20:05.370+0000'  #BW

df_path_MVD = (pivot_load_df
               .filter(f"timestamp<='{query_time}'")
               .groupBy('part_number', 'serial_number')
               .agg(*[last(c, True).alias(c) for c in pivot_feature_cols])
              )

# add timestamp column
#display(df_path_MVD)

part_number,serial_number,acc8bdf3-c872-4c77-bc1d-f8d3134aa77f_Ang__deg__,acc8bdf3-c872-4c77-bc1d-f8d3134aa77f_Torque,4fae6029-da79-4ac1-b03c-98f3e5f33b4a_Ang__deg__,4fae6029-da79-4ac1-b03c-98f3e5f33b4a_Torque,8515451f-c865-4cd5-9d55-279578065db5_Ang__deg__,8515451f-c865-4cd5-9d55-279578065db5_Torque,002f3aa6-fc1a-4030-b92f-d7ef66704225_Ang__deg__,002f3aa6-fc1a-4030-b92f-d7ef66704225_Torque,4ad3b2c5-5198-4c11-ac65-86aef08f3db3_Ang__deg__,4ad3b2c5-5198-4c11-ac65-86aef08f3db3_Torque,30963d81-db2e-467f-8a4c-28a37a15da8c_Ang__deg__,30963d81-db2e-467f-8a4c-28a37a15da8c_Torque,f269a193-9198-434b-bc4c-fb46f7cdb1f3_Ang__deg__,f269a193-9198-434b-bc4c-fb46f7cdb1f3_Torque,e6b8d50e-a042-4a6a-8d75-6519b3c02934_Ang__deg__,e6b8d50e-a042-4a6a-8d75-6519b3c02934_Torque,a65c94ea-4616-4ece-ac87-d919cc97d035_Ang__deg__,a65c94ea-4616-4ece-ac87-d919cc97d035_Torque,1f100034-a532-4a58-8b6c-f4444e074afe_Ang__deg__,1f100034-a532-4a58-8b6c-f4444e074afe_Torque,b8d46933-5220-4bad-8221-c54715a835fd_Ang__deg__,b8d46933-5220-4bad-8221-c54715a835fd_Torque
12639700098,0472120380800151,,,,,,,,,,,,,,,,,,,,,18.0,13.7
12639700098,0472121005800156,,,,,,,,,,,,,,,,,,,,,12.0,13.67
12639700098,0482106335400003,,,,,,,,,,,,,,,56.0,10.49,,,,,,
12639700098,0482106351600004,,,,,,,,,,,,,,,,,90.0,143.05,,,,
12639700098,0482106452300007,,,,,,,,,,,65.0,13.73,992.0,17.08,,,,,,,,
12639700098,0482109230400041,,,,,,,,,,,,,1056.0,17.01,,,,,,,,
12639700098,0482109333800043,,,,,,,,,,,,,,,,,90.0,158.88,,,,
12639700098,0482110031800048,,,,,,,,,,,,,,,,,90.0,147.74,,,,
12639700098,0482110035100049,,,,,,,,,,,55.0,13.72,1057.0,17.03,,,,,,,,
12639700098,0482110114800050,,,,,,,,,,,,,,,54.0,10.47,,,,,,


In [0]:
#display(pivot_load_df)

In [0]:
#display(df_path_MVD.filter(f"serial_number='1265-2009016318'"))

In [0]:
#display(sensorDf)