In [0]:
%run "./config/v4/configurations-bw-kv-json"

In [0]:
%run "./config/v4/common"

data_source_id,schema_version,schema_ddl,logic_mapping
0.1.0,0.1.1,"`Line` STRING, `Station` STRING, `Part Number` STRING, `Database Code` STRING, `Serial Number` STRING, `Time` STRING, `Gun` STRING, `Job` STRING, `Pass` STRING, `Torque` STRING, `Ang (deg.)` STRING","{""line"":[""Line""], ""station_config"":[""Line"", ""Station""], ""sensor_config"":[""Line"", ""Station"", ""Gun"", ""Job""], ""part_number"": [""Part_Number""], ""serial_number"": [""Serial_Number""], ""measurement"":[""Torque"", ""Ang__deg__""], ""measured_time"":""Time""}"


data_source_id,client,location,line,source_type,folder_location
0.1.0,Borg Warner,XYZ,L14,csv,server://path


In [0]:
from pyspark.sql.functions import *
import pyspark.sql.functions as f
from pyspark.sql.window import Window
from pyspark.sql.utils import AnalysisException

In [0]:
# load pivot table in memory
pivot_load_df = spark.read.format('delta').load(pivotRootDir + f"{line}/")
pivot_load_df.cache()
pivot_load_df.count()
#display(pivot_load_df)

In [0]:
# run this function to return a time series dataframe

def get_ts_data(signal, 
                start_time=None, end_time=None,  
                part_group=[], part_number=[], serial_number=[], 
                sample_size=-1):
  """
    Returns time series measurement data for a given signal.

    This function reads a slice from the line-specific pivot table and generates time series data 
    filtered by time range, part group, part number, and serial number. The number of rows returned
    is optionally limited by a given sample size.
    
    In the case that more than one filter category (i.e. part group, part number, serial number)
    are specified (i.e. a list for part group and part number are both given), the function will 
    return a union of the data.

    Parameters
    ----------
    signal : string
        The signal for which the time series data will be returned.
    start_time : string
        The start time of the time series data.
    end_time : string
        The end time of the time series data.
    part_group : array
        The list of part groups for which data is to be returned.
    part_number : array
        The list of part numbers for which data is to be returned.
    serial_number : array
        The list of serial numbers for which data is to be returned.
    sample_size : int
        The maximum number of rows to return. use '-1' for unlimited rows.

    Returns
    -------
    return_code : string
        Return code of the function: 200 - success; 0 - error.
    rows : array
        A list of row objects with four columns: serial_number, part_number, timestamp, [signal].
    
    Raises
    -------
    AnalysisException : A error occured when a non-existent signal name is passed to the function.

    Examples
    --------
    >>> get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque', start_time='2021-03-16')
    >>> get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque', part_group=['0.0_HS_Tolerance'], part_number=['12639700098'])
    """
  
  try:
    ts_df = pivot_load_df.select("serial_number", "part_number", "timestamp", signal).withColumn("timestamp",to_timestamp("timestamp"))
  except AnalysisException:
    print(f"operation or signal not found in dataframe for line {line}")
    return ('0', [])
  
  if start_time is not None:
    ts_df = ts_df.filter(ts_df.timestamp >= start_time)
    
  if end_time is not None:
    ts_df = ts_df.filter(ts_df.timestamp <= end_time)
    
  if len(part_group)>0:
    part_num_list = []
    for g in part_group:
      part_num_list.extend(part_group_dict[g])
    part_number.extend(part_num_list)
  
  if len(part_number)>0 and len(serial_number)>0:
    ts_df = ts_df.filter((ts_df.part_number.isin(part_number)) | (ts_df.serial_number.isin(serial_number)))
  elif len(part_number)>0:
    ts_df = ts_df.filter(ts_df.part_number.isin(part_number))
  elif len(serial_number)>0:
    ts_df = ts_df.filter(ts_df.serial_number.isin(serial_number))
    
  ts_df = ts_df.na.drop()
  
  if sample_size != -1:
    ts_df = (ts_df.sort(col("timestamp").desc()).take(sample_size))
  else:
    ts_df = ts_df.collect()
  
  return '200', ts_df

In [0]:
### TESTS FOR get_ts_data ###
## uncomment one of the commands below to run test

## get all data given a signal
return_code, ts_df = get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque')

# ## get data according to time range
# return_code, ts_df = get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque', start_time='2021-03-16')
# return_code, ts_df = get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque', end_time='2021-04-01')
# return_code, ts_df = get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque', start_time='2021-03-16', end_time='2021-04-01')

# ## get data according to part group
# return_code, ts_df = get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque', part_group=['0.0_HS_Tolerance'])
# return_code, ts_df = get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque', part_group=['0.7_HS_Tolerance'])
                                 
# ## get data according to part number
# return_code, ts_df = get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque', part_number=['12639700098'])

# ## get data according to serial number
# return_code, ts_df = get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque', serial_number=['0542106411600017', '0532108455300046', '0000338751200150'])

# ## get composite data (data selected by combination of part group, part number, and serial numbers)
# return_code, ts_df = get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque', part_group=['0.0_HS_Tolerance'], part_number=['12639700098'])
# return_code, ts_df = get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque', part_number=['12639700098'], serial_number=['0000338751200150'])
# return_code, ts_df = get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque', part_number=['12769700033'], serial_number=['0532108455300046'])

# ## limit data by sample size
# return_code, ts_df = get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque', sample_size=99)
# return_code, ts_df = get_ts_data('30963d81-db2e-467f-8a4c-28a37a15da8c_Torque',  part_number=['12639700098'], sample_size=30)

# ## exception handling
# return_code, ts_df = get_ts_data('_Torque', sample_size=99)   # non-existent feature name

# display the resulting dataframe
if len(ts_df)==0:
  print(ts_df)
else:
  display(ts_df)

serial_number,part_number,timestamp,30963d81-db2e-467f-8a4c-28a37a15da8c_Torque
842119511100133,12639700098,2021-04-12T13:22:59.397+0000,13.65
902114333500078,12639700098,2021-04-12T16:26:44.150+0000,13.6
842119445100130,12639700098,2021-04-12T13:19:44.753+0000,13.81
842119445100130,12639700098,2021-04-12T13:19:52.707+0000,13.64
842119511100133,12639700098,2021-04-12T13:23:12.657+0000,13.61
882111550700060,12639700098,2021-04-12T05:15:00.607+0000,13.76
842119511100133,12639700098,2021-04-12T13:23:06.057+0000,13.77
882110240400046,12639700098,2021-04-12T16:24:32.410+0000,13.71
882111554700061,12639700098,2021-04-12T05:17:24.037+0000,13.78
882110240400046,12639700098,2021-04-12T16:24:04.113+0000,13.62
