In [1]:
import pickle
import pandas as pd
import tensorflow as tf
from tensorflow.keras.utils import Sequence
import numpy as np
from numpy import nan
from model import *
from utils import *
import collections
from matplotlib import pyplot as plt
from collections import defaultdict 
from os import listdir
from os.path import isfile, join

### Retrieve classes associated with each experiment

In [2]:
mypath = './/data//release1//'  #change it to your path
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

classes = np.unique([onlyfiles[i][:9] for i in range(len(onlyfiles))])[:-1]
print('Unique classes in the dataset: ', classes)
print()



idxs = defaultdict()
for s,j in enumerate(classes):
    idxs[j] = np.where([j in onlyfiles[i] for i in range(len(onlyfiles))])[0]

idxs = dict(idxs)

Unique classes in the dataset:  ['class_ 0_' 'class_ 2_' 'class_ 3_' 'class_ 5_' 'class_ 7_' 'class_ 9_']



### Get an example experiment from class 0 (healthy)
As we can see the dataFrame has two columns: the first reports the name of the measurement (e.g. CpuTemperatire), the second the corresponding values. The notation "class_ N_X_data" stands for the Xth experiment for class N. 

In [3]:
index = np.random.choice(idxs[classes[0]],1).item()
file = onlyfiles[index]
path_csv = join(mypath,file)
data = pd.read_csv(path_csv)
data

Unnamed: 0.1,Unnamed: 0,class_ 0_75_data
0,CpuTemperature,"[[63.25, 56.75, 1.56444878471627, 59.3], [58.0..."
1,DurationPickToPick,"[[0, 0.0, nan, nan, nan, nan, nan], [4, 0.3977..."
2,DurationRobotFromFeederToTestBench,"[[1, 0.0997286821365267, 0.682, 0.682, 0.0, na..."
3,DurationRobotFromTestBenchToFeeder,"[[1, 0.0997286771636219, 0.695, 0.695, 0.0, na..."
4,DurationTestBenchClosed,"[[1, 0.0997287845784753, 0.116, 0.116, 0.0, na..."
5,EPOSCurrent,"[[55, 5.4850815107546, 1.0, -12.0, 2.146994824..."
6,EPOSPosition,"[[55, 5.48508025260809, 0.0, 0.0, 0.0, 0.0, 0...."
7,EPOSVelocity,"[[55, 5.48507899446215, 0.0, 0.0, 0.0, 0.0, 0...."
8,ErrorFrame,"[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0..."
9,FeederAction1,"[[0], [0], [0], [0], [0], [0], [0], [0], [0], ..."


### Get fields for each measurment
Get information about the features available for each measurement. The number of features (or fields) available changes across measurements.

In [5]:
mypath = './/data//release1//fields.csv'  #change it to your path
fields = pd.read_csv(mypath)
fields


Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6
0,CpuTemperature,vMax,vMin,vStd,value,,,
1,DurationPickToPick,vCnt,vFreq,vMax,vMin,vStd,vTrend,value
2,DurationRobotFromFeederToTestBench,vCnt,vFreq,vMax,vMin,vStd,vTrend,value
3,DurationRobotFromTestBenchToFeeder,vCnt,vFreq,vMax,vMin,vStd,vTrend,value
4,DurationTestBenchClosed,vCnt,vFreq,vMax,vMin,vStd,vTrend,value
5,EPOSCurrent,vCnt,vFreq,vMax,vMin,vStd,vTrend,value
6,EPOSPosition,vCnt,vFreq,vMax,vMin,vStd,vTrend,value
7,EPOSVelocity,vCnt,vFreq,vMax,vMin,vStd,vTrend,value
8,ErrorFrame,vCnt,vFreq,,,,,
9,FeederAction1,vCnt,,,,,,


### Get 'Pressure' measurement for the extracted experiment

In [6]:
pressure = eval(data.iloc[35][1])
print('Experiment duration: ', len(pressure)*10/3600, 'h.')
print()
print('Fileds associated with pressure', fields.values[35,1:])
print()
print('Example time step: ', pressure[4])


Experiment duration:  1.0027777777777778 h.

Fileds associated with pressure ['vCnt' 'vFreq' 'vMax' 'vMin' 'vStd' 'vTrend' 'value']

Example time step:  [39, 3.88239377131042, 4.78286080507828, 4.77309488204596, 0.00283995450478673, -4.94226874181436e-06, 4.78023151810804]


### IMPORTANT 
Partecipants are free to use the fields they prefer for each measurment (e.g. only "value" and "vCnt") and the measurments they consider as most important. They can consider of using a subset of them (e.g. by discarding the environemtal measurements such as Temperature or Humidity).

### IMPORTANT
Note that for "NumberFuseDetected", the number of keys might vary from 6 to 7. The reason is that when tthe field "vCnt" is never larger than 1, "vTrend" is not calculated for any element of the time series and it's therefore eliminated.

In [7]:
nfd = eval(data.iloc[33][1])
print('Experiment duration: ', len(nfd)*10/3600, 'h.')

print('Fileds associated with pressure', fields.values[33,1:])
print('Example time step')
nfd

Experiment duration:  1.0027777777777778 h.
Fileds associated with pressure ['vCnt' 'vFreq' 'vMax' 'vMin' 'vStd' 'vTrend' 'value']
Example time step


[[1, 0.0997287796055603, 17.0, 17.0, 0.0, nan, 17.0],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan, nan],
 [1, 0.0995156899770593, 23.0, 23.0, 0.0, nan, 23.0],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan, nan],
 [1, 0.0995364240355182, 26.0, 26.0, 0.0, nan, 26.0],
 [1, 0.0995725539447007, 23.0, 23.0, 0.0, nan, 23.0],
 [0, 0.0, nan, nan, nan, nan, nan],
 [1, 0.0996645331679381, 21.0, 21.0, 0.0, nan, 21.0],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan, nan],
 [1, 0.0996931753112762, 19.0, 19.0, 0.0, nan, 19.0],
 [0, 0.0, nan, nan, nan, nan, nan],
 [0, 0.0, nan, nan, nan, nan

### Extract either "value" or "vcnt"

In [8]:
is_value = []
is_vcnt = []
for i,j in enumerate(fields.values[:,1:]):
    if 'value'  in j:
        if i == 33:                         ### This takes NumberFuseDetected into account
            is_value.append([i,-1])
            is_vcnt.append('no')
        else:
            w_v = np.where('value' == j)[0]
            is_value.append([i,w_v])
            is_vcnt.append('no')
    else:
        if 'vCnt' in j:
            w_v = np.where('vCnt' == j)[0]
            is_vcnt.append([i,w_v])
            is_value.append('no')
    
    
data_fields_selected = np.ones([len(data),len(eval(data.values[0,1]))])
for i in range(len(data)):
    if is_value[i][0] == i:
        data_fields_selected[i,:] = np.array(eval(data.values[i,1]))[:,is_value[i][1]].squeeze()
    else:
        data_fields_selected[i,:] = np.array(eval(data.values[i,1]))[:,is_vcnt[i][1]].squeeze()
    

selected_data = pd.DataFrame(data_fields_selected)
selected_data.index = fields['Unnamed: 0'].values
selected_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,351,352,353,354,355,356,357,358,359,360
CpuTemperature,59.3,56.325,57.7,58.35,58.975,57.7,58.125,58.9,58.25,58.425,...,60.325,58.725,62.2,58.625,60.15,59.5,61.025,60.225,60.375,59.25
DurationPickToPick,,3.05125,3.232,3.255333,3.156667,3.267667,4.938,3.02025,3.038,3.065667,...,3.041,3.226,4.843,3.014667,3.32875,3.14,3.201,3.1655,,
DurationRobotFromFeederToTestBench,0.682,0.692,0.676333,0.657,0.7035,0.698,0.677,0.706,0.69575,0.676333,...,0.683,0.72,0.6855,0.653,0.660333,0.709333,0.6635,0.71,,
DurationRobotFromTestBenchToFeeder,0.695,0.708333,0.707667,0.691333,0.718,0.741667,0.667,0.71925,0.721,0.695,...,0.71375,0.741,0.651,0.664333,0.672,0.723,0.680667,0.7285,,
DurationTestBenchClosed,0.116,0.110667,0.107667,0.107333,0.114,0.11475,0.112,0.114667,0.111667,0.1135,...,0.107667,0.109,0.114,0.115,0.108333,0.11,0.111333,0.112333,,
EPOSCurrent,-0.436364,4.103896,5.789474,14.946667,7.026316,2.355263,8.103896,5.350649,7.948052,3.87013,...,7.578947,11.533333,4.064103,8.858974,8.763158,5.597403,9.960526,-1.71875,,
EPOSPosition,0.0,10.753247,14.065789,14.133333,14.092105,16.223684,3.805195,10.675325,11.038961,10.25974,...,1.171053,14.373333,5.576923,8.628205,11.210526,16.25974,16.118421,24.9375,,
EPOSVelocity,0.0,2.558442,-3.184211,24.026667,11.447368,-7.565789,-11.454545,5.363636,18.155844,-19.987013,...,-15.526316,-10.066667,-1.807692,1.089744,5.407895,-9.454545,15.539474,-12.59375,,
ErrorFrame,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FeederAction1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
