# Case study
#### Using RISF on real life data from bus sensors

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis
import datetime
import pickle
from csutils import *

In [2]:
# Timestamp conversion copied from buses-data-demo.ipynb
VACT_EPOCH = datetime.datetime(year=2011,month=6,day=16,hour=5,minute=23,second=0)
VACT_TIMESTAMP = 1308194580
assert datetime.datetime.fromtimestamp(VACT_TIMESTAMP) == VACT_EPOCH
MILISECOND = 1
SECOND = 1000 * MILISECOND
HOUR = 3600*SECOND
DAY = 24*HOUR
def getVactDate(value):
    return VACT_EPOCH + datetime.timedelta(milliseconds=int(value))

In [3]:
# Loading pandas DataFrame with recorded values from sensors and
# Adding column with transformed timestamp
# Note that this data is ignored by git, so you have to make a copy on your own
# Also the excel file seems to only contain data about the bus number 369
# TODO Remove these links
# Link to data:
# https://halmstaduniversity.box.com/s/rtm3o8dzdt4o0hxr5sredhb1y06qsi8u


#

DATA_369 = pickle.load(open("../data/bus/data-369.pickle","rb"))
DATA_370 = pickle.load(open("../data/bus/data-370.pickle","rb"))
DATA_371 = pickle.load(open("../data/bus/data-371.pickle","rb"))
DATA_372 = pickle.load(open("../data/bus/data-372.pickle","rb"))
DATA_375 = pickle.load(open("../data/bus/data-375.pickle","rb"))


In [4]:
DATA_369.insert(0, 'tstamp', DATA_369.Timestamp.apply(getVactDate))
DATA_370.insert(0, 'tstamp', DATA_370.Timestamp.apply(getVactDate))
DATA_371.insert(0, 'tstamp', DATA_371.Timestamp.apply(getVactDate))
DATA_372.insert(0, 'tstamp', DATA_372.Timestamp.apply(getVactDate))
DATA_375.insert(0, 'tstamp', DATA_375.Timestamp.apply(getVactDate))

In [5]:
DATA_369 = DATA_369.set_index('tstamp')
DATA_369.sort_index(inplace = True)
DATA_370 = DATA_370.set_index('tstamp')
DATA_370.sort_index(inplace = True)
DATA_371 = DATA_371.set_index('tstamp')
DATA_371.sort_index(inplace = True)
DATA_372 = DATA_372.set_index('tstamp')
DATA_372.sort_index(inplace = True)
DATA_375 = DATA_375.set_index('tstamp')
DATA_375.sort_index(inplace = True)

In [6]:
DATA_369.drop(columns=['Timestamp'], inplace = True)
DATA_370.drop(columns=['Timestamp'], inplace = True)
DATA_371.drop(columns=['Timestamp'], inplace = True)
DATA_372.drop(columns=['Timestamp'], inplace = True)
DATA_375.drop(columns=['Timestamp'], inplace = True)

In [7]:
DATA_369.index.min()

Timestamp('2012-04-15 08:23:47.763000')

In [8]:
# Representations calculations
def calculate_all_reps(data):
    rdf = pd.DataFrame(columns=['Numeric', 'HistVal', 'HistDens', 'TimeSeries'], dtype=object)
    
    START = data.index.min()
    END = data.index.max()
    
    FREQ = pd.Timedelta('4T')
    WINDOW_SIZE = pd.Timedelta('4T')
    
    def bin10(column): return 10
    AVG_SIZE = '1S'
    
    for twin in extract_time_windows(data, WINDOW_SIZE, start=START, end=END, freq=FREQ, timestamp = True):
        t, win = twin
        n = win.shape[0]
        if n < 20:
            continue
        rdf.loc[t,'Numeric'] = (get_numeric_from_window(win))
        rdf.loc[t,'HistVal'] = (get_histograms_from_window(win, way_of_binarization=bin10))
        rdf.loc[t,'HistDens'] = (get_histograms_from_window(win, dens = True, way_of_binarization=bin10))
        rdf.loc[t,'TimeSeries'] = (get_time_series_from_window(win,t,WINDOW_SIZE,AVG_SIZE=AVG_SIZE))
    

    return rdf


In [9]:
RDF_369 = calculate_all_reps(DATA_369)

  skewness = skew(window[sensor])
  kurt = kurtosis(window[sensor])


In [11]:
RDF_369.to_pickle('../data/bus/rep-369.pickle')

: 

: 

In [None]:
RDF_369.to_csv('../data/bus/rep-369.csv')

## Experiments

In [None]:
X_dfs = [DATA_370,DATA_371,DATA_372,DATA_375]
y_dfs = [DATA_369]

START = pd.Timestamp('2012-04-15 08:23:47.763000') 
END = START + pd.Timedelta('4W')

FREQ = pd.Timedelta('4T')  
WINDOW_SIZE = pd.Timedelta('4T')

def bin10(column): return 10
AVG_SIZE = '1S'

X_NUMERIC = []
X_HISTOGRAM = []
X_TSIRES = []

y_NUMERIC = []
y_HISTOGRAM = []
y_TSIRES = []
y_TSTAMPS = []

for X in X_dfs:
    for twin in extract_time_windows(X, WINDOW_SIZE, start=START, end=END, freq=FREQ, timestamp = True):
        t, win = twin
        n = win.shape[0]
        if n < 20:
            continue
        X_NUMERIC.append(get_numeric_from_window(win))
        X_HISTOGRAM.append(get_histograms_from_window(win, way_of_binarization=bin10))
        X_TSIRES.append(get_time_series_from_window(win,t,WINDOW_SIZE,AVG_SIZE=AVG_SIZE))


  skewness = skew(window[sensor])
  kurt = kurtosis(window[sensor])
  skewness = skew(window[sensor])
  kurt = kurtosis(window[sensor])


In [None]:
#Nr. Mode        Start time End time   Failure alarm
#1  planned     2012-02-02 2012-02-09 2012-02-01
#2  planned     2012-03-01 2012-03-02 2012-02-29
#3  unplanned   2012-03-05 2012-03-05 2012-03-03
#4  unplanned   2012-03-16 2012-03-19 2012-03-14
#5  unplanned   2012-04-01 2012-04-13 2012-03-31
#6  planned     2012-05-29 2012-05-29 2012-05-29
#7  unplanned   2012-07-10 2012-07-23 2012-07-10
#8  planned     2012-08-21 2012-08-23 2012-08-21
#9  unplanned   2012-10-08 2012-10-08 2012-10-08
#10 planned     2012-12-27 2012-12-28 2012-12-25
#11 planned     2013-02-19 2013-03-06 2013-02-19
#12 unplanned   2013-04-23 2013-04-24 2013-04-22
#13 unplanned   2013-04-29 2013-04-29 -
#14 unplanned   2013-07-11 2013-07-19 2013-07-11
#15 planned     2013-08-12 2013-08-16 2013-08-12
#16 unplanned   2013-12-11 2013-12-19 2013-12-10
#17 planned     2014-01-09 2014-01-09 2014-01-09
#18 planned     2014-01-31 2014-02-11 2014-01-31
#19 unplanned   2014-02-23 2014-02-23 2014-02-22
#20 unplanned   2014-03-13 2014-03-20 2014-03-12
#21 unplanned   2014-04-14 2014-04-14 2014-04-13
#22 unplanned   2014-04-20 2014-04-20 -
#23 unplanned   2014-06-08 2014-06-08 2014-06-05
#24 unplanned   2014-08-14 2014-08-15 -
#25 planned     2014-09-03 2014-09-05 2014-09-03
#26 planned     2014-11-28 2014-11-28 -