In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()


df_data = pd.read_pickle('small_set.pkl')

In [None]:
%matplotlib inline 

#Keep measurement locations that were active by removing NaNs
#And sort to group measurement locations and order by timestamp
data = df_data[np.isfinite(df_data['duration'])].sort_values(by=['id','timestamp'])

#Linear interpolate for durations that are -1
# fp = duration that are not -1
# xp = position of those durations in the list
# positions in the range that are not in xp will get interpolated by the function np.interp

fp = data[(data.duration > 0)].duration.values
xp = [i for i,x in enumerate(data.duration) if x > 0]
inter = np.interp(range(0,data.duration.size),np.transpose(xp),fp)

pd.options.mode.chained_assignment = None  # default='warn'
data['duration_interpolation'] = inter

#Show the interpolation for one of the measurement locations
#X = range(data[(data.id == 'RWS01_MONIBAS_0581hrr0137ra0')].duration.size)
#plt.figure(figsize=(18,10))
#plt.plot(X, data[(data.id == 'RWS01_MONIBAS_0581hrr0137ra0')].duration)
#plt.plot(X, data[(data.id == 'RWS01_MONIBAS_0581hrr0137ra0')].duration_interpolation)
#plt.show()

#calculate the median of the durations to get the duration during normal hours
df_median = data.groupby(['id']).median()[['duration_interpolation']]
df_median.columns = ['duration_interpolation_median']
df = pd.merge(data, df_median, right_index=True, left_on='id')

#calculate the deviation compared to the median
df['duration_deviation'] = df.duration_interpolation - df.duration_interpolation_median

#EXAMPLE select a subset of the locations which are in a row
df_filtered = df[df['id'].isin(['RWS01_MONIBAS_0581hrr0192ra0','RWS01_MONIBAS_0581hrr0195ra0','RWS01_MONIBAS_0581hrr0207ra0','RWS01_MONIBAS_0581hrr0215ra0','RWS01_MONIBAS_0581hrr0221ra0','RWS01_MONIBAS_0581hrr0226ra0','RWS01_MONIBAS_0581hrr0233ra0','RWS01_MONIBAS_0581hrr0239ra0','RWS01_MONIBAS_0581hrr0244ra0','RWS01_MONIBAS_0581hrr0251ra0','RWS01_MONIBAS_0581hrr0257ra0','RWS01_MONIBAS_0581hrr0263ra0','RWS01_MONIBAS_0581hrr0269ra0','RWS01_MONIBAS_0581hrr0275ra0','RWS01_MONIBAS_0581hrr0281ra0','RWS01_MONIBAS_0581hrr0287ra0','RWS01_MONIBAS_0581hrr0293ra0','RWS01_MONIBAS_0581hrr0298ra0','RWS01_MONIBAS_0581hrr0309ra0','RWS01_MONIBAS_0581hrr0315ra0','RWS01_MONIBAS_0581hrr0320ra0','RWS01_MONIBAS_0581hrr0326ra0','RWS01_MONIBAS_0581hrr0332ra0','RWS01_MONIBAS_0581hrr0337ra0','RWS01_MONIBAS_0581hrr0342ra0','RWS01_MONIBAS_0581hrr0346ra0','RWS01_MONIBAS_0581hrr0352ra0','RWS01_MONIBAS_0581hrr0356ra0','RWS01_MONIBAS_0581hrr0361ra0','RWS01_MONIBAS_0581hrr0364ra0','RWS01_MONIBAS_0581hrr0369ra0','RWS01_MONIBAS_0581hrr0376ra0','RWS01_MONIBAS_0581hrr0383ra0','RWS01_MONIBAS_0581hrr0388ra0','RWS01_MONIBAS_0581hrr0394ra0'])]

#Or, select all locations
#df_filtered = df

df_pivot = df_filtered.pivot(index='timestamp', columns='id', values='duration_deviation').clip(-60, 60)

fig, ax = plt.subplots(figsize=(25,100)) 
ax = sns.heatmap(df_pivot, center=0, xticklabels=False, yticklabels=False, cbar=False, cmap="YlGnBu")

#sns.plt.show()

In [None]:
df_filtered