# Temeperature example (smart home simulation)

In this example, data is loaded from a CSV file. This file corresponds to the result of basic threshold based event detection on smart home simulation data, generated using the iCasa simulator.

In [1]:
import sys
sys.path.append("../")

In [2]:
import pandas as pd
from abduction_memorability.event import Event, Label
from abduction_memorability.abduction_module import SurpriseAbductionModule
from abduction_memorability.predicate import Predicate, MonthPredicate, DayPredicate, HasLabelPredicate, AxisRankPredicate, RandomChoicePredicate, DevicePredicate, LocationPredicate
from abduction_memorability.memory import Memory
from abduction_memorability.predicate_filter import OptimizedFilter
import random
import math
import datetime as dt
from matplotlib.patches import Ellipse

from typing import Tuple

import plotly.io as pio
import plotly.express as px
import plotly.offline as py

import matplotlib.pyplot as plt

In [3]:
time_series = pd.read_csv("../time_series/data.csv")
fig = px.line(time_series, x="time", y=["room2.myTemperature", "outdoor.myTemperature"])


Columns (47,48,51,53,55,57,126,127,130,132,134) have mixed types.Specify dtype option on import or set low_memory=False.



In [4]:
fig.update_layout(
    showlegend=False,
    yaxis_title="Temperature (°C)"
)
fig

In [5]:
# Loading the corresponding events
mem = Memory.load_csv("../events/420_events.csv")

Index(['index', 'label', 'timestamp', 'duration', 'max_temp', 'min_temp',
       'device', 'location', 'some_property'],
      dtype='object')
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241

In [6]:
# printing labels
mem.labels()

[event,
 event/day,
 event/temperature,
 event/temperature/hot,
 event/temperature/cold,
 event/device,
 event/device/removed,
 event/device/addition]

In [6]:
label_day_filter = OptimizedFilter(HasLabelPredicate(mem, 1))
day_events = [event.get_id() for event in label_day_filter(mem)]
hot_events = [event.get_id() for event in OptimizedFilter(HasLabelPredicate(mem, 3))(mem)]
cold_events = [event.get_id() for event in OptimizedFilter(HasLabelPredicate(mem, 4))(mem)]
removed_events = [event.get_id() for event in OptimizedFilter(HasLabelPredicate(mem, 6))(mem)]
added_events = [event.get_id() for event in OptimizedFilter(HasLabelPredicate(mem, 7))(mem)]


In [7]:
module = SurpriseAbductionModule(
    memory=mem,
    predicates=[
        HasLabelPredicate,
        DayPredicate,
        MonthPredicate,
        # RandomChoicePredicate,
        AxisRankPredicate,
        DevicePredicate,
        LocationPredicate
    ]
)

Loaded the memory with 578 items!
Computing complexities with 4 passes
Starting pass 0 with 1 memories to explore
Finished pass 0 in 0.14209771156311035s.
Improved complexity for 3 event(s)
Starting pass 1 with 10 memories to explore
Finished pass 1 in 0.48718690872192383s.
Improved complexity for 634 event(s)
Starting pass 2 with 296 memories to explore
Finished pass 2 in 1.5400075912475586s.
Improved complexity for 135 event(s)
Starting pass 3 with 171 memories to explore
Finished pass 3 in 0.43112921714782715s.
Improved complexity for 0 event(s)
Computing surprise scores for all events !


In [8]:
candidates = module.abduction(410)

	prog=0, pred=HasLabelPredicate : 80 events found, with complexity 2
	prog=2, pred=HasLabelPredicate : 75 events found, with complexity 6.0
	prog=3, pred=HasLabelPredicate : 420 events found, with complexity 7.325249204613158
	prog=4, pred=HasLabelPredicate : 3 events found, with complexity 8.169925001442312
	prog=0, pred=DayPredicate : 6 events found, with complexity 2
	prog=1, pred=DayPredicate : 2 events found, with complexity 2
	prog=2, pred=DayPredicate : 2 events found, with complexity 6.0
	prog=3, pred=DayPredicate : 3 events found, with complexity 7.325249204613158
		Score improved for 158: 8.169925001442312
	prog=5, pred=DayPredicate : 5 events found, with complexity 8.7859697861766
		Score improved for 142: 9.268878557093467
		Score improved for 247: 9.664933050786377
		Score improved for 392: 10.0
		Score improved for 339: 10.289967873808441
	prog=10, pred=DayPredicate : 5 events found, with complexity 10.545278234648993
		Score improved for 179: 10.773151302054865
		Score i

In [9]:
top_ids = [pair[0].get_id() for pair in candidates[:5]]
top_ids
candidates[:5]

[(Event(timestamp=1642806054.857, characteristics={'index': 158, 'max_temp': 19.71762658492622, 'min_temp': 10.708706113934989}, label=event/day, duration=86400.0, _id=158),
  15.218792915599526),
 (Event(timestamp=1642633969.857, characteristics={'index': 142, 'max_temp': 23.86566423813997, 'min_temp': 12.697877586933355}, label=event/day, duration=86400.0, _id=142),
  14.122449700935663),
 (Event(timestamp=1643065489.857, characteristics={'index': 561, 'max_temp': 17.01944068716544, 'min_temp': 11.341677650386586}, label=event/day, duration=86400.0, _id=561),
  14.020261618319982),
 (Event(timestamp=1643153169.857, characteristics={'index': 458, 'max_temp': 26.0, 'min_temp': 11.341677650386586}, label=event/day, duration=86400.0, _id=458),
  13.852698136414507),
 (Event(timestamp=1642548294.857, characteristics={'index': 247, 'max_temp': 19.42669067490336, 'min_temp': 10.090165101810388}, label=event/day, duration=86400.0, _id=247),
  13.727335954258649)]

In [10]:
df = module.dataframe_output()
df.index = df["id"]
df



Unnamed: 0_level_0,id,time,label,complexity,recipe,memorability,date
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
315,315,1.631225e+09,event/day,26.779221,"[label(event/day), rank(max_temp, 105)]",0.870346,2021-09-09 22:05:39.857000192
502,502,1.628328e+09,event/temperature/hot,28.137832,"[device(thermo_3), day(40)]",3.276912,2021-08-07 09:12:34.857000192
316,316,1.629585e+09,event/day,25.398167,"[label(event/day), rank(max_temp, 53)]",0.602608,2021-08-21 22:25:04.857000192
317,317,1.644103e+09,event/day,26.202666,"[label(event/day), day(218)]",0.702962,2022-02-05 23:16:19.857000192
318,318,1.644535e+09,event/day,26.158154,"[label(event/day), day(213)]",1.120629,2022-02-10 23:18:29.857000192
...,...,...,...,...,...,...,...
309,309,1.642893e+09,event/day,26.321862,"[label(event/day), day(232)]",0.902452,2022-01-22 23:02:54.857000192
310,310,1.649542e+09,event/day,25.543960,"[label(event/day), day(155)]",0.713855,2022-04-09 22:13:54.857000192
311,311,1.655850e+09,event/day,22.754888,"[label(event/day), rank(max_temp, 1), day(2)]",0.729133,2022-06-21 22:24:34.857000192
312,312,1.641770e+09,event/day,24.128713,"[label(event/day), rank(max_temp, 29)]",1.348213,2022-01-09 23:17:59.857000192


In [11]:
fig = px.scatter(df, x="date", y="complexity", color="label", hover_data=["id", "recipe"], height=800, color_discrete_sequence=["gray", "blue", "red", "orange", "cyan"])
fig.update_traces(marker={'size':13})
fig.update_layout(
    showlegend=False,
    font=dict(
        size=25
    )
)

In [12]:
fig2 = px.scatter(df, x="date", y="memorability", color="label", height=800, hover_data=["id"],title="", color_discrete_sequence=["gray", "blue", "red", "orange", "cyan"])
fig2.update_layout(
    showlegend=False,
    font=dict(
        size=28
    )
)
for event_id in [20, 183, 329, 149]:
    x_coord = df["date"][event_id]
    y_coord = df["memorability"][event_id]
    if event_id == 183:
        fig2.add_annotation(text=f'{event_id}', x=x_coord, y=y_coord, ax=20, ay=-40, showarrow=True, arrowsize=1.5, arrowwidth=3, arrowhead=1)
    elif event_id == 149:
        fig2.add_annotation(text=f'{event_id}', x=x_coord, y=y_coord, showarrow=True, ax=-60, ay=0, arrowhead=1, arrowsize=1.5, arrowwidth=3)
    elif event_id == 329:
        fig2.add_annotation(text=f'{event_id}', x=x_coord, y=y_coord, ax=-20, ay=-40, arrowsize=1.5, arrowwidth=3, arrowhead=1, showarrow=True)
    else:
        fig2.add_annotation(text=f'{event_id}', x=x_coord, y=y_coord, showarrow=True, ax=-60, arrowsize=1.5, arrowwidth=3, arrowhead=1, ay=0)
    fig2.add_shape(
        type="circle", 
        xref="x",
        yref="y",
        x0=x_coord - dt.timedelta(days=1.5),
        y0=y_coord - 0.1,
        x1=x_coord + dt.timedelta(days=1.5),
        y1=y_coord + 0.1)
fig2.update_traces(marker={'size':13})


In [14]:
fig3 = px.scatter(df, x="date", y="memorability", color="label", height=800, title="Memorability of events", color_discrete_sequence=["blue", "gray", "red", "orange", "cyan"], hover_data=["id", "recipe"])
fig3.update_layout(
    showlegend=False,
    font=dict(
        size=28
    )
)
for event_id in top_ids:
    x_coord = df["date"][event_id]
    y_coord = df["memorability"][event_id]
    if event_id == 183:
        fig3.add_annotation(text=f'{event_id}', x=x_coord, y=y_coord, showarrow=True, yshift=15, xshift=20)
    elif event_id == 149:
        fig3.add_annotation(text=f'{event_id}', x=x_coord, y=y_coord, showarrow=False, yshift=-10, xshift=-20)
    else:
        fig3.add_annotation(text=f'{event_id}', x=x_coord, y=y_coord, showarrow=True, arrowcolor="yellow")
fig3.update_traces(marker={'size':13})


### Using memorability as an event detector tool
While the first purpose of memorability is not to provide an event detector, it is interesting to see how "real" events, in the sense that they were handmade introduced rather than being generated by the random background noise. 

To illustrate this, we propose the following methodoly: given a fixed threshold $m$, we consider as "true events" events with a computed memorability $M(e) \ge m$. 
Since data was generated, ground truth events are identified as $\{337, 126, 210, 260, 20, 183, 339, 329, 577, 198, 205, 571, 260, 271, 238, 214, 213, 149, 253, 88, 517\}$ (for instance, days where the outdoor temperature was set to be abnormally high or low, devices removals and addtions) . This knowledge allows to compute for each threshold value the True Positive, False Positive, False Negative and True Negative rates, which can then be used to coompute a ROC curve of the detector

In [50]:
thresholds = [i/10 for i in range(0, int(10*max(df["memorability"])), 1)]
true_events = set([337, 126, 210, 260, 20, 183, 339, 329, 577, 198, 205, 571, 260, 271, 238, 214, 213, 149, 253, 88, 517])
other_events = set(df["id"]) - true_events
print (f"{len(true_events)} events have been flagged as ground truth remarkable events.")
number_events = len(df)
roc_data = {"FPR": [], "TPR": []}
for m in thresholds:
    detected_events = set(df[df["memorability"] > m]["id"])
    undetected_events = set(df["id"]) - detected_events
    true_positive = len(true_events.intersection(detected_events))
    false_positive = len(detected_events) - true_positive
    true_negative = len(undetected_events.intersection(other_events))
    false_negative = len(true_events.intersection(undetected_events))
    roc_data["FPR"].append(false_positive / (false_positive + true_negative))
    roc_data["TPR"].append(true_positive / (true_positive + false_negative))
    # print(f"{m} -> TP={true_positive}, FP={false_negative}, TN={true_negative}, FN={false_negative}")
roc_df = df.from_dict(roc_data)
roc_curve = px.line(roc_df, x='FPR', y='TPR')
roc_curve.update_layout(
    height=900,
    font=dict(
        size=30
    )
)
    

20 events have been flagged as ground truth remarkable events.
