Skip to content

Commit 75efe3c

Browse files
committed
feat: added redact_sensitive.py example
1 parent 120b1f7 commit 75efe3c

File tree

1 file changed

+139
-0
lines changed

1 file changed

+139
-0
lines changed

examples/redact_sensitive.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
"""
2+
Interactive script to redact sensitive data.
3+
Be careful to not delete stuff you want to keep!
4+
5+
Issues/improvements:
6+
- If an event matches the sensitive string, only the sensitive field will be redacted (so if the title matches but not the URL, the URL will remain unredacted)
7+
- One might not want to redact to the non-informative 'REDACTED', but instead to a string with secret meaning.
8+
- No preview of the events/strings to be redacted.
9+
"""
10+
11+
import re
12+
import sys
13+
from typing import List, Set, Pattern, Union
14+
from copy import deepcopy
15+
16+
from aw_core import Event
17+
from aw_client import ActivityWatchClient
18+
19+
aw: ActivityWatchClient
20+
21+
REDACTED = "REDACTED"
22+
DRYRUN = True
23+
24+
25+
def main():
26+
global DRYRUN
27+
if '--wet' in sys.argv:
28+
DRYRUN = False
29+
30+
global aw
31+
aw = ActivityWatchClient(testing=True)
32+
33+
buckets = aw.get_buckets()
34+
print("Buckets: ")
35+
print("\n".join([" - " + bid for bid in buckets.keys()]) + "\n")
36+
37+
bid_to_redact = input(
38+
"In which bucket are the events you want to redact? (* for all): "
39+
)
40+
assert bid_to_redact == "*" or bid_to_redact in buckets, 'Not a valid option'
41+
42+
regex_or_string = input(
43+
"Do you want to search by regex or string? (regex/string): "
44+
)
45+
assert regex_or_string in ["regex", "string"], 'Not a valid option'
46+
47+
print("\nNOTE: Matching is not case sensitive!")
48+
pattern: Union[str, Pattern]
49+
if regex_or_string == "string":
50+
pattern = input("Enter a string indicating sensitive content: ").lower()
51+
else:
52+
pattern = re.compile(
53+
input("Enter a regex indicating sensitive content: ").lower()
54+
)
55+
56+
print("")
57+
if DRYRUN:
58+
print(
59+
"NOTE: Performing a dry run, no events will be modified. Run with --wet to modify events."
60+
)
61+
else:
62+
print(
63+
"WARNING: Note that this performs an operation that cannot be undone. We strongly recommend that you backup/export your data before proceeding."
64+
)
65+
input("Press ENTER to continue, or Ctrl-C to abort")
66+
67+
if bid_to_redact == "*":
68+
for bucket_id in buckets.keys():
69+
if bucket_id.startswith("aw-watcher-afk"):
70+
return
71+
_redact_bucket(bucket_id, pattern)
72+
else:
73+
_redact_bucket(bid_to_redact, pattern)
74+
75+
76+
def _redact_bucket(bucket_id: str, pattern: Union[str, Pattern]):
77+
print("\nChecking bucket: {}".format(bucket_id))
78+
79+
events = aw.get_events(bucket_id, limit=-1)
80+
sensitive_ids = _find_sensitive(events, pattern)
81+
print(f"Found {len(sensitive_ids)} sensitive events")
82+
83+
if not sensitive_ids:
84+
return
85+
86+
yes_redact = input(
87+
f"Do you want to replace all the matching strings with '{REDACTED}'? (y/N): "
88+
)
89+
if yes_redact == 'y':
90+
for e in events:
91+
if e.id in sensitive_ids:
92+
e_before = e
93+
e = _redact_event(e, pattern)
94+
print(f"\nData before: {e_before.data}")
95+
print(f"Data after: {e.data}")
96+
97+
if DRYRUN:
98+
print("DRYRUN, would do: aw.insert_event(bucket_id, e)")
99+
else:
100+
aw.insert_event(bucket_id, e)
101+
print("Redacted event")
102+
103+
104+
def _check_event(e: Event, pattern: Union[str, Pattern]) -> bool:
105+
for k, v in e.data.items():
106+
if isinstance(v, str):
107+
if isinstance(pattern, str):
108+
if pattern in v.lower():
109+
return True
110+
else:
111+
if pattern.findall(v.lower()):
112+
return True
113+
return False
114+
115+
116+
def _redact_event(e: Event, pattern: Union[str, Pattern]) -> Event:
117+
e = deepcopy(e)
118+
for k, v in e.data.items():
119+
if isinstance(v, str):
120+
if isinstance(pattern, str):
121+
if pattern in v.lower():
122+
e.data[k] = REDACTED
123+
else:
124+
if pattern.findall(v.lower()):
125+
e.data[k] = REDACTED
126+
return e
127+
128+
129+
def _find_sensitive(el: List[Event], pattern: Union[str, Pattern]) -> Set[int]:
130+
sensitive_ids: Set[int] = set()
131+
for e in el:
132+
if _check_event(e, pattern):
133+
sensitive_ids.add(e.id)
134+
135+
return sensitive_ids
136+
137+
138+
if __name__ == "__main__":
139+
main()

0 commit comments

Comments
 (0)