-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
exporttojson.py
executable file
·81 lines (62 loc) · 2.34 KB
/
exporttojson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
import sys
import re
import json
class Message:
def __init__(self, date, time, sender, message):
self.date = date
self.time = time
self.sender = sender
self.message = message
def append(self, toappend):
self.message = self.message + toappend
message_scan_regex = re.compile(r"^(\d{1,2}/\d{1,2}/\d{1,2}, \d+:\d+ (A|P)M)")
message_with_sender_regex = re.compile(r"^((?P<date>(\d{1,2}/\d{1,2}/\d{1,2})), (?P<time>(\d+:\d+ (A|P)M))) - (?P<sender>(.+?)): (?P<message>[\s\S]*)")
message_system_regex = re.compile(r"^((?P<date>(\d{1,2}/\d{1,2}/\d{1,2})), (?P<time>(\d+:\d+ (A|P)M))) - (?P<message>[\s\S]*)")
def is_empty_string(s):
return not s or not s.strip
def process_message(s):
# Check if it is a system message
if message_with_sender_regex.match(s):
matcher = message_with_sender_regex.match(s)
date = matcher.group("date")
time = matcher.group("time")
sender = matcher.group("sender")
message = matcher.group("message")
return Message(date, time, sender, message)
elif message_system_regex.match(s):
matcher = message_system_regex.match(s)
date = matcher.group("date")
time = matcher.group("time")
sender = "System"
message = matcher.group("message")
return Message(date, time, sender, message)
else:
print(s)
print(len(s))
raise ValueError
if len(sys.argv) not in [2, 3]:
print("Wrong format")
print("Format: exporttojson.py [source txt file] [result JSON file]")
print("\tOR")
print("Format: exporttojson.py [source txt file]")
print("This prints out the JSON to stdout")
exit(-1)
try:
with open(sys.argv[1], "r") as f:
messages = []
for line in f:
if message_scan_regex.match(line) and (not is_empty_string(line)):
messages.append(process_message(line))
else:
messages[-1].append(line)
# Convert to dicts for JSON serialization
message_dicts = [m.__dict__ for m in messages]
if len(sys.argv) == 3:
with open(sys.argv[2], "w") as fw:
fw.write(json.dumps(message_dicts))
else:
print(json.dumps(message_dicts))
except Exception as e:
print("Error occured while parsing")
print(e)