/
calc_holes_in_db_data.py
44 lines (37 loc) · 985 Bytes
/
calc_holes_in_db_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import psycopg2
import functools
import datetime
# Connect to the db
conn = psycopg2.connect("dbname=twitter-geo")
# Fetch stuff from db
cur = conn.cursor()
cur.execute('select distinct(created_at) from tweets;')
result = cur.fetchall()
cur.close()
# Get rid of tuples
result = list(map(lambda a: a[0], result))
# Work on a minute level
result = list(map(lambda a: a.replace(second=0, microsecond=0), result))
# Remove duplicates
result = set(result)
# Convert to list
result = list(result)
# Sort
result = sorted(result)
# Find non aligned values
for i in range(len(result)):
# Convert to seconds
if(i == 0):
continue
# Get the two values
start = result[i-1]
end = result[i]
# Convert to seconds
a = start.timestamp()
b = end.timestamp()
# Print non aligned values
if(b-a != 60):
print("FOUND HOLE")
print("Last insert time: {}".format(start))
print("First insert time: {}".format(end))
print("")