-
Notifications
You must be signed in to change notification settings - Fork 33
/
statistics.py
48 lines (40 loc) · 1.68 KB
/
statistics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
"""Statistical summary of store statements mixin"""
from sqlalchemy.orm.session import Session
from sqlalchemy.sql import func
def get_group_by_count(session, group_by_column):
"""
Construct SQL query to get counts for distinct values using GROUP BY.
Args:
session (~sqlalchemy.orm.session.Session): session to query in
group_by_column (~sqlalchemy.schema.Column): column to group by
Returns:
dict: dictionary mapping from value to count
"""
return dict(
session.query(
group_by_column,
func.count(group_by_column)
).group_by(group_by_column).all()
)
class StatisticsMixin:
''' Has methods for statistics on stores '''
def statistics(self, asserted_statements=True, literals=True, types=True):
"""Store statistics."""
statistics = {
"store": dict(total_num_statements=len(self)),
}
with self.engine.connect() as connection:
session = Session(bind=connection)
if asserted_statements:
table = self.tables["asserted_statements"]
group_by_column = table.c.predicate
statistics["asserted_statements"] = get_group_by_count(session, group_by_column)
if literals:
table = self.tables["literal_statements"]
group_by_column = table.c.predicate
statistics["literals"] = get_group_by_count(session, group_by_column)
if types:
table = self.tables["type_statements"]
group_by_column = table.c.klass
statistics["types"] = get_group_by_count(session, group_by_column)
return statistics