# FB5M & FB2M KG DB Numbers

In [1]:
import sys
sys.path.insert(0, '../../')
from lib.data import FB5M_KG
from lib.data import FB2M_KG
from lib.data import FB2M_KG_TABLE
from lib.data import FB5M_KG_TABLE
from lib.connect import get_connection 

connection = get_connection()
cursor = connection.cursor()

def sql_fetchone(sql):
    cursor.execute(sql)
    return cursor.fetchone()[0]

In [2]:
n_unique_facts_fb2m = sql_fetchone('SELECT count(*) FROM ' + FB2M_KG_TABLE)
n_unique_facts_fb5m = sql_fetchone('SELECT count(*) FROM ' + FB5M_KG_TABLE)

In [3]:
n_grouped_facts_fb2m = sql_fetchone('SELECT count(DISTINCT(subject_mid, relation)) FROM ' + FB2M_KG_TABLE)
n_grouped_facts_fb5m = sql_fetchone('SELECT count(DISTINCT(subject_mid, relation)) FROM ' + FB5M_KG_TABLE)

In [4]:
n_relations_fb2m = sql_fetchone('SELECT count(DISTINCT(relation)) FROM ' + FB2M_KG_TABLE)
n_relations_fb5m = sql_fetchone('SELECT count(DISTINCT(relation)) FROM ' + FB5M_KG_TABLE)

In [5]:
def get_all_entities(kg_table_name):
    """ Get all MIDs in the FB5M KG """
    all_entities = set()
    for column in ['object_mid', 'subject_mid']:
        cursor.execute('SELECT ' + column + ' FROM %s' % (kg_table_name,))
        for (mid,) in cursor.fetchall():
            all_entities.add(mid)
    return all_entities

n_entities_fb2m = len(get_all_entities(FB2M_KG_TABLE))
n_entities_fb5m = len(get_all_entities(FB5M_KG_TABLE))

In [7]:
from IPython.display import display, Markdown

display(Markdown(
"""#### FB5M and FB2M Numbers:
- FB5M Unique Facts: %d (Original Paper: 22,441,880) (https://github.com/zihangdai/CFO: 17,872,174)
- FB2M Unique Facts: %d (Original Paper: 14,180,937)
- FB5M Group Facts: %d (Original Paper: 12,010,500)
- FB2M Group Facts: %d (Original Paper: 10,843,106)
- FB5M Relations: %d (Original Paper: 7,523)
- FB2M Relations: %d (Original Paper: 6,701)
- FB5M Entities: %d (Original Paper: 4,904,397) (https://github.com/zihangdai/CFO: 3,988,105)
- FB2M Entities: %d (Original Paper: 2,150,604)
""" % (
    n_unique_facts_fb5m,
    n_unique_facts_fb2m,
    n_grouped_facts_fb5m,
    n_grouped_facts_fb2m,
    n_relations_fb5m,
    n_relations_fb2m,
    n_entities_fb5m,
    n_entities_fb2m,
)))

#### FB5M and FB2M Numbers:
- FB5M Unique Facts: 17872174 (Original Paper: 22,441,880) (https://github.com/zihangdai/CFO: 17,872,174)
- FB2M Unique Facts: 14174246 (Original Paper: 14,180,937)
- FB5M Group Facts: 7688234 (Original Paper: 12,010,500)
- FB2M Group Facts: 7188636 (Original Paper: 10,843,106)
- FB5M Relations: 7523 (Original Paper: 7,523)
- FB2M Relations: 6701 (Original Paper: 6,701)
- FB5M Entities: 3988105 (Original Paper: 4,904,397) (https://github.com/zihangdai/CFO: 3,988,105)
- FB2M Entities: 1963130 (Original Paper: 2,150,604)
