In [1]:
%cd ../..

/Users/brewer/Code/Eric/lectern/analyze


# 23_ref_meta

In [2]:
import sqlite3

import pandas as pd
from matplotlib import pyplot as plt

from lectern.analyze import namespaces
from lectern.analyze.trie import TrieNode

In [3]:
table = namespaces.TABLE_GENERAL_CONFERENCE_ADDRESS_REF
with sqlite3.connect(namespaces.DATABASE_APP_DEFAULT) as con:
  df = pd.read_sql_query('SELECT * FROM {} ORDER BY conference ASC, ordinal ASC;'.format(table), con)
print(len(df))
df.head()

43851


Unnamed: 0,conference,ordinal,url,lines,notes
0,1971-04,0,https://www.churchofjesuschrist.org/study/scri...,17,
1,1971-04,0,https://www.churchofjesuschrist.org/study/scri...,19,
2,1971-04,1,https://www.churchofjesuschrist.org/study/scri...,37,
3,1971-04,1,https://www.churchofjesuschrist.org/study/scri...,36,
4,1971-04,1,https://www.churchofjesuschrist.org/study/scri...,12,


## Url

To which domains do addresses refer?

In [4]:
root = TrieNode.root()
for _, row in df.iterrows():
  root.insert(row['url'])
len(root)

43851

In [5]:
completions = root.completions()
completions[:3]

[('https://www.churchofjesuschrist.org/study/scriptures/pgp/moses/1.39?lang=eng',
  206),
 ('https://www.churchofjesuschrist.org/study/scriptures/bofm/mosiah/3.19?lang=eng',
  113),
 ('https://www.churchofjesuschrist.org/study/scriptures/bofm/2-ne/31.20?lang=eng',
  90)]

In [8]:
prefixes = root.prefixes()
prefixes[:20]

[('http', 43851),
 ('https://', 43754),
 ('https://w', 42305),
 ('https://www.', 42303),
 ('https://www.c', 42218),
 ('https://www.church', 42217),
 ('https://www.churchofjesuschrist.org/', 42215),
 ('https://www.churchofjesuschrist.org/s', 42189),
 ('https://www.churchofjesuschrist.org/study/', 42186),
 ('https://www.churchofjesuschrist.org/study/scriptures/', 40979),
 ('https://www.churchofjesuschrist.org/study/scriptures/nt/', 12203),
 ('https://www.churchofjesuschrist.org/study/scriptures/b', 11964),
 ('https://www.churchofjesuschrist.org/study/scriptures/bofm/', 11918),
 ('https://www.churchofjesuschrist.org/study/scriptures/dc-testament/', 10028),
 ('https://www.churchofjesuschrist.org/study/scriptures/dc-testament/dc/',
  10021),
 ('https://www.churchofjesuschrist.org/study/scriptures/ot/', 4388),
 ('https://www.churchofjesuschrist.org/study/scriptures/dc-testament/dc/1',
  3880),
 ('https://www.churchofjesuschrist.org/study/scriptures/nt/ma', 3510),
 ('https://www.churchofjesus

In [10]:
PREFIX_CHURCH = 'https://www.churchofjesuschrist.org/'
PREFIX_CHURCH_STUDY = PREFIX_CHURCH + 'study/'
PREFIX_CHURCH_STUDY_ENSIGN = PREFIX_CHURCH_STUDY + 'ensign/'
PREFIX_CHURCH_STUDY_CONFERENCE = PREFIX_CHURCH_STUDY + 'general-conference/'
PREFIX_CHURCH_STUDY_LIAHONA = PREFIX_CHURCH_STUDY + 'liahona/'

In [11]:
ensign_prefixes = root.prefixes(prefix=PREFIX_CHURCH_STUDY_ENSIGN, order='len')
ensign_prefixes[:10]

[('https://www.churchofjesuschrist.org/study/ensign/', 58),
 ('https://www.churchofjesuschrist.org/study/ensign/19', 40),
 ('https://www.churchofjesuschrist.org/study/ensign/20', 18),
 ('https://www.churchofjesuschrist.org/study/ensign/197', 7),
 ('https://www.churchofjesuschrist.org/study/ensign/198', 18),
 ('https://www.churchofjesuschrist.org/study/ensign/199', 15),
 ('https://www.churchofjesuschrist.org/study/ensign/200', 8),
 ('https://www.churchofjesuschrist.org/study/ensign/201', 8),
 ('https://www.churchofjesuschrist.org/study/ensign/1974/', 3),
 ('https://www.churchofjesuschrist.org/study/ensign/1980/', 3)]

In [12]:
conference_prefixes = root.prefixes(prefix=PREFIX_CHURCH_STUDY_CONFERENCE, order='len')
conference_prefixes[:10]

[('https://www.churchofjesuschrist.org/study/general-conference/', 739),
 ('https://www.churchofjesuschrist.org/study/general-conference/19', 142),
 ('https://www.churchofjesuschrist.org/study/general-conference/20', 597),
 ('https://www.churchofjesuschrist.org/study/general-conference/197', 13),
 ('https://www.churchofjesuschrist.org/study/general-conference/198', 38),
 ('https://www.churchofjesuschrist.org/study/general-conference/199', 91),
 ('https://www.churchofjesuschrist.org/study/general-conference/200', 136),
 ('https://www.churchofjesuschrist.org/study/general-conference/201', 387),
 ('https://www.churchofjesuschrist.org/study/general-conference/202', 74),
 ('https://www.churchofjesuschrist.org/study/general-conference/1974/', 4)]

In [13]:
liahona_prefixes = root.prefixes(prefix=PREFIX_CHURCH_STUDY_LIAHONA, order='len')
liahona_prefixes[:10]

[('https://www.churchofjesuschrist.org/study/liahona/', 115),
 ('https://www.churchofjesuschrist.org/study/liahona/19', 7),
 ('https://www.churchofjesuschrist.org/study/liahona/20', 108),
 ('https://www.churchofjesuschrist.org/study/liahona/198', 2),
 ('https://www.churchofjesuschrist.org/study/liahona/199', 4),
 ('https://www.churchofjesuschrist.org/study/liahona/200', 21),
 ('https://www.churchofjesuschrist.org/study/liahona/201', 83),
 ('https://www.churchofjesuschrist.org/study/liahona/202', 4),
 ('https://www.churchofjesuschrist.org/study/liahona/2010/', 11),
 ('https://www.churchofjesuschrist.org/study/liahona/2012/', 8)]