In [None]:
%cd ../..

# 22_conference_address_meta

In [None]:
import operator
import sqlite3
from collections import defaultdict

import pandas as pd
from matplotlib import pyplot as plt

from lectern.analyze import namespaces, plot_util

In [None]:
def levenshtein(a, b, case_sensitive=False):
  if not case_sensitive:
    a, b = a.lower(), b.lower()
  
  d = [[max(i, j) for i in range(len(b) + 1)]
       for j in range(len(a) + 1)]

  for i in range(1, len(a) + 1):
    for j in range(1, len(b) + 1):
      deletion = d[i - 1][j] + 1
      insertion = d[i][j - 1] + 1
      cost = 0 if a[i - 1] == b[j - 1] else 1
      substitution = d[i - 1][j - 1] + cost
      d[i][j] = min(deletion, min(insertion, substitution))
  
  return d[-1][-1]

In [None]:
table = namespaces.TABLE_GENERAL_CONFERENCE_ADDRESS
with sqlite3.connect(namespaces.DATABASE_APP_DEFAULT) as con:
  df = pd.read_sql_query('SELECT * FROM {} ORDER BY conference ASC, ordinal ASC;'.format(table), con)
print(len(df))
df.head(3)

In [None]:
conference_ticks = plot_util.get_ticks(df['conference'])

## Session

In [None]:
session_counts = df['session'].value_counts()
len(session_counts)

In [None]:
session_counts[:5]

In [None]:
session_counts[-8:]

## Speaker

In [None]:
df_no_speaker = df[df['speaker'].isnull()]
print(len(df_no_speaker))
df_no_speaker['title']

In [None]:
speaker_counts = df['speaker'].value_counts(dropna=True)
len(speaker_counts)

In [None]:
speaker_counts[:5]

In [None]:
speakers_sorted = sorted(list(speaker_counts.dropna().keys()))
for i in range(1, len(speakers_sorted)):
  prev = speakers_sorted[i - 1]
  curr = speakers_sorted[i]
  dist = levenshtein(prev, curr)
  if dist <= 4:
    print('Dist={:d} for {} and {}.'.format(dist, prev, curr))

Christoffel Golden was [called in 2001](https://www.churchofjesuschrist.org/study/ensign/2001/05/news-of-the-church/elder-christoffel-golden-jr-of-the-seventy?lang=eng) and [held the Jr. suffix](https://www.churchofjesuschrist.org/study/general-conference/2013/04/the-father-and-the-son) until he [spoke in 2021](https://www.churchofjesuschrist.org/study/general-conference/2021/10/27golden). I don't care enough to fix the minor name discrepancy.

LeGrand R. Curtis (Sr.) was [called in 1990](https://www.churchofjesuschrist.org/study/ensign/1990/05/news-of-the-church/elder-legrand-r-curtis-of-the-seventy?lang=eng). His son by the same name (Jr.) was [called in 2011](https://www.churchofjesuschrist.org/study/ensign/2011/05/news-of-the-church/elder-legrand-r-curtis-jr?lang=eng).

## Title

In [None]:
df[df['title'].isnull()]

## Description & Kicker

How often and when were these two values the same?

In [None]:
eq_desc_kicker = df['description'] == df['kicker']
df_eq_desc_kicker = df[eq_desc_kicker]
print(len(df_eq_desc_kicker))

plt.figure(figsize=(16, 1.6))
plt.bar(list(range(len(df))), eq_desc_kicker.astype(int), width=1)
plt.title('Addresses Where Description is Identical to Kicker')
plt.ylabel('Identical')
plt.xticks(*zip(*conference_ticks))
plt.show()

How often and when did neither value exist?

In [None]:
no_desc_kicker = df['description'].isnull() & df['kicker'].isnull()
df_no_desc_kicker = df[no_desc_kicker]
print(len(df_no_desc_kicker))

plt.figure(figsize=(16, 1.6))
plt.bar(list(range(len(df))), no_desc_kicker.astype(int), width=1)
plt.title('Addresses Without Description or Kicker')
plt.ylabel('Neither')
plt.xticks(*zip(*conference_ticks))
plt.show()

## Role

In [None]:
df_no_role = df[df['role'].isnull()]
len(df_no_role)

In [None]:
category_counts_no_role = df_no_role['category'].value_counts(dropna=False)
category_counts_no_role

In [None]:
df_no_role[df_no_role['category'].isnull()]