## Imports

In [1]:
import numpy as np
import pandas as pd

from sherlock_helpers.constants import DATA_DIR
from sherlock_helpers.functions import (
    get_topic_words, 
    multicol_display, 
    show_source
)

Helper functions and variables used across multiple notebooks can be found in `/mnt/code/sherlock_helpers/sherlock_helpers`, or on GitHub, [here](https://github.com/ContextLab/sherlock-topic-model-paper/tree/master/code/sherlock_helpers).<br />You can also view source code directly from the notebook with:<br /><pre>    from sherlock_helpers.functions import show_source<br />    show_source(foo)</pre>

## Inspect `get_topic_words` function

In [2]:
show_source(get_topic_words)

## Load data

In [3]:
video_model = np.load(DATA_DIR.joinpath('models_t100_v50_r10.npy'), 
                      allow_pickle=True)[0]
cv = np.load(DATA_DIR.joinpath('count_vectorizer_model.npy'), 
             allow_pickle=True).item()
lda = np.load(DATA_DIR.joinpath('topic_model.npy'), allow_pickle=True).item()

## Find topics used to describe video content

In [4]:
used_dims = np.where(video_model.var(axis=0) > 1e-5)[0]
print(f'Number of topics utilized by video model: {len(used_dims)}')

Number of topics utilized by video model: 32


## Get top words from each topic

In [5]:
topic_words = get_topic_words(cv, lda, used_dims, n_words=10)
display_vals = []
list(map(display_vals.extend, topic_words.items()))

multicol_display(*display_vals, 
                 caption='Top 10 words by topic', 
                 col_headers=('Topic ID', 'Top words'),
                 table_css={'border-collapse': 'separate', 
                            'border-spacing': '0 1.5em'})

0,1
1,"john, outdoor, yes, phone, road, brixton, box, medium, donovan, street"
2,"sherlock, john, indoor, laboratory, hospital, st, bartholomew, medium, yes, mike"
4,"man, john, warehouse, indoor, yes, medium, shoulder, says, hand, asks"
5,"john, mike, sherlock, medium, molly, park, russell, square, outdoor, bench"
7,"yes, jeffrey, sir, jimmy, indoor, aide, medium, helen, woman, gary"
9,"sherlock, floor, room, crime, scene, lauriston, indoor, gardens, john, yes"
17,"sherlock, lestrade, john, indoor, gardens, lauriston, room, medium, floor, scene"
20,"soldiers, singers, cartoon, background, medium, indoor, world, yes, afghanistan, lobby"
22,"sherlock, john, street, baker, 221b, indoor, mrs, hudson, suite, yes"
27,"sherlock, john, outdoor, medium, taxi, road, yes, says, phone, lauriston"
