# Topic Modeling the 1-Hop Monster Scrape

Build a topic model of the ~7k Wikipedia pages generated by the 1-hop spider scrape of the manually selected monster seed pages

In [1]:
spider_pages_1_dir = './spider-pages-depth-1'
train_pos_dir='./labeled/train/positive'
train_neg_dir='./labeled/train/negative'
test_pos_dir='./labeled/test/positive'
test_neg_dir='./labeled/test/negative'

To keep the token count down, we'll ignore rare words - ones that appear this many times or fewer in the corpus

In [2]:
rare_word_threshold = 50

Number of topics to model the wikipedia 1-hop data set

In [3]:
num_topics = 250

### Read the whole article text into dictionary keyed on filename

In [4]:
import os
from io import open

ext = '.txt'
file_dict = {}

txt_files = [i for i in os.listdir(spider_pages_1_dir) if os.path.splitext(i)[1] == ext]

for f in txt_files:
    with open(os.path.join(spider_pages_1_dir, f), encoding="utf-8") as file_object:
        file_dict[f] = file_object.read()
        
print ('Read ', len(file_dict), ' text files')

Read  7067  text files


In [5]:
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))

From working with the dataset, I've learned that we should have some data-set-specific stopwords...

In [6]:
wikipedia_stop_words = {
    
    "retrieved",  # the references on each page often have a retrieval data as part of the listing
    "edit",       # the text of the label of the edit button often appears inline in the article text
    "isbn",       # appears frequenly in references
    "archived",   # the phrase 'archived from the original' appears frequently in the references
    "original",
    "url"
}

In [7]:
import re
import string

def tokenize(text):
    lda_tokens = []
    text = re.sub("http\S*", "URL", text)
    tokens = word_tokenize(text)
    for token in tokens:
        if not token.isalpha():
            continue
        #if token in string.punctuation:
        #    continue
        #if token in ["'", '"', '`', "''"]:
        #    continue
        if len(token) < 4:
            continue
        lc = token.lower()
        if lc in stop_words:
            continue
        if lc in wikipedia_stop_words:
            continue
        #if lc == 'url':
        #    continue
        lda_tokens.append(lc)
    return lda_tokens


In [8]:
# Tokenize the corpus
tokenized_docs = []
tokenized_docs_filenames = []

for f in file_dict:
    tokenized_docs_filenames.append(f)
    tokens = tokenize(file_dict[f])
    tokenized_docs.append(tokens)

In [9]:
# Peek into the tokenized data as a sanity check
print(tokenized_docs_filenames[0])
print(tokenized_docs[0])

Samuel Marinus Zwemer.txt
['american', 'missionary', 'traveler', 'scholar', 'samuel', 'marinus', 'zwemer', 'born', 'april', 'vriesland', 'michigan', 'united', 'states', 'died', 'april', 'aged', 'york', 'city', 'united', 'states', 'education', 'hope', 'college', 'brunswick', 'theological', 'seminary', 'spouse', 'elizabeth', 'wilkes', 'samuel', 'marinus', 'zwemer', 'april', 'april', 'nicknamed', 'apostle', 'islam', 'american', 'missionary', 'traveler', 'scholar', 'born', 'vriesland', 'michigan', 'received', 'hope', 'college', 'holland', 'received', 'brunswick', 'theological', 'seminary', 'brunswick', 'degrees', 'include', 'hope', 'college', 'muskingum', 'college', 'rutgers', 'college', 'ordained', 'reformed', 'church', 'ministry', 'pella', 'iowa', 'classis', 'missionary', 'busrah', 'bahrein', 'locations', 'arabia', 'member', 'arabian', 'mission', 'founder', 'american', 'mission', 'hospital', 'bahrain', 'zwemer', 'served', 'egypt', 'also', 'traveled', 'widely', 'asia', 'minor', 'elected',

In [10]:
def sortFreqDict(freqdict):
    aux = [(freqdict[key], key) for key in freqdict]
    aux.sort()
    aux.reverse()
    return aux

In [11]:
word_freqs = dict()
for tokenized_doc in tokenized_docs:
    for word in tokenized_doc:
        if word in word_freqs:
            word_freqs[word] += 1
        else:
            word_freqs[word] = 1
            

Let's take a look at the word list prior to filtering out rare words...

In [12]:
sorted_word_freqs = sortFreqDict(word_freqs)
print('number of words: ', len(word_freqs))
sorted_word_freqs

number of words:  405820


[(71687, 'film'),
 (59745, 'also'),
 (42839, 'help'),
 (39967, 'first'),
 (37351, 'world'),
 (35949, 'target'),
 (35063, 'error'),
 (30437, 'history'),
 (29737, 'university'),
 (29457, 'time'),
 (29453, 'march'),
 (29264, 'press'),
 (29011, 'october'),
 (28610, 'series'),
 (28427, 'films'),
 (28251, 'january'),
 (27863, 'april'),
 (27665, 'july'),
 (26894, 'december'),
 (26869, 'june'),
 (26781, 'september'),
 (26325, 'august'),
 (25523, 'november'),
 (25157, 'february'),
 (24811, 'york'),
 (24415, 'united'),
 (24082, 'game'),
 (23598, 'john'),
 (23137, 'states'),
 (22515, 'used'),
 (22458, 'american'),
 (22193, 'many'),
 (21816, 'city'),
 (21509, 'would'),
 (21217, 'king'),
 (20037, 'book'),
 (19548, 'science'),
 (19362, 'godzilla'),
 (19005, 'times'),
 (18992, 'early'),
 (18866, 'years'),
 (18704, 'life'),
 (18564, 'century'),
 (18211, 'list'),
 (18122, 'article'),
 (18036, 'people'),
 (17272, 'story'),
 (17014, 'later'),
 (16792, 'news'),
 (16672, 'national'),
 (16640, 'main'),
 (16

Filter our rare words 

In [13]:
highly_filtered_tokenized_docs = []
filtered_words = set()
for tokenized_doc in tokenized_docs:
    tokens = []
    for word in tokenized_doc:
        if word_freqs[word] > rare_word_threshold:
            tokens.append(word)
            filtered_words.add(word)
    highly_filtered_tokenized_docs.append(tokens)

In [14]:
print('Vocabulary size after rare word removal:')
print(len(filtered_words))

Vocabulary size after rare word removal:
26583


In [15]:
# Peek into the tokenized data as a sanity check
print(tokenized_docs_filenames[0])
print(highly_filtered_tokenized_docs[0])

Samuel Marinus Zwemer.txt
['american', 'missionary', 'traveler', 'scholar', 'samuel', 'marinus', 'zwemer', 'born', 'april', 'michigan', 'united', 'states', 'died', 'april', 'aged', 'york', 'city', 'united', 'states', 'education', 'hope', 'college', 'brunswick', 'theological', 'seminary', 'spouse', 'elizabeth', 'wilkes', 'samuel', 'marinus', 'zwemer', 'april', 'april', 'nicknamed', 'apostle', 'islam', 'american', 'missionary', 'traveler', 'scholar', 'born', 'michigan', 'received', 'hope', 'college', 'holland', 'received', 'brunswick', 'theological', 'seminary', 'brunswick', 'degrees', 'include', 'hope', 'college', 'college', 'rutgers', 'college', 'ordained', 'reformed', 'church', 'ministry', 'pella', 'iowa', 'missionary', 'locations', 'arabia', 'member', 'arabian', 'mission', 'founder', 'american', 'mission', 'hospital', 'bahrain', 'zwemer', 'served', 'egypt', 'also', 'traveled', 'widely', 'asia', 'minor', 'elected', 'fellow', 'royal', 'geographical', 'society', 'london', 'appointed', '

In [16]:
from gensim import corpora

# Create Dictionary
id2word = corpora.Dictionary(highly_filtered_tokenized_docs)
# Create Corpus
texts = highly_filtered_tokenized_docs
# Term Document Frequency
corpus = [id2word.doc2bow(text) for text in texts]
# View
print(corpus[:1][0][:30])



[(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 5), (6, 2), (7, 1), (8, 1), (9, 1), (10, 2), (11, 2), (12, 2), (13, 2), (14, 1), (15, 1), (16, 1), (17, 1), (18, 4), (19, 1), (20, 1), (21, 1), (22, 1), (23, 8), (24, 4), (25, 2), (26, 7), (27, 2), (28, 3), (29, 3)]


In [17]:
import gensim.models.ldamodel
from pathlib import Path
from gensim.test.utils import datapath

filename = "./lda-model"

lda_file = Path(filename)
if not lda_file.is_file():
    print("Building model")
    lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus, 
                                       id2word=id2word,
                                       num_topics=num_topics,
                                       iterations=1000,
                                       alpha=.1,
                                       eta=.1,
                                       update_every=0,
                                       passes=12)
    lda_model.save(filename)
else:
    print("Loading pre-built model from ", filename)
    lda_model = gensim.models.ldamodel.LdaModel.load(filename)


Loading pre-built model from  ./lda-model


In [18]:
from pprint import pprint
pprint(lda_model.print_topics(num_topics))

[(0,
  '0.036*"cross" + 0.017*"temple" + 0.010*"memes" + 0.010*"temples" + '
  '0.008*"columns" + 0.005*"meme" + 0.005*"used" + 0.005*"also" + '
  '0.004*"force" + 0.004*"century"'),
 (1,
  '0.069*"virginia" + 0.014*"israel" + 0.011*"kingdom" + 0.010*"richmond" + '
  '0.009*"judah" + 0.008*"little" + 0.006*"house" + 0.005*"chesapeake" + '
  '0.004*"northern" + 0.004*"march"'),
 (2,
  '0.014*"detective" + 0.011*"steampunk" + 0.010*"malaysia" + '
  '0.010*"jewellery" + 0.008*"fiction" + 0.007*"mystery" + 0.007*"stories" + '
  '0.006*"zone" + 0.006*"gold" + 0.006*"twilight"'),
 (3,
  '0.045*"norse" + 0.019*"edda" + 0.016*"saga" + 0.015*"cologne" + '
  '0.012*"elves" + 0.010*"snorri" + 0.009*"mythology" + 0.008*"sagas" + '
  '0.007*"prose" + 0.007*"poetic"'),
 (4,
  '0.225*"best" + 0.066*"awards" + 0.051*"award" + 0.047*"year" + '
  '0.017*"movie" + 0.014*"golden" + 0.014*"trailer" + 0.013*"poster" + '
  '0.011*"winners" + 0.010*"spot"'),
 (5,
  '0.014*"science" + 0.009*"scientific" + 0.00

### Look at the top topics for each document as a sanity check

In [19]:
for filename, text in zip(tokenized_docs_filenames, highly_filtered_tokenized_docs):
    bow_doc = id2word.doc2bow(text)
    doc_to_topic = lda_model.get_document_topics(bow = bow_doc, minimum_probability = 0.05)
    print(filename)
    print(doc_to_topic)
    print()

Samuel Marinus Zwemer.txt
[(80, 0.6232163), (95, 0.339967)]

P-51 Dragon Fighter.txt
[(24, 0.10588387), (35, 0.22130865), (133, 0.055055667), (168, 0.15108147), (173, 0.33093238)]

Wired News.txt
[(60, 0.06901993), (61, 0.28659058), (113, 0.06089103), (203, 0.52486503)]

Voyage to the Prehistoric Planet.txt
[(11, 0.053142507), (173, 0.765843), (186, 0.06655265)]

Succubi in fiction.txt
[(22, 0.13468148), (157, 0.17143306), (169, 0.06414659), (193, 0.15128891)]

Woolworth Building.txt
[(174, 0.6710343), (176, 0.20722085), (218, 0.057340115)]

Susumu Kurobe.txt
[(77, 0.3990855), (88, 0.1837519), (169, 0.11706006), (238, 0.117064655)]

Motif-Index of Folk-Literature.txt
[(5, 0.07552465), (26, 0.38567877), (146, 0.10263426), (157, 0.1183052), (206, 0.14352149), (239, 0.117486775)]

Terminator 2-COLON- Judgment Day.txt
[(32, 0.5421219), (38, 0.31270164)]

Playmobil-COLON- The Movie.txt
[(38, 0.07148438), (116, 0.8477647)]

Musicarello.txt
[(122, 0.69152087), (191, 0.21471652)]

NES.txt
[(60

Umberto Eco.txt
[(35, 0.07903835), (65, 0.22441661), (98, 0.21699856), (226, 0.15146314)]

Star Wars-COLON- The Force Awakens.txt
[(38, 0.16791849), (130, 0.500486), (220, 0.3263997)]

Northern (genre).txt
[(97, 0.05889945), (122, 0.17705616), (146, 0.0782095), (191, 0.32171157), (202, 0.12669894), (237, 0.14381832)]

Soul.txt
[(199, 0.99556714)]

Spectrophilia.txt
[(82, 0.79259706), (223, 0.06015044)]

Shotacon.txt
[(19, 0.50835115), (122, 0.34038273), (191, 0.055407826)]

Sandman.txt
[(17, 0.29534188), (105, 0.051636677), (111, 0.053018313), (137, 0.08329053), (157, 0.20272918), (169, 0.060527567), (173, 0.08969742)]

Ángel Sala.txt
[(38, 0.15379141), (47, 0.37210736), (150, 0.05001998), (191, 0.15289561), (213, 0.08717143)]

Systema Naturae.txt
[(104, 0.055334978), (117, 0.48645887), (232, 0.43456635)]

Nangoku no hada.txt
[(123, 0.5705539), (173, 0.11336358), (237, 0.0694678), (238, 0.097108975)]

Prototype (1992 film).txt
[(21, 0.79332954), (92, 0.11113758)]

Religion.txt
[(192, 0

Vörðr.txt
[(3, 0.06993439), (59, 0.057173252), (82, 0.053124115), (130, 0.067203596), (157, 0.50141394), (199, 0.07049148)]

Peripheral vision.txt
[(5, 0.079473466), (29, 0.27218932), (84, 0.19149229), (195, 0.081366226), (205, 0.14952081)]

Walt Disney Television (production company).txt
[(21, 0.084674224), (45, 0.8333336)]

Wulfings.txt
[(3, 0.5246554), (103, 0.242965), (121, 0.12159586)]

Monsters (2010 film).txt
[(38, 0.7496754), (88, 0.08657693), (173, 0.07896732)]

Times Square.txt
[(22, 0.57994735), (174, 0.093755685), (176, 0.20705941)]

Polish Film School.txt
[(122, 0.7416928), (191, 0.16085956)]

Turok-COLON- Son of Stone (film).txt
[(21, 0.22863406), (35, 0.05402566), (67, 0.07968633), (105, 0.06612409), (173, 0.38324207)]

WolfCop.txt
[(173, 0.68783444), (221, 0.099696085)]

Sephora.txt
[(46, 0.12799226), (60, 0.07544503), (69, 0.41853645), (134, 0.05113207), (176, 0.19780114)]

Prima Games.txt
[(60, 0.118096575), (110, 0.28233802), (113, 0.14307474), (126, 0.08859358), (20

OED.txt
[(206, 0.95038784)]

Taper Jean Girl.txt
[(175, 0.96692044)]

Waterloo Station.txt
[(64, 0.37768942), (80, 0.28694758), (176, 0.15230568), (218, 0.054476477), (230, 0.051859416)]

Succubous.txt
[(29, 0.16420846), (59, 0.064638294), (66, 0.1227748), (84, 0.06912401), (140, 0.11224065), (234, 0.09470029)]

Template talk-COLON-Kimiyoshi Yasuda.txt
[(126, 0.19637004)]

Republic of China.txt
[(44, 0.18248183), (52, 0.21816365), (56, 0.13181195), (74, 0.3460321), (118, 0.05625126)]

Wendigo.txt
[(65, 0.06502914), (137, 0.2640729), (157, 0.14776303), (218, 0.07022334), (237, 0.058203854), (239, 0.06485107)]

On Translating Beowulf.txt
[(59, 0.066488035), (121, 0.8500486)]

Monotheism.txt
[(95, 0.25812557), (139, 0.22324212), (178, 0.08244909), (192, 0.14022996), (199, 0.20514652)]

Regarding Henry.txt
[(35, 0.18984638), (38, 0.13443068), (39, 0.09186698), (173, 0.3833837)]

Space opera.txt
[(97, 0.6800191), (172, 0.15807468), (220, 0.07514093)]

Solar variation.txt
[(25, 0.9267744), (

Night at the Museum-COLON- Secret of the Tomb.txt
[(38, 0.9641465)]

Tremors (1990 film).txt
[(173, 0.4810078), (219, 0.4258898)]

Tarzan (2013 film).txt
[(111, 0.4579254), (173, 0.34853354)]

Turksploitation.txt
[(122, 0.665286), (191, 0.18575142)]

Xing Tian.txt
[(44, 0.58005416), (98, 0.06735306), (103, 0.07209743), (157, 0.16353333)]

Tech noir.txt
[(97, 0.3039273), (122, 0.42065695), (191, 0.17436363)]

Salafism.txt
[(74, 0.29568437), (95, 0.08063132), (161, 0.39408), (192, 0.18503596)]

Playmobil.txt
[(29, 0.08220945), (55, 0.17460872), (60, 0.1904706), (116, 0.19504844)]

Zolpidem.txt
[(43, 0.19000666), (137, 0.33053857), (195, 0.38931918)]

Perception.txt
[(5, 0.2817513), (29, 0.124408014), (205, 0.46422258)]

Stanford University Press.txt
[(61, 0.081572995), (65, 0.16868544), (125, 0.4919148), (214, 0.11623377)]

The Leviathan (2015 film).txt
[]

Spy film.txt
[(122, 0.23446406), (191, 0.4876668), (202, 0.18841502)]

Venus.txt
[(25, 0.089917324), (186, 0.765303), (231, 0.062647

The Lost World-COLON- Jurassic Park.txt
[(138, 0.79681194), (173, 0.12000598)]

No-budget film.txt
[(20, 0.2831489), (35, 0.060337663), (122, 0.56014055)]

Proto-Indo-European.txt
[(59, 0.44746295), (63, 0.22491011), (216, 0.10942053)]

The Dragon (Beowulf).txt
[(121, 0.52023256), (168, 0.29698053), (218, 0.11206082)]

The Underdog (song).txt
[(67, 0.4111926), (111, 0.2754037), (120, 0.06681485), (169, 0.06269258)]

Sexual penetration.txt
[(223, 0.91966784)]

Pulp noir.txt
[(122, 0.69683325), (191, 0.24419755)]

Sumerian religion.txt
[(103, 0.18091334), (139, 0.72963995), (155, 0.05831726)]

Splatter film.txt
[(38, 0.054705333), (122, 0.116632774), (191, 0.72895277)]

Stargate (film).txt
[(38, 0.3803466), (75, 0.06680733), (84, 0.12895122), (169, 0.054164626), (173, 0.14008267)]

Outlaw biker film.txt
[(122, 0.24064109), (173, 0.25760078), (191, 0.41234705)]

Sheridan Le Fanu.txt
[(16, 0.10929928), (33, 0.06378965), (35, 0.060703155), (36, 0.19801065), (54, 0.104397185), (82, 0.0531833

Veneration of the dead.txt
[(13, 0.4194722), (139, 0.52048975)]

Öland.txt
[(68, 0.19427763), (118, 0.08855544), (152, 0.12790734), (230, 0.106974535), (237, 0.23412526)]

Subconscious.txt
[(5, 0.4927695), (93, 0.13268651), (100, 0.08139116), (199, 0.16049848)]

The Lost World (1992 film).txt
[(47, 0.09737294), (173, 0.46805188), (202, 0.05787278), (241, 0.20501463)]

Travel documentary.txt
[(20, 0.45978191), (122, 0.36554724)]

Resurrection of Jesus.txt
[(95, 0.11613129), (158, 0.8023016), (218, 0.05383523)]

Tiger Shark (film).txt
[(35, 0.19554794), (173, 0.4861758), (202, 0.12410729)]

Tom and Jerry and the Wizard of Oz.txt
[(21, 0.7194273), (49, 0.066455826), (111, 0.1623495)]

The Land Before Time IV-COLON- Journey Through the Mists.txt
[(21, 0.06995769), (111, 0.7416625), (173, 0.1558438)]

The Terminator.txt
[(32, 0.90155774), (38, 0.06370856)]

Ralph.txt
[(35, 0.2206335), (59, 0.07467584), (125, 0.10432107), (246, 0.32294703)]

The Real Ghostbusters (1993 video game).txt
[(8, 0

The Young Lord.txt
[(173, 0.110052176), (238, 0.612944)]

The Human Vapor.txt
[(123, 0.5390117), (173, 0.3040213)]

Tōkyō no hito sayōnara.txt
[(123, 0.7679102), (238, 0.07203858)]

Photios I of Constantinople.txt
[(95, 0.19552726), (98, 0.07416193), (152, 0.06391446), (158, 0.11362817), (187, 0.35318798), (218, 0.0636832), (232, 0.108357646)]

Thirteen Ghosts.txt
[(82, 0.15153737), (173, 0.4916805), (204, 0.29571518)]

Neville Page.txt
[(21, 0.054112263), (35, 0.1297629), (38, 0.10287442), (125, 0.113533914), (126, 0.090127975), (170, 0.38478675)]

Tiyanak.txt
[(47, 0.08426713), (157, 0.6130346), (246, 0.12720117)]

The Land Before Time (film).txt
[(108, 0.11819984), (111, 0.2947519), (173, 0.24053764), (220, 0.28162473)]

Paranormal activity.txt
[(5, 0.083792716), (13, 0.1547601), (100, 0.7128898)]

Rebar.txt
[(29, 0.95656604)]

William Morris.txt
[(11, 0.57621753), (98, 0.05530978), (218, 0.26640752)]

Roman d'Alexandre en prose.txt
[(98, 0.40703207), (103, 0.098460875), (161, 0.109

The Haunting of Hill House.txt
[(54, 0.35844177), (82, 0.30671126), (173, 0.09647407), (244, 0.09492973)]

Saint-Malo.txt
[(52, 0.1566165), (152, 0.16340584), (159, 0.050263412), (202, 0.35414755), (230, 0.16677563)]

Sword and sorcery.txt
[(97, 0.7374475), (122, 0.24139592)]

Practical joke.txt
[(5, 0.13035908), (21, 0.0554918), (108, 0.522296), (125, 0.13917024)]

Mythos.txt
[(5, 0.062213536), (98, 0.054245185), (139, 0.25902718), (153, 0.42500064), (206, 0.071495034)]

Zarkorr! The Invader.txt
[(21, 0.12921269), (35, 0.12781632), (88, 0.05785631), (173, 0.5278314)]

Wandlebury.txt
[(25, 0.07210716), (33, 0.06776479), (82, 0.06819077), (165, 0.06737435), (186, 0.06648448), (237, 0.095341355)]

Richard Rudgley.txt
[(5, 0.1806721), (65, 0.11502731), (98, 0.2191784), (146, 0.107444875), (239, 0.12333453)]

Omnipotent.txt
[(5, 0.3738481), (95, 0.07274439), (157, 0.060877874), (178, 0.07798619), (199, 0.35211962)]

Trick film.txt
[(20, 0.14327237), (122, 0.6670089), (191, 0.069320604)]

R

It (novel).txt
[(144, 0.34416878), (173, 0.32163128), (244, 0.062321007)]

Ghosts in Malay culture.txt
[(58, 0.46965852), (82, 0.25607717), (157, 0.20533903)]

Comedy.txt
[(5, 0.10806256), (20, 0.08825638), (94, 0.3072843), (98, 0.2927315)]

Felicity (TV series).txt
[(35, 0.070003964), (169, 0.731584)]

Eiji Funakoshi.txt
[(35, 0.080770254), (145, 0.55895823), (238, 0.14787047)]

Kaidan (parapsychology).txt
[(26, 0.065789096), (58, 0.08236354), (78, 0.06832844), (82, 0.5670608), (238, 0.08478505)]

Bollywood.txt
[(38, 0.058287602), (52, 0.08202674), (63, 0.056769166), (178, 0.12670109), (191, 0.5293917)]

Keye Luke.txt
[(10, 0.15238626), (35, 0.21332467), (44, 0.10176882), (169, 0.19056153), (173, 0.09916321), (202, 0.10690041)]

Kamen Rider Den-O & Kiva-COLON- Climax Deka.txt
[(21, 0.13577175), (77, 0.086590245), (90, 0.60697913), (238, 0.05082834)]

List of film noir titles.txt
[(20, 0.17167726), (173, 0.16344662), (191, 0.067611165), (202, 0.26452762), (213, 0.20561981)]

Beowulf-CO

Military science fiction.txt
[(97, 0.4486681), (122, 0.3787764)]

Ethan Haas Was Right.txt
[(5, 0.05362431), (60, 0.07944297), (110, 0.110862054), (112, 0.19804494), (134, 0.15962163), (169, 0.11643847), (170, 0.07008505)]

2008 in film.txt
[(35, 0.5861237), (38, 0.0530154), (202, 0.08887037)]

Ghost Whisperer.txt
[(169, 0.5778293)]

Etruria.txt
[(59, 0.060359694), (152, 0.7399841), (236, 0.10579623)]

List of Christian films.txt
[(35, 0.15714528), (158, 0.076926395), (188, 0.078268185), (213, 0.5925389)]

List of ninja films.txt
[(213, 0.95577437)]

Creature Feature (film).txt
[(20, 0.07734622)]

Funko.txt
[(60, 0.107686915), (105, 0.066455655), (110, 0.06003589), (166, 0.10943711), (176, 0.12231691), (223, 0.23337069), (230, 0.058241367)]

Deno's Wonder Wheel Amusement Park.txt
[(70, 0.1723252), (176, 0.7513953)]

Mexican sex comedy.txt
[(122, 0.7405193), (191, 0.19108772)]

Dallas Morning News.txt
[(61, 0.51400363), (129, 0.38981143), (174, 0.0545522)]

Freaked.txt
[(169, 0.12650035

Easter witch.txt
[(17, 0.5645992), (157, 0.2178625), (158, 0.088147014)]

Hauntings.txt
[(52, 0.06589638), (82, 0.62703574), (174, 0.050610878), (201, 0.14742792)]

Mission-COLON- Impossible III.txt
[(38, 0.2826576), (39, 0.089315), (173, 0.2156927), (245, 0.2509891)]

Good Girls (Elle King song).txt
[(8, 0.3854511), (21, 0.19022244), (51, 0.06287999), (111, 0.24211653)]

Making-of.txt
[(20, 0.11033584), (122, 0.83813524)]

Arthur Ochs Sulzberger, Jr..txt
[(61, 0.8056531), (125, 0.08784631), (212, 0.05060773)]

Lists of horror films.txt
[(118, 0.05790247), (122, 0.5030379), (191, 0.33532855)]

Greek underworld.txt
[(101, 0.69533294), (139, 0.105163), (157, 0.116038315)]

Lost Continent (1951 film).txt
[(173, 0.53651404), (202, 0.19186226), (237, 0.10511274)]

Jurassic World-COLON- Dominion.txt
[(138, 0.993043)]

Konungs skuggsjá.txt
[(3, 0.05149296), (95, 0.087640755), (98, 0.3667597), (118, 0.124664195), (161, 0.06132997)]

Commedia sexy all'italiana.txt
[(94, 0.16763923), (122, 0.448

Image Entertainment.txt
[(35, 0.076277986), (86, 0.07079601), (166, 0.69461447), (173, 0.07917179)]

Atari.txt
[(60, 0.14284094), (110, 0.21209903), (166, 0.18371132), (203, 0.45224687)]

Indian subcontinent.txt
[(52, 0.5126189), (178, 0.09353094), (237, 0.37448344)]

Dinosaurs.txt
[(153, 0.99144375)]

Into the Grizzly Maze.txt
[(138, 0.28666836), (173, 0.56885797), (215, 0.11181954)]

Epoch-COLON- Evolution.txt
[(21, 0.894895)]

Matt Czuchry.txt
[(35, 0.39571327), (125, 0.11201302), (169, 0.4077082)]

Ghosts 'n Goblins (series).txt
[(53, 0.83226466), (110, 0.15130527)]

Manticore_(Dungeons_%26_Dragons).txt
[]

James Berardinelli.txt
[(16, 0.076318525), (35, 0.12672), (38, 0.3820487), (125, 0.07552408), (134, 0.062595196), (146, 0.0538958)]

Islamic funeral.txt
[(13, 0.25762415), (29, 0.05055679), (95, 0.16732238), (139, 0.10821178), (161, 0.16090965)]

19-2000.txt
[(111, 0.6430444), (175, 0.13971025)]

List of science fiction action films.txt
[(88, 0.108750716), (97, 0.20832765), (105

Cannibal film.txt
[(122, 0.21798444), (173, 0.07757059), (191, 0.6419464)]

Ancient history.txt
[(44, 0.08281705), (52, 0.16770644), (152, 0.090213835), (155, 0.09527623), (236, 0.46684173)]

Mission-COLON- Impossible – Ghost Protocol.txt
[(38, 0.5005825), (243, 0.07769974), (245, 0.2614767)]

Doraemon-COLON- Nobita and the Knights of Dinosaurs.txt
[(77, 0.9504695)]

Budapest school.txt
[(122, 0.7531315), (191, 0.17326124)]

Keiko Kayama.txt
[]

Limited series (comics).txt
[(60, 0.091191866), (105, 0.39446414), (214, 0.20450757)]

American International Pictures.txt
[(166, 0.15045434), (173, 0.6822898)]

King Saul.txt
[(95, 0.16505076), (180, 0.803054)]

Category-COLON-Gamera films.txt
[]

Ecgþeow.txt
[(103, 0.15535824), (121, 0.7044599)]

Grímur Jónsson Thorkelin.txt
[(121, 0.86737984)]

List of anti-war films.txt
[(35, 0.19074127), (38, 0.12527663), (61, 0.05509081), (74, 0.065425366), (122, 0.22095756), (191, 0.1691347), (202, 0.12153616)]

Japanese New Wave.txt
[(18, 0.094001904), 

Apulia.txt
[(52, 0.32116503), (152, 0.098657794), (226, 0.31810278), (230, 0.056242783), (237, 0.10253479)]

In the Deep (film).txt
[(38, 0.38082603), (149, 0.14793514), (173, 0.3551648)]

Cinema of Transgression.txt
[(20, 0.07413993), (122, 0.84624374)]

Bad Robot Productions.txt
[(39, 0.10224328), (166, 0.7724458), (169, 0.064121895)]

JSDF.txt
[(74, 0.72572666), (133, 0.10272279), (238, 0.16163452)]

Dennis Muren.txt
[(145, 0.8750374), (188, 0.052917622)]

Cunnilingus.txt
[(149, 0.099734485), (195, 0.077224016), (223, 0.591269)]

Chunichi Dragons.txt
[(6, 0.10937173), (169, 0.094722755), (198, 0.2516446), (238, 0.25453)]

Chinese classic text.txt
[(44, 0.6753422), (98, 0.2478202)]

Computer animation.txt
[(5, 0.11948443), (60, 0.09813065), (71, 0.39643404), (122, 0.18113403)]

Manga.txt
[(19, 0.5230278), (218, 0.20157689)]

Iron Sky-COLON- The Coming Race.txt
[(38, 0.14156464), (166, 0.07161881), (173, 0.07688365), (221, 0.5414846)]

Amazon.com.txt
[(60, 0.15953308), (74, 0.10223899

Lego Dimensions.txt
[(8, 0.10554426), (75, 0.06695802), (90, 0.08214027), (110, 0.3060879), (189, 0.24140784)]

List of films featuring extraterrestrials.txt
[(75, 0.06168209), (122, 0.1527667), (160, 0.06622009), (167, 0.46454507), (220, 0.10190069)]

Dungeons & Dragons Game (1991 boxed set).txt
[(6, 0.13914351), (128, 0.849179)]

List of films featuring dinosaurs.txt
[(88, 0.15223274), (138, 0.055824496), (213, 0.63693666)]

Mockumentary.txt
[(20, 0.18961892), (94, 0.1228069), (122, 0.29059008), (191, 0.26591775)]

List of zombie films.txt
[(35, 0.1991005), (51, 0.26635826), (122, 0.11421071), (173, 0.066565715), (218, 0.19328882)]

Heart.txt
[(9, 0.4556114), (29, 0.08584189), (84, 0.1704248), (195, 0.10730947), (234, 0.1456617)]

Blink_dog.txt
[]

Himalayan mountains.txt
[(52, 0.13976309), (178, 0.054341685), (237, 0.78620446)]

Hugh Evelyn-White.txt
[]

Cinéma pur.txt
[(71, 0.15645751), (191, 0.81835204)]

Mega Shark Versus Giant Octopus.txt
[(37, 0.6919482), (38, 0.05429371), (173

Cartoon All-Stars to the Rescue.txt
[(21, 0.5592152), (86, 0.052389577), (102, 0.108922966), (108, 0.058971446)]

Junko Yashiro.txt
[]

Abaye.txt
[(95, 0.5138937), (98, 0.10939766), (111, 0.07206101), (164, 0.1055708)]

Lake Placid 2.txt
[(21, 0.8715493), (173, 0.07975229)]

Category-COLON-Articles with dead external links from July 2020.txt
[(14, 0.84908473)]

Category-COLON-2008 films.txt
[(8, 0.24122655), (14, 0.2705041), (20, 0.08032497), (218, 0.0552151)]

HMV.txt
[(46, 0.13957664), (60, 0.17059273), (80, 0.08350741), (86, 0.05608346), (166, 0.11569317), (176, 0.31731468)]

Joy Ride 3-COLON- Roadkill.txt
[(21, 0.058641307), (48, 0.1299863), (173, 0.73251075)]

Leech.txt
[(41, 0.058346435), (84, 0.37359938), (104, 0.0749394), (149, 0.1879902), (234, 0.19751221)]

Medium (TV show).txt
[(35, 0.16527042), (102, 0.13789152), (169, 0.5770073), (212, 0.056212574)]

Children's film.txt
[(108, 0.069873855), (122, 0.15622096), (191, 0.5068597), (218, 0.08432593), (247, 0.10897683)]

Lady in

Odette Annable.txt
[(144, 0.6831962), (169, 0.28180146)]

Pneuma.txt
[(5, 0.0694993), (63, 0.05449129), (95, 0.07257495), (199, 0.40886703), (232, 0.11197289), (234, 0.0602426), (236, 0.059690922)]

Yoshiyuki Kuroda.txt
[]

Robot Chicken (season 4).txt
[(35, 0.6205157), (189, 0.26417664)]

Red Beard.txt
[(35, 0.18265209), (103, 0.12737037), (173, 0.32560685), (217, 0.073473625)]

Tyrannosaurus rex.txt
[(25, 0.9911987)]

Osaka.txt
[(19, 0.12563121), (118, 0.06575957), (230, 0.43545523), (238, 0.19492853)]

Vampire films.txt
[(122, 0.21712035), (191, 0.19966005), (201, 0.54563016)]

New York Post.txt
[(61, 0.6779964), (74, 0.0619891), (176, 0.08952443)]

Triceratops.txt
[(25, 0.18820553), (153, 0.08558845), (165, 0.7204248)]

Snowman.txt
[(29, 0.15329553), (108, 0.050021727), (142, 0.33692035), (157, 0.16395298), (173, 0.054892305)]

Mumblecore.txt
[(35, 0.10568996), (38, 0.08256618), (122, 0.23575206), (173, 0.08174651), (191, 0.43445608)]

Winston Zeddemore.txt
[(8, 0.7888161), (169, 0

Pursuivant.txt
[(225, 0.87482196)]

U.S. Forest Service.txt
[(74, 0.074384615), (133, 0.4425242), (237, 0.38517025)]

Survivorman.txt
[(5, 0.07583685), (20, 0.07034379), (86, 0.12829998), (169, 0.3242512), (173, 0.091888964), (179, 0.14102836)]

Yuggoth.txt
[(126, 0.07145386), (231, 0.0745515), (244, 0.7531304)]

Psalms.txt
[(95, 0.71271384), (224, 0.15190734)]

Zelus.txt
[(57, 0.92199)]

The Transition of Juan Romero.txt
[(244, 0.9247948)]

Stone Tape.txt
[(5, 0.13401304), (60, 0.10052769), (82, 0.4047743), (100, 0.2541498)]

Ugarit.txt
[(103, 0.14282681), (155, 0.55435514), (236, 0.22253443)]

Worm of Linton.txt
[(33, 0.14717937), (43, 0.050850086), (157, 0.28250375), (168, 0.34837323), (206, 0.057045717)]

Variety (magazine).txt
[(38, 0.14566842), (61, 0.256158), (132, 0.36674428), (166, 0.11675845)]

S.H.I.E.L.D..txt
[(105, 0.8691243)]

Ural Mountains.txt
[(65, 0.25098455), (236, 0.10292922), (237, 0.47142357)]

Skip Williams.txt
[(6, 0.16254956), (83, 0.79864705)]

Yenghe hatam.tx

Ubisoft.txt
[(74, 0.37910977), (110, 0.40887678), (166, 0.13976587)]

Younger Avesta.txt
[(63, 0.054408867), (95, 0.11166506), (98, 0.08460387), (139, 0.100995615), (155, 0.5654215)]

Tom Atkins (actor).txt
[(35, 0.5012488), (169, 0.05342223), (173, 0.2981261)]

Zone Fighter.txt
[(88, 0.7732735), (169, 0.1354353)]

Sea Shepherd Conservation Society.txt
[(35, 0.06016023), (74, 0.62737083), (214, 0.103499696), (229, 0.0913902)]

Young Hollywood.txt
[(86, 0.38703877), (134, 0.17438188), (166, 0.29093072), (169, 0.07452037)]

You Can't Do That on Stage Anymore, Vol. 3.txt
[(142, 0.9334941)]

Shinjuku.txt
[(230, 0.781468), (238, 0.17149727)]

The Venture Bros..txt
[(86, 0.10334971), (169, 0.7626116)]

Trophonius.txt
[(57, 0.75384665), (101, 0.06647214), (112, 0.1220226)]

Shenlong.txt
[(19, 0.10277747), (44, 0.25795197), (157, 0.10368539), (168, 0.22061671)]

The Moon-Bog.txt
[(244, 0.8771567)]

Tokusatsu.txt
[(20, 0.053210266), (77, 0.090091445), (97, 0.37591735), (123, 0.15607028), (193, 

The Mound (novella).txt
[(244, 0.78197896)]

The Mighty Peking Man.txt
[(51, 0.57356447), (173, 0.2884798)]

Uther Pendragon.txt
[(103, 0.13099857), (161, 0.70834076)]

Scythe.txt
[(29, 0.25533727), (152, 0.058342505), (157, 0.059412807), (194, 0.50812817)]

Silla.txt
[(44, 0.37530878), (118, 0.14564739), (152, 0.14612314), (200, 0.16806425)]

Stater.txt
[(161, 0.06752512), (162, 0.21189494), (163, 0.1883513), (236, 0.37973022)]

S2CID (identifier).txt
[(5, 0.2752061), (60, 0.11318948), (61, 0.056614418), (107, 0.060933042), (125, 0.06593178), (127, 0.2856317)]

Richard Matheson.txt
[(35, 0.22749949), (97, 0.09137294), (132, 0.3615631), (146, 0.15350743), (173, 0.07983443)]

Thebes (Greece).txt
[(236, 0.9884511)]

Template talk-COLON-Greek mythology (deities).txt
[(126, 0.72928214)]

Satyr.txt
[(40, 0.11590046), (98, 0.15135185), (101, 0.24174768), (157, 0.1008518), (216, 0.20221238), (218, 0.113339424)]

Vert (heraldry).txt
[(0, 0.14212447), (161, 0.07590747), (225, 0.6898923)]

Razzi

Bandai.txt
[(60, 0.07570953), (77, 0.056070853), (110, 0.14754178), (166, 0.09561948), (177, 0.3779922)]

John Mitchell Kemble.txt
[(121, 0.6298984), (125, 0.09293156), (206, 0.09535423)]

Conjure Wife.txt
[(16, 0.20444551), (35, 0.06683012), (97, 0.384404), (173, 0.07318721), (244, 0.103125036)]

Arrow Films.txt
[(4, 0.0649085), (20, 0.08406933), (96, 0.07459609), (110, 0.10679168), (141, 0.083793685), (166, 0.20104355), (173, 0.19338703), (191, 0.06368386)]

Chiller (TV channel).txt
[(86, 0.34518728), (166, 0.13391124), (243, 0.40878314)]

Ghostbusters (2016 video game).txt
[(8, 0.4148814), (21, 0.15699038), (110, 0.38038808)]

Dragon Fighter.txt
[(21, 0.25152615), (168, 0.21275675), (173, 0.1849039), (221, 0.25334865)]

Lovetide.txt
[(123, 0.87925893)]

Category-COLON-Articles to be expanded from April 2018.txt
[(14, 0.84908485)]

Category-COLON-Articles with unsourced statements from December 2013.txt
[(14, 0.8490849)]

Minosuke Yamada.txt
[]

List of science fiction horror films.t

Barugon.txt
[(141, 0.9232107)]

Chico Roland.txt
[(212, 0.07736949)]

Counter-Earth.txt
[(29, 0.07112127), (157, 0.065721035), (186, 0.47754768), (199, 0.059099767), (231, 0.07959495)]

Corbel.txt
[(29, 0.10328673), (152, 0.52058905), (157, 0.104797885), (174, 0.07196167)]

Hentai.txt
[(19, 0.40327814), (122, 0.07384728), (191, 0.29318252), (223, 0.0674337)]

John Ferriar.txt
[(33, 0.082663506), (35, 0.13295692), (65, 0.07191105), (98, 0.114578426), (100, 0.050568137), (146, 0.22105718), (208, 0.05715085)]

Charles Leslie Wrenn.txt
[(121, 0.79748803)]

Hieroglyph.txt
[(29, 0.05566581), (59, 0.06181299), (63, 0.1937543), (98, 0.09821537), (139, 0.42078137)]

Minority Report (film).txt
[(35, 0.09509482), (38, 0.11233852), (116, 0.66247356)]

India.txt
[(52, 0.3478853), (74, 0.12841862), (178, 0.3801103), (218, 0.09514836)]

Gershom Scholem.txt
[(65, 0.061636806), (95, 0.15280056), (164, 0.6255351), (232, 0.11409332)]

Declan O'Brien.txt
[(21, 0.37704578), (173, 0.43146387)]

Golden Age o

Category-COLON-Japanese-language films.txt
[(59, 0.1102727), (166, 0.05581378)]

Kon Ohmura.txt
[]

Category-COLON-Cloverfield (franchise).txt
[]

Grand Central Terminal.txt
[(22, 0.110665806)]

Blaxploitation.txt
[(35, 0.12732103), (173, 0.13371354), (191, 0.7055093)]

Attack of the Show!.txt
[(86, 0.13260694), (127, 0.43416014), (134, 0.05685868), (169, 0.32875845)]

Crusades.txt
[(95, 0.17301491), (152, 0.5156946), (210, 0.11441016), (218, 0.1309099)]

Magic in the Greco-Roman world.txt
[(40, 0.22433914), (98, 0.118452445), (101, 0.057149068), (239, 0.4594562)]

Indian ghost movie.txt
[(20, 0.13241957), (59, 0.07335198), (82, 0.33648506), (108, 0.06545475), (178, 0.07540849)]

Austin American-Statesman.txt
[(60, 0.06783778), (61, 0.69045705), (125, 0.12686509), (237, 0.050008953)]

Chimera of Arezzo.txt
[(29, 0.05590388), (98, 0.058610562), (112, 0.07547867), (152, 0.6090262), (236, 0.09252537)]

Catholic Church.txt
[(95, 0.07775497)]

Digging up the Morrow.txt
[]

I Timothy.txt
[(9

BlackBook Magazine.txt
[(60, 0.084517874), (61, 0.19796775), (66, 0.061372608), (113, 0.26128173), (126, 0.133238)]

Mantis.txt
[(82, 0.23435245), (149, 0.4434784), (234, 0.22526519)]

Category-COLON-CS1 maint-COLON- multiple names-COLON- authors list.txt
[(14, 0.81503415), (59, 0.059675567)]

Bobblehead.txt
[(35, 0.11509166), (60, 0.069944575), (61, 0.057786405), (90, 0.09076681), (157, 0.07758113), (169, 0.070528775), (173, 0.056905303), (176, 0.06564995)]

Hyoscyamus niger.txt
[(29, 0.08662963), (43, 0.14518046), (149, 0.1451877), (178, 0.19286096), (239, 0.13663015)]

Goosebumps (film).txt
[(38, 0.31202844), (116, 0.32037628), (173, 0.21845002)]

Bavarian porn.txt
[(122, 0.6267203), (191, 0.26103592)]

Beast of Gévaudan.txt
[(99, 0.3788207), (151, 0.11250053), (157, 0.15966944), (173, 0.089760795)]

Diocese of Bjørgvin.txt
[(95, 0.08897408), (118, 0.5187102), (158, 0.22769614)]

Guinea.txt
[(52, 0.53217524), (74, 0.40518728)]

Anaconda.txt
[(42, 0.10129922), (52, 0.13345008), (104,

Gervase of Tilbury.txt
[(72, 0.5166487), (152, 0.17233303), (161, 0.102872126), (206, 0.09154647)]

Laurence M. Larson.txt
[(35, 0.34716704), (61, 0.16378863), (125, 0.06981917), (146, 0.26534513)]

J. R. Skelton.txt
[(80, 0.21800505), (98, 0.20229119), (152, 0.052061673), (188, 0.25601566)]

Gargoyle's Quest II.txt
[(53, 0.4049237), (110, 0.2720292), (157, 0.05625096), (182, 0.06220151)]

Deildegast.txt
[(21, 0.07182505), (82, 0.0748387), (157, 0.62340575)]

Berserker.txt
[(3, 0.11537794), (59, 0.06427133), (103, 0.11923755), (135, 0.094719514), (152, 0.10304972), (157, 0.068059385), (200, 0.21331458)]

Bourekas film.txt
[(122, 0.3921184), (191, 0.52979755)]

Gyaos.txt
[(123, 0.06119735), (141, 0.44121182), (173, 0.3015812), (206, 0.09082009)]

Lucasfilm.txt
[(45, 0.21083266), (166, 0.15633672), (220, 0.59457916)]

British Board of Film Classification.txt
[(20, 0.13776301), (74, 0.15206227), (80, 0.09130367), (86, 0.1189387), (108, 0.19041675), (134, 0.08873313), (191, 0.058738127)]



Abortion.txt
[(74, 0.18426302), (149, 0.56964713), (195, 0.10530615), (208, 0.052159943), (223, 0.052481536)]

ASIN (identifier).txt
[(60, 0.20235659), (165, 0.52284664)]

Leo Reinisch.txt
[]

Encephalitis.txt
[(195, 0.7190867), (217, 0.2192814)]

Halloween.txt
[(47, 0.0756276), (89, 0.50211763), (95, 0.101894006), (210, 0.08979792)]

Hallucinations.txt
[(29, 0.07974034), (100, 0.19046973), (137, 0.07927662), (195, 0.3792631), (205, 0.20745288)]

Chest.txt
[(84, 0.6941067), (195, 0.0795083), (234, 0.15761356)]

3D film.txt
[(20, 0.5531761), (29, 0.08827268), (38, 0.06367725), (60, 0.08547551), (122, 0.060012795), (147, 0.07727604)]

Drama (film and television).txt
[(122, 0.15605162), (191, 0.7349168)]

Kyōko Enami.txt
[(21, 0.17053694), (35, 0.16988266), (145, 0.07344807), (238, 0.33568472)]

Kevin Crossley-Holland.txt
[(121, 0.39572883), (146, 0.33460966)]

Helen Keller vs. Nightwolves.txt
[]

Joan As Policewoman.txt
[(35, 0.08933933), (94, 0.17733487), (111, 0.43932632), (173, 0.1529

Inverness-shire.txt
[(33, 0.19078486), (129, 0.6373078), (237, 0.14848074)]

Harrison Lake.txt
[(86, 0.055638544), (234, 0.065595955), (237, 0.7443316)]

Kydoimos.txt
[(66, 0.7566364)]

Illithid.txt
[(81, 0.098316774), (83, 0.41619813), (115, 0.108317435), (164, 0.097635366)]

Indo-European mythology.txt
[(139, 0.29455724), (216, 0.6226848), (218, 0.07807217)]

Hastur.txt
[(244, 0.9244797)]

Nuggle.txt
[(33, 0.20635818), (42, 0.12738596), (157, 0.32099637), (187, 0.10482318), (218, 0.11441014)]

Hidetaka Miyazaki.txt
[(110, 0.38674682), (169, 0.0567371), (180, 0.15960398), (199, 0.08627445)]

Kitana.txt
[(73, 0.18428893), (182, 0.7938129)]

Orphism (religion).txt
[(40, 0.4461742), (98, 0.121175244), (101, 0.20121478), (207, 0.12757367)]

Noob Saibot.txt
[(73, 0.8144277), (182, 0.15926099)]

Italy.txt
[(52, 0.18961829), (74, 0.078443654), (118, 0.075702325), (152, 0.21044798), (226, 0.41181964)]

Mortal Kombat-COLON- Rebirth.txt
[(38, 0.059578143), (73, 0.64800555), (116, 0.12374884), (

The Mothers of Invention.txt
[(142, 0.8570958), (218, 0.0742215)]

Rob Bottin.txt
[(35, 0.101788275), (145, 0.7995647)]

USAopoly.txt
[(60, 0.06664184), (110, 0.197134), (186, 0.607118)]

The Things (short story).txt
[(38, 0.15251908), (54, 0.07842155), (97, 0.26586133), (108, 0.061490607), (160, 0.07237268)]

Ray Patterson (animator).txt
[(35, 0.29446745), (45, 0.09830878), (71, 0.13591582), (126, 0.060999997), (166, 0.081700236), (192, 0.07763586)]

The_Blob.txt
[(37, 0.6867223), (38, 0.054955937), (173, 0.2119864)]

Vertebrae.txt
[(10, 0.072427824), (84, 0.80169404), (165, 0.06493862)]

Steven S. DeKnight.txt
[(102, 0.22870174), (144, 0.23902941), (169, 0.40322345)]

T. K. Carter.txt
[(21, 0.11714171), (35, 0.5375065), (169, 0.23279569)]

Times-Standard.txt
[(60, 0.056596547), (61, 0.5901417), (184, 0.07172455), (190, 0.056567784), (230, 0.11797939)]

Skunk ape.txt
[(35, 0.05927548), (64, 0.23967233), (104, 0.08844752), (157, 0.14267667), (177, 0.07693871), (202, 0.0736906), (215, 0

UC Berkeley.txt
[(125, 0.8009418), (189, 0.1430239)]

Wand.txt
[(97, 0.29597855), (157, 0.059070114), (239, 0.54943854)]

Slaad.txt
[(6, 0.06825094), (83, 0.393841), (115, 0.07346033), (128, 0.20164597), (157, 0.110895105)]

Scorpion (Mortal Kombat).txt
[(73, 0.71734947), (182, 0.25596532)]

Terror of Mechagodzilla.txt
[(88, 0.5921782), (123, 0.31740853), (173, 0.05147073)]

The Washington Post.txt
[(61, 0.86372316), (67, 0.09903134)]

Tim McIntire.txt
[(35, 0.41942316), (111, 0.11680115), (169, 0.24459141), (202, 0.05014256)]

Spelljammer.txt
[(29, 0.06421575), (81, 0.13717808), (83, 0.6313052)]

Sound.txt
[(5, 0.06311243), (29, 0.74412984), (205, 0.09271344)]

Yale (mythical creature).txt
[(125, 0.10124534), (225, 0.7831601)]

Thalassa (mythology).txt
[(57, 0.9695634)]

Sigurd.txt
[(3, 0.1001419), (59, 0.058063757), (168, 0.5977014), (218, 0.166438)]

Psychic.txt
[(5, 0.05233506), (100, 0.7274568)]

Sagami Province.txt
[(236, 0.12995407), (238, 0.7811188)]

Sindel.txt
[(73, 0.7610488


Susan Deacy.txt
[(65, 0.18351695), (98, 0.13884085), (125, 0.308976), (163, 0.12927225), (223, 0.06339404)]

Thailand.txt
[(52, 0.46480894), (58, 0.15546963), (74, 0.24345924)]

Psychokinesis.txt
[(5, 0.100212865), (100, 0.7135478)]

Witchcraft.txt
[(239, 0.9622704)]

Yazata.txt
[(139, 0.16900271), (155, 0.65687025), (218, 0.057328507)]

The Fog (soundtrack).txt
[(120, 0.8334929)]

River Ness.txt
[(30, 0.09505081), (33, 0.13116312), (47, 0.053068697), (104, 0.22288585), (129, 0.06480255), (176, 0.101970784), (237, 0.14702727)]

Tannin (monster).txt
[(95, 0.28893143), (103, 0.43985024), (218, 0.08633574)]

Zagreus.txt
[(185, 0.23498549), (207, 0.6788743)]

Skull art.txt
[(10, 0.060322836), (47, 0.22370361), (98, 0.15743828), (139, 0.067224205), (157, 0.09630703), (177, 0.054144334), (178, 0.078680515), (191, 0.07655956)]

Saturn Award for Best Horror Film.txt
[(4, 0.25923032), (35, 0.17400827), (92, 0.092806876), (102, 0.060721844), (173, 0.14956827)]

Reportedly haunted locations in t

The Vinyl Factory.txt
[(29, 0.09527145), (60, 0.3308464), (80, 0.1099897), (166, 0.10391915), (230, 0.12938608)]

The Curse of Yig.txt
[(244, 0.93723637)]

Scribblenauts.txt
[(5, 0.08935285), (21, 0.17667651), (110, 0.61357903)]

Time Warner.txt
[(86, 0.17216554), (166, 0.21898851), (247, 0.57776755)]

South Lanarkshire.txt
[(33, 0.18632087), (118, 0.06007106), (136, 0.3198794), (174, 0.05128836), (230, 0.15274782), (237, 0.10892538)]

The Alchemist (short story).txt
[(244, 0.91918)]

Psionics (Dungeons & Dragons).txt
[(83, 0.8816804)]

Zoroastrianism in the United States.txt
[(11, 0.24148667), (155, 0.22345884), (237, 0.36858514)]

The 7th Voyage of Sinbad.txt
[(36, 0.19711824), (89, 0.12790616), (173, 0.5044875)]

Zmeu.txt
[(157, 0.65878326), (168, 0.16499415)]

Woodcut.txt
[(18, 0.57926035), (29, 0.1022894), (152, 0.16261868)]

Styx.txt
[(40, 0.61780226), (57, 0.36190206)]

Providence (Avatar Press).txt
[(173, 0.17809948), (244, 0.7068115)]

Wrath of the Immortals.txt
[(128, 0.98408

Yoshimitsu Banno.txt
[(88, 0.3565578), (238, 0.46297756)]

The Howling (film).txt
[(35, 0.19735047), (92, 0.105044276), (173, 0.55109423)]

Simon & Schuster.txt
[(34, 0.5362648), (61, 0.061180394), (86, 0.25258538), (166, 0.14279042)]

Unidentified flying object.txt
[(13, 0.757118), (74, 0.053807024), (100, 0.086392865)]

Twelve Olympians.txt
[(40, 0.25443974), (57, 0.30570954), (101, 0.34274238), (185, 0.08597299)]

The Complete Psionics Handbook.txt
[(83, 0.07686642), (128, 0.9020899)]

Raytheon.txt
[(60, 0.13149562), (74, 0.11841524), (113, 0.30550113), (133, 0.12992817), (166, 0.17318031), (176, 0.08356749)]

Vendidad.txt
[(5, 0.057805657), (95, 0.11300877), (155, 0.7186514)]

Tabard.txt
[(13, 0.0758724), (206, 0.08299414), (225, 0.72710484)]

Zombie.txt
[(51, 0.09713188), (89, 0.33983856), (97, 0.056073107), (191, 0.10907208)]

Wired (magazine).txt
[(60, 0.069013774), (61, 0.28657788), (113, 0.060860775), (203, 0.5248569)]

Roy Tucker.txt
[(118, 0.7584015), (249, 0.20208822)]

The

Rape.txt
[(195, 0.09479025), (223, 0.8988062)]

Predation.txt
[(5, 0.14812799), (104, 0.119067185), (149, 0.45685834), (183, 0.056404058), (234, 0.08299348)]

Sweet Ermengarde.txt
[(244, 0.91276157)]

Selene.txt
[(40, 0.15183051), (57, 0.21009551), (101, 0.18452947), (185, 0.44550738)]

The Atlantic.txt
[(46, 0.23303676), (61, 0.5735045)]

Stop-motion.txt
[(20, 0.6330516), (71, 0.2161607), (122, 0.11165199)]

Variation of the field.txt
[(140, 0.5947585), (225, 0.32284054)]

Proteus.txt
[(57, 0.5501869), (98, 0.1641877), (112, 0.13480526)]

Sword-and-sandal.txt
[(97, 0.055588588), (99, 0.77489746), (122, 0.09864832), (191, 0.06414208)]

Socialist heraldry.txt
[(11, 0.5425381), (225, 0.38501182)]

World of Warcraft.txt
[(47, 0.25859073), (81, 0.057348356), (110, 0.598574)]

Tiamat.txt
[(103, 0.5705685), (139, 0.20859882)]

Rupert Gould.txt
[(80, 0.0843235), (104, 0.05502478), (133, 0.052994397), (146, 0.2563082), (214, 0.43820038)]

Skookum.txt
[(59, 0.117762975), (90, 0.088902794), (157

Robert Bloch.txt
[(146, 0.7632812), (244, 0.10951252)]

Shao Kahn.txt
[(73, 0.7464038), (182, 0.23775816)]

Shachihoko.txt
[(58, 0.1314659), (157, 0.321277), (238, 0.23297317)]

Zmey Gorynych.txt
[(168, 0.8517162), (218, 0.07778723)]

Who shot J.R.-QM-.txt
[(20, 0.061760884), (61, 0.055538785), (169, 0.4775795), (173, 0.1147775), (209, 0.22142056)]

Saltire.txt
[(0, 0.4609527), (52, 0.09626954), (225, 0.39967138)]

Thalia (Grace).txt
[(57, 0.9457236)]

Sable (heraldry).txt
[(225, 0.82210237)]

Zahhak.txt
[(103, 0.14660819), (155, 0.28745666), (157, 0.16903616), (168, 0.11589612), (249, 0.08551657)]

Sufism.txt
[(41, 0.3099086), (95, 0.13046075), (161, 0.2204109), (192, 0.18014342), (199, 0.057783253)]

Zoroastrian calendar.txt
[(155, 0.8486641), (203, 0.05105928)]

Template-COLON-Zoroastrianism.txt
[(155, 0.81551504)]

Thomas Tull.txt
[(21, 0.08958356), (125, 0.12802985), (166, 0.36586067), (200, 0.3261743)]

Wayback Machine.txt
[(60, 0.058539525), (74, 0.076887734), (107, 0.5519977), 

Sierra Nevada.txt
[(230, 0.12417178), (237, 0.7455059)]

Tiryns.txt
[(112, 0.18056747), (152, 0.34509367), (174, 0.09601761), (236, 0.26084983)]

Wiglaf.txt
[(103, 0.11961422), (121, 0.702021), (173, 0.06973237)]

Princeton University.txt
[(125, 0.89482284)]

Snorri Sturluson.txt
[(3, 0.30675563), (74, 0.0630538), (98, 0.0748576), (103, 0.05006061), (118, 0.06136151), (152, 0.06523778), (228, 0.29447585)]

Someone's Watching Me!.txt
[(35, 0.09587008), (120, 0.41400656), (173, 0.31864393)]

Savoy knot.txt
[(225, 0.9148859)]

Prospecting.txt
[(2, 0.37814206), (25, 0.1290646), (29, 0.29098162), (237, 0.11556137)]

Werewolf fiction.txt
[(92, 0.17134234), (97, 0.107642405), (122, 0.24546112), (157, 0.103788674), (191, 0.12938479), (193, 0.08350205)]

The Triumph of Death.txt
[(98, 0.16412383), (152, 0.06818615), (157, 0.6239032)]

Vishap.txt
[(139, 0.23118469), (157, 0.23195913), (168, 0.2268843)]

Yamata no Orochi.txt
[(103, 0.08069315), (139, 0.1495908), (157, 0.16922618), (168, 0.3801021

Idaho State Journal.txt
[(61, 0.5735096), (198, 0.16451664), (237, 0.09856246)]

Plane (Dungeons & Dragons).txt
[(81, 0.21140364), (83, 0.17675748), (157, 0.5608617)]

Hudson (Aliens).txt
[(35, 0.12757498), (160, 0.5941726), (170, 0.1995512)]

Momo the Monster.txt
[(35, 0.06464965), (64, 0.43020973), (82, 0.065945245), (115, 0.09583784), (230, 0.15097778)]

Pandemic (board game).txt
[(110, 0.08827938), (175, 0.8584389)]

M. A. Wetherell.txt
[(21, 0.061831914), (35, 0.34278348), (66, 0.067058675), (104, 0.054168522), (146, 0.08189782), (179, 0.0725571), (200, 0.12317993)]

Lithograph.txt
[(29, 0.37715814), (60, 0.08800427), (98, 0.1287664), (129, 0.26981035), (152, 0.05491521)]

Halloween (soundtrack).txt
[(120, 0.9366665)]

King Kong 2-COLON- Ikari no Megaton Punch.txt
[(51, 0.6816895), (110, 0.11497321), (126, 0.052577164)]

List of Advanced Dungeons & Dragons 2nd edition monsters.txt
[(115, 0.998586)]

Horkos.txt
[(66, 0.37738505), (98, 0.27410108), (112, 0.1379259)]

MonsterVerse.tx

Munchkin (card game).txt
[(6, 0.11088155), (21, 0.13096613), (110, 0.085037865), (175, 0.47474092)]

Hemera.txt
[(57, 0.9504204)]

New York Life Building.txt
[(22, 0.11762893), (174, 0.5762403), (176, 0.21987323), (218, 0.07666245)]

Mansfield News Journal.txt
[(60, 0.09244547), (61, 0.6324118), (125, 0.050090045), (184, 0.07824852), (237, 0.06032964)]

Jacko hoax.txt
[(21, 0.052684084), (61, 0.112365), (64, 0.3512786), (146, 0.1647448), (192, 0.08195534), (244, 0.05435081)]

Mighty Joe Young (1949 film).txt
[(36, 0.05665858), (51, 0.64653105), (173, 0.2443645)]

Plesiosauria.txt
[(165, 0.99565125)]

Pallas (daughter of Triton).txt
[(57, 0.14716597), (112, 0.20279658), (163, 0.2808708), (207, 0.2562783)]

Minos.txt
[(57, 0.36339918), (94, 0.07043148), (98, 0.10827627), (101, 0.11151185), (112, 0.23126134)]

James Cameron.txt
[(32, 0.069115825), (38, 0.10071419), (188, 0.73339456)]

Haruo Nakajima.txt
[(35, 0.15787826), (88, 0.6646622), (123, 0.07272621)]

Metaphor.txt
[(5, 0.481646), (

J. R. R. Tolkien.txt
[(18, 0.7537843), (121, 0.15242186)]

Mechagodzilla.txt
[(88, 0.92990667)]

Injustice 2.txt
[(73, 0.122407116), (110, 0.24467452), (156, 0.5666949)]

Jun Fukuda.txt
[(21, 0.065787375), (88, 0.35294285), (238, 0.38012722)]

Pitch (filmmaking).txt
[(20, 0.30950218), (38, 0.35079795), (108, 0.06989985), (169, 0.12010563)]

Pirene (mythology).txt
[(66, 0.12246431), (207, 0.69146717)]

Numicus.txt
[(30, 0.05225203), (66, 0.3056591), (112, 0.18420455), (158, 0.06066164), (171, 0.0514455), (226, 0.055368163)]

Oizys.txt
[(57, 0.089983545), (66, 0.48399317), (137, 0.08379178)]

Plouto (Oceanid).txt
[(185, 0.16973285), (207, 0.6081109)]

Halloween (1978 film).txt
[(20, 0.12060069), (38, 0.059138328), (120, 0.21138133), (173, 0.43517184), (218, 0.09049014)]

Patterson–Gimlin film.txt
[(192, 0.90115565)]

Pluto.txt
[(25, 0.08769455), (186, 0.23295121), (231, 0.5462255)]

Magic item (Dungeons & Dragons).txt
[(6, 0.062263552), (81, 0.09180064), (83, 0.21510555), (128, 0.2488722

Megaguirus.txt
[(88, 0.95139533)]

Larian Studios.txt
[(6, 0.1163214), (86, 0.07696281), (110, 0.4775347), (183, 0.183438)]

Keloid.txt
[(29, 0.44506434), (151, 0.0656087), (161, 0.05511118), (195, 0.2450206), (234, 0.06628112)]

Minilla.txt
[(88, 0.9597871)]

Invasion of Astro-Monster.txt
[(88, 0.15854682), (123, 0.7718239), (218, 0.0512601)]

Merriam-Webster.txt
[(59, 0.09575728), (60, 0.1808149), (63, 0.05958056), (206, 0.5400753)]

History of the Necronomicon.txt
[(244, 0.9084028)]

Larunda.txt
[(66, 0.08527622), (101, 0.35568497), (139, 0.07125247), (171, 0.24597152)]

Hal David.txt
[(35, 0.5760724), (111, 0.13230056), (204, 0.22210877)]

PMID (identifier).txt
[(5, 0.15520965), (60, 0.13178201), (107, 0.05095903), (127, 0.2704959), (134, 0.05955791), (195, 0.14954905)]

Order (distinction).txt
[(50, 0.90444267)]

Gotengo.txt
[(88, 0.89029014), (126, 0.06037433)]

John Heard (actor).txt
[(35, 0.72818977), (169, 0.1548455)]

Monkey Day.txt
[(12, 0.3917598), (19, 0.15899913), (61, 0.

List of Dungeons & Dragons deities.txt
[(27, 0.45161343), (83, 0.2828346), (128, 0.12093369), (157, 0.0712023)]

Kurt Russell.txt
[(35, 0.65864366), (169, 0.09870577), (202, 0.08846168)]

Hydraulics.txt
[(29, 0.5170067), (125, 0.07884274), (152, 0.13029294), (155, 0.050840214), (236, 0.059082802)]

Metope (mythology).txt
[(57, 0.06548794), (207, 0.7713415)]

Paul Du Chaillu.txt
[(12, 0.05975096), (52, 0.12718554), (65, 0.08029098), (98, 0.36109212), (146, 0.074308425), (151, 0.10176423)]

Lance Anderson.txt
[(21, 0.21455704), (66, 0.05290224), (145, 0.28914818), (188, 0.15992714)]

Ned Tanen.txt
[(35, 0.16952291), (38, 0.065000035), (61, 0.07259875), (131, 0.058983218), (166, 0.4513357), (173, 0.051985826)]

Loch_Ness_Monster.txt
[(100, 0.059592318), (104, 0.9046689)]

Matthew Robbins (screenwriter).txt
[(4, 0.05699708), (21, 0.07636315), (35, 0.32681036), (188, 0.26547933), (191, 0.078578)]

List of mountains in China.txt
[(23, 0.12292319), (44, 0.24349862), (52, 0.31188053), (74, 0.0

North America.txt
[(52, 0.7334305), (237, 0.2522636)]

MovieWeb.txt
[(28, 0.112848654), (60, 0.07447618), (86, 0.095464565), (134, 0.3535914), (166, 0.18058905)]

New York (state).txt
[(176, 0.5200662), (237, 0.41549587)]

Hartmann Schedel.txt
[(30, 0.26114738), (95, 0.06497931), (98, 0.3226264), (152, 0.05763955), (232, 0.05219202)]

Legendary Entertainment.txt
[(166, 0.954842)]

Godzilla March.txt
[(88, 0.9187525)]

Image stitching.txt
[(5, 0.13318416), (29, 0.31777203), (68, 0.3292809), (71, 0.13608612)]

In the Walls of Eryx.txt
[(244, 0.8238299)]

Plaster casts.txt
[(4, 0.35005018), (29, 0.17438543), (152, 0.3327491)]

Melinoë.txt
[(101, 0.66557676), (139, 0.0761011), (207, 0.14148557)]

List of Greyhawk deities.txt
[(81, 0.10188131), (83, 0.094614014), (157, 0.18771023), (197, 0.53879106)]

Lake Tianchi Monster.txt
[(21, 0.19894591), (44, 0.08197103), (61, 0.06443163), (104, 0.3611596), (157, 0.10871063)]

Nomos (mythology).txt
[(66, 0.41168234), (139, 0.081407964)]

Litae.txt
[(

Kickstarter.txt
[(38, 0.078294754), (80, 0.4495433), (110, 0.12335082), (134, 0.15047172)]

Gray_ooze.txt
[(115, 0.998586)]

Mondo (American company).txt
[(38, 0.2327974), (110, 0.0520149), (166, 0.13194534), (204, 0.38907006), (220, 0.06226251)]

Mystara.txt
[(81, 0.97250026)]

Meryl Streep.txt
[(35, 0.7535727), (211, 0.2107717)]

Peter Nicholls (writer).txt
[(97, 0.43922755), (146, 0.36930832)]

Parody candidate.txt
[(11, 0.11591585), (86, 0.12403598), (118, 0.21189524), (223, 0.23722805)]

Planescape.txt
[(6, 0.29131788), (7, 0.10211411), (81, 0.26079094), (83, 0.24936266)]

Jaws (film).txt
[(20, 0.082206555), (32, 0.5136419), (38, 0.15622157), (218, 0.09146564)]

Microsoft Windows.txt
[(60, 0.42049098), (110, 0.28184244), (211, 0.2727726)]

Mortal Kombat Gold.txt
[(73, 0.5723489), (110, 0.092721485), (182, 0.29802224)]

Ma (goddess).txt
[(101, 0.34716424), (139, 0.23404536), (236, 0.27914914)]

Nuclear power.txt
[(181, 0.9972022)]

Plastic Man (song).txt
[(111, 0.3045373), (162, 0.

Opossums.txt
[(104, 0.16141291), (149, 0.18718578), (205, 0.6116496)]

Hanna-Barbera Productions.txt
[(21, 0.31724748), (71, 0.06574213), (86, 0.10274619), (166, 0.28016946), (247, 0.052966986)]

Nerdist Industries.txt
[(19, 0.55142975), (86, 0.16265637), (166, 0.09318782), (169, 0.09753745)]

IMDb.txt
[(20, 0.06132998), (60, 0.06980654), (134, 0.5574471), (166, 0.0600261)]

Nomia (mythology).txt
[(66, 0.13689357), (112, 0.054762866), (207, 0.66708905)]

Plaster cast.txt
[(4, 0.3503677), (29, 0.17881931), (152, 0.33362573)]

Mortal Kombat-COLON- Armageddon.txt
[(73, 0.1912024), (110, 0.07164558), (182, 0.72298664)]

Great Lakes region.txt
[(52, 0.058667712), (237, 0.7434738)]

History Channel.txt
[(86, 0.51906157), (146, 0.43449178)]

Planet of the Apes.txt
[(21, 0.11163098), (38, 0.12659763), (85, 0.4561009), (130, 0.119223505)]

Haunted attraction (simulated).txt
[(70, 0.18512481), (82, 0.17011952), (89, 0.3671951), (176, 0.10878631)]

Museo archeologico regionale Paolo Orsi.txt
[(23

Gorgoneia.txt
[(0, 0.05814125), (112, 0.07122237), (139, 0.07457109), (152, 0.12339758), (163, 0.27230382), (218, 0.101761505), (239, 0.07458748)]

MeatEater.txt
[(35, 0.098157845), (169, 0.30032733), (172, 0.05377503), (173, 0.09951334), (215, 0.1404084), (237, 0.11169948)]

Moscow, Russia.txt
[(65, 0.6665177), (174, 0.054114215), (230, 0.24514738)]

Kim Newman.txt
[(97, 0.24319836), (105, 0.050095096), (146, 0.074157365), (172, 0.33165276), (173, 0.07094772), (244, 0.0869115)]

Larvae.txt
[(149, 0.09013304), (234, 0.6620961)]

Megafauna.txt
[(104, 0.7463327), (151, 0.060096506), (153, 0.07507338)]

Japan.txt
[(21, 0.07199215), (52, 0.25497544), (74, 0.19577484), (118, 0.1510934), (238, 0.18754883)]

Godzilla Game.txt
[(88, 0.90050554)]

Gravity Falls.txt
[(45, 0.050750133), (86, 0.0698489), (108, 0.13921665), (132, 0.3817626), (169, 0.30316904)]

Hibakusha.txt
[(24, 0.68889934), (74, 0.052737843), (238, 0.058645185)]

Nomen dubium.txt
[(5, 0.10532159), (117, 0.5160789), (165, 0.22679

Nilus (mythology).txt
[(57, 0.09004949), (185, 0.089628994), (207, 0.7319075)]

Patrick Tatopoulos.txt
[(35, 0.11471371), (75, 0.08956559), (145, 0.44745067)]

Mortal Kombat 11.txt
[(73, 0.3310612), (110, 0.18875696), (182, 0.36634)]

Michael Wolgemut.txt
[(30, 0.12343334), (98, 0.43823838), (152, 0.20918973), (212, 0.09561251)]

Hygieia.txt
[(57, 0.0849622), (101, 0.25100413), (112, 0.07488047), (163, 0.19202149), (207, 0.056283258), (208, 0.13992968), (236, 0.13115838)]

Monk (Dungeons & Dragons).txt
[(83, 0.9789507)]

Argos, Peloponnese.txt
[(152, 0.18479139), (230, 0.24335325), (236, 0.4765731)]

Dogora.txt
[(88, 0.71820503), (123, 0.1112888), (173, 0.114200465)]

Achelous.txt
[(57, 0.2331042), (185, 0.27943507), (207, 0.40369976)]

Aion (deity).txt
[(57, 0.7981126), (139, 0.092780665)]

Classic.txt
[(5, 0.090846434), (20, 0.20568705), (60, 0.091903634), (65, 0.06725485), (98, 0.22127765)]

Edward Mountain.txt
[(104, 0.088665836), (146, 0.14798062), (166, 0.15205899), (174, 0.06558

Amanda Ripley (character).txt
[(110, 0.0774255), (160, 0.730155), (221, 0.09868128)]

AFI Catalog of Feature Films.txt
[(20, 0.49398458), (125, 0.07511536), (166, 0.060428895), (206, 0.11631564)]

Bigfoot-COLON- The Life and Times of a Legend.txt
[(21, 0.13617554), (61, 0.09763548), (64, 0.32673112), (65, 0.053871088), (146, 0.12003739), (192, 0.052987896)]

Chrysopeleia.txt
[(66, 0.23567887), (207, 0.6031713)]

Category-COLON-Articles with dead external links from June 2016.txt
[(14, 0.8490849)]

Aura (paranormal).txt
[(100, 0.7715871), (218, 0.062709995)]

Category-COLON-Use mdy dates from May 2020.txt
[(14, 0.8490848)]

Fangoria.txt
[(61, 0.14096999), (86, 0.064495854), (132, 0.36561173), (147, 0.19421995), (166, 0.13372453), (173, 0.057047807)]

Americas.txt
[(52, 0.80796313), (237, 0.1519929)]

Dungeon Master.txt
[(5, 0.06774382), (6, 0.13325119), (83, 0.727562)]

BBC News.txt
[(61, 0.18446063), (74, 0.31607014), (80, 0.073647365), (86, 0.33515373)]

Binomial nomenclature.txt
[(59

Frank Edwards (writer and broadcaster).txt
[(13, 0.3815363), (86, 0.11980771), (100, 0.058384843), (240, 0.34303463)]

Cephisso.txt
[(66, 0.76054853)]

Artificer (Dungeons & Dragons).txt
[(83, 0.9209741)]

Extrasensory perception.txt
[(100, 0.9643706)]

Category-COLON-Fantasy creatures.txt
[(97, 0.92356044)]

Centauromachy.txt
[(101, 0.24431083), (112, 0.33143318), (200, 0.2740188)]

Empire (film magazine).txt
[(20, 0.11929911), (35, 0.07641949), (61, 0.091990374), (86, 0.057920806), (132, 0.23022494), (220, 0.26660722)]

Franklyn Ajaye.txt
[(35, 0.44789106), (94, 0.0866157), (169, 0.26224923)]

Daleks.txt
[(75, 0.78353703)]

Chuck Pfarrer.txt
[(16, 0.06734104), (35, 0.22978278), (61, 0.08397836), (133, 0.2796641), (214, 0.103541546)]

Diner.txt
[(20, 0.06254694), (29, 0.10459826), (60, 0.05186403), (152, 0.05601869), (156, 0.07594877), (162, 0.34217095), (176, 0.16962154)]

Among Us.txt
[(110, 0.6996103), (134, 0.0784394), (188, 0.12714896)]

Eridanos (river of Hades).txt
[(157, 0.075

Blue Öyster Cult.txt
[(111, 0.6351648), (183, 0.12727384)]

Bernie Knee.txt
[(35, 0.23487724), (37, 0.083596155), (111, 0.29248863), (176, 0.094353124)]

Acis and Galatea.txt
[(98, 0.08603151), (112, 0.11179951), (152, 0.12119585), (157, 0.060938023), (171, 0.44390917), (207, 0.1011073)]

American Indian Quarterly.txt
[(61, 0.16645971), (63, 0.16660227), (66, 0.11795295), (125, 0.16616909), (206, 0.06984731)]

Dinosaur_(Dungeons_%26_Dragons).txt
[]

Complete Psionic.txt
[(83, 0.21598908), (128, 0.7611471)]

Ebirah, Horror of the Deep.txt
[(88, 0.8184234), (123, 0.15700296)]

Fauna.txt
[(52, 0.5464405), (104, 0.24963903), (118, 0.072903514)]

American English.txt
[(52, 0.120592415), (59, 0.32741076), (63, 0.08947406), (184, 0.06120541), (224, 0.10737995), (237, 0.11143131)]

Defibrillate.txt
[(29, 0.28251195), (195, 0.14266421), (208, 0.056209356), (243, 0.40993026)]

Extended play.txt
[(60, 0.33033603), (96, 0.06315967), (111, 0.3437802)]

Gigan.txt
[(88, 0.966335)]

Category-COLON-Wik

Buddhist ethics.txt
[(5, 0.051753756), (135, 0.77120143), (178, 0.067752644)]

City gate.txt
[(44, 0.062804736), (152, 0.46781826), (174, 0.10605), (194, 0.18141292), (230, 0.073988244)]

Category-COLON-CS1 maint-COLON- discouraged parameter.txt
[(14, 0.86585057)]

3D computer graphics.txt
[(5, 0.12619041), (29, 0.08847591), (60, 0.10920513), (71, 0.6018738)]

Black and white.txt
[(20, 0.14706478), (29, 0.12393521), (60, 0.05829059), (122, 0.5081585)]

-zilla.txt
[(60, 0.09807136), (88, 0.63569576)]

Dungeons & Dragons in popular culture.txt
[(6, 0.42824915), (35, 0.18636642), (169, 0.12181727)]

Dutch language.txt
[(52, 0.13260107), (59, 0.673528), (218, 0.10111597)]

Astrophysicist.txt
[(5, 0.28178003), (25, 0.25206757), (29, 0.121591836), (186, 0.1795479)]

Folkloristics.txt
[(5, 0.08025427), (206, 0.81761926)]

Deep One.txt
[(244, 0.9241445)]

Biological life cycle.txt
[(41, 0.40980735), (104, 0.07994143), (234, 0.4327902)]

Chester Springs, Pennsylvania.txt
[(174, 0.07792106), (18

Donkey Kong (character).txt
[(49, 0.56720877), (51, 0.22779506), (110, 0.17539644)]

Echo (mythology).txt
[(90, 0.445253), (98, 0.11836899), (112, 0.11776016), (207, 0.24877578)]

Elder Sign (card game).txt
[(175, 0.71364015), (244, 0.25100622)]

Battlesystem.txt
[(83, 0.17071947), (128, 0.74755037)]

Alastor.txt
[(112, 0.1473489), (207, 0.71973664)]

Category-COLON-Wikipedia articles with LCCN identifiers.txt
[]

Ananke.txt
[(5, 0.079003036), (57, 0.60975784), (98, 0.12865767)]

Beithir.txt
[(33, 0.1405775), (42, 0.37411472), (157, 0.3013562)]

Alke.txt
[(57, 0.051402017), (207, 0.84474665)]

Amalgamated Dynamics.txt
[(21, 0.18840922), (88, 0.061618842), (126, 0.10074236), (145, 0.14980298), (160, 0.086486846), (173, 0.09628636)]

Aliens Versus Predator-COLON- Extinction.txt
[(21, 0.10488704), (110, 0.23665507), (160, 0.08336894), (183, 0.54870003)]

Batman.txt
[(49, 0.71982914), (75, 0.18418029), (105, 0.07325336)]

Alan Howarth (composer).txt
[(20, 0.13105324), (21, 0.09585343), (35

Giant_(Dungeons_%26_Dragons).txt
[]

Carrion.txt
[(149, 0.40648803), (214, 0.15241162)]

Donna Haraway.txt
[(5, 0.13308355), (25, 0.21553123), (35, 0.14327367), (65, 0.4196347)]

Arms and Equipment Guide.txt
[(128, 0.953345)]

2012 US presidential election.txt
[(61, 0.21563178), (74, 0.12360412), (184, 0.06774878), (223, 0.37089506), (237, 0.08666312)]

Anti-hero.txt
[(9, 0.3738437), (54, 0.2502634), (97, 0.08848854), (127, 0.1522673), (191, 0.07131218)]

Category-COLON-Wikipedia articles needing page number citations from August 2020.txt
[(14, 0.84908485)]

Corus (mythology).txt
[(66, 0.34888774), (185, 0.050239332), (232, 0.06294253)]

Forgotten Realms Campaign Setting.txt
[(81, 0.124620125), (128, 0.81380755)]

Cultural universal.txt
[(5, 0.08355433), (65, 0.7563407)]

Aergia.txt
[(57, 0.9410613)]

Dungeons & Dragons Rules Cyclopedia.txt
[(128, 0.98063236)]

Banquo.txt
[(7, 0.79103154), (85, 0.12881404)]

Christine (1983 film).txt
[(35, 0.11149788), (38, 0.072387986), (120, 0.242054

Dian Fossey.txt
[(12, 0.47688404), (61, 0.053235088), (74, 0.07997362), (173, 0.12543055)]

Edgar Wright.txt
[(35, 0.18195719), (38, 0.18711051), (169, 0.055866633), (173, 0.09142069), (188, 0.16228242), (197, 0.1879646)]

Georgia (U.S. state).txt
[(74, 0.053211503), (230, 0.061522588), (237, 0.5928981), (240, 0.23265447)]

Ed Greenwood.txt
[(6, 0.08162546), (81, 0.15960102), (83, 0.42471656), (97, 0.063910194), (132, 0.06428446)]

David M. Ewalt.txt
[(6, 0.15197891), (21, 0.41774553), (61, 0.26732096), (110, 0.09165603)]

Air pressure.txt
[(29, 0.08974012), (119, 0.82849526)]

Glaistig.txt
[(42, 0.6290247), (82, 0.08076328), (157, 0.19091466)]

Bernard Heuvelmans.txt
[(146, 0.88018423)]

Clio.txt
[(66, 0.5650651), (125, 0.07771778), (206, 0.051933404), (207, 0.2249475)]

Cathar.txt
[(95, 0.2204101), (152, 0.15126646), (158, 0.1345416), (187, 0.40145022)]

Electronic voice phenomenon.txt
[(5, 0.07584733), (60, 0.1670263), (82, 0.19493765), (100, 0.37021536)]

Deadline Hollywood.txt
[(3

Crystal Creek Reservoir.txt
[(29, 0.11927119), (189, 0.05989327), (237, 0.584088)]

Fox News.txt
[(61, 0.15599807), (69, 0.6271999), (86, 0.1998199)]

Australia (continent).txt
[(52, 0.80266625), (237, 0.120161034)]

Carolynne Cunningham.txt
[(4, 0.21805096), (35, 0.16482319), (38, 0.12400137), (51, 0.06980736), (188, 0.20553957)]

Charles Hallahan.txt
[(21, 0.15651108), (35, 0.6551071)]

Centaur.txt
[(40, 0.33754128), (101, 0.3626612), (157, 0.0845821)]

Atë.txt
[(66, 0.49866775), (85, 0.07146479), (112, 0.1710975), (157, 0.101973966)]

Baragon.txt
[(88, 0.95354307)]

Dungeons & Dragons-related products.txt
[(6, 0.553365), (83, 0.12113609), (110, 0.10865894)]

Akan people.txt
[(44, 0.07660076), (52, 0.27246454), (65, 0.058275383), (194, 0.38191685)]

Antarctica.txt
[(52, 0.6437181), (104, 0.16204467), (245, 0.064348795)]

Blood spurt.txt
[(158, 0.056948762), (173, 0.075094245), (234, 0.62812406)]

Clover (creature).txt
[(38, 0.070170276), (88, 0.08376895), (169, 0.053170465), (170, 0.

Natural history.txt
[(5, 0.13225627), (65, 0.17832786), (104, 0.18598703), (117, 0.10826637), (232, 0.3285)]

Occult.txt
[(5, 0.099709794), (199, 0.06365868), (218, 0.10442821), (239, 0.5551609)]

Logan Bonner.txt
[]

Morbid Angel.txt
[(111, 0.23278263), (119, 0.07786773), (120, 0.4102233), (150, 0.25348276)]

Godzilla vs. Kong.txt
[(38, 0.52784884), (51, 0.16490768), (88, 0.25819218)]

In medias res.txt
[(54, 0.59920096), (112, 0.27302444)]

Owl of Athena.txt
[(40, 0.60549974), (101, 0.15400763), (163, 0.15950286)]

Ioke (mythology).txt
[(40, 0.075487964), (66, 0.3074168), (112, 0.14737418), (206, 0.07995943)]

Pegasides.txt
[(57, 0.7875713), (112, 0.053838987), (207, 0.050217904)]

OCLC (identifier).txt
[(60, 0.20986088), (66, 0.3347902), (74, 0.12039539), (107, 0.05497877), (125, 0.18021402)]

Gord the Rogue.txt
[(81, 0.30983394), (83, 0.3534262), (197, 0.11425544), (241, 0.16917166)]

Nahuel Huapi Lake Monster.txt
[(52, 0.105776064), (104, 0.17434439), (126, 0.21677764), (157, 0.12

Chiang Mai.txt
[(52, 0.09992182), (58, 0.07988286), (119, 0.24427989), (230, 0.38054463)]

Chuvash dragon.txt
[(42, 0.050197005), (157, 0.43184948), (168, 0.25109518)]

Delphyne.txt
[(92, 0.07080804), (112, 0.08138203), (185, 0.6596673), (207, 0.12134287)]

Iliad.txt
[(112, 0.89585716)]

Ged (heraldry).txt
[(225, 0.91023064)]

Ahuna Vairya.txt
[(95, 0.09810192), (103, 0.0785108), (155, 0.6897454)]

Fairy godmother.txt
[(9, 0.10608138), (26, 0.07118933), (42, 0.19314566), (97, 0.3237177), (157, 0.06943531), (227, 0.20278555)]

A Book of Dragons.txt
[(26, 0.114971206), (146, 0.8017938)]

Feilong (mythology).txt
[(44, 0.117638454), (58, 0.6159424), (168, 0.23201641)]

Corvus (heraldry).txt
[(33, 0.07431943), (66, 0.07422518), (157, 0.14722891), (225, 0.45713353)]

Armiger.txt
[(225, 0.93246794)]

Aphrodite.txt
[(57, 0.10857847), (71, 0.08057456), (101, 0.51817274), (218, 0.17182325)]

Gothic fiction.txt
[(54, 0.93355644), (97, 0.063287586)]

Buddhist law.txt
[(135, 0.5111514), (178, 0.406

Elf.txt
[(3, 0.13441196), (59, 0.108611494), (157, 0.24289837), (187, 0.09502756), (218, 0.1170297), (239, 0.058729436)]

Hadad.txt
[(95, 0.10442512), (103, 0.66554284), (139, 0.18218972)]

Enchanted forest.txt
[(26, 0.124456495), (97, 0.44963795), (114, 0.083210275), (157, 0.19860506)]

Gnome.txt
[(26, 0.14589304), (42, 0.1712091), (97, 0.33917913), (157, 0.21343306)]

Dragon (Dungeons & Dragons).txt
[(6, 0.50390947), (115, 0.08304853), (149, 0.06214269), (157, 0.056268103), (168, 0.18381639)]

Flag of the Qing dynasty.txt
[(0, 0.09994611), (44, 0.67347854), (133, 0.10115491), (225, 0.06536748)]

Hebrew Bible.txt
[(95, 0.8971094)]

Deer.txt
[(104, 0.07308805), (149, 0.20536484), (151, 0.27706042), (172, 0.23219317), (225, 0.100518), (237, 0.08867765)]

Argent.txt
[(55, 0.12592138), (157, 0.07480308), (225, 0.6743389)]

Here be dragons.txt
[(52, 0.11557965), (98, 0.29841697), (104, 0.058439475), (152, 0.056157082), (157, 0.14390473), (168, 0.12131536), (209, 0.06664618)]

Feathered Ser

English language.txt
[(52, 0.091920145), (59, 0.5260598), (63, 0.09981175), (216, 0.052662533), (218, 0.097315006), (224, 0.09348002)]

Four Seas.txt
[(44, 0.6352902), (218, 0.09313612), (237, 0.10723965)]

Gremlin.txt
[(42, 0.14744568), (97, 0.19393131), (132, 0.117861226), (157, 0.12348095), (173, 0.13679177), (189, 0.093971744)]

Ancient Greek.txt
[(59, 0.47780636), (236, 0.51144993)]

Caulonia (ancient city).txt
[(218, 0.07614656), (236, 0.77065015)]

Dragonology.txt
[(6, 0.08448078), (97, 0.05943883), (110, 0.07253698), (126, 0.18519919), (128, 0.050694533), (168, 0.093230434), (214, 0.22558272)]

Agamemnon.txt
[(98, 0.05530206), (112, 0.88012147)]

Bee (mythology).txt
[(112, 0.16520762), (139, 0.127036), (149, 0.21578862), (225, 0.37365812)]

Chiyou.txt
[(44, 0.7018132), (103, 0.058788512), (157, 0.14965194)]

Fimbriation.txt
[(52, 0.06073748), (225, 0.83890784)]

Goryeo.txt
[(44, 0.4349204), (118, 0.23365393), (152, 0.100467764), (218, 0.08448509)]

Gnosticism.txt
[(95, 0.302323


Kaiju.txt
[(51, 0.07304211), (77, 0.14086202), (88, 0.27312115), (97, 0.10772986), (122, 0.27485657), (123, 0.053543486)]

Panlong (mythology).txt
[(44, 0.317937), (157, 0.084204495), (168, 0.4310858)]

Méliès d'Or.txt
[(97, 0.7769793), (118, 0.050848108), (213, 0.080534644)]

Mythopoeia.txt
[(97, 0.42732933), (122, 0.18965904), (157, 0.05890244)]

Ninazu.txt
[(103, 0.29630175), (139, 0.43628702)]

List of religious ideas in fantasy fiction.txt
[(95, 0.051625356), (97, 0.7269605), (139, 0.12141325)]

Mythlore.txt
[(61, 0.120335035), (121, 0.6969046), (132, 0.059503336)]

Pherecydes of Athens.txt
[(98, 0.4110708), (185, 0.37106052), (236, 0.06700013)]

Magic realism.txt
[(36, 0.8020367), (97, 0.090846255), (122, 0.10337284)]

Mokele-mbembe.txt
[(5, 0.052176278), (52, 0.06351183), (104, 0.22185358), (126, 0.059067164), (146, 0.14767496), (157, 0.17240062)]

Loeb Classical Library.txt
[(98, 0.5884134), (115, 0.13240172), (185, 0.113132514), (236, 0.058198508)]

Ljubi.txt
[(66, 0.07098058

Mesopotamia.txt
[(95, 0.17087765), (155, 0.39311293), (236, 0.10872683)]

Minamoto no Mitsunaka.txt
[(44, 0.08518109), (47, 0.18943615), (95, 0.05297844), (103, 0.08191046), (135, 0.062184684), (217, 0.24091554), (238, 0.10283924)]

Locus Award.txt
[(4, 0.1535785), (97, 0.79347223)]

Lantern Festival.txt
[(44, 0.39636663), (157, 0.14532952), (203, 0.3850208)]

List of fantasy authors.txt
[(35, 0.12947394), (97, 0.46293408), (102, 0.36374962)]

Mušḫuššu.txt
[(103, 0.83704793)]

Nebula Award.txt
[(4, 0.12495064), (97, 0.8104885)]

Lindworm.txt
[(103, 0.072462544), (157, 0.12742023), (168, 0.1075004), (225, 0.48474488)]

Magic ring.txt
[(9, 0.055654038), (16, 0.255642), (97, 0.10055911), (103, 0.051579345), (157, 0.112192124), (239, 0.26700073)]

Near East.txt
[(52, 0.45904887), (236, 0.07874977), (237, 0.27521306)]

Officer of arms.txt
[(74, 0.095210105), (225, 0.8127312)]

Khordeh Avesta.txt
[(155, 0.9359767)]

List of dragons in popular culture.txt
[(168, 0.26677626), (193, 0.68465257)

Extinction.txt
[(5, 0.08183829), (89, 0.06856987), (104, 0.34644854), (149, 0.1508957), (151, 0.07837798), (181, 0.061789133), (245, 0.07142216)]

Frank Zappa.txt
[(35, 0.08060739), (111, 0.07263241), (142, 0.80989224)]

Air raid siren.txt
[(194, 0.9822893)]

-SLASH-Film.txt
[(4, 0.16896577), (35, 0.10528928), (61, 0.062100958), (86, 0.112623245), (113, 0.06389611), (134, 0.17537636), (169, 0.100994706)]

Ape Canyon.txt
[(23, 0.058907155), (64, 0.23185647), (82, 0.05171443), (157, 0.05465847), (173, 0.12865774), (192, 0.1650528), (237, 0.101961344), (244, 0.11780005)]

Benny Andersson.txt
[(35, 0.111232005), (111, 0.8408674)]

Alien Resurrection.txt
[(38, 0.11387445), (160, 0.2521453), (221, 0.59732527)]

Doug Stewart (game designer).txt
[(6, 0.12906475)]

Category-COLON-CS1 maint-COLON- unfit URL.txt
[(14, 0.836905)]

Arethusa (mythology).txt
[(98, 0.18520257), (185, 0.15161547), (207, 0.25185144), (216, 0.31322804)]

A Nightmare on Elm Street (franchise).txt
[(102, 0.80050015), (173,


European eel.txt
[(12, 0.08935046), (52, 0.13825071), (104, 0.07706826), (149, 0.43513307), (234, 0.069280125)]

Aliens vs. Predator-COLON- Requiem.txt
[(38, 0.1967137), (160, 0.16394955), (173, 0.18087439), (183, 0.27829012), (221, 0.1451549)]

Elements of the Cthulhu Mythos.txt
[(137, 0.5087457), (157, 0.13350496), (244, 0.22098815)]

Ahead of Their Time.txt
[(120, 0.07138842), (142, 0.86727697)]

Category-COLON-Redirects from moves.txt
[(14, 0.8346518)]

Futurama.txt
[(86, 0.060235456), (108, 0.14176065), (132, 0.058076136), (169, 0.33499476), (170, 0.28826073)]

Fresno, California.txt
[(230, 0.9643556)]

Dan O'Bannon.txt
[(35, 0.54886204), (51, 0.061981432), (145, 0.20256561)]

Eleusinian Mysteries.txt
[(40, 0.053344302), (101, 0.77186894), (139, 0.16244693)]

Aurora Plastics Corporation.txt
[(21, 0.055925887), (29, 0.17976995), (36, 0.33799374), (60, 0.24068692)]

Blu-ray.txt
[(60, 0.15679216), (96, 0.8397261)]

Etsushi Toyokawa.txt
[(35, 0.051506158), (145, 0.42817566), (173, 0.

Fantastic Four.txt
[(105, 0.9050701)]

Euterpe.txt
[(66, 0.4194045), (98, 0.1393256), (101, 0.05870115), (112, 0.2207717)]

Aliens-COLON- The Computer Game (1986 video game).txt
[(110, 0.14158833), (160, 0.7819663)]

Cultural icon.txt
[(65, 0.9361929)]

Corin Hardy.txt
[(21, 0.3129928), (35, 0.17735551), (38, 0.14038548), (173, 0.09454954), (227, 0.058686428)]

Datura.txt
[(43, 0.16758467), (117, 0.07914258), (137, 0.051421683), (149, 0.18425617), (153, 0.30591115), (195, 0.054144092), (239, 0.06382139)]

Anthology-COLON- Movie Themes 1974–1998.txt
[(120, 0.9046276)]

Diffuse reflection.txt
[(29, 0.6126559), (124, 0.35471082)]

Gary Gygax.txt
[(6, 0.07764065), (69, 0.66959625), (81, 0.1796104), (83, 0.0657289)]

Androktasiai.txt
[(66, 0.69639254), (185, 0.18655859)]

Arthur Rankin Jr..txt
[(35, 0.17446245), (51, 0.054922275), (102, 0.6188642), (108, 0.056492265)]

Athens.txt
[(65, 0.052676328), (118, 0.11615293), (230, 0.3431921), (236, 0.37784192)]

Against the Giants.txt
[(81, 0.2386

Dog.txt
[(149, 0.10343615), (151, 0.71042997), (195, 0.051402006)]

Fouke, Arkansas.txt
[(136, 0.0518401), (162, 0.11813875), (184, 0.07762223), (230, 0.5613568), (237, 0.08915041)]

Escape from L.A..txt
[(102, 0.059764545), (120, 0.22890976), (173, 0.4995333)]

Ann Darrow.txt
[(51, 0.8246879), (88, 0.11168229), (110, 0.05199492)]

Beastwars (album).txt
[(21, 0.055955477), (111, 0.21567033), (120, 0.25185552), (143, 0.051677536), (175, 0.051339675)]

Azathoth (short story).txt
[(244, 0.9352149)]

Category-COLON-Articles with unsourced statements from September 2016.txt
[(14, 0.8490849)]

Aliens-COLON- Colonial Marines Technical Manual.txt
[(160, 0.8893945)]

Eternity.txt
[(5, 0.088028096), (90, 0.10761427), (139, 0.09451406), (199, 0.49096832)]

9-1-1.txt
[(60, 0.24811302), (74, 0.29509196), (134, 0.058298223), (159, 0.26059875)]

Eastern gorilla.txt
[(12, 0.77806836), (149, 0.11994465)]

Asian cinema.txt
[(20, 0.13431202), (52, 0.35133073), (191, 0.2794064)]

Foam rubber.txt
[(29, 0.7

### What topics have high weights in the manually-labeled monster documents?

In [39]:
def get_topics_for_text(text, threshold):
    topics = set()
    all_tokens = tokenize(text)
    tokens = []
    for word in all_tokens:
        if word_freqs[word] > rare_word_threshold:
            tokens.append(word)
    bow_doc = id2word.doc2bow(tokens)
    doc_to_topic = lda_model.get_document_topics(bow = bow_doc, minimum_probability = threshold)
    return doc_to_topic

In [40]:
from os import listdir
from os.path import isfile, join

monster_documents = [join(train_pos_dir,f) for f in listdir(train_pos_dir) if isfile(join(train_pos_dir, f))]
more_monster_documents = [join(test_pos_dir,f) for f in listdir(test_pos_dir) if isfile(join(test_pos_dir, f))]

monster_documents.extend(more_monster_documents)

monster_topics = set()

for f in monster_documents:
    with open(f, encoding="utf-8") as file_object:
        text = file_object.read()
        doc_to_topic = get_topics_for_text(text, 0.05)
        print(f)
        print(doc_to_topic)
        for topic in doc_to_topic:
            print(topic[0], ' ---- ', lda_model.print_topic(topic[0]))
            monster_topics.add(topic[0])
        print()

print('total number of monster topics (so far): ', len(monster_topics))

./labeled/train/positive\Dragon.txt
[(97, 0.07325695), (168, 0.49136847), (218, 0.10297265), (225, 0.067408524)]
97  ----  0.036*"fantasy" + 0.034*"fiction" + 0.019*"science" + 0.009*"world" + 0.007*"series" + 0.007*"stories" + 0.006*"magic" + 0.006*"literature" + 0.005*"history" + 0.005*"works"
168  ----  0.119*"dragon" + 0.032*"dragons" + 0.009*"european" + 0.007*"help" + 0.007*"sigurd" + 0.006*"serpent" + 0.005*"error" + 0.005*"target" + 0.005*"also" + 0.004*"mythology"
218  ----  0.141*"target" + 0.138*"error" + 0.132*"help" + 0.019*"harvnb" + 0.005*"sfnm" + 0.005*"press" + 0.004*"university" + 0.003*"york" + 0.003*"also" + 0.002*"early"
225  ----  0.041*"arms" + 0.021*"heraldry" + 0.011*"heraldic" + 0.011*"coat" + 0.008*"also" + 0.007*"used" + 0.005*"black" + 0.005*"flag" + 0.005*"white" + 0.005*"shield"

./labeled/train/positive\Vampire.txt
[(100, 0.055113595), (157, 0.12010571), (201, 0.6147919), (239, 0.050385024)]
100  ----  0.014*"paranormal" + 0.010*"scientific" + 0.009*"sci

./labeled/train/positive\Godzilla.txt
[(51, 0.063701585), (88, 0.45474136), (123, 0.36543062)]
51  ----  0.076*"kong" + 0.052*"king" + 0.015*"film" + 0.013*"island" + 0.008*"skull" + 0.008*"films" + 0.006*"cooper" + 0.006*"zombie" + 0.006*"conan" + 0.005*"dead"
88  ----  0.108*"godzilla" + 0.019*"monsters" + 0.017*"king" + 0.015*"monster" + 0.012*"film" + 0.011*"mothra" + 0.010*"ghidorah" + 0.009*"toho" + 0.009*"mechagodzilla" + 0.007*"japan"
123  ----  0.036*"godzilla" + 0.029*"film" + 0.013*"japanese" + 0.013*"king" + 0.012*"toho" + 0.012*"films" + 0.010*"galbraith" + 0.009*"help" + 0.009*"ryfle" + 0.009*"monster"

./labeled/train/positive\Them!_(1954_film).txt
[(20, 0.065476105), (67, 0.06669602), (173, 0.6412305)]
20  ----  0.044*"film" + 0.020*"films" + 0.007*"movie" + 0.007*"movies" + 0.006*"first" + 0.005*"made" + 0.005*"also" + 0.005*"cinema" + 0.005*"motion" + 0.004*"feature"
67  ----  0.024*"stone" + 0.015*"rolling" + 0.008*"plane" + 0.007*"fiends" + 0.006*"kingston" + 0.006*

./labeled/train/positive\Goblin.txt
[(42, 0.20373516), (97, 0.3581739), (157, 0.29153678)]
42  ----  0.033*"snake" + 0.030*"snakes" + 0.011*"fairy" + 0.010*"fairies" + 0.009*"lilith" + 0.008*"venom" + 0.007*"water" + 0.006*"serpent" + 0.004*"kelpie" + 0.004*"also"
97  ----  0.036*"fantasy" + 0.034*"fiction" + 0.019*"science" + 0.009*"world" + 0.007*"series" + 0.007*"stories" + 0.006*"magic" + 0.006*"literature" + 0.005*"history" + 0.005*"works"
157  ----  0.011*"also" + 0.010*"mythology" + 0.010*"folklore" + 0.007*"death" + 0.006*"creatures" + 0.005*"creature" + 0.005*"often" + 0.004*"people" + 0.004*"legend" + 0.004*"known"

./labeled/train/positive\Harpy.txt
[(40, 0.3891952), (101, 0.2340671), (112, 0.063375786), (225, 0.19545531)]
40  ----  0.013*"greek" + 0.013*"deities" + 0.006*"cave" + 0.006*"games" + 0.005*"mysteries" + 0.005*"mythological" + 0.005*"mythology" + 0.004*"hermes" + 0.004*"ancient" + 0.003*"ploutonion"
101  ----  0.017*"greek" + 0.009*"zeus" + 0.005*"mythology" + 0.

./labeled/train/positive\Cloverfield.txt
[(38, 0.45952788), (39, 0.050764874), (116, 0.08462592), (169, 0.13737799), (173, 0.091453195)]
38  ----  0.038*"film" + 0.011*"million" + 0.010*"july" + 0.009*"office" + 0.008*"march" + 0.008*"april" + 0.007*"december" + 0.007*"october" + 0.007*"january" + 0.007*"november"
39  ----  0.039*"trek" + 0.036*"star" + 0.017*"abrams" + 0.012*"november" + 0.010*"gaiman" + 0.010*"film" + 0.007*"darkness" + 0.006*"neil" + 0.006*"series" + 0.006*"july"
116  ----  0.019*"film" + 0.009*"runner" + 0.009*"movie" + 0.009*"transformers" + 0.008*"blade" + 0.007*"december" + 0.006*"july" + 0.006*"martin" + 0.005*"april" + 0.005*"october"
169  ----  0.027*"season" + 0.026*"series" + 0.021*"episode" + 0.016*"show" + 0.013*"episodes" + 0.010*"television" + 0.008*"september" + 0.008*"october" + 0.006*"february" + 0.006*"march"
173  ----  0.030*"film" + 0.007*"films" + 0.006*"movie" + 0.005*"release" + 0.005*"directed" + 0.005*"time" + 0.005*"released" + 0.005*"horror

90  ----  0.022*"monster" + 0.020*"time" + 0.016*"doctor" + 0.013*"high" + 0.010*"november" + 0.008*"cybermen" + 0.007*"doll" + 0.006*"back" + 0.006*"dolls" + 0.006*"travel"

./labeled/train/positive\Succubus.txt
[(22, 0.056505464), (100, 0.09617669), (103, 0.050532848), (137, 0.080644675), (157, 0.2308906), (239, 0.1849465)]
22  ----  0.038*"park" + 0.024*"central" + 0.024*"york" + 0.020*"times" + 0.016*"street" + 0.016*"square" + 0.015*"avenue" + 0.012*"city" + 0.012*"circle" + 0.011*"october"
100  ----  0.014*"paranormal" + 0.010*"scientific" + 0.009*"science" + 0.008*"psychic" + 0.008*"research" + 0.007*"parapsychology" + 0.006*"skeptical" + 0.006*"phenomena" + 0.005*"pseudoscience" + 0.005*"evidence"
103  ----  0.010*"also" + 0.010*"king" + 0.005*"name" + 0.005*"form" + 0.005*"text" + 0.004*"translation" + 0.004*"gilgamesh" + 0.004*"demons" + 0.004*"marduk" + 0.004*"gods"
137  ----  0.030*"sleep" + 0.012*"entity" + 0.009*"great" + 0.008*"paralysis" + 0.008*"insomnia" + 0.007*"appe

168  ----  0.119*"dragon" + 0.032*"dragons" + 0.009*"european" + 0.007*"help" + 0.007*"sigurd" + 0.006*"serpent" + 0.005*"error" + 0.005*"target" + 0.005*"also" + 0.004*"mythology"
185  ----  0.017*"hesiod" + 0.012*"press" + 0.012*"university" + 0.011*"theogony" + 0.009*"zeus" + 0.009*"west" + 0.009*"library" + 0.008*"greek" + 0.008*"apollodorus" + 0.008*"harvard"
236  ----  0.026*"greek" + 0.022*"ancient" + 0.010*"greece" + 0.010*"athens" + 0.008*"century" + 0.008*"period" + 0.007*"history" + 0.006*"empire" + 0.006*"roman" + 0.005*"city"

./labeled/test/positive\Kraken.txt
[(59, 0.05564555), (104, 0.136167), (106, 0.06745992), (157, 0.30450037), (234, 0.054255363)]
59  ----  0.025*"english" + 0.023*"language" + 0.017*"german" + 0.015*"languages" + 0.012*"germanic" + 0.007*"dialects" + 0.007*"modern" + 0.007*"used" + 0.007*"words" + 0.007*"dutch"
104  ----  0.010*"loch" + 0.010*"species" + 0.009*"ness" + 0.008*"monster" + 0.006*"animals" + 0.006*"evolution" + 0.005*"pmid" + 0.005*"larg

### Remove topics from the set that have high weight in the negative-labeled documents

In [41]:
non_monster_documents = [join(train_neg_dir,f) for f in listdir(train_neg_dir) if isfile(join(train_neg_dir, f))]
non_more_monster_documents = [join(test_neg_dir,f) for f in listdir(test_neg_dir) if isfile(join(test_neg_dir, f))]

non_monster_documents.extend(non_more_monster_documents)

non_monster_topics = set()

for f in non_monster_documents:
    with open(f, encoding="utf-8") as file_object:
        text = file_object.read()
        doc_to_topic = get_topics_for_text(text, 0.05)

        print(f)
        print(doc_to_topic)
        for topic in doc_to_topic:
            print(topic[0], ' ---- ', lda_model.print_topic(topic[0]))
            non_monster_topics.add(topic[0])
        print()

print('total number of non monster topics: ', len(non_monster_topics))

./labeled/train/negative\Clothing (1).txt
[(13, 0.874561), (29, 0.080023184)]
13  ----  0.005*"hypnosis" + 0.005*"also" + 0.004*"clothing" + 0.004*"american" + 0.003*"tattoos" + 0.003*"people" + 0.003*"press" + 0.003*"many" + 0.003*"body" + 0.003*"ufos"
29  ----  0.015*"used" + 0.008*"also" + 0.006*"water" + 0.005*"process" + 0.005*"steel" + 0.004*"system" + 0.004*"sound" + 0.004*"surface" + 0.004*"light" + 0.004*"material"

./labeled/train/negative\Flag of the Qing dynasty (1).txt
[(0, 0.09998488), (44, 0.67345035), (133, 0.10109782), (225, 0.06533236)]
0  ----  0.036*"cross" + 0.017*"temple" + 0.010*"memes" + 0.010*"temples" + 0.008*"columns" + 0.005*"meme" + 0.005*"used" + 0.005*"also" + 0.004*"force" + 0.004*"century"
44  ----  0.052*"chinese" + 0.026*"china" + 0.015*"dynasty" + 0.011*"emperor" + 0.007*"qing" + 0.006*"also" + 0.006*"history" + 0.005*"characters" + 0.005*"people" + 0.005*"traditional"
133  ----  0.016*"states" + 0.015*"united" + 0.014*"force" + 0.014*"navy" + 0.012*

./labeled/train/negative\Caveman.txt
[(0, 0.0708508), (9, 0.2691171), (97, 0.057435054), (151, 0.15274672), (153, 0.07076283), (157, 0.111763544), (173, 0.118378446)]
0  ----  0.036*"cross" + 0.017*"temple" + 0.010*"memes" + 0.010*"temples" + 0.008*"columns" + 0.005*"meme" + 0.005*"used" + 0.005*"also" + 0.004*"force" + 0.004*"century"
9  ----  0.035*"hercules" + 0.018*"heart" + 0.018*"heracles" + 0.014*"hero" + 0.014*"tolkien" + 0.011*"rings" + 0.010*"komodo" + 0.009*"lord" + 0.005*"blood" + 0.005*"hydra"
97  ----  0.036*"fantasy" + 0.034*"fiction" + 0.019*"science" + 0.009*"world" + 0.007*"series" + 0.007*"stories" + 0.006*"magic" + 0.006*"literature" + 0.005*"history" + 0.005*"works"
151  ----  0.017*"human" + 0.013*"humans" + 0.011*"pmid" + 0.009*"homo" + 0.008*"evolution" + 0.008*"years" + 0.007*"species" + 0.007*"dogs" + 0.007*"modern" + 0.006*"wolf"
153  ----  0.024*"dinosaurs" + 0.016*"dinosaur" + 0.009*"birds" + 0.006*"issn" + 0.006*"extinction" + 0.005*"early" + 0.005*"pmid" 

149  ----  0.015*"species" + 0.008*"shark" + 0.007*"pmid" + 0.006*"food" + 0.005*"ants" + 0.005*"sharks" + 0.005*"animal" + 0.005*"prey" + 0.005*"also" + 0.004*"white"
151  ----  0.017*"human" + 0.013*"humans" + 0.011*"pmid" + 0.009*"homo" + 0.008*"evolution" + 0.008*"years" + 0.007*"species" + 0.007*"dogs" + 0.007*"modern" + 0.006*"wolf"
199  ----  0.033*"soul" + 0.012*"body" + 0.009*"life" + 0.007*"souls" + 0.007*"spirit" + 0.007*"philosophy" + 0.007*"mind" + 0.007*"death" + 0.007*"human" + 0.005*"spiritual"

./labeled/train/negative\Komodo dragon.txt
[(9, 0.2684334), (52, 0.057448093), (104, 0.09078998), (149, 0.35415196), (168, 0.08798499), (234, 0.054744065)]
9  ----  0.035*"hercules" + 0.018*"heart" + 0.018*"heracles" + 0.014*"hero" + 0.014*"tolkien" + 0.011*"rings" + 0.010*"komodo" + 0.009*"lord" + 0.005*"blood" + 0.005*"hydra"
52  ----  0.009*"world" + 0.008*"islands" + 0.007*"south" + 0.006*"population" + 0.006*"asia" + 0.006*"east" + 0.006*"north" + 0.005*"island" + 0.005*"co

86  ----  0.028*"channel" + 0.020*"network" + 0.018*"news" + 0.018*"television" + 0.013*"radio" + 0.008*"august" + 0.008*"programming" + 0.007*"media" + 0.007*"networks" + 0.007*"cable"
205  ----  0.020*"mirror" + 0.016*"perception" + 0.014*"opossum" + 0.010*"neurons" + 0.008*"visual" + 0.008*"brain" + 0.007*"pmid" + 0.006*"cortex" + 0.005*"system" + 0.005*"tibetan"

./labeled/train/negative\Florida.txt
[(177, 0.32041138), (237, 0.56072056)]
177  ----  0.042*"florida" + 0.019*"aztec" + 0.018*"mexico" + 0.014*"spanish" + 0.010*"hasbro" + 0.007*"aztecs" + 0.006*"conquest" + 0.006*"commodus" + 0.006*"empire" + 0.006*"tenochtitlan"
237  ----  0.017*"state" + 0.010*"river" + 0.008*"states" + 0.008*"area" + 0.008*"north" + 0.007*"lake" + 0.006*"national" + 0.006*"population" + 0.006*"south" + 0.006*"west"

./labeled/train/negative\Geomagnetic.txt
[(25, 0.81325907), (29, 0.10752253)]
25  ----  0.015*"solar" + 0.012*"tyrannosaurus" + 0.010*"earth" + 0.008*"field" + 0.008*"magnetic" + 0.006*"pm

95  ----  0.015*"church" + 0.009*"christian" + 0.009*"bible" + 0.007*"hebrew" + 0.006*"jewish" + 0.006*"book" + 0.006*"catholic" + 0.006*"holy" + 0.005*"christianity" + 0.005*"john"
111  ----  0.027*"album" + 0.019*"music" + 0.017*"band" + 0.013*"tarzan" + 0.013*"song" + 0.012*"songs" + 0.012*"time" + 0.009*"released" + 0.009*"rock" + 0.008*"land"
125  ----  0.038*"university" + 0.013*"college" + 0.011*"texas" + 0.009*"school" + 0.007*"research" + 0.006*"students" + 0.006*"hall" + 0.006*"campus" + 0.006*"state" + 0.005*"education"
218  ----  0.141*"target" + 0.138*"error" + 0.132*"help" + 0.019*"harvnb" + 0.005*"sfnm" + 0.005*"press" + 0.004*"university" + 0.003*"york" + 0.003*"also" + 0.002*"early"
224  ----  0.016*"angola" + 0.012*"vietnamese" + 0.011*"bengali" + 0.011*"atlantic" + 0.009*"rock" + 0.006*"cappella" + 0.005*"help" + 0.005*"english" + 0.005*"language" + 0.005*"target"
233  ----  0.032*"bass" + 0.028*"music" + 0.015*"double" + 0.009*"string" + 0.009*"instruments" + 0.008*

169  ----  0.027*"season" + 0.026*"series" + 0.021*"episode" + 0.016*"show" + 0.013*"episodes" + 0.010*"television" + 0.008*"september" + 0.008*"october" + 0.006*"february" + 0.006*"march"
173  ----  0.030*"film" + 0.007*"films" + 0.006*"movie" + 0.005*"release" + 0.005*"directed" + 0.005*"time" + 0.005*"released" + 0.005*"horror" + 0.004*"also" + 0.004*"production"

./labeled/train/negative\Iroquois.txt
[(194, 0.81196946), (218, 0.07866302), (237, 0.055026572)]
218  ----  0.141*"target" + 0.138*"error" + 0.132*"help" + 0.019*"harvnb" + 0.005*"sfnm" + 0.005*"press" + 0.004*"university" + 0.003*"york" + 0.003*"also" + 0.002*"early"
237  ----  0.017*"state" + 0.010*"river" + 0.008*"states" + 0.008*"area" + 0.008*"north" + 0.007*"lake" + 0.006*"national" + 0.006*"population" + 0.006*"south" + 0.006*"west"

./labeled/train/negative\London.txt
[(80, 0.62201697), (230, 0.22162051)]
80  ----  0.053*"london" + 0.013*"british" + 0.007*"world" + 0.007*"jehovah" + 0.006*"royal" + 0.006*"reality" 

20  ----  0.044*"film" + 0.020*"films" + 0.007*"movie" + 0.007*"movies" + 0.006*"first" + 0.005*"made" + 0.005*"also" + 0.005*"cinema" + 0.005*"motion" + 0.004*"feature"
60  ----  0.010*"digital" + 0.009*"music" + 0.007*"audio" + 0.006*"media" + 0.006*"also" + 0.006*"company" + 0.006*"used" + 0.006*"format" + 0.006*"computer" + 0.005*"tape"
96  ----  0.035*"disc" + 0.017*"jordan" + 0.015*"video" + 0.013*"laser" + 0.012*"discs" + 0.009*"format" + 0.008*"players" + 0.008*"optical" + 0.007*"player" + 0.006*"sony"

./labeled/train/negative\Metaphor.txt
[(5, 0.4821504), (54, 0.22570686), (63, 0.08217875), (98, 0.09701021)]
5  ----  0.014*"science" + 0.009*"scientific" + 0.007*"theory" + 0.005*"example" + 0.005*"also" + 0.004*"social" + 0.004*"research" + 0.004*"used" + 0.003*"psychology" + 0.003*"philosophy"
54  ----  0.034*"gothic" + 0.009*"story" + 0.008*"novel" + 0.008*"character" + 0.008*"fiction" + 0.006*"also" + 0.005*"matrix" + 0.005*"literature" + 0.005*"film" + 0.004*"characters"
6

./labeled/train/negative\Idaho.txt
[(198, 0.14791045), (237, 0.763996)]
198  ----  0.028*"team" + 0.025*"idaho" + 0.020*"super" + 0.020*"game" + 0.019*"bowl" + 0.018*"league" + 0.013*"sports" + 0.012*"february" + 0.011*"football" + 0.010*"january"
237  ----  0.017*"state" + 0.010*"river" + 0.008*"states" + 0.008*"area" + 0.008*"north" + 0.007*"lake" + 0.006*"national" + 0.006*"population" + 0.006*"south" + 0.006*"west"

./labeled/train/negative\Lion.txt
[(104, 0.49975142), (149, 0.11796894), (151, 0.16916618)]
104  ----  0.010*"loch" + 0.010*"species" + 0.009*"ness" + 0.008*"monster" + 0.006*"animals" + 0.006*"evolution" + 0.005*"pmid" + 0.005*"large" + 0.005*"water" + 0.005*"mammals"
149  ----  0.015*"species" + 0.008*"shark" + 0.007*"pmid" + 0.006*"food" + 0.005*"ants" + 0.005*"sharks" + 0.005*"animal" + 0.005*"prey" + 0.005*"also" + 0.004*"white"
151  ----  0.017*"human" + 0.013*"humans" + 0.011*"pmid" + 0.009*"homo" + 0.008*"evolution" + 0.008*"years" + 0.007*"species" + 0.007*"dog

./labeled/train/negative\Wolf.txt
[(149, 0.20878488), (151, 0.52987826), (215, 0.054950483), (218, 0.066338755)]
149  ----  0.015*"species" + 0.008*"shark" + 0.007*"pmid" + 0.006*"food" + 0.005*"ants" + 0.005*"sharks" + 0.005*"animal" + 0.005*"prey" + 0.005*"also" + 0.004*"white"
151  ----  0.017*"human" + 0.013*"humans" + 0.011*"pmid" + 0.009*"homo" + 0.008*"evolution" + 0.008*"years" + 0.007*"species" + 0.007*"dogs" + 0.007*"modern" + 0.006*"wolf"
215  ----  0.057*"bear" + 0.045*"bears" + 0.021*"black" + 0.016*"american" + 0.014*"grizzly" + 0.009*"genome" + 0.009*"brown" + 0.007*"human" + 0.007*"goodall" + 0.006*"ursus"
218  ----  0.141*"target" + 0.138*"error" + 0.132*"help" + 0.019*"harvnb" + 0.005*"sfnm" + 0.005*"press" + 0.004*"university" + 0.003*"york" + 0.003*"also" + 0.002*"early"

./labeled/train/negative\World War II.txt
[(11, 0.7499227), (24, 0.08089215), (52, 0.052488837), (218, 0.06828479)]
11  ----  0.015*"soviet" + 0.014*"help" + 0.013*"target" + 0.013*"error" + 0.007*

52  ----  0.009*"world" + 0.008*"islands" + 0.007*"south" + 0.006*"population" + 0.006*"asia" + 0.006*"east" + 0.006*"north" + 0.005*"island" + 0.005*"country" + 0.004*"history"
104  ----  0.010*"loch" + 0.010*"species" + 0.009*"ness" + 0.008*"monster" + 0.006*"animals" + 0.006*"evolution" + 0.005*"pmid" + 0.005*"large" + 0.005*"water" + 0.005*"mammals"
147  ----  0.016*"hakluyt" + 0.014*"fangoria" + 0.013*"iguana" + 0.011*"effects" + 0.007*"stereoscopic" + 0.006*"pinus" + 0.006*"clam" + 0.006*"gwangi" + 0.006*"poltergeist" + 0.005*"special"
149  ----  0.015*"species" + 0.008*"shark" + 0.007*"pmid" + 0.006*"food" + 0.005*"ants" + 0.005*"sharks" + 0.005*"animal" + 0.005*"prey" + 0.005*"also" + 0.004*"white"
234  ----  0.013*"species" + 0.008*"cells" + 0.007*"pmid" + 0.007*"biology" + 0.006*"blood" + 0.006*"squid" + 0.005*"also" + 0.005*"lung" + 0.005*"cell" + 0.005*"body"
237  ----  0.017*"state" + 0.010*"river" + 0.008*"states" + 0.008*"area" + 0.008*"north" + 0.007*"lake" + 0.006*"nat

./labeled/test/negative\Gorilla.txt
[(12, 0.77211857), (149, 0.12376417)]
12  ----  0.013*"gorilla" + 0.012*"apes" + 0.012*"life" + 0.010*"orangutan" + 0.008*"chimpanzees" + 0.008*"human" + 0.008*"chimpanzee" + 0.007*"great" + 0.007*"orangutans" + 0.006*"jung"
149  ----  0.015*"species" + 0.008*"shark" + 0.007*"pmid" + 0.006*"food" + 0.005*"ants" + 0.005*"sharks" + 0.005*"animal" + 0.005*"prey" + 0.005*"also" + 0.004*"white"

./labeled/test/negative\Japan.txt
[(21, 0.071993224), (52, 0.25500092), (74, 0.1957737), (118, 0.15108566), (238, 0.18754937)]
21  ----  0.028*"parameter" + 0.028*"maint" + 0.027*"discouraged" + 0.027*"link" + 0.007*"film" + 0.006*"movie" + 0.006*"series" + 0.005*"voice" + 0.004*"jerry" + 0.004*"lake"
52  ----  0.009*"world" + 0.008*"islands" + 0.007*"south" + 0.006*"population" + 0.006*"asia" + 0.006*"east" + 0.006*"north" + 0.005*"island" + 0.005*"country" + 0.004*"history"
74  ----  0.010*"united" + 0.007*"women" + 0.007*"march" + 0.006*"september" + 0.006*"gov

### Manually add topics to automatically-generated list of monster topics

In [56]:
# First, remove the non-monster topics from the monster topics
print("Original set of monster topics")
print(monster_topics)
print()

print("Topics from non-monster documents:")
print(non_monster_topics)
print()

print("Intersection of monster and non-monster topics")
intersection = monster_topics.intersection(non_monster_topics)
print(intersection)
for intersection_topic in intersection:
    print(intersection_topic, ' --- ', lda_model.print_topic(intersection_topic))

print("Remaining monster topics:")
refined_monster_topics = monster_topics.difference(non_monster_topics)
print(refined_monster_topics)

Original set of monster topics
{0, 6, 7, 8, 12, 15, 20, 21, 22, 37, 38, 39, 40, 42, 47, 51, 57, 59, 64, 67, 70, 73, 80, 82, 83, 85, 88, 89, 90, 92, 94, 95, 97, 98, 100, 101, 102, 103, 104, 105, 106, 107, 110, 111, 112, 115, 116, 117, 120, 121, 122, 123, 125, 126, 137, 138, 139, 141, 146, 151, 152, 157, 158, 160, 163, 168, 169, 170, 173, 174, 185, 186, 191, 193, 198, 199, 201, 206, 213, 215, 218, 221, 225, 234, 235, 236, 237, 238, 239, 244, 249}

Topics from non-monster documents:
{0, 2, 5, 9, 10, 11, 12, 13, 20, 21, 23, 24, 25, 26, 28, 29, 33, 35, 38, 41, 44, 45, 52, 54, 56, 59, 60, 61, 62, 63, 64, 65, 66, 69, 70, 74, 80, 84, 86, 89, 91, 93, 95, 96, 97, 98, 102, 104, 110, 111, 112, 114, 117, 118, 120, 125, 126, 130, 132, 133, 134, 140, 147, 149, 151, 152, 153, 155, 157, 159, 162, 165, 166, 168, 169, 170, 172, 173, 174, 175, 176, 177, 179, 180, 181, 184, 186, 187, 190, 194, 195, 196, 198, 199, 201, 202, 203, 205, 206, 207, 208, 211, 212, 214, 215, 218, 220, 222, 224, 225, 226, 229, 230,

120  ---  0.051*"carpenter" + 0.038*"halloween" + 0.028*"john" + 0.015*"themes" + 0.012*"escape" + 0.011*"lost" + 0.009*"film" + 0.009*"soundtrack" + 0.009*"music" + 0.009*"mars"
125  ---  0.038*"university" + 0.013*"college" + 0.011*"texas" + 0.009*"school" + 0.007*"research" + 0.006*"students" + 0.006*"hall" + 0.006*"campus" + 0.006*"state" + 0.005*"education"
126  ---  0.032*"article" + 0.023*"template" + 0.019*"talk" + 0.016*"alien" + 0.015*"page" + 0.013*"please" + 0.013*"wikiproject" + 0.009*"would" + 0.008*"articles" + 0.008*"project"
Remaining monster topics:
{6, 7, 8, 137, 138, 139, 141, 15, 146, 22, 158, 160, 163, 37, 39, 40, 42, 47, 51, 57, 185, 191, 193, 67, 73, 82, 83, 85, 213, 88, 90, 92, 221, 94, 100, 101, 249, 103, 105, 106, 107, 235, 239, 115, 116, 244, 121, 122, 123}


In [50]:
manual_monster_topics = set([
3,
6,
7,
8,
9,
15,
18,
26,
27,
30,
32,
34,
37,
40,
42,
43,
51,
53,
57,
64,
66,
67,
75,
79,
81,
82,
83,
88,
89,
90,
92,
94,
97,
100,
101,
102,
103,
104,
105,
106,
109,
115,
121,
122,
123,
128,
136,
137,
138,
139,
141,
143,
147,
154,
157,
160,
168,
171,
172,
173,
183,
193,
201,
211,
221,
228,
235,
239,
242,
244]
)

In [53]:
intersection_manual_refined = manual_monster_topics.intersection(refined_monster_topics)
print(intersection_manual_refined)

manual_not_in_refined = manual_monster_topics.difference(refined_monster_topics)
print(manual_not_in_refined)
refined_not_in_manual = refined_monster_topics.difference(manual_monster_topics)
print(refined_not_in_manual)

{6, 7, 8, 137, 138, 139, 141, 15, 160, 37, 40, 42, 51, 57, 193, 67, 82, 83, 88, 90, 92, 221, 94, 100, 101, 103, 105, 106, 235, 239, 115, 244, 121, 122, 123}
{128, 3, 136, 9, 143, 18, 147, 154, 26, 27, 157, 30, 32, 34, 168, 43, 171, 172, 173, 53, 183, 64, 66, 201, 75, 79, 81, 211, 89, 97, 228, 102, 104, 109, 242}
{163, 249, 39, 73, 107, 47, 146, 116, 85, 22, 213, 185, 158, 191}


In [46]:
num_monster_articles = 0
for filename, text in zip(tokenized_docs_filenames, highly_filtered_tokenized_docs):
    bow_doc = id2word.doc2bow(text)
    doc_to_topic = lda_model.get_document_topics(bow = bow_doc, minimum_probability = 0.15)
    for topic in doc_to_topic:
        if topic[0] in refined_monster_topics:
            print(filename, ' - (', topic[0], ':', topic[1], ') ')
            print(lda_model.print_topic(topic[0]))
            print()
            num_monster_articles += 1

print (num_monster_articles, ' articles related to monster topics')


0  articles related to monster topics
