In [115]:
import nltk
import json
import spacy
from nltk.corpus import stopwords
from math import log
from collections import defaultdict, Counter
from string import punctuation
from nltk.stem.wordnet import WordNetLemmatizer
import gensim
import re
import csv
from gensim.summarization import bm25

# Variables

In [116]:
OPEN_QUESTION_WORDS = ['what','who','whose','whom','where','when','why','how',
                       'which',"what's","who's","where's","how's"]
CLOSED_QUESTION_WORDS = ['is','are','am','was','were','do','does,','did','can',
                         'could','will','would','shall','should','have','has',
                         'had']

# Stop words
stop = set(stopwords.words('english'))

lmtz = WordNetLemmatizer()

with open('testing.json') as json_data:
    test = json.load(json_data)

with open('documents.json') as json_data:
    documents = json.load(json_data)

# Spacy toolkit
nlp = spacy.load('en_core_web_sm')

punc = set(punctuation)

In [117]:
def strip_punctuation(s):
    return ''.join(c for c in s if c not in punc)

In [118]:
def lemmatize(token):
    lemma = lmtz.lemmatize(token, 'v')
    if lemma == token:
        lemma = lmtz.lemmatize(token, 'n')
    return lemma

        
def extract_term_freqs(doc):
    tfs = {}
    for token in nltk.word_tokenize(doc):
        lemma = lemmatize(token.lower())
        if lemma not in stop and lemma.isalpha():
            tfs[lemma] = tfs.get(lemma, 0) + 1
    return tfs


def compute_doc_freqs(doc_term_freqs):
    dfs = Counter()
    for tfs in doc_term_freqs.values():
        for term in tfs.keys():
            dfs[term] += 1
    return dfs


def query_vsm(query, index, k=5):
    accumulator = Counter()
    for term in query:
        postings = index[term]
        for docid, weight in postings:
            accumulator[docid] += weight
    return accumulator.most_common(k)


# Find the question word
def get_qword(question):
    tokens = nltk.word_tokenize(question.lower())
    for token in tokens:
        if token in OPEN_QUESTION_WORDS:
            return token
    for token in tokens:
        if token in CLOSED_QUESTION_WORDS:
            return token
    return 'others'

In [119]:
# length of longest same sequences of keywords
def get_overlap(sent1, sent2):
    tokens1 = []
    tokens2 = []

    for token in nltk.word_tokenize(strip_punctuation(sent1.lower())):
        lemma = lemmatize(token)
        if lemma not in stop:
            tokens1.append(lemma)

    for token in nltk.word_tokenize(strip_punctuation(sent2.lower())):
        lemma = lemmatize(token)
        if lemma not in stop:
            tokens2.append(lemma)

    max = 0
    for i in range(len(tokens1)):
        for j in range(len(tokens2)):

            if tokens1[i] == tokens2[j]:
                length = 1

                ii = i + 1
                jj = j + 1
                while ii < len(tokens1) and jj < len(tokens2) and \
                        tokens1[ii] == tokens2[jj]:
                    ii += 1
                    jj += 1
                    length += 1

                if length > max:
                    max = length

    return max

# BM25

# Write to test file

In [114]:
csvFile = open("high.csv", "w")
writer = csv.writer(csvFile)
header = ['id','answer']
writer.writerow(header)


case_count = 0
# test = [test[17]]
for test_case in test:
    question = test_case['question']
    docid = test_case['docid']

    # Convert doc into one string, then tokenize sentences
    corpus = ''
    for para in documents[docid]['text']:
        corpus += para + ' '

    # sentence as a document
    raw_docs = nltk.sent_tokenize(corpus)
    

    # TFIDF
    doc_term_freqs = {}
    for (id, raw_doc) in enumerate(raw_docs):
        term_freqs = extract_term_freqs(raw_doc)
        doc_term_freqs[id] = term_freqs
    M = len(doc_term_freqs)

    doc_freqs = compute_doc_freqs(doc_term_freqs)

    vsm_inverted_index = defaultdict(list)
    for docid, term_freqs in doc_term_freqs.items():
        N = sum(term_freqs.values())
        length = 0

        # find tf*idf values and accumulate sum of squares
        tfidf_values = []
        for term, count in term_freqs.items():
            tfidf = float(count) / N * log(M / float(doc_freqs[term]))
            tfidf_values.append((term, tfidf))
            length += tfidf ** 2

        # normalise documents by length and insert into index
        length = length ** 0.5
        for term, tfidf in tfidf_values:
            # inversion of the indexing, term -> (doc_id, score)
            vsm_inverted_index[term].append([docid, tfidf / length])

    for term, docids in vsm_inverted_index.items():
        docids.sort()

    terms = extract_term_freqs(question) 
    results = query_vsm(terms, vsm_inverted_index)
    
#     tokenized_sentence = []
#     for each_sentence in raw_docs:
#         sentence_as_words = nltk.word_tokenize(each_sentence)
#         tokenized_sentence.append(sentence_as_words)
        
#     bm25Model = bm25.BM25(tokenized_sentence)
#     average_idf = sum(map(lambda k: float(bm25Model.idf[k]), bm25Model.idf.keys())) / len(bm25Model.idf.keys())
    
#     query = []
#     for word in nltk.word_tokenize(question):
#         query.append(word)
        
#     scores = bm25Model.get_scores(query,average_idf)
#     bm25_dic = Counter()
#     sentence_id = 0
#     for each_score in scores:
#         bm25_dic[sentence_id] = each_score
#         sentence_id += 1
#     results = bm25_dic.most_common(4)


    # Step 2
    # Analyse question type
    qword = get_qword(question)

    # the word after question word, such as 'what value', 'which gender'
    next_token = ''

    qtype = ''

    # dependency parsing
    dep = ''

    # head word
    head = ''

    # head dependency
    head_dep = ''

    # subject, root, object
    nsubj = ''
    ROOT = ''
    dobj = ''

    # yes or no questions have two options
    closed_q_choices = ('', '')

    doc = nlp(question)

    tokens = nltk.word_tokenize(question.lower())

    # get next word
    if qword in tokens:
        if tokens.index(qword) < len(tokens) - 1:
            next_token = tokens[tokens.index(qword) + 1]

    # get structure of sentence
    for token in doc:
        if 'nsubj' in token.dep_:
            nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_:
            dobj = lemmatize(strip_punctuation(token.text))

    # for noun (phrase) questions, get answer dependency
    for chunk in doc.noun_chunks:
        if qword in chunk.text:
            dep = chunk.root.dep_
            head = lemmatize(strip_punctuation(chunk.root.head.text))
            head_dep = chunk.root.head.dep_

    # determine answer type
    if 'stand for' in question or 'abbreviat' in question:
        qtype = 'abrv'

    elif qword in ['who',"who's",'whom','whose']:
        qtype = 'who'

    elif qword == 'when':
        qtype = 'when'

    elif qword in ['where',"where's"]:
        qtype = 'where'

    elif qword in ['how',"how's"]:
        if next_token == 'much':
            qtype = 'MONEY'
        elif next_token == 'many':
            qtype = 'CARDINAL'
        elif next_token == 'long':
            qtype = 'DATE'
        elif next_token in ['far','big','wide','deep','tall','high','fast','heavy']:
            qtype = 'QUANTITY'
        elif next_token in ['old','young']:
            qtype = 'DATE'
        elif next_token in ['does','did','do','have','has','had','should',
                              'can','could','will','would','must']:
            if dobj != '':
                qtype = 'adj'
            else:
                qtype = 'verb'

    elif qword in ['what', "what's", 'which']:
        if 'year'in tokens or \
                'day' in tokens or \
                'month' in tokens or \
                'era' in tokens or \
                'age' in tokens or \
                'century' in tokens or \
                'week' in tokens or \
                'period' in tokens or \
                'dynasty' in tokens:
            qtype = 'DATE'

        elif 'company' in tokens or \
                'organization' in tokens or \
                'organisation' in tokens or \
                'corporation' in tokens or \
                'institution' in tokens or \
                'university' in tokens or \
                'corporation' in tokens or \
                'association' in tokens or \
                'union' in tokens or \
                'agency' in tokens:
            qtype = 'ORG'

        elif 'city' in tokens or \
                'country' in tokens or \
                'state' in tokens or \
                'province' in tokens or \
                'county' in tokens:
            qtype = 'GPE'

        elif 'place' in tokens or \
                'river' in tokens or \
                'mountain' in tokens or \
                'ocean' in tokens or \
                'region' in tokens or \
                'area' in tokens or \
                'sea' in tokens or \
                'lake' in tokens or \
                'continent' in tokens or \
                'location' in tokens or \
                'forest' in tokens or \
                'jungle' in tokens:
            qtype = 'LOC'

        elif 'nationality' in tokens:
            qtype = 'NORP'

        elif 'building' in tokens or \
            'airport' in tokens or \
            'highway' in tokens or \
            'bridge' in tokens or \
            'harbour' in tokens or \
            'harbor' in tokens or \
            'port' in tokens or \
            'dam' in tokens:
            qtype = 'FACILITY'

        elif 'hurricane' in tokens or \
            'battle' in tokens or \
            'war' in tokens:
            qtype = 'EVENT'

        elif 'book' in tokens or \
            'novel' in tokens or \
            'song' in tokens or \
            'music' in tokens or \
            'painting' in tokens:
            qtype = 'WORK_OF_ART'

        elif 'language' in tokens or \
                'speak' in tokens:
            qtype = 'LANGUAGE'

        elif 'percentage' in tokens or 'percent' in tokens:
            qtype = 'PERCENT'

        elif 'value' in tokens or \
                'distance' in tokens or \
                'size' in tokens or \
                'length' in tokens or \
                'depth' in tokens or \
                'height' in tokens or \
                'density' in tokens or \
                'speed' in tokens or \
                'weight' in tokens or \
                'area' in tokens or \
                'temperature' in tokens or \
                'volume' in tokens:
            qtype = 'QUANTITY'

        elif 'number' in tokens:
            qtype = 'CARDINAL'

        elif 'price' in tokens:
            qtype = 'MONEY'

        elif 'name' in tokens:
            qtype = 'NE'

        else:
            # what...do type question
            tokens.remove(next_token)
            if 'do' in tokens:
                qtype = 'verb'
            else:
                qtype = 'noun'

    elif qword == 'why':
        qtype = 'why'

    elif qword in CLOSED_QUESTION_WORDS:
        qtype = 'closed'

        # answer is one of the 'or' options in the question
        if 'or' in tokens:
            index = tokens.index('or')
            prev1 = tokens[index - 1]
            next1 = tokens[index + 1]
            tag_tokens = nltk.pos_tag(tokens)

            tag = tag_tokens[index - 1][1]

            # if answer is a noun
            if tag in ['NN', 'NNP', 'NNS', 'NNPS']:
                for chunk in doc.noun_chunks:
                    if prev1 in chunk.text:
                        first = chunk.text
                    if next1 in chunk.text:
                        second = chunk.text
                closed_q_choices = (first, second)
            else:
                closed_q_choices = (prev1, next1)
        else:
            qtype = 'others'

    # re-rank the 20 sentences
    scores = {}
    for id, _ in results:
        sent = raw_docs[id]
        doc = nlp(sent)

        score = get_overlap(sent, question)

        if qtype == 'who':
            for ent in doc.ents:
                if ent.label_ == 'PERSON':
                    score += 1

        elif qtype == 'when':
            for ent in doc.ents:
                if ent.label_ == 'TIME' or ent.label_ == "DATE":
                    score += 1

        elif qtype == 'where':
            for ent in doc.ents:
                if ent.label_ == 'GPE' or ent.label_ == "LOC":
                    score += 1

        elif qtype in ['LANGUAGE','WORK_OF_ART','EVENT','NORP','FACILITY',
                       'GPE','DATE','TIME','PERCENT','QUANTITY','CARDINAL',
                     'MONEY','PERSON','ORG','LOC']:
            for ent in doc.ents:
                if ent.label_ == qtype:
                    score += 1
                    
        elif qtype == 'NE':
            for ent in doc.ents:
                    score += 1

        elif qtype == 'adj':
            for token in doc:
                if 'advmod' in token.dep_ or 'acomp' in token.dep_:
                    score += 1

        elif qtype == 'verb':
            for token in doc:
                if token.dep_ == 'ROOT':
                    score += 1

        elif qtype == 'closed':
            first = closed_q_choices[0]
            second = closed_q_choices[1]

            score += (first in sent) + (second in sent)

        elif qtype == 'why':
            if 'reason' in sent or 'because' in sent or 'due to' in sent or 'since' in sent or 'for' in sent:
                score += 1

        scores[id] = score

    rank = {}
    for id, sim in results:
        max_score = scores[max(scores, key=scores.get)]
        if max_score != 0:
            rank[id] = sim * 0.5 + (scores[id] / max_score * 0.5)
        else:
            rank[id] = sim
    
    # sentence with highest rank
    index = max(rank, key=rank.get)
    sent = raw_docs[index]
    doc = nlp(sent)

    # find sentence structure
    sent_nsubj = ''
    sent_ROOT = ''
    sent_dobj = ''
    for token in doc:
        if 'nsubj' in token.dep_:
            sent_nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            sent_ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_:
            sent_dobj = lemmatize(strip_punctuation(token.text))
            
    # find answer with highest score
    max_score = -1
    answer = ''
    
    if qtype == 'who':
        for np in doc.noun_chunks:
            score = 0
            
            if np in doc.ents:
                for ent in doc.ents:
                    if np.text in ent.text and ent.label_ == 'PERSON':
                            score += 3

            # find NP dependency
            np_dep = np.root.dep_
            np_head = lemmatize(strip_punctuation(np.root.head.text))
            np_head_dep = np.root.head.dep_

            if np_dep == dep:
                score += 1
            if np_head == head:
                score += 1
            if np_head_dep == head_dep:
                score += 1

            if np.text not in question:
                score += 1

            if strip_punctuation(np.text).strip().lower() not in stop:
                score += 1

            if np.text.lower() == 'it':
                score = -1
                
            if score > max_score:
                max_score = score
                answer = np.text

    elif qtype == 'when':
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == 'TIME' or ent.label_ == "DATE":
                score += 3
                
            if ent.text not in question:
                score += 1
            
            if score > max_score:
                max_score = score
                answer = ent.text

    elif qtype == 'where':
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == 'GPE' or ent.label_ == "LOC":
                score += 3

            if ent.text not in question:
                score += 1
                
            if score > max_score:
                max_score = score
                answer = ent.text
            
    elif qtype in ['LANGUAGE', 'WORK_OF_ART', 'EVENT', 'NORP', 'FACILITY',
                   'GPE', 'DATE', 'TIME', 'PERCENT', 'QUANTITY', 'CARDINAL',
                   'MONEY', 'PERSON', 'ORG', 'LOC']:
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == qtype:
                score += 3
            
            if ent.text not in question:
                score += 1
                
            if qtype in ['LOC','GPE'] and ent.root.tag_ not in ['NN','NNP','NNS','NNPS']:
                score -= 2
                
            if score > max_score:
                max_score = score
                answer = ent.text
                
                if qtype in ['MONEY']:
                    for token in doc:
                        if token.text == '$':
                            answer = '$ ' + answer
                            
                if qtype in ['PERCENT']:
                    if 'percent' in answer:
                        answer = answer[:answer.index('percent')-1]
                            
                if qtype in ['PERCENT','QUANTITY','CARDINAL','MONEY']:
                    tokens = nltk.word_tokenize(answer)
                    i = 0
                    answer = ''
                    while i < len(tokens):
                        if tokens[i].lower() in ['well','about','around','approximately', 'some']:
                            del tokens[i]
                        else:
                            if i+1 < len(tokens) and tokens[i+1] == "'s":
                                answer += tokens[i]
                            else:
                                answer += tokens[i] + ' '
                            i += 1
                    answer = answer.strip()
                    
    elif qtype == 'NE':
        for ent in doc.ents:
            score = 3
            
            if ent.text not in question:
                score += 1
                
            if score > max_score:
                max_score = score
                answer = ent.text

    elif qtype == 'abrv':
        abrv = ''
        qdoc = nlp(question)
        for token in qdoc:
            text = token.text
            if len(text) >= 2 and text.isupper() and text.isalpha():
                abrv = text.lower()

        if abrv == '' and 'stand for' in question:
            tokens = question.lower().split(' ')
            abrv = tokens[tokens.index('stand')-1]

        if abrv != '':
            tokens = nltk.word_tokenize(sent)
            for (i, token) in enumerate(tokens):
                if token[0].isupper():
                    k = 1
                    phrase = token.lower()
                    initials = phrase[0]

                    while i+k < len(tokens) and tokens[i+k][0].isupper():
                        phrase = phrase + ' ' + tokens[i+k].lower()
                        initials += tokens[i+k][0].lower()
                        k += 1

                    phrase = phrase.strip()
                    if initials == abrv:
                        answer = phrase

        else:
            tokens = nltk.word_tokenize(question)
            for (i, token) in enumerate(tokens):
                if token[0].isupper():
                    k = 1
                    initials = token[0].lower()

                    while i + k < len(tokens) and tokens[i + k][0].isupper():
                        initials += tokens[i + k][0].lower()
                        k += 1

                    if len(initials) >= 2:
                        answer = initials

    elif qtype == 'adj':
        for token in doc:
            score = 0
            
            if 'advmod' in token.dep_ or 'acomp' in token.dep_:
                score += 3

            token_dep = token.dep_
            token_head = lemmatize(strip_punctuation(token.head.text))
            token_head_dep = token.head.dep_

            if token_dep == dep:
                score += 1
            if token_head == head:
                score += 1
            if token_head_dep == head_dep:
                score += 1

            if token.text not in question:
                score += 1

            if strip_punctuation(token.text).strip().lower() not in stop:
                score += 1

            if token.text.lower() == 'it':
                score = -1

            if score > max_score:
                max_score = score
                answer = token.text

    elif qtype == 'verb':
        for token in doc:
            score = 0

            if token.dep_ == 'ROOT':
                score += 1

            if lemmatize(strip_punctuation(token.text)) not in \
                    [lemmatize(strip_punctuation(s)) for s in nltk.word_tokenize(question)]:
                score += 1

            if strip_punctuation(token.text).strip().lower() not in stop:
                score += 1

            if score > max_score:
                max_score = score
                answer = token.text

    elif qtype == 'closed':
        first = closed_q_choices[0]
        second = closed_q_choices[1]

        # whether each option appears (and is negates)
        appear1 = False
        appear2 = False
        negate1 = False
        negate2 = False
        neg_count = 0
        tokens = nltk.word_tokenize(raw_docs[id])

        for (index, token) in enumerate(tokens):
            if token == 'not' or "n't" in token:
                neg_count += 1

                if index+1 < len(tokens):
                    if tokens[index+1] == first:
                        negate1 = True
                    if tokens[index+1] == second:
                        negate2 = True

            if token == first:
                appear1 = True
            if token == second:
                appear2 = True

        possible_answer = ''
        if appear1 and not appear2:
            if neg_count % 2 == 1:
                possible_answer = second
            else:
                possible_answer = first

        elif appear2 and not appear1:
            if neg_count % 2 == 0:
                possible_answer = second
            else:
                possible_answer = first

        elif appear1 and appear2:
            if negate1 and not negate2:
                possible_answer = second
            elif negate2 and not negate1:
                possible_answer = first
            else:
                possible_answer = second

        if possible_answer != '':
            score += 5
            if score > max_score:
                max_score = score
                answer = possible_answer

    elif qtype == 'why':

        possible_answer = ''
        score = 0

        if 'reason' in sent or 'because' in sent or 'due to' in sent or 'since' in sent or 'for' in sent:

            if 'because of' in sent:
                score += 3
                index = sent.index('because of')
                substr = sent[index+11:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break

            elif 'because' in sent:
                score += 3
                index = sent.index('because')
                substr = sent[index + 8:]
                possible_answer = substr

            elif 'due to' in sent:
                score += 3
                index = sent.index('due to')
                substr = sent[index+7:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    possible_answer = substr

            elif 'reason' in sent:
                score += 2
                index = sent.index('reason')
                substr = sent[index+7:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    index = substr.find('is')
                    if index != -1:
                        possible_answer = substr[index+3]
                    else:
                        index = substr.find('was')
                        if index != -1:
                            possible_answer = substr[index+4]
                        else:
                            possible_answer = sent[sent.index('reason'):]

            elif 'for' in sent:
                score += 1
                index = sent.index('for')
                substr = sent[index + 4:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    possible_answer = substr

            elif 'since' in sent:
                score += 1
                index = sent.index('since')
                substr = sent[index + 6:]
                possible_answer = substr

            if possible_answer != '' and score > max_score:
                answer = possible_answer
                max_score = score

    # if answer not found, find noun phrases
    if answer == '':
        for np in doc.noun_chunks:
            score = 0

            np_dep = np.root.dep_
            np_head = lemmatize(strip_punctuation(np.root.head.text))
            np_head_dep = np.root.head.dep_

            if np_dep == dep:
                score += 1
            if np_head == head:
                score += 1
            if np_head_dep == head_dep:
                score += 1

            if np.text not in question:
                score += 1

            if strip_punctuation(np.text).strip().lower() not in stop:
                score += 1
                
            if score > max_score:
                max_score = score
                answer = np.text
                
    a = nltk.word_tokenize(answer)
    if len(a) > 0 and a[0].lower() in stop:
        del a[0]
        answer = ''
        for i in range(len(a)):
            if i+1 < len(a) and a[i+1] == "'s":
                answer += a[i]
            else:
                answer += a[i] + ' '
        
#     i = 0
#     ques_tokens = [lemmatize(token.lower()) for token in nltk.word_tokenize(question)]
#     while i < len(a):
#         if lemmatize(a[i].lower()) in ques_tokens and a[i].lower() not in stop and a[i] not in punc and len(a) > 1:
#             if i+1 < len(a) and a[i+1] == "'s":
#                 del a[i]
#                 del a[i]
#             else:
#                 del a[i]
#         else:
#             if i+1 < len(a) and a[i+1] == "'s":
#                 answer += a[i]
#             else:
#                 answer += a[i] + ' '
#             i += 1
        
    answer = answer.strip().lower()
    answer_result = []
    answer_result.append(case_count)
    answer_result.append(answer)
    writer.writerow(answer_result)
    print(case_count,' ',answer)
    case_count += 1
    
csvFile.close()

0   combination
1   addition
2   browser's layout engine
3   internet explorer
4   late 2004
5   windows
6   1990
7   browsers
8   marc andreessen
9   first web browser
10   competition
11   dominance
12   internet relay chat
13   january
14   every major web browser
15   january 2003
16   january 2009
17   file transfer protocol
18   google
19   case
20   rich user interfaces
21   2002
22   august 2011
23   major web browsers
24   chrome's user-base
25   development
26   december 2011
27   1993
28   netscape
29   apple's safari
30   rapid development
31   prefix
32   mozilla foundation
33   private networks
34   mac
35   comparison
36   1994
37   user interface
38   addition
39   major browsers
40   information resources
41   live bookmarks
42   more traditional feed reader
43   bookmarks
44   september 2008
45   prefix
46   browser software
47   file transfer protocol
48   microsoft corp
49   mobile safari
50   web browsers
51   windows
52   user's default e-mail application
53   bro

469   1339
470   1233
471   university hospital southampton nhs foundation trust
472   biggest operator
473   4.2
474   1959
475   354
476   government figures
477   m27
478   coast
479   hanover buildings
480   city
481   december 2007
482   12th century
483   traffic congestion
484   southampton
485   2004
486   1233
487   town
488   three fire stations
489   southampton docks
490   24
491   m27
492   southampton's largest retail centre
493   13th century
494   1066
495   clausentum
496   two
497   plans
498   southampton
499   port
500   over a quarter
501   main station
502   20–24
503   council estates
504   south west trains
505   16.2 percent
506   king henry's departure
507   route
508   trust
509   town
510   16.2 percent
511   two large live music venues
512   newport
513   university of southampton
514   london
515   river test and river itchen
516   1968
517   area
518   hampshire county council
519   duchess
520   world's largest cruise ships
521   1938
522   large shoppin

924   terror management theory
925   29
926   united states
927   germany
928   end of the last ice age
929   19th-century critics
930   contrasted
931   terror management theory
932   anthropologists
933   1970s
934   around 50,000 years ago
935   united states
936   diffusion
937   prussian linguist
938   1950s and 1960s
939   stuart hall
940   élite ideal
941   present legislation
942   jefferson's metaphor
943   1971
944   nonconformists
945   areas
946   pervasive secularism
947   wall
948   may 3, 2006
949   still other scholars
950   early as the mid-17th century
951   religious freedom
952   engel
953   madison
954   december 20, 2005
955   william penn
956   two
957   legal scholars
958   still other scholars
959   reynolds
960   1776
961   court's decision
962   opponents
963   december
964   robert s. wood
965   u.s.
966   1962
967   court
968   1994
969   kurtzman
970   argue
971   thomas jefferson's influential virginia statute
972   1947
973   lone dissenter
974   central

1338   black sea
1339   31 december
1340   cronstadt
1341   george hamilton-gordon
1342   5 september
1343   32–40
1344   alliance
1345   local commanders
1346   russians
1347   crimean war
1348   movement
1349   omar pasha
1350   nicholas
1351   parliament
1352   constantinople
1353   åland islands
1354   alexander ii
1355   nicholas
1356   cardigan
1357   treaty of paris
1358   austria
1359   start
1360   october 1853
1361   french
1362   sunday
1363   centuries-old
1364   public opinion
1365   second counterattack
1366   winter of 1854
1367   sinop
1368   russian cavalry movement
1369   catholic support
1370   peaceful settlement
1371   1830
1372   corps
1373   vidin
1374   reaction
1375   september 1853
1376   danube river
1377   ottoman forces
1378   william howard russell
1379   june 24, 1839
1380   local commanders
1381   105  the tsar
1382   28 march 1854
1383   432–33
1384   roger fenton
1385   russian troops
1386   far south wrangel
1387   august
1388   461 the resulting batt

1795   example
1796   dst
1797   november
1798   reduces
1799   1970s
1800   older form
1801   march 2011
1802   2005
1803   us
1804   winter
1805   north america
1806   1895
1807   ntfs
1808   
1809   northern summer
1810   farmers ' groups
1811   countries
1812   dst inherits
1813   dst
1814   filesystem
1815   2007
1816   merriam-webster
1817   farmers ' groups
1818   st
1819   people
1820   time zone differences
1821   early goal
1822   one reason
1823   daylight
1824   regions
1825   kingsford charcoal
1826   1999 study
1827   many enactments
1828   one
1829   year-independent way
1830   autumn and two hours
1831   least two
1832   us
1833   clocks
1834   iceland
1835   europe
1836   britain
1837   2000
1838   daily
1839   name
1840   clocks
1841   united kingdom
1842   franklin's 1784 satire
1843   daylight
1844   electricity use
1845   times
1846   1984
1847   adopt
1848   britain
1849   dst
1850   first three weekdays
1851   plan
1852   standardize
1853   14 april
1854   2008
1

2208   2000
2209   brazilian child
2210   racism
2211   suggest
2212   populations
2213   cladistics
2214   phylogenetic analysis
2215   initial hypotheses
2216   east asians
2217   populations
2218   practice
2219   significant number
2220   700.000
2221   clade
2222   diagnosis
2223   concept
2224   eduardo bonilla-silva
2225   physical anthropologists
2226   adversely
2227   mass incarceration
2228   many thousands
2229   france
2230   y chromosomes
2231   5 %
2232   roughly 28–37 %
2233   93 %
2234   uses
2235   international epidemiological data
2236   cranial measurements
2237   european concept
2238   arbitrary matter
2239   1964
2240   another way
2241   blumenbach
2242   good arguments
2243   word
2244   earlier work
2245   researchers
2246   last two decades
2247   eduardo bonilla-silva
2248   race
2249   many people
2250   2003 paper
2251   system
2252   five percent
2253   europeans
2254   kaplan
2255   many biological anthropologists
2256   amerindians
2257   identificatio

2620   residential
2621   end
2622   safety record
2623   residential
2624   300,000
2625   passenger elevators
2626   less expensive installations
2627   1
2628   gearless traction machines
2629   steam driven devices
2630   cab interiors
2631   another way
2632   dumbwaiters
2633   freight elevator
2634   first elevator shaft
2635   january
2636   construction
2637   single bulkhead cylinders
2638   machine-room-less elevators
2639   `` shaft
2640   neapolitan architect
2641   barrel
2642   40-50 %
2643   london
2644   method
2645   environmental concerns
2646   belt elevators
2647   team's earliest exit
2648   1189
2649   eight
2650   1953
2651   citation
2652   1872
2653   2001 and 2006
2654   february 2012
2655   1954 fifa world cup
2656   1974
2657   england
2658   2008–09 season
2659   england national football team
2660   
2661   charge
2662   england
2663   2002
2664   england
2665   john terry
2666   2002
2667   england's traditional away colours
2668   1923
2669   england
26

3036   1948
3037   muslims
3038   first five-year plan
3039   extreme variation
3040   others
3041   military settlement
3042   roman women
3043   imperial era
3044   human sacrifice
3045   security
3046   edict
3047   public festivals
3048   camp
3049   constantine
3050   roman women
3051   second edict
3052   famous tirade
3053   human sacrifice
3054   strong connections
3055   ordinary romans
3056   vergil
3057   valerian's first religious edict
3058   di immortales
3059   roman
3060   roman camps
3061   ruins
3062   abolition
3063   excessive devotion
3064   rome's hegemony
3065   end
3066   product
3067   customary offers
3068   product
3069   rome's hegemony
3070   relationship
3071   little or no civil authority
3072   edict
3073   wake
3074   most important camp-offering
3075   several days
3076   women
3077   dictator
3078   public festivals
3079   edict
3080   opportunities
3081   solution
3082   cult
3083   sporadic and sometimes brutal attempts
3084   punic crisis
3085   hu

KeyboardInterrupt: 

#  准确率测试