In [54]:
import nltk
import json
import spacy
from nltk.corpus import stopwords
from math import log
from collections import defaultdict, Counter
from string import punctuation
from nltk.stem.wordnet import WordNetLemmatizer
import gensim
import re
import csv

# Variables

In [46]:
OPEN_QUESTION_WORDS = ['what','who','whose','whom','where','when','why','how',
                       'which',"what's","who's","where's","how's"]
CLOSED_QUESTION_WORDS = ['is','are','am','was','were','do','does,','did','can',
                         'could','will','would','shall','should','have','has',
                         'had']

# Stop words
stop = set(stopwords.words('english'))

lmtz = WordNetLemmatizer()

with open('testing.json') as json_data:
    test = json.load(json_data)

with open('documents.json') as json_data:
    documents = json.load(json_data)

# Spacy toolkit
nlp = spacy.load('en_core_web_sm')

punc = set(punctuation)

In [47]:
def strip_punctuation(s):
    return ''.join(c for c in s if c not in punc)

In [48]:
def lemmatize(token):
    lemma = lmtz.lemmatize(token, 'v')
    if lemma == token:
        lemma = lmtz.lemmatize(token, 'n')
    return lemma

        
def extract_term_freqs(doc):
    tfs = {}
    for token in nltk.word_tokenize(doc):
        lemma = lemmatize(token.lower())
        if lemma not in stop and lemma.isalpha():
            tfs[lemma] = tfs.get(lemma, 0) + 1
    return tfs


def compute_doc_freqs(doc_term_freqs):
    dfs = Counter()
    for tfs in doc_term_freqs.values():
        for term in tfs.keys():
            dfs[term] += 1
    return dfs


def query_vsm(query, index, k=12):
    accumulator = Counter()
    for term in query:
        postings = index[term]
        for docid, weight in postings:
            accumulator[docid] += weight
    return accumulator.most_common(k)


# Find the question word
def get_qword(question):
    tokens = nltk.word_tokenize(question.lower())
    for token in tokens:
        if token in OPEN_QUESTION_WORDS:
            return token
    for token in tokens:
        if token in CLOSED_QUESTION_WORDS:
            return token
    return 'others'

In [49]:
# length of longest same sequences of keywords
def get_overlap(sent1, sent2):
    tokens1 = []
    tokens2 = []

    for token in nltk.word_tokenize(strip_punctuation(sent1.lower())):
        lemma = lemmatize(token)
        if lemma not in stop:
            tokens1.append(lemma)

    for token in nltk.word_tokenize(strip_punctuation(sent2.lower())):
        lemma = lemmatize(token)
        if lemma not in stop:
            tokens2.append(lemma)

    max = 0
    for i in range(len(tokens1)):
        for j in range(len(tokens2)):

            if tokens1[i] == tokens2[j]:
                length = 1

                ii = i + 1
                jj = j + 1
                while ii < len(tokens1) and jj < len(tokens2) and \
                        tokens1[ii] == tokens2[jj]:
                    ii += 1
                    jj += 1
                    length += 1

                if length > max:
                    max = length

    return max

# Write to test file

In [65]:
csvFile = open("high.csv", "w")
writer = csv.writer(csvFile)
header = ['id','answer']
writer.writerow(header)


case_count = 0
# test = [test[17]]
for test_case in test:
    question = test_case['question']
    docid = test_case['docid']

    # Convert doc into one string, then tokenize sentences
    corpus = ''
    for para in documents[docid]['text']:
        corpus += para + ' '

    # sentence as a document
    raw_docs = nltk.sent_tokenize(corpus)
    
#     fn_docs = raw_docs
#     bm25 = BM25(fn_docs, delimiter=' ')
#     Query = question
#     Query = Query.split()
#     scores = bm25.BM25Score(Query)
#     tfidf = bm25.TFIDF()
#     print bm25.Items()
    
#     for i, tfidfscore in enumerate(tfidf):
#         print i, tfidfscore
    # TFIDF
    doc_term_freqs = {}
    for (id, raw_doc) in enumerate(raw_docs):
        term_freqs = extract_term_freqs(raw_doc)
        doc_term_freqs[id] = term_freqs
    M = len(doc_term_freqs)

    doc_freqs = compute_doc_freqs(doc_term_freqs)

    vsm_inverted_index = defaultdict(list)
    for docid, term_freqs in doc_term_freqs.items():
        N = sum(term_freqs.values())
        length = 0

        # find tf*idf values and accumulate sum of squares
        tfidf_values = []
        for term, count in term_freqs.items():
            tfidf = float(count) / N * log(M / float(doc_freqs[term]))
            tfidf_values.append((term, tfidf))
            length += tfidf ** 2

        # normalise documents by length and insert into index
        length = length ** 0.5
        for term, tfidf in tfidf_values:
            # inversion of the indexing, term -> (doc_id, score)
            vsm_inverted_index[term].append([docid, tfidf / length])

    for term, docids in vsm_inverted_index.items():
        docids.sort()

    terms = extract_term_freqs(question)
    results = query_vsm(terms, vsm_inverted_index)


    # Step 2
    # Analyse question type
    qword = get_qword(question)

    # the word after question word, such as 'what value', 'which gender'
    next_token = ''

    qtype = ''

    # dependency parsing
    dep = ''

    # head word
    head = ''

    # head dependency
    head_dep = ''

    # subject, root, object
    nsubj = ''
    ROOT = ''
    dobj = ''

    # yes or no questions have two options
    closed_q_choices = ('', '')

    doc = nlp(question)

    tokens = nltk.word_tokenize(question.lower())

    # get next word
    if qword in tokens:
        if tokens.index(qword) < len(tokens) - 1:
            next_token = tokens[tokens.index(qword) + 1]

    # get structure of sentence
    for token in doc:
        if 'nsubj' in token.dep_:
            nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_:
            dobj = lemmatize(strip_punctuation(token.text))

    # for noun (phrase) questions, get answer dependency
    for chunk in doc.noun_chunks:
        if qword in chunk.text:
            dep = chunk.root.dep_
            head = lemmatize(strip_punctuation(chunk.root.head.text))
            head_dep = chunk.root.head.dep_

    # determine answer type
    if 'stand for' in question or 'abbreviat' in question:
        qtype = 'abrv'

    elif qword in ['who',"who's",'whom','whose']:
        qtype = 'who'

    elif qword == 'when':
        qtype = 'when'

    elif qword in ['where',"where's"]:
        qtype = 'where'

    elif qword in ['how',"how's"]:
        if next_token == 'much':
            qtype = 'MONEY'
        elif next_token == 'many':
            qtype = 'CARDINAL'
        elif next_token == 'long':
            qtype = 'DATE'
        elif next_token in ['far','big','wide','deep','tall','high','fast','heavy']:
            qtype = 'QUANTITY'
        elif next_token in ['old','young']:
            qtype = 'DATE'
        elif next_token in ['does','did','do','have','has','had','should',
                              'can','could','will','would','must']:
            if dobj != '':
                qtype = 'adj'
            else:
                qtype = 'verb'

    elif qword in ['what', "what's", 'which']:

        if 'year'in tokens or \
                'day' in tokens or \
                'month' in tokens or \
                'era' in tokens or \
                'age' in tokens or \
                'century' in tokens or \
                'week' in tokens or \
                'period' in tokens or \
                'dynasty' in tokens:
            qtype = 'DATE'

        elif 'company' in tokens or \
                'organization' in tokens or \
                'organisation' in tokens or \
                'corporation' in tokens or \
                'institution' in tokens or \
                'university' in tokens or \
                'corporation' in tokens or \
                'association' in tokens or \
                'union' in tokens or \
                'agency' in tokens:
            qtype = 'ORG'

        elif 'city' in tokens or \
                'country' in tokens or \
                'state' in tokens or \
                'province' in tokens or \
                'county' in tokens:
            qtype = 'GPE'

        elif 'place' in tokens or \
                'river' in tokens or \
                'mountain' in tokens or \
                'ocean' in tokens or \
                'region' in tokens or \
                'area' in tokens or \
                'sea' in tokens or \
                'lake' in tokens or \
                'continent' in tokens or \
                'location' in tokens or \
                'forest' in tokens or \
                'jungle' in tokens:
            qtype = 'LOC'

        elif 'nationality' in tokens:
            qtype = 'NORP'

        elif 'building' in tokens or \
            'airport' in tokens or \
            'highway' in tokens or \
            'bridge' in tokens or \
            'harbour' in tokens or \
            'harbor' in tokens or \
            'port' in tokens or \
            'dam' in tokens:
            qtype = 'FACILITY'

        elif 'hurricane' in tokens or \
            'battle' in tokens or \
            'war' in tokens:
            qtype = 'EVENT'

        elif 'book' in tokens or \
            'novel' in tokens or \
            'song' in tokens or \
            'music' in tokens or \
            'painting' in tokens:
            qtype = 'WORK_OF_ART'

        elif 'language' in tokens or \
                'speak' in tokens:
            qtype = 'LANGUAGE'

        elif 'percentage' in tokens or 'percent' in tokens:
            qtype = 'PERCENT'

        elif 'value' in tokens or \
                'distance' in tokens or \
                'size' in tokens or \
                'length' in tokens or \
                'depth' in tokens or \
                'height' in tokens or \
                'density' in tokens or \
                'speed' in tokens or \
                'weight' in tokens or \
                'area' in tokens or \
                'temperature' in tokens or \
                'volume' in tokens:
            qtype = 'QUANTITY'

        elif 'number' in tokens:
            qtype = 'CARDINAL'

        elif 'price' in tokens:
            qtype = 'MONEY'

        else:
            # what...do type question
            tokens.remove(next_token)
            if 'do' in tokens:
                qtype = 'verb'
            else:
                qtype = 'noun'

    elif qword == 'why':
        qtype = 'why'

    elif qword in CLOSED_QUESTION_WORDS:
        qtype = 'closed'

        # answer is one of the 'or' options in the question
        if 'or' in tokens:
            index = tokens.index('or')
            prev1 = tokens[index - 1]
            next1 = tokens[index + 1]
            tag_tokens = nltk.pos_tag(tokens)

            tag = tag_tokens[index - 1][1]

            # if answer is a noun
            if tag in ['NN', 'NNP', 'NNS', 'NNPS']:
                for chunk in doc.noun_chunks:
                    if prev1 in chunk.text:
                        first = chunk.text
                    if next1 in chunk.text:
                        second = chunk.text
                closed_q_choices = (first, second)
            else:
                closed_q_choices = (prev1, next1)
        else:
            qtype = 'others'

    # re-rank the 20 sentences
    scores = {}
    for id, _ in results:
        sent = raw_docs[id]
        doc = nlp(sent)

        score = get_overlap(sent, question)

        if qtype == 'who':
            for ent in doc.ents:
                if ent.label_ == 'PERSON':
                    score += 1

        elif qtype == 'when':
            for ent in doc.ents:
                if ent.label_ == 'TIME' or ent.label_ == "DATE":
                    score += 1

        elif qtype == 'where':
            for ent in doc.ents:
                if ent.label_ == 'GPE' or ent.label_ == "LOC":
                    score += 1

        elif qtype in ['LANGUAGE','WORK_OF_ART','EVENT','NORP','FACILITY',
                       'GPE','DATE','TIME','PERCENT','QUANTITY','CARDINAL',
                     'MONEY','PERSON','ORG','LOC']:
            for ent in doc.ents:
                if ent.label_ == qtype:
                    score += 1

        elif qtype == 'adj':
            for token in doc:
                if 'advmod' in token.dep_ or 'acomp' in token.dep_:
                    score += 1

        elif qtype == 'verb':
            for token in doc:
                if token.dep_ == 'ROOT':
                    score += 1

        elif qtype == 'closed':
            first = closed_q_choices[0]
            second = closed_q_choices[1]

            score += (first in sent) + (second in sent)

        elif qtype == 'why':
            if 'reason' in sent or 'because' in sent or 'due to' in sent or 'since' in sent or 'for' in sent:
                score += 1

        scores[id] = score

    rank = {}
    for id, sim in results:
        max_score = scores[max(scores, key=scores.get)]
        if max_score != 0:
            rank[id] = sim * 0.5 + (scores[id] / max_score * 0.5)
        else:
            rank[id] = sim
    
    # sentence with highest rank
    index = max(rank, key=rank.get)
    sent = raw_docs[index]
    doc = nlp(sent)

    # find sentence structure
    sent_nsubj = ''
    sent_ROOT = ''
    sent_dobj = ''
    for token in doc:
        if 'nsubj' in token.dep_:
            sent_nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            sent_ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_:
            sent_dobj = lemmatize(strip_punctuation(token.text))
            
    # find answer with highest score
    max_score = -1
    answer = ''
    
    if qtype == 'who':
        for np in doc.noun_chunks:
            score = 0
            
            if np in doc.ents:
                for ent in doc.ents:
                    if np.text in ent.text and ent.label_ == 'PERSON':
                            score += 3

            # find NP dependency
            np_dep = np.root.dep_
            np_head = lemmatize(strip_punctuation(np.root.head.text))
            np_head_dep = np.root.head.dep_

            if np_dep == dep:
                score += 1
            if np_head == head:
                score += 1
            if np_head_dep == head_dep:
                score += 1

            if np.text not in question:
                score += 1

            if strip_punctuation(np.text).strip().lower() not in stop:
                score += 1

            if np.text.lower() == 'it':
                score = -1
                
            if score > max_score:
                max_score = score
                answer = np.text

    elif qtype == 'when':
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == 'TIME' or ent.label_ == "DATE":
                score += 3
                
            if ent.text not in question:
                score += 1
            
            if score > max_score:
                max_score = score
                answer = ent.text

    elif qtype == 'where':
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == 'GPE' or ent.label_ == "LOC":
                score += 3

            if ent.text not in question:
                score += 1
                
            if score > max_score:
                max_score = score
                answer = ent.text
            
    elif qtype in ['LANGUAGE', 'WORK_OF_ART', 'EVENT', 'NORP', 'FACILITY',
                   'GPE', 'DATE', 'TIME', 'PERCENT', 'QUANTITY', 'CARDINAL',
                   'MONEY', 'PERSON', 'ORG', 'LOC']:
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == qtype:
                score += 3
            
            if ent.text not in question:
                score += 1
                
            if score > max_score:
                max_score = score
                answer = ent.text

    elif qtype == 'abrv':
        abrv = ''
        qdoc = nlp(question)
        for token in qdoc:
            text = token.text
            if len(text) >= 2 and text.isupper() and text.isalpha():
                abrv = text.lower()

        if abrv == '' and 'stand for' in question:
            tokens = question.lower().split(' ')
            abrv = tokens[tokens.index('stand')-1]

        if abrv != '':
            tokens = nltk.word_tokenize(sent)
            for (i, token) in enumerate(tokens):
                if token[0].isupper():
                    k = 1
                    phrase = token.lower()
                    initials = phrase[0]

                    while i+k < len(tokens) and tokens[i+k][0].isupper():
                        phrase = phrase + ' ' + tokens[i+k].lower()
                        initials += tokens[i+k][0].lower()
                        k += 1

                    phrase = phrase.strip()
                    if initials == abrv:
                        answer = phrase

        else:
            tokens = nltk.word_tokenize(question)
            for (i, token) in enumerate(tokens):
                if token[0].isupper():
                    k = 1
                    initials = token[0].lower()

                    while i + k < len(tokens) and tokens[i + k][0].isupper():
                        initials += tokens[i + k][0].lower()
                        k += 1

                    if len(initials) >= 2:
                        answer = initials

    elif qtype == 'adj':
        for token in doc:
            score = 0
            
            if 'advmod' in token.dep_ or 'acomp' in token.dep_:
                score += 3

            token_dep = token.dep_
            token_head = lemmatize(strip_punctuation(token.head.text))
            token_head_dep = token.head.dep_

            if token_dep == dep:
                score += 1
            if token_head == head:
                score += 1
            if token_head_dep == head_dep:
                score += 1

            if token.text not in question:
                score += 1

            if strip_punctuation(token.text).strip().lower() not in stop:
                score += 1

            if token.text.lower() == 'it':
                score = -1

            if score > max_score:
                max_score = score
                answer = token.text

    elif qtype == 'verb':
        for token in doc:
            score = 0

            if token.dep_ == 'ROOT':
                score += 1

            if lemmatize(strip_punctuation(token.text)) not in \
                    [lemmatize(strip_punctuation(s)) for s in nltk.word_tokenize(question)]:
                score += 1

            if strip_punctuation(token.text).strip().lower() not in stop:
                score += 1

            if score > max_score:
                max_score = score
                answer = token.text

    elif qtype == 'closed':
        first = closed_q_choices[0]
        second = closed_q_choices[1]

        # whether each option appears (and is negates)
        appear1 = False
        appear2 = False
        negate1 = False
        negate2 = False
        neg_count = 0
        tokens = nltk.word_tokenize(raw_docs[id])

        for (index, token) in enumerate(tokens):
            if token == 'not' or "n't" in token:
                neg_count += 1

                if index+1 < len(tokens):
                    if tokens[index+1] == first:
                        negate1 = True
                    if tokens[index+1] == second:
                        negate2 = True

            if token == first:
                appear1 = True
            if token == second:
                appear2 = True

        possible_answer = ''
        if appear1 and not appear2:
            if neg_count % 2 == 1:
                possible_answer = second
            else:
                possible_answer = first

        elif appear2 and not appear1:
            if neg_count % 2 == 0:
                possible_answer = second
            else:
                possible_answer = first

        elif appear1 and appear2:
            if negate1 and not negate2:
                possible_answer = second
            elif negate2 and not negate1:
                possible_answer = first
            else:
                possible_answer = second

        if possible_answer != '':
            score += 5
            if score > max_score:
                max_score = score
                answer = possible_answer

    elif qtype == 'why':

        possible_answer = ''
        score = 0

        if 'reason' in sent or 'because' in sent or 'due to' in sent or 'since' in sent or 'for' in sent:

            if 'because of' in sent:
                score += 3
                index = sent.index('because of')
                substr = sent[index+11:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break

            elif 'because' in sent:
                score += 3
                index = sent.index('because')
                substr = sent[index + 8:]
                possible_answer = substr

            elif 'due to' in sent:
                score += 3
                index = sent.index('due to')
                substr = sent[index+7:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    possible_answer = substr

            elif 'reason' in sent:
                score += 2
                index = sent.index('reason')
                substr = sent[index+7:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    index = substr.find('is')
                    if index != -1:
                        possible_answer = substr[index+3]
                    else:
                        index = substr.find('was')
                        if index != -1:
                            possible_answer = substr[index+4]
                        else:
                            possible_answer = sent[sent.index('reason'):]

            elif 'for' in sent:
                score += 1
                index = sent.index('for')
                substr = sent[index + 4:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    possible_answer = substr

            elif 'since' in sent:
                score += 1
                index = sent.index('since')
                substr = sent[index + 6:]
                possible_answer = substr

            if possible_answer != '' and score > max_score:
                answer = possible_answer
                max_score = score

    # if answer not found, find noun phrases
    if answer == '':
        for np in doc.noun_chunks:
            score = 0

            np_dep = np.root.dep_
            np_head = lemmatize(strip_punctuation(np.root.head.text))
            np_head_dep = np.root.head.dep_

            if np_dep == dep:
                score += 1
            if np_head == head:
                score += 1
            if np_head_dep == head_dep:
                score += 1

            if np.text not in question:
                score += 1

            if strip_punctuation(np.text).strip().lower() not in stop:
                score += 1

            if np.text.lower() == 'it':
                score = 0

            if score > max_score:
                max_score = score
                answer = np.text
                
                
    a = nltk.word_tokenize(answer)
    try:
        if a[0].lower() == 'the' or a[0].lower() == 'a' or a[0].lower() == 'an' or a[0].lower() == 'this':
            answer = ''
            del a[0]
            for word in a:
                answer += word + ' '
    except:
        answer = answer
        
    answer = answer.strip().lower()
    result = []
    result.append(case_count)
    result.append(answer)
    writer.writerow(result)
    print(case_count,' ',answer)
    case_count += 1
    
csvFile.close()

0   combination
1   addition
2   browser 's layout engine
3   its opera-mini version
4   late 2004
5   windows
6   1995
7   most browsers
8   marc andreessen
9   first web browser
10   competition
11   dominance
12   internet relay chat
13   january
14   every major web browser
15   january 2003
16   january 2009
17   file transfer protocol
18   google
19   case
20   rich user interfaces
21   2002
22   august 2011
23   all major web browsers
24   chrome's user-base
25   development
26   december 2011
27   january 2003
28   netscape
29   apple's safari
30   rapid development
31   prefix
32   mozilla foundation
33   private networks
34   mac
35   comparison
36   1994
37   user interface
38   addition
39   major browsers
40   information resources
41   live bookmarks
42   more traditional feed reader
43   bookmarks
44   year
45   prefix
46   browser software
47   file transfer protocol
48   microsoft corp
49   mobile safari
50   most web browsers
51   windows
52   user 's default e-mail a

459   city
460   additional police stations
461   local train services
462   two
463   uk
464   solent
465   p&o cruises
466   27 june 1640
467   college
468   840
469   centuries
470   1233
471   university hospital southampton nhs foundation trust
472   biggest operator
473   4.2
474   1959
475   354
476   government figures
477   m27
478   talking heads
479   hanover buildings
480   city
481   december 2007
482   12th century
483   traffic congestion
484   southampton
485   2004
486   1233
487   built-up area
488   three fire stations
489   southampton docks
490   24
491   western docks
492   southampton's largest retail centre
493   1920s
494   1976
495   clausentum
496   two
497   plans
498   southampton
499   port
500   just over a quarter
501   main station
502   20–24
503   council estates
504   south west trains
505   16.2 percent
506   king henry's departure
507   route
508   trust
509   town
510   16.2 percent
511   two large live music venues
512   three
513   university of

905   neptune's more varied weather
906   30.1
907   pluto
908   apparent backward motion
909   current models
910   1997
911   planetary science
912   400 m/s
913   2004
914   images
915   1017
916   other candidates
917   neptune's more varied weather
918   reaction
919   hobbes
920   modern term `` culture
921   non-material culture
922   citation
923   19th-century
924   terror management theory
925   29
926   united states
927   germany
928   end of the last ice age
929   other 19th-century critics
930   contrasted
931   terror management theory
932   anthropologists
933   1970s
934   around 50,000 years ago
935   united states
936   diffusion
937   prussian linguist
938   1950s and 1960s
939   stuart hall
940   élite ideal
941   present legislation
942   jefferson's metaphor
943   1971
944   nonconformists
945   all areas
946   pervasive secularism
947   that wall
948   may 3, 2006
949   still other scholars
950   as early as the mid-17th century
951   religious freedom
952   eng

1312   15,000
1313   four
1314   russians
1315   both sides
1316   william howard russell
1317   time
1318   1857
1319   25 september
1320   constantinople
1321   war
1322   february
1323   morning
1324   piedmont
1325   nicholas
1326   late september
1327   year
1328   three
1329   bulgaria
1330   alexander ii
1331   french
1332   cronstadt
1333   second
1334   walachia
1335   minor naval skirmishes
1336   ukraine
1337   19–31
1338   black sea
1339   31 december
1340   cronstadt
1341   george hamilton-gordon
1342   omar pasha
1343   32–40
1344   its alliance
1345   local commanders
1346   russians
1347   william howard russell
1348   constantinople
1349   omar pasha
1350   omar pasha
1351   parliament
1352   constantinople
1353   åland islands
1354   alexander ii
1355   nicholas
1356   cardigan
1357   treaty of paris
1358   austria
1359   ridge
1360   october 1853
1361   french
1362   united kingdom
1363   centuries-old
1364   public opinion
1365   second counterattack
1366   winter o

1774   regions
1775   united kingdom
1776   sunlight exposure
1777   unexpected adverse effect
1778   winston churchill
1779   people
1780   proposal
1781   1907
1782   1995
1783   microsoft windows
1784   negative dst
1785   britain
1786   standard time
1787   autumn and two hours
1788   merriam-webster's
1789   two hours
1790   dst
1791   correlation
1792   (cet
1793   all years
1794   fixed work schedules
1795   example
1796   dst
1797   november
1798   several studies
1799   1970s
1800   older form
1801   march 2011
1802   2007
1803   us
1804   winter
1805   north america
1806   1895
1807   ntfs
1808   is
1809   northern summer
1810   some farmers' groups
1811   some countries
1812   dst inherits
1813   dst
1814   filesystem
1815   2007
1816   merriam-webster
1817   some farmers' groups
1818   st
1819   people
1820   time zone differences
1821   early goal
1822   one reason
1823   daylight
1824   common agreement
1825   kingsford charcoal
1826   1999 study
1827   many enactments
18

2182   only seven
2183   whites
2184   categories
2185   biological race concept
2186   population
2187   existence
2188   scientific classification
2189   subspecies
2190   sex
2191   rigid descent rule
2192   forensic physical anthropologist
2193   1949
2194   skeletal remain
2195   at least two
2196   sense
2197   falsify
2198   recent work
2199   no generally accepted concept
2200   greatest number
2201   many
2202   survey
2203   races
2204   about 30%
2205   clustering
2206   today
2207   20th century
2208   2000
2209   brazilian child
2210   racism
2211   suggest
2212   populations
2213   cladistics
2214   phylogenetic analysis
2215   initial hypotheses
2216   east asians
2217   populations
2218   practice
2219   significant number
2220   700.000
2221   clade
2222   diagnosis
2223   concept
2224   eduardo bonilla-silva
2225   physical anthropologists
2226   adversely
2227   mass incarceration
2228   many thousands
2229   france
2230   y chromosomes
2231   about 5%
2232   roughly

2582   gabriel
2583   citation
2584   case
2585   complaint
2586   articles
2587   many treaties
2588   other treaties
2589   2008
2590   dates
2591   ruled
2592   treaties
2593   add
2594   articles
2595   states
2596   reservations
2597   supreme court
2598   passenger elevator
2599   40-50%
2600   1853
2601   weight
2602   counterweights
2603   opens
2604   berlin
2605   werner von siemens
2606   elevator cab
2607   germany
2608   low-volume hours
2609   double deck elevators
2610   make
2611   citation
2612   israeli
2613   hydraulic crane
2614   four
2615   relay controller
2616   city transport
2617   governor device
2618   passenger cabs
2619   unique design characteristics
2620   residential
2621   each end
2622   500 ft/min
2623   may
2624   approximately 300,000
2625   passenger elevators
2626   less expensive installations
2627   1
2628   gearless traction machines
2629   these steam driven devices
2630   cab interiors
2631   elevator cars
2632   dumbwaiters
2633   freight e

2988   2011
2989   its software business
2990   2.06 percent
2991   virbhadra singh
2992   state-owned television broadcaster
2993   governor
2994   himachal
2995   sikhs
2996   kangra miniature paintings
2997   state
2998   himachal
2999   state
3000   himachal pradesh
3001   economy
3002   10th century
3003   hill stations
3004   public radio station
3005   1 november 1956
3006   himachal
3007   handicrafts
3008   virbhadra singh
3009   21st
3010   anglo-gorkha war
3011   superintendent
3012   pit-loom
3013   virbhadra singh
3014   buddhism and sikhism
3015   himachal pradesh
3016   wool
3017   year 1768
3018   hima
3019   hydro electric resources
3020   dial-up access
3021   mahmud ghaznavi
3022   citation
3023   citation
3024   technological advancements
3025   30 december 2007
3026   first
3027   nepal
3028   h.p.
3029   private fm stations
3030   18 december 1970
3031   state
3032   1.16%
3033   radio
3034   2006
3035   handicrafts
3036   1948
3037   muslims
3038   first five-yea

3395   romanian
3396   ming dynasty
3397   latin
3398   , a more sociolinguistic notion
3399   multiple standard dialects
3400   latin
3401   latin
3402   italians
3403   swedish
3404   dialect
3405   serbia
3406   1996
3407   tok pisin
3408   public environment
3409   25.1, 1945
3410   low german varieties
3411   north african
3412   first
3413   spanish
3414   rest
3415   only 2.5%
3416   english
3417   tunisia
3418   ion bărbuţă
3419   risorgimento
3420   heinz kloss
3421   italian
3422   south
3423   world war i
3424   modern nationalism
3425   opposite example
3426   arabic
3427   arabic
3428   yiddish speaker
3429   northern germany
3430   terms `` language
3431   them
3432   example
3433   northern italy's
3434   hong kong
3435   standardized dialect
3436   various terms
3437   italy
3438   german empire
3439   german language
3440   that interpretation
3441   secondary sense
3442   dialect
3443   english
3444   dialect
3445   standard dialect
3446   german dialectology
3447   i

#  准确率测试

In [19]:
with open('training.json') as json_data:
    train = json.load(json_data)
    
case_count = 0

for train_case in train:
    question = train_case['question']
    docid = train_case['docid']
    correct_answer = train_case['text']

    # Convert doc into one string, then tokenize sentences
    corpus = ''
    for para in documents[docid]['text']:
        corpus += para + ' '

    # sentence as a document
    raw_docs = nltk.sent_tokenize(corpus)

    # TFIDF
    doc_term_freqs = {}
    for (id, raw_doc) in enumerate(raw_docs):
        term_freqs = extract_term_freqs(raw_doc)
        doc_term_freqs[id] = term_freqs
    M = len(doc_term_freqs)

    doc_freqs = compute_doc_freqs(doc_term_freqs)

    vsm_inverted_index = defaultdict(list)
    for docid, term_freqs in doc_term_freqs.items():
        N = sum(term_freqs.values())
        length = 0

        # find tf*idf values and accumulate sum of squares
        tfidf_values = []
        for term, count in term_freqs.items():
            tfidf = float(count) / N * log(M / float(doc_freqs[term]))
            tfidf_values.append((term, tfidf))
            length += tfidf ** 2

        # normalise documents by length and insert into index
        length = length ** 0.5
        for term, tfidf in tfidf_values:
            # inversion of the indexing, term -> (doc_id, score)
            vsm_inverted_index[term].append([docid, tfidf / length])

    for term, docids in vsm_inverted_index.items():
        docids.sort()

    terms = extract_term_freqs(question)
    results = query_vsm(terms, vsm_inverted_index)


    # Step 2
    # Analyse question type
    qword = get_qword(question)

    # the word after question word, such as 'what value', 'which gender'
    next_token = ''

    qtype = ''

    # dependency parsing
    dep = ''

    # head word
    head = ''

    # head dependency
    head_dep = ''

    # subject, root, object
    nsubj = ''
    ROOT = ''
    dobj = ''

    # yes or no questions have two options
    closed_q_choices = ('', '')

    doc = nlp(question)

    tokens = nltk.word_tokenize(question.lower())

    # get next word
    if qword in tokens:
        if tokens.index(qword) < len(tokens) - 1:
            next_token = tokens[tokens.index(qword) + 1]

    # get structure of sentence
    for token in doc:
        if 'nsubj' in token.dep_:
            nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_:
            dobj = lemmatize(strip_punctuation(token.text))

    # for noun (phrase) questions, get answer dependency
    for chunk in doc.noun_chunks:
        if qword in chunk.text:
            dep = chunk.root.dep_
            head = lemmatize(strip_punctuation(chunk.root.head.text))
            head_dep = chunk.root.head.dep_

    # determine answer type
    if 'stand for' in question or 'abbreviat' in question:
        qtype = 'abrv'

    elif qword in ['who',"who's",'whom','whose']:
        qtype = 'who'

    elif qword == 'when':
        qtype = 'when'

    elif qword in ['where',"where's"]:
        qtype = 'where'

    elif qword in ['how',"how's"]:
        if next_token == 'much':
            qtype = 'MONEY'
        elif next_token == 'many':
            qtype = 'CARDINAL'
        elif next_token == 'long':
            qtype = 'DATE'
        elif next_token in ['far','big','wide','deep','tall','high','fast','heavy']:
            qtype = 'QUANTITY'
        elif next_token in ['old','young']:
            qtype = 'DATE'
        elif next_token in ['does','did','do','have','has','had','should',
                              'can','could','will','would','must']:
            if dobj != '':
                qtype = 'adj'
            else:
                qtype = 'verb'

    elif qword in ['what', "what's", 'which']:

        if 'year'in tokens or \
                'day' in tokens or \
                'month' in tokens or \
                'era' in tokens or \
                'age' in tokens or \
                'century' in tokens or \
                'week' in tokens or \
                'period' in tokens or \
                'dynasty' in tokens:
            qtype = 'DATE'

        elif 'company' in tokens or \
                'organization' in tokens or \
                'organisation' in tokens or \
                'corporation' in tokens or \
                'institution' in tokens or \
                'university' in tokens or \
                'corporation' in tokens or \
                'association' in tokens or \
                'union' in tokens or \
                'agency' in tokens:
            qtype = 'ORG'

        elif 'city' in tokens or \
                'country' in tokens or \
                'state' in tokens or \
                'province' in tokens or \
                'county' in tokens:
            qtype = 'GPE'

        elif 'place' in tokens or \
                'river' in tokens or \
                'mountain' in tokens or \
                'ocean' in tokens or \
                'region' in tokens or \
                'area' in tokens or \
                'sea' in tokens or \
                'lake' in tokens or \
                'continent' in tokens or \
                'location' in tokens or \
                'forest' in tokens or \
                'jungle' in tokens:
            qtype = 'LOC'

        elif 'nationality' in tokens:
            qtype = 'NORP'

        elif 'building' in tokens or \
            'airport' in tokens or \
            'highway' in tokens or \
            'bridge' in tokens or \
            'harbour' in tokens or \
            'harbor' in tokens or \
            'port' in tokens or \
            'dam' in tokens:
            qtype = 'FACILITY'

        elif 'hurricane' in tokens or \
            'battle' in tokens or \
            'war' in tokens:
            qtype = 'EVENT'

        elif 'book' in tokens or \
            'novel' in tokens or \
            'song' in tokens or \
            'music' in tokens or \
            'painting' in tokens:
            qtype = 'WORK_OF_ART'

        elif 'language' in tokens or \
                'speak' in tokens:
            qtype = 'LANGUAGE'

        elif 'percentage' in tokens or 'percent' in tokens:
            qtype = 'PERCENT'

        elif 'value' in tokens or \
                'distance' in tokens or \
                'size' in tokens or \
                'length' in tokens or \
                'depth' in tokens or \
                'height' in tokens or \
                'density' in tokens or \
                'speed' in tokens or \
                'weight' in tokens or \
                'area' in tokens or \
                'temperature' in tokens or \
                'volume' in tokens:
            qtype = 'QUANTITY'

        elif 'number' in tokens:
            qtype = 'CARDINAL'

        elif 'price' in tokens:
            qtype = 'MONEY'

        else:
            # what...do type question
            tokens.remove(next_token)
            if 'do' in tokens:
                qtype = 'verb'
            else:
                qtype = 'noun'

    elif qword == 'why':
        qtype = 'why'

    elif qword in CLOSED_QUESTION_WORDS:
        qtype = 'closed'

        # answer is one of the 'or' options in the question
        if 'or' in tokens:
            index = tokens.index('or')
            prev1 = tokens[index - 1]
            next1 = tokens[index + 1]
            tag_tokens = nltk.pos_tag(tokens)

            tag = tag_tokens[index - 1][1]

            # if answer is a noun
            if tag in ['NN', 'NNP', 'NNS', 'NNPS']:
                for chunk in doc.noun_chunks:
                    if prev1 in chunk.text:
                        first = chunk.text
                    if next1 in chunk.text:
                        second = chunk.text
                closed_q_choices = (first, second)
            else:
                closed_q_choices = (prev1, next1)
        else:
            qtype = 'others'

    # re-rank the 20 sentences
    scores = {}
    for id, _ in results:
        sent = raw_docs[id]
        doc = nlp(sent)

        score = get_overlap(sent, question)

        if qtype == 'who':
            for ent in doc.ents:
                if ent.label_ == 'PERSON':
                    score += 1

        elif qtype == 'when':
            for ent in doc.ents:
                if ent.label_ == 'TIME' or ent.label_ == "DATE":
                    score += 1

        elif qtype == 'where':
            for ent in doc.ents:
                if ent.label_ == 'GPE' or ent.label_ == "LOC":
                    score += 1

        elif qtype in ['LANGUAGE','WORK_OF_ART','EVENT','NORP','FACILITY',
                       'GPE','DATE','TIME','PERCENT','QUANTITY','CARDINAL',
                     'MONEY','PERSON','ORG','LOC']:
            for ent in doc.ents:
                if ent.label_ == qtype:
                    score += 1

        elif qtype == 'adj':
            for token in doc:
                if 'advmod' in token.dep_ or 'acomp' in token.dep_:
                    score += 1

        elif qtype == 'verb':
            for token in doc:
                if token.dep_ == 'ROOT':
                    score += 1

        elif qtype == 'closed':
            first = closed_q_choices[0]
            second = closed_q_choices[1]

            score += (first in sent) + (second in sent)

        elif qtype == 'why':
            if 'reason' in sent or 'because' in sent or 'due to' in sent or 'since' in sent or 'for' in sent:
                score += 1

        scores[id] = score

    rank = {}
    for id, sim in results:
        max_score = scores[max(scores, key=scores.get)]
        if max_score != 0:
            rank[id] = sim * 0.5 + (scores[id] / max_score * 0.5)
        else:
            rank[id] = sim
    
    # sentence with highest rank
    index = max(rank, key=rank.get,default=0)
    sent = raw_docs[index]
    doc = nlp(sent)

    
    # find sentence structure
    sent_nsubj = ''
    sent_ROOT = ''
    sent_dobj = ''
    for token in doc:
        if 'nsubj' in token.dep_:
            sent_nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            sent_ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_:
            sent_dobj = lemmatize(strip_punctuation(token.text))
            
    # find answer with highest score
    max_score = -1
    answer = ''
    
    if qtype == 'who':
        for np in doc.noun_chunks:
            score = 0
            
            if np in doc.ents:
                for ent in doc.ents:
                    if np.text in ent.text and ent.label_ == 'PERSON':
                            score += 3

            # find NP dependency
            np_dep = np.root.dep_
            np_head = lemmatize(strip_punctuation(np.root.head.text))
            np_head_dep = np.root.head.dep_

            if np_dep == dep:
                score += 1
            if np_head == head:
                score += 1
            if np_head_dep == head_dep:
                score += 1

            if np.text not in question:
                score += 1

            if strip_punctuation(np.text).strip().lower() not in stop:
                score += 1

            if np.text.lower() == 'it':
                score = -1
                
            if score > max_score:
                max_score = score
                answer = np.text

    elif qtype == 'when':
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == 'TIME' or ent.label_ == "DATE":
                score += 3
                
            if ent.text not in question:
                score += 1
            
            if score > max_score:
                max_score = score
                answer = ent.text

    elif qtype == 'where':
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == 'GPE' or ent.label_ == "LOC":
                score += 3

            if ent.text not in question:
                score += 1
                
            if score > max_score:
                max_score = score
                answer = ent.text
            
    elif qtype in ['LANGUAGE', 'WORK_OF_ART', 'EVENT', 'NORP', 'FACILITY',
                   'GPE', 'DATE', 'TIME', 'PERCENT', 'QUANTITY', 'CARDINAL',
                   'MONEY', 'PERSON', 'ORG', 'LOC']:
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == qtype:
                score += 3
            
            if ent.text not in question:
                score += 1
                
            if score > max_score:
                max_score = score
                answer = ent.text

    elif qtype == 'abrv':
        abrv = ''
        qdoc = nlp(question)
        for token in qdoc:
            text = token.text
            if len(text) >= 2 and text.isupper() and text.isalpha():
                abrv = text.lower()

        if abrv == '' and 'stand for' in question:
            tokens = question.lower().split(' ')
            abrv = tokens[tokens.index('stand')-1]

        if abrv != '':
            tokens = nltk.word_tokenize(sent)
            for (i, token) in enumerate(tokens):
                if token[0].isupper():
                    k = 1
                    phrase = token.lower()
                    initials = phrase[0]

                    while i+k < len(tokens) and tokens[i+k][0].isupper():
                        phrase = phrase + ' ' + tokens[i+k].lower()
                        initials += tokens[i+k][0].lower()
                        k += 1

                    phrase = phrase.strip()
                    if initials == abrv:
                        answer = phrase

        else:
            tokens = nltk.word_tokenize(question)
            for (i, token) in enumerate(tokens):
                if token[0].isupper():
                    k = 1
                    initials = token[0].lower()

                    while i + k < len(tokens) and tokens[i + k][0].isupper():
                        initials += tokens[i + k][0].lower()
                        k += 1

                    if len(initials) >= 2:
                        answer = initials

    elif qtype == 'adj':
        for token in doc:
            score = 0
            
            if 'advmod' in token.dep_ or 'acomp' in token.dep_:
                score += 3

            token_dep = token.dep_
            token_head = lemmatize(strip_punctuation(token.head.text))
            token_head_dep = token.head.dep_

            if token_dep == dep:
                score += 1
            if token_head == head:
                score += 1
            if token_head_dep == head_dep:
                score += 1

            if token.text not in question:
                score += 1

            if strip_punctuation(token.text).strip().lower() not in stop:
                score += 1

            if token.text.lower() == 'it':
                score = -1

            if score > max_score:
                max_score = score
                answer = token.text

    elif qtype == 'verb':
        for token in doc:
            score = 0

            if token.dep_ == 'ROOT':
                score += 1

            if lemmatize(strip_punctuation(token.text)) not in \
                    [lemmatize(strip_punctuation(s)) for s in nltk.word_tokenize(question)]:
                score += 1

            if strip_punctuation(token.text).strip().lower() not in stop:
                score += 1

            if score > max_score:
                max_score = score
                answer = token.text

    elif qtype == 'closed':
        first = closed_q_choices[0]
        second = closed_q_choices[1]

        # whether each option appears (and is negates)
        appear1 = False
        appear2 = False
        negate1 = False
        negate2 = False
        neg_count = 0
        tokens = nltk.word_tokenize(raw_docs[id])

        for (index, token) in enumerate(tokens):
            if token == 'not' or "n't" in token:
                neg_count += 1

                if index+1 < len(tokens):
                    if tokens[index+1] == first:
                        negate1 = True
                    if tokens[index+1] == second:
                        negate2 = True

            if token == first:
                appear1 = True
            if token == second:
                appear2 = True

        possible_answer = ''
        if appear1 and not appear2:
            if neg_count % 2 == 1:
                possible_answer = second
            else:
                possible_answer = first

        elif appear2 and not appear1:
            if neg_count % 2 == 0:
                possible_answer = second
            else:
                possible_answer = first

        elif appear1 and appear2:
            if negate1 and not negate2:
                possible_answer = second
            elif negate2 and not negate1:
                possible_answer = first
            else:
                possible_answer = second

        if possible_answer != '':
            score += 5
            if score > max_score:
                max_score = score
                answer = possible_answer

    elif qtype == 'why':

        possible_answer = ''
        score = 0

        if 'reason' in sent or 'because' in sent or 'due to' in sent or 'since' in sent or 'for' in sent:

            if 'because of' in sent:
                score += 3
                index = sent.index('because of')
                substr = sent[index+11:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break

            elif 'because' in sent:
                score += 3
                index = sent.index('because')
                substr = sent[index + 8:]
                possible_answer = substr

            elif 'due to' in sent:
                score += 3
                index = sent.index('due to')
                substr = sent[index+7:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    possible_answer = substr

            elif 'reason' in sent:
                score += 2
                index = sent.index('reason')
                substr = sent[index+7:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    index = substr.find('is')
                    if index != -1:
                        possible_answer = substr[index+3]
                    else:
                        index = substr.find('was')
                        if index != -1:
                            possible_answer = substr[index+4]
                        else:
                            possible_answer = sent[sent.index('reason'):]

            elif 'for' in sent:
                score += 1
                index = sent.index('for')
                substr = sent[index + 4:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    possible_answer = substr

            elif 'since' in sent:
                score += 1
                index = sent.index('since')
                substr = sent[index + 6:]
                possible_answer = substr

            if possible_answer != '' and score > max_score:
                answer = possible_answer
                max_score = score

    # if answer not found, find noun phrases
    if answer == '':
        for np in doc.noun_chunks:
            score = 0

            np_dep = np.root.dep_
            np_head = lemmatize(strip_punctuation(np.root.head.text))
            np_head_dep = np.root.head.dep_

            if np_dep == dep:
                score += 1
            if np_head == head:
                score += 1
            if np_head_dep == head_dep:
                score += 1

            if np.text not in question:
                score += 1

            if strip_punctuation(np.text).strip().lower() not in stop:
                score += 1

            if np.text.lower() == 'it':
                score = 0

            if score > max_score:
                max_score = score
                answer = np.text
                
    if answer != correct_answer:
        print(case_count)
        print('Question: ',question)
        print('Predicted: ',strip_punctuation(answer).strip().lower())
        print('Correct Answer: ',correct_answer)
#         print('Support sentence: ')
#         for id, _ in results:
#             sent = raw_docs[id]   
#             print(sent+'\n')
        print('\n')
    
    case_count += 1


0
Question:  A kilogram could be definined as having a Planck constant of what value?
Predicted:  several tens
Correct Answer:  6966662606895999999♠6.62606896×10−34 j⋅s


1
Question:  What is the shape of the object that establishes the base unit of the kilogram?
Predicted:  the planck constant
Correct Answer:  cylinder


2
Question:  What example is given as another paired relationship of uncertainly related to standard deviation?
Predicted:  modern terms
Correct Answer:  time vs. energy


3
Question:  What does the Planck Constant refer to?
Predicted:  r2
Correct Answer:  quantum of action


5
Question:  What scientist first studied black body radiation?
Predicted:  max planck
Correct Answer:  kirchhoff


6
Question:  Who helped to give the correct quantization rules for electrons in 1926?
Predicted:  the energy
Correct Answer:  schrödinger


8
Question:  What is required to exist in classical statistics mechanics?
Predicted:  classical statistical mechanics
Correct Answer:  h


9
Qu

63
Question:  What alloy is the base unit of the kilogram made from?
Predicted:  the planck constant
Correct Answer:  platinum–iridium


65
Question:  Who modeled the atom in 1913, challenging Rutherford's model?
Predicted:  niels bohr
Correct Answer:  niels bohr


67
Question:  What is maximized as a result of a black object absorbing all the light that hits it?
Predicted:  the object
Correct Answer:  thermal light emission


68
Question:  What is the energy of a photon?
Predicted:  possible new definitions
Correct Answer:  6981358000000000000♠3.58×10−19 j


69
Question:  What color is hotter than "red hot"?
Predicted:  the colour
Correct Answer:  white hot


70
Question:  Planck studied what problem posed originally by Kirchhoff?
Predicted:  the last years
Correct Answer:  black-body radiation


71
Question:  What is the name for the amount of energy transfered by a wave in a given time?
Predicted:  the energy
Correct Answer:  intensity


72
Question:  Einstein's paper on the photoel

132
Question:  The PV industry has seen drops in module prices since what year?
Predicted:  since 2008
Correct Answer:  2008


133
Question:  As of 2012, what accounts for about half of new nameplate electrical capacity?
Predicted:  renewable energy accounts
Correct Answer:  renewable energy


134
Question:  What is the most widely used form of renewable energy?
Predicted:  16 percent
Correct Answer:  hydroelectricity


135
Question:  In Spain, wind power accounts for what percentage of electricity generated?
Predicted:  approximately 19
Correct Answer:  9 %


136
Question:  Ban Ki-moon states that renewable energy has the ability to lift the poorest nations to new levels of prosperity?
Predicted:  united nations secretarygeneral ban kimoon
Correct Answer:  renewable energy


138
Question:  How much was the total investment in renewable energy in 2012?
Predicted:  244 billion
Correct Answer:  $ 244 billion


139
Question:  What states that EU Member States must ensure that the origin o

210
Question:  What rating did 'Temple of Doom' receive?
Predicted:  spite
Correct Answer:  pg


211
Question:  When will 'The BFG' be in most theaters?
Predicted:  may 2016
Correct Answer:  july 1 , 2016


212
Question:  Who made Spielberg a knight?
Predicted:  spielberg
Correct Answer:  jacques chirac


213
Question:  Who is naive in 'Catch Me if You Can'?
Predicted:  a career
Correct Answer:  frank


214
Question:  What was the first video game Spielberg played?
Predicted:  lucasarts
Correct Answer:  pong


216
Question:  How many Oscar nominations did 'Bridge of Spies' get?
Predicted:  one
Correct Answer:  six


217
Question:  Who wrote 'The BFG'?
Predicted:  et
Correct Answer:  roald dahl


218
Question:  Which film did Kahn first work with Spielberg on?
Predicted:  the first nonpixar film
Correct Answer:  close encounters


219
Question:  Where did Spielberg's paternal grandparents immigrate from?
Predicted:  ukraine
Correct Answer:  ukraine


220
Question:  When did the 'War Hor

289
Question:  What book was 'Munich' based on?
Predicted:  2005
Correct Answer:  vengeance


290
Question:  Who wrote the 'War of the Worlds' book?
Predicted:  spielberg
Correct Answer:  h. g. wells


291
Question:  Who wrote the 'Interstellar' script?
Predicted:  tom hanks
Correct Answer:  jonathan nolan


292
Question:  Whose son disappeared in 'Minority Report'?
Predicted:  spielberg
Correct Answer:  john anderton


293
Question:  How many Oscars did Close Encounters get nominated for, besides Best Director?
Predicted:  one
Correct Answer:  six


294
Question:  Who wrote 'The Color Purple'?
Predicted:  spielberg
Correct Answer:  alice walker


295
Question:  When was Boombots released?
Predicted:  1996
Correct Answer:  1999


296
Question:  What film festival will first air 'The BFG'?
Predicted:  competition
Correct Answer:  cannes film festival


297
Question:  When is 'Ready Player One' planned to be released?
Predicted:  early 2017
Correct Answer:  2018


298
Question:  What cha

363
Question:  What do armies tend to have in depth?
Predicted:  most countries
Correct Answer:  air defence


364
Question:  What was one of the quickest areas to evolve in military technology in the 20th century?
Predicted:  the 1930s
Correct Answer:  air defence


365
Question:  Deflection settings were also called what?
Predicted:  the problem
Correct Answer:  aim-off


367
Question:  Why was Britain mainly interested in solid fuel rockets?
Predicted:  development
Correct Answer:  for anti-aircraft fire


368
Question:  What size rocket was being developed at the end of the war?
Predicted:  world war i
Correct Answer:  3-inch


369
Question:  What was the primary method for HAA fire?
Predicted:  two assumptions
Correct Answer:  aimed fire


370
Question:  What agency stated that the arrangements were an air defence ground environment?
Predicted:  nato
Correct Answer:  nato


371
Question:  What were hydrogen filled balloons called?
Predicted:  zeppelins
Correct Answer:  zeppelins



435
Question:  This system was a direct answer to what?
Predicted:  the flakvierlingwhich
Correct Answer:  the flakvierling


436
Question:  Testing is being done on weapons to create as much damage as what missile at a much lower cost?
Predicted:  a tomahawk missile
Correct Answer:  a tomahawk


437
Question:  What was used as targets in training practices?
Predicted:  smoke shells
Correct Answer:  smoke shells


438
Question:  In addition to high explosive, what else was used?
Predicted:  both high explosive
Correct Answer:  shrapnel


439
Question:  What city got attacked by more V-1 and V-2 missiles than any other?
Predicted:  antwerp
Correct Answer:  antwerp


440
Question:  Smaller .50 caliber and 8 millimeter guns have been used in what?
Predicted:  the smallest mounts
Correct Answer:  smallest mounts


441
Question:  What showed that aircraft could be a significant contribution to the battlefield?
Predicted:  an important part
Correct Answer:  world war i


442
Question:  Where

501
Question:  What country created an integrated system for ADGB?
Predicted:  britain
Correct Answer:  britain


502
Question:  Contemporary air defence systems are usually what?
Predicted:  most modern air defence systems
Correct Answer:  mobile


503
Question:  When is the railgun expected to be ready?
Predicted:  2020
Correct Answer:  2020 to 2025


504
Question:  What are the AA systems called in Russian?
Predicted:  russian
Correct Answer:  zenitnye


505
Question:  What did Germany add to the 20 mm to make it more effective?
Predicted:  rheinmetall
Correct Answer:  a 3.7 cm


506
Question:  How are targets aquired for non-ManPAD SAMs?
Predicted:  targets
Correct Answer:  air-search radar


507
Question:  Which country's air defence and aircraft has been under integrated command and control?
Predicted:  britain
Correct Answer:  soviet union


508
Question:  What systems are really geared toward mobility?
Predicted:  the diversification
Correct Answer:  soviet


509
Question:  Sho

570
Question:  What does GBAD stand for?
Predicted:  the allies most advanced technologies
Correct Answer:  ground based ad


571
Question:  Some airfield runways were painted green during what war?
Predicted:  the cold war
Correct Answer:  the cold war


572
Question:  How long is the screening period of an individual's or organization's petition to add a species to the list
Predicted:  90day
Correct Answer:  90-day screening period


573
Question:  Who wrote the article "Searching for Consensus and Predictability: Habitat Conservation Planning under the Endangered Species Act of 1973."
Predicted:  robert dthornton
Correct Answer:  robert d.thornton


574
Question:  Costs over what balance lead to the Secretary of the Treasury depositing money into the cooperative endangered species conservation fund?
Predicted:  an amount
Correct Answer:  $ 500,000


575
Question:  How many years after the noted absence of the whooping crane from its breeding range did the first law regulating wildli

631
Question:  What is the per capita GDP of Swaziland?
Predicted:  its gdp
Correct Answer:  $ 9,714


632
Question:  The University of Swaziland, Southern Nazarene University, offer higher learning in Swaziland, what is one other University?
Predicted:  swaziland christian university
Correct Answer:  swaziland christian university


633
Question:  What age attends pre-school in Swaziland?
Predicted:  1903
Correct Answer:  5-year or younger


634
Question:  For how many years is a Swazi student in junior secondary school?
Predicted:  swaziland
Correct Answer:  three


635
Question:  How many members of the Swazi House of Assembly are chosen by the king?
Predicted:  30
Correct Answer:  10


636
Question:  What quantity of imports does Swaziland get from South Africa?
Predicted:  swazilands currency
Correct Answer:  over 90 %


637
Question:  What is the chairman of the bucopho also called?
Predicted:  the chairman
Correct Answer:  indvuna ye nkhundla


638
Question:  In what year was Sw

697
Question:  Which country did Iran's leadership was quoted as being a possible target of its atomic weapons?  
Predicted:  israel
Correct Answer:  israel


698
Question:  What other branch of Islam is recognized by the Iranian government?
Predicted:  christianity
Correct Answer:  sunni branch


699
Question:  What is Iran's volunteer militia force named?
Predicted:  a paramilitary volunteer militia force
Correct Answer:  the basij


700
Question:  What religious minority is not officially recognized by the Iranian government?
Predicted:  the baháí faith
Correct Answer:  the bahá'í faith


701
Question:  The Iranian Parliament passes legislation without the involvement of what department to the detriment of wildlife?
Predicted:  industries
Correct Answer:  department of environment


702
Question:  Which Iranian scientist co-invented the first gas laser?
Predicted:  fuzzy set theory
Correct Answer:  ali javan


703
Question:  What resource does Iran have the fourth largest supply of 

757
Question:  Who won the 1980 US Presidential Elections after President Carter's repeated failed attempts to resolve the US Iranian Embassy situation? 
Predicted:  carter
Correct Answer:  ronald reagan


758
Question:  The Achaemenid Empire controlled all of the significant settlements of what ancient country during its greatest extent?
Predicted:  iran
Correct Answer:  ancient egypt


759
Question:  Which newspaper reported the official name request in 1935?
Predicted:  the new york times
Correct Answer:  new york times


760
Question:  How much did Iran spend in oil imports in 2005?
Predicted:  us4 billion
Correct Answer:  us $ 4 billion


761
Question:  How many civilians in Iran were killed during the Iran-Iraq War?
Predicted:  123220–160000
Correct Answer:  11,000–16,000


762
Question:  What city was the ritual center of the Archaemenids?
Predicted:  iran
Correct Answer:  persepolis


763
Question:  During what Age was Iran the site of several of these ancient civilizations?
Pr

825
Question:  Where does Iran border the Persian Gulf and the Gulf of Oman?
Predicted:  armenia
Correct Answer:  to the south


826
Question:  Who drove out the Pashtuns from Iran in 1729?
Predicted:  nader shah
Correct Answer:  nader shah


827
Question:  Which ancient civilization in Iran during the 4th Millenium was the most prominent?
Predicted:  elam
Correct Answer:  elam


828
Question:  In what era did Iranian visual art reach its peak?
Predicted:  the sassanid era
Correct Answer:  the sassanid era


829
Question:  How much of Iran's 33k megawatt installed capacity was based on natural gas? 
Predicted:  about 75
Correct Answer:  75 %


830
Question:  Which religious minority since the 1979 Revolution has been persecuted and in some cases executed by the Iranian government?
Predicted:  the baháí faith
Correct Answer:  the bahá'í faith


831
Question:  Who invaded Iran in 1980?
Predicted:  darius iii
Correct Answer:  the iraqi army


832
Question:  In Iranian cuisine, what is hab

883
Question:  What category of race do Italian Americans normally fall under?
Predicted:  asked
Correct Answer:  white


884
Question:  Who's authority did Truman want to increase throughout the states?
Predicted:  truman
Correct Answer:  federal


885
Question:  In relation to whites, what was the poverty rate for Native Americans in 1990?
Predicted:  the poverty rate
Correct Answer:  more than triple


886
Question:  Who followed in Johnson's footsteps in terms of ensuring equal opportunity?
Predicted:  78–80 the strides
Correct Answer:  his successor nixon


887
Question:  Where was the issue of White and Hispanic firefighters heard in the case based out of?
Predicted:  new haven
Correct Answer:  new haven , connecticut


888
Question:  Who did Lockheed sign an agreement with to seek out more minority workers?
Predicted:  vice president johnson
Correct Answer:  vice president johnson


890
Question:  Other than Texas and Florida, which other state replaced racial quotas?
Predicted:

939
Question:  Where do half of the black college students rank in terms of their performance relative to the rest of their class?
Predicted:  about half
Correct Answer:  bottom 20 percent


940
Question:  Which President declared that discrimination is contrary to the Constitutional principles of the United States?
Predicted:  president kennedy
Correct Answer:  kennedy


941
Question:  What was the main reason that "affirmative action" was chosen over "positive action"?
Predicted:  the words
Correct Answer:  alliterative quality


942
Question:  What position of importance other than Senator did Everett Dirksen hold?
Predicted:  many probusiness republicans
Correct Answer:  senate minority leader


943
Question:  Which case concerning White and Hispanic firefighters was heard by the Supreme Court in 2009?
Predicted:  the case
Correct Answer:  ricci v. destefano


944
Question:  Which administration allowed affirmative action to take a backseat to other issues?
Predicted:  the brief fo

998
Question:  Veterans of the French Army had prviously fought in which war in Italy?
Predicted:  the crimean war
Correct Answer:  the franco-austrian war


999
Question:  What was the name of the famous telelgram?
Predicted:  the deficiencies
Correct Answer:  ems dispatch


1000
Question:  At Metz, what was the approximate number of Prussian troops?
Predicted:  about 210
Correct Answer:  over 150,000


1001
Question:  On which dates did La Semaine Sanglante occur in Paris?
Predicted:  the red flag
Correct Answer:  21–28 may


1002
Question:  On which date did Frossard's and Bazaines's troops cross the German border?
Predicted:  2 august
Correct Answer:  2 august


1003
Question:  On which date did the French parliament vote to declare war on Prussia?
Predicted:  16 july 1870
Correct Answer:  16 july 1870


1004
Question:  Which side was initially successful at the Battle of Coulmiers?
Predicted:  the battle of coulmiers on
Correct Answer:  the germans


1005
Question:  In which direc

1062
Question:  Niel's plan was cast aside in favour of a plan by General Frossard and what other general??
Predicted:  this plan
Correct Answer:  bartélemy lebrun


1063
Question:  What commander of the General Staff held the expectaion of independent thinking in his officers?
Predicted:  addition
Correct Answer:  moltke


1066
Question:  In which type of warfare did the French best utilize the Chassepot?
Predicted:  a range
Correct Answer:  trench-warfare


1067
Question:  What was there strong support of in Posen?
Predicted:  the prussian province
Correct Answer:  the french


1068
Question:  British involvement in European matters in the late 19th century was considered what?
Predicted:  the late 19th century
Correct Answer:  very limited


1069
Question:  What proved its worth against the Dreyse rifle?
Predicted:  the chassepot rifle
Correct Answer:  the chassepot rifle


1070
Question:  Which Prussian general commanded the attack against the French at St. Privat?
Predicted:  the 

1133
Question:  Who struck his parents with an arrow?
Predicted:  zeus
Correct Answer:  ismenius


1134
Question:  Where was Apollo's first temple at Rome established?
Predicted:  rome
Correct Answer:  flaminian fields


1135
Question:  What did Piraeus Apollo hold in his left hand?
Predicted:  the statue
Correct Answer:  the bow


1136
Question:  Who asserted that a divine reason gave order to the seeds of the universe?
Predicted:  plato
Correct Answer:  anaxagoras


1137
Question:  Who was the mountain-god?
Predicted:  apellōn
Correct Answer:  tmolus


1138
Question:  Who was the immortal god of ideal balance?
Predicted:  apollo
Correct Answer:  apollo


1139
Question:  Who was represented as a dragon?
Predicted:  zeus
Correct Answer:  python


1140
Question:  Who was accused of matricide?
Predicted:  apollo
Correct Answer:  orestes


1141
Question:  Who did Leucothea fall in love with?
Predicted:  clytia
Correct Answer:  leucothea


1142
Question:  Who holds that the bond of marriag

1203
Question:  Who was the son of Elatus?
Predicted:  smintheus
Correct Answer:  ischys


1204
Question:  which two cult sites had widespread infuence?
Predicted:  the olympic deities
Correct Answer:  delos and delphi


1205
Question:  The inspiration oracular-cult was probably introduced from where?
Predicted:  anatolia
Correct Answer:  anatolia


1206
Question:  Which group created the legalism, the supervision of the orders of the gods, and the demand for moderation and harmony?
Predicted:  the greeks
Correct Answer:  the greeks


1207
Question:  It was believed that this woman could bring death with her arrows.
Predicted:  apollo
Correct Answer:  artemis


1208
Question:  Who was the daughter of Phlegyas?
Predicted:  coronis
Correct Answer:  coronis


1209
Question:  What is the Greek word for "to destroy?"
Predicted:  the greek words oida
Correct Answer:  apollymi


1210
Question:  What item did Apollo create and name after his lover?
Predicted:  grief
Correct Answer:  flower


1

1266
Question:  What tiny object's source plays a role in determining wood density?
Predicted:  xylophaga
Correct Answer:  seed


1267
Question:  What kind of dried wood retains a small quantity of water but is considered absolutely dry?
Predicted:  ovendried wood
Correct Answer:  oven-dried


1268
Question:  What pieces of furniture that most people use every night can be made out of wood?
Predicted:  people
Correct Answer:  beds


1269
Question:  What might wood be used for in a building made from brick or other materials?
Predicted:  buildings
Correct Answer:  supporting material


1270
Question:  Other than the base of a side branch, what tree feature causes a knot?
Predicted:  a knot
Correct Answer:  a dormant bud


1271
Question:  What's the abbreviation for medium-density fiberboard?
Predicted:  wood
Correct Answer:  mdf


1272
Question:  What approximate percentage of carbon does wood have?
Predicted:  approximately 50
Correct Answer:  50 %


1273
Question:  What kind of strong

1327
Question:  What animals cause the reddish-brown streaks of color in hickory wood?
Predicted:  the reddishbrown streaks
Correct Answer:  birds


1328
Question:  What causes the black checked pattern in the wood of western hemlocks?
Predicted:  the black check
Correct Answer:  insect attacks


1329
Question:  Wood fibers from wood strands, lumber, and what other source can be glued together to make larger units?
Predicted:  glued engineered wood products
Correct Answer:  veneers


1331
Question:  When wood is described as "green," what does "green" mean?
Predicted:  the late 19th century
Correct Answer:  water-saturated


1333
Question:  What could be made out of wood that could be used to fight off attackers?
Predicted:  nearly all boats
Correct Answer:  weapons


1334
Question:  What do broken limbs and deep wounds open a door for in a tree?
Predicted:  every broken limb
Correct Answer:  decay


1335
Question:  Which property of a tree's wood that gives clues about its strength an

1388
Question:  Which kind of wood contains lignin derived from two main alcohol sources?
Predicted:  softwood lignin
Correct Answer:  hardwood


1389
Question:  Is the color difference between heartwood and sapwood usually very subtle or conspicuous?
Predicted:  species
Correct Answer:  conspicuous


1390
Question:  How many cubic meters of growing stock forest are there on the planet?
Predicted:  earth
Correct Answer:  434 billion


1391
Question:  What category of items often constructed from wood does a chair belong to?
Predicted:  many types
Correct Answer:  furniture


1392
Question:  What's the common name for Picea abies?
Predicted:  the choice
Correct Answer:  european spruce


1393
Question:  What direction of compression in relation to its grain won't weaken wood with sound knots in it?
Predicted:  sound knots
Correct Answer:  parallel


1394
Question:  What other modern material has joined composites, carbon fiber, titanium, and aluminum to replace wood in the manufacture o

1446
Question:  How is wood broken down into chips and fibers?
Predicted:  wood
Correct Answer:  mechanically


1447
Question:  What's the name for the crystalline polymer derived from glucose?
Predicted:  cellulose
Correct Answer:  cellulose


1448
Question:  What makes up 40-50% of the cell walls in wood?
Predicted:  25
Correct Answer:  cellulose


1449
Question:  What is the living wood in a tree called?
Predicted:  all wood
Correct Answer:  sapwood


1450
Question:  What do people outside of North America call the sawn boards used for building?
Predicted:  north america
Correct Answer:  timber


1451
Question:  What holds together an engineered wood product?
Predicted:  engineered wood products
Correct Answer:  glue


1452
Question:  About how many cubic meters of wood was used in 1991 to make products like glulam, LVL, and structural composite lumber?
Predicted:  scl
Correct Answer:  100 million


1453
Question:  Besides ring-porous, what's the other class hardwoods are often divi

1507
Question:  What is the captain a part of before the start of the game?
Predicted:  each team
Correct Answer:  the coin toss


1509
Question:  What year did the Sheffield FA form?
Predicted:  1857
Correct Answer:  1867


1510
Question:  How many yellow cards lead to a red card?
Predicted:  second
Correct Answer:  a second


1511
Question:  What city did a peaceful match between two armies take place in 2007?
Predicted:  scotland
Correct Answer:  bouaké


1512
Question:  Players are usually strategically placed by who?
Predicted:  a coach
Correct Answer:  coach


1513
Question:   A team players layout is a what?
Predicted:  the layout
Correct Answer:  formation


1515
Question:  what is allowed but restricted?
Predicted:  the olympics
Correct Answer:  physical contact


1516
Question:  What city was FIFA formed?
Predicted:  paris
Correct Answer:  paris


1517
Question:  What unit of measurement is now expressed by the Laws?
Predicted:  england
Correct Answer:  metric


1518
Question

1583
Question:  Who was one of the five inductees of the Rock and Roll Hall of Fame?
Predicted:  madonna
Correct Answer:  madonna


1584
Question:  When was Madonna inducted into the UK Hall of Fame?
Predicted:  her first year
Correct Answer:  november 2004


1585
Question:  How long did Madonna spent learning to play the violin?
Predicted:  1999
Correct Answer:  three months


1586
Question:  How much did Rebel Heart Tour grossed?
Predicted:  rocco
Correct Answer:  $ 169.8 million


1587
Question:  When did Madonna's style become a fashion trend?
Predicted:  the 1980s
Correct Answer:  1980s


1588
Question:  How many submissions does the initiative have?
Predicted:  number one
Correct Answer:  over 3,000


1589
Question:  Who is Madonna's second husband?
Predicted:  rocco john ritchie
Correct Answer:  guy ritchie


1590
Question:  When was the adoption finalized?
Predicted:  may 2008
Correct Answer:  may 2008


1591
Question:  What was Madonna dressed in for the tour?
Predicted:  sept

1649
Question:  Who is first to have reenactment of her music videos in concerts?
Predicted:  madonna
Correct Answer:  madonna


1650
Question:  Which disease did her mother passed away from?
Predicted:  madonna
Correct Answer:  breast cancer


1651
Question:  Which award did Madonna win for the single "Beautiful Stranger?"
Predicted:  the 1999 film austin powers
Correct Answer:  grammy award


1652
Question:  How much did the Tour grossed?
Predicted:  131 billion
Correct Answer:  $ 305.2 million


1653
Question:  When was Confessions on a Dance Floor released?
Predicted:  november 2005
Correct Answer:  november 2005


1654
Question:  Who remixed most of Madonna's album tracks?
Predicted:  madonna
Correct Answer:  benitez


1655
Question:  When did Madonna audition for a dance role in the show "A Chorus Line?" 
Predicted:  february 1984
Correct Answer:  february 1984


1656
Question:  Who is the most influential recording artist of all time?
Predicted:  madonna
Correct Answer:  madonna

1720
Question:  What makes pure metals impure metals?
Predicted:  a pure metal
Correct Answer:  unwanted impurities


1721
Question:  What is the name of steel when it is being formed of two phases?
Predicted:  the steel
Correct Answer:  heterogeneous


1722
Question:  When did iron start to become melted by people?
Predicted:  bc
Correct Answer:  middle ages


1723
Question:  What alloy is formed naturally? 
Predicted:  a molten metal
Correct Answer:  electrum


1726
Question:  Copper and zinc combine to make what?
Predicted:  brass
Correct Answer:  brass


1727
Question:  What did ancient Romans use to gild their armor?
Predicted:  the ancient romans
Correct Answer:  mercury-tin amalgams


1728
Question:  What is the name of one interstitial alloy?
Predicted:  an interstitial alloy
Correct Answer:  steel


1729
Question:  Since when have Amalgams been used?
Predicted:  200
Correct Answer:  200 bc


1730
Question:  One of the first "age hardening" alloys used were called?
Predicted:  

1797
Question:  What is the most popular football club in Paris?
Predicted:  paris most popular sport clubs
Correct Answer:  paris saint-germain f.c .


1798
Question:  Who built the Marais-quarter Agodudas Hakehilos Synagogue?
Predicted:  83 synagogues
Correct Answer:  hector guimard


1800
Question:  What is the name of the rugby union in Paris?
Predicted:  stade français
Correct Answer:  stade français


1801
Question:  Near what were most restaurants opened?
Predicted:  the moulin rouge
Correct Answer:  theatres


1802
Question:  How many seats are in the State de France?
Predicted:  29
Correct Answer:  80,000


1803
Question:  How large is the metropolitan area of Paris?
Predicted:  the metropolitan area
Correct Answer:  2,300 km2


1804
Question:  Where was Saint Denis beheaded?
Predicted:  paris
Correct Answer:  mountain of martyrs


1805
Question:  What town was Patrick Ollier a mayor of?
Predicted:  the first president
Correct Answer:  rueil-malmaison


1806
Question:  How man

1874
Question:  When did THe Contemporary Art museum of the Louis Vuitton Foundation open?
Predicted:  october 2014
Correct Answer:  october 2014


1875
Question:  where does T5 run?
Predicted:  paris
Correct Answer:  saint-denis to garges-sarcelles


1876
Question:  Who funds these homeless shelters?
Predicted:  the city
Correct Answer:  the city


1877
Question:  What is the largest cinema room today in Paris?
Predicted:  paris largest cinema room
Correct Answer:  le grand rex


1878
Question:  What type of eating place is defined as a neighborhood restaurant?
Predicted:  a type
Correct Answer:  bistro


1879
Question:  In what century did Collete, ANdre Gide and Francois Mauriac dominate the literary community?
Predicted:  the 20th century
Correct Answer:  20th


1880
Question:  What helped establish a successful defense in the Siege of Paris?
Predicted:  fortification
Correct Answer:  bridges


1881
Question:  Who was Prefect Nicholas Frochot under?
Predicted:  napoleon bonaparte
C

1952
Question:  How many people marched on January 11 against terrorism?
Predicted:  an estimated 15 million
Correct Answer:  1.5 million


1953
Question:  Who is the second largest business district employer?
Predicted:  levalloisperret
Correct Answer:  la défense


1955
Question:  Where are France's highest courts located?
Predicted:  paris
Correct Answer:  paris


1956
Question:  In what century was Cafe Anglais, Cafe de Paris and Rocher de Cancale opened?
Predicted:  the 19th century
Correct Answer:  19th


1957
Question:  What is France's public radio broadcaster?
Predicted:  radio france
Correct Answer:  radio france


1958
Question:  Where is CESLA located?
Predicted:  strasbourg
Correct Answer:  neuilly-sur-seine


1961
Question:  Where was the Eiffel Tower revealed?
Predicted:  paris
Correct Answer:  1889 universal exposition


1962
Question:  Where is the head quarters for the International Federation for Human Rights?
Predicted:  paris
Correct Answer:  paris


1965
Question:

2030
Question:  When did CBC discontinue its late-night broadcast of The National?
Predicted:  10 pm
Correct Answer:  october 2006


2031
Question:  What is CBC Country Canada now called?
Predicted:  cottage life television
Correct Answer:  cottage life television


2032
Question:  Which title is CBC's flagship newscast?
Predicted:  the cbcs flagship newscast
Correct Answer:  the national


2033
Question:  Which broadcast do affiliates generally carry?
Predicted:  private
Correct Answer:  10 p.m


2034
Question:  When do most other stations view their local newscasts?
Predicted:  500 pm
Correct Answer:  weekend evenings


2036
Question:  Which CBC affiliate joined E! in February 2006?
Predicted:  one private cbc affiliate
Correct Answer:  chbc-tv in kelowna


2037
Question:  CBC was eclusive carrier of what other sport during the 2004-2005 season?
Predicted:  canadian curling association events
Correct Answer:  curling


2038
Question:  Where could CBC's channels need to be relocated i

2093
Question:  How many species of moss is there?
Predicted:  around 12000
Correct Answer:  12,000


2094
Question:  What is the name of a branch system?
Predicted:  the takhtajan system
Correct Answer:  inflorescence


2095
Question:  What group of now extinct seed plants had many of the traits of what are now flowering plants?
Predicted:  gigantopterids
Correct Answer:  gigantopterids


2096
Question:  Beech and maple had already appeared by what period?
Predicted:  the late cretaceous
Correct Answer:  cretaceous


2097
Question:  What shape are the bundles in the young stem of dicotyledons arranged in?
Predicted:  the dicotyledons
Correct Answer:  an open ring


2098
Question:  What formal subclass classification were flowering plants given in 2009?
Predicted:  a formal classification
Correct Answer:  magnoliidae


2099
Question:  What feature helped flowers not get plucked?
Predicted:  human crops
Correct Answer:  prettiest


2100
Question:  How are some male and female parts sepa

2159
Question:  How many hours of coursework are required to obtain a JD at the Washington University School of Law?
Predicted:  3
Correct Answer:  85 hours


2160
Question:  When was the cornerstone laid for Busch Hall?
Predicted:  october 20 1900
Correct Answer:  october 20 , 1900


2161
Question:  Who provided the donation that enabled the opening  Simon Hall?
Predicted:  john e simon
Correct Answer:  john e. simon


2163
Question:  How many schools does Washington University have?
Predicted:  washington university school of law
Correct Answer:  seven


2164
Question:  Where was an Executive MBA program established by Washington University in 2002?
Predicted:  shanghai
Correct Answer:  shanghai


2165
Question:  At what division level does the Washington University sports teams compete?
Predicted:  washington universitys
Correct Answer:  division iii


2166
Question:  What percentage of the sequencing did the Washington University Medical School contribute to the Human Genome Projec

2232
Question:  How many divisions make up Arts & Sciences at Washington University?
Predicted:  three
Correct Answer:  three divisions


2234
Question:  How many Nobel laureates have been affiliated with Washington University?
Predicted:  twentyfive
Correct Answer:  twenty-five


2235
Question:  What name was selected by the Board of Trustees in 1854 for Washington University?
Predicted:  washington institute
Correct Answer:  washington institute


2237
Question:  How many research centers are housed by the Brown School? 
Predicted:  12
Correct Answer:  12 research centers .


2238
Question:  Who designed the new location of  the Mildred Lane Kemper Art Museum?
Predicted:  the mildred lane kemper art museum
Correct Answer:  fumihiko maki


2239
Question:  What building provides the location of the Washington University School of Law?
Predicted:  the olin business school
Correct Answer:  anheuser-busch hall


2240
Question:  When did Washington University begin to expand west?
Predicte

2303
Question:  Self-image, self-esteem, and individuality relate to what?
Predicted:  selfimage
Correct Answer:  a psychological identity


2304
Question:  The Neo-Eriksonian identity status paradigm focuses on what twin concepts?
Predicted:  the twin concepts
Correct Answer:  exploration and commitment


2305
Question:  What distinction is sometimes referred to as the self?
Predicted:  cognitive psychology
Correct Answer:  ego identity


2306
Question:  What term have Anthropologists employed to refer to the Eriksonian idea of selfhood?
Predicted:  the term identity
Correct Answer:  identity


2307
Question:  The personal idiosyncrasies that separate individuals are called what?
Predicted:  the eriksonian framework
Correct Answer:  the personal identity


2308
Question:  The strategic manipulator, pastiche personality, and relational self are linked to the rise of what culture?
Predicted:  kenneth gergen
Correct Answer:  postmodern culture


2310
Question:  Dissociating from the char

2364
Question:  Despite some traditions and rights, no see has the right to what?
Predicted:  the principal clergy
Correct Answer:  the cardinalate


2365
Question:  Who separated them?
Predicted:  velletri
Correct Answer:  pope pius x


2366
Question:  How do Oriental Patriarchs sign?
Predicted:  use
Correct Answer:  sanctae ecclesiae cardinalis


2367
Question:  In what year was the letter sent?
Predicted:  the 16th century
Correct Answer:  747


2368
Question:  Which cardinals are ranked lowest?
Predicted:  three eastern patriarchs
Correct Answer:  cardinal deacons


2371
Question:  At one time, Priest permanently were assigned to a church were referred to as?
Predicted:  citation
Correct Answer:  cardinal


2372
Question:  What is the name of the hat that the pope used to wear when naming a new Cardinal?
Predicted:  citation
Correct Answer:  a galero .


2373
Question:  What should the new pople have been before becoming a pope?
Predicted:  decree
Correct Answer:  cardinal


2374
Q

2435
Question:  Plato and Aristotle wrote what type of literature?
Predicted:  some writings
Correct Answer:  philosophical texts


2436
Question:  What two elements have reduced the literary nature of scientific journals?
Predicted:  advances
Correct Answer:  advances and specialization


2437
Question:  Serious studies in logic tend to resemble what discipline, moreso than literature?
Predicted:  philosophy
Correct Answer:  mathematics


2438
Question:  What author espoused the theory of the "pristine unconscious?"
Predicted:  his ‘‘third force psychology
Correct Answer:  d.h lawrence


2439
Question:  Drama is sometimes blended with what other elements?
Predicted:  drama
Correct Answer:  music and dance


2440
Question:  What writer's entry in "The English Journal" was concerned with young adult fiction?
Predicted:  an entry
Correct Answer:  d. mitchell


2441
Question:  Besides this major division, what are two other sub-divisions to describe literature?
Predicted:  major literary 

2495
Question:  Comparing brain sizes among different creatures is used most commonly by what?
Predicted:  species
Correct Answer:  encephalization quotient ( eq )


2496
Question:  Chemicals called neurotransmitters are released at what part of the brain?
Predicted:  a synapse
Correct Answer:  synapses


2497
Question:  What scientific field tries to understand the mind and behavior?
Predicted:  psychology
Correct Answer:  psychology


2498
Question:  Glial cells are also referred to as what?
Predicted:  glia
Correct Answer:  glia or neuroglia


2499
Question:  Clusters of small nuclei comprise what parts of the brain?
Predicted:  other parts
Correct Answer:  thalamus and hypothalamus


2500
Question:  The RHT is an abbreviation for what?
Predicted:  the brain
Correct Answer:  retinohypothalamic tract


2501
Question:  Creatures that have a diffuse nerve net are called what?
Predicted:  a few primitive organisms
Correct Answer:  cnidarians


2502
Question:  Mammals have a pallium that

2561
Question:  The suprachiasmatic nucleus is a small part of what part of the brain?
Predicted:  the arousal system
Correct Answer:  the hypothalamus


2562
Question:  The thalamus and hypothalamus comprise what region of the brain?
Predicted:  other parts
Correct Answer:  diencephalon


2563
Question:  The brain is surrounded by what type of fluid?
Predicted:  a system
Correct Answer:  cerebrospinal fluid


2564
Question:  How many synapses does the human brain supposedly contain?
Predicted:  several million
Correct Answer:  100 trillion synapses ;


2565
Question:  What type of responsiveness can be used without a brain?
Predicted:  some basic types
Correct Answer:  reflexes


2566
Question:  Homeostasis is Greek for what phrase?
Predicted:  an animal
Correct Answer:  `` standing still ''


2567
Question:  Which part of the arousal system controls the body's biological clock?
Predicted:  the scn
Correct Answer:  the suprachiasmatic nucleus


2568
Question:  Computers were invented 

2626
Question:  Besides English, what other language is spoken in Malta?
Predicted:  spanish
Correct Answer:  maltese


2627
Question:  Who led the Nazis?
Predicted:  adolf hitler
Correct Answer:  adolf hitler


2628
Question:  What was the name of the different governments forming in Ancient Greece?
Predicted:  the period
Correct Answer:  city-states


2629
Question:  Between which two nations was the Treaty of the Pyrenees signed?
Predicted:  the spanish crown
Correct Answer:  spain and france


2630
Question:  What is the most common group of languages spoken in Mediterranean Europe?
Predicted:  other language groupings
Correct Answer:  romance languages


2632
Question:  Where was the headquarters of the Western Roman Empire?
Predicted:  the roman empire
Correct Answer:  rome


2633
Question:  What was the military partnership between countries aligned with the Soviet Union called?
Predicted:  the warsaw pact
Correct Answer:  the warsaw pact


2634
Question:  Which new regime appea

2697
Question:  When was the overbudget amount discovered?
Predicted:  the ministry of defence
Correct Answer:  2013


2699
Question:  Which person in the Joint Forces Command supports the Chief of the Defence Staff?
Predicted:  the vice chief
Correct Answer:  the commander


2700
Question:  Who created the position of Minister for Coordination of Defence?
Predicted:  stanley baldwin
Correct Answer:  stanley baldwin


2701
Question:  Who in the MoD was convicted of fraud?
Predicted:  gordon foxley
Correct Answer:  gordon foxley


2702
Question:  How much land is owned by the MoD?
Predicted:  as much as £500m
Correct Answer:  227,300 hectares


2704
Question:  In what concept is the MoD considered a leader?
Predicted:  a changing world white paper
Correct Answer:  defence diplomacy


2705
Question:  What years was Foxley employed by the MoD?
Predicted:  the most notable fraud conviction
Correct Answer:  1981 to 1984


2706
Question:  Where was Britain involved in a land war when some th

2765
Question:  Of what was Sanskrit once thought to be a divider?
Predicted:  sanskrit linguist madhav deshpande
Correct Answer:  social class


2766
Question:  Which Sanskrit linguist describes Sanskrit as a "particularly refined or perfected manner of speaking?"
Predicted:  sanskrit linguist madhav deshpande
Correct Answer:  madhav deshpande


2767
Question:  How long was Sanskrit a language of culture?
Predicted:  the 18th century
Correct Answer:  2000 years


2769
Question:  Which Indian state has Sanskrit as its official language?
Predicted:  uttarakhand
Correct Answer:  uttarakhand


2770
Question:  Who sent missionaries to China?
Predicted:  buddhism
Correct Answer:  ashoka


2771
Question:  Who wrote an opera in Sanskrit?
Predicted:  philip glass
Correct Answer:  philip glass


2772
Question:  What language has influenced the languages of India?
Predicted:  hindi
Correct Answer:  sanskrit


2773
Question:  What is the theory called dealing with the transfer of Sanskrit to Indi

2833
Question:  Who ranked BYU as No. 11 of institutions whose grads were top-rated by recruiters in a 2010 article?
Predicted:  whose graduates
Correct Answer:  wall street journal


2834
Question:  What cannot be violated because the expression is dishonest according to the new Statement on Academic Freedom?
Predicted:  the university
Correct Answer:  honor code


2835
Question:  When was the first property for what would become BYU acquired?
Predicted:  1909
Correct Answer:  october 16 , 1875


2836
Question:  Who is responsible for proposing BYU's current name?
Predicted:  matthew s holland
Correct Answer:  benjamin cluff , jr


2837
Question:  Where does CES sponsor BYU's sister schools?
Predicted:  jerusalem
Correct Answer:  hawaii and idaho


2838
Question:  Who ranked BYU the vest value for college in 2007?
Predicted:  the princeton review
Correct Answer:  the princeton review


2839
Question:  How is BYU's collegiate ballroom dance program rated worldwide?
Predicted:  the ndca

2894
Question:  Where did best selling author Stephenie Meyer graduate from in 1995?
Predicted:  byu
Correct Answer:  byu


2895
Question:  What percentage of graduates had taken a hiatus from their BYU studies to serve as an LDS missionary?
Predicted:  some 97 percent
Correct Answer:  97


2897
Question:  Who are both LDS and Non-LDS students required to provide an endorsement from upon submitting their application?
Predicted:  both lds
Correct Answer:  an ecclesiastic leader


2898
Question:  What is the national marriage average among college graduates?
Predicted:  a national marriage average
Correct Answer:  11 percent


2899
Question:  What is former alumnus Paul D. Boyer known for being?
Predicted:  the university
Correct Answer:  nobel prize winner


2900
Question:  Which BYU student produced algorithm is found in Adobe Photoshop?
Predicted:  student achievements
Correct Answer:  magnetic lasso


2901
Question:  At what age, since 2012, are men allowed to serve a mission after h

2957
Question:  PAL, SECAM, and NTSC frame rates apply to what definition televisions?
Predicted:  digital or high definition broadcasts
Correct Answer:  analogue standard


2958
Question:  Which standard were ATSC and DVB based on?
Predicted:  both atsc
Correct Answer:  mpeg-2


2959
Question:  When a parameter is dropped, the remaining numeric parameter is specified first, followed by what?
Predicted:  first
Correct Answer:  the scanning system


2960
Question:  What were 1920x1080i and 1280x720p defined by?
Predicted:  some reports
Correct Answer:  us smpte standards


2962
Question:  How was the Freeview HD service rolled out in the UK?
Predicted:  the freeview hd service
Correct Answer:  region by region


2963
Question:  What is usually dropped for the commercial naming of an HDTV product?
Predicted:  a product
Correct Answer:  the frame rate


2964
Question:  What signal did the first European HDTV broadcast use?
Predicted:  these first european hdtv broadcasts
Correct Answer:  

3029
Question:  How does Oklahoma's per-capita arts spending rank?
Predicted:  is
Correct Answer:  17th


3030
Question:  Which one is more accurately placed, the TX/NM border or the OK/NM border?
Predicted:  oklahoma
Correct Answer:  oklahoma/new mexico border


3031
Question:  What is Oklahoma's rank among states producing crude oil?
Predicted:  the nations thirdlargest producer
Correct Answer:  fifth


3032
Question:  When was McVeigh executed?
Predicted:  june 11 2001
Correct Answer:  june 11 , 2001


3033
Question:  What is the third-most-prevalent language in Oklahoma?
Predicted:  arabic
Correct Answer:  cherokee


3034
Question:  What is the busiest highway in Oklahoma?
Predicted:  2008
Correct Answer:  interstate 44


3036
Question:  Where has the Medieval Fair of Norman been held since 2003?
Predicted:  oklahoma
Correct Answer:  reaves park


3037
Question:  What are Oklahoma's Native American ballerinas called?
Predicted:  the state
Correct Answer:  the five moons


3038
Ques

3103
Question:  Who commands the Oklahoma National Guard?
Predicted:  the principal head
Correct Answer:  the governor


3104
Question:  What was Spiro, Oklahoma named for?
Predicted:  the westernmost center
Correct Answer:  spiro mounds


3105
Question:  What percent of Oklahomans speak only English at home, as of 2000?
Predicted:  english
Correct Answer:  92.6 %


3106
Question:  How many square miles is Oklahoma?
Predicted:  181035
Correct Answer:  69,898


3107
Question:  How many ports does Oklahoma have?
Predicted:  two
Correct Answer:  two


3108
Question:  Where is 'Discoveryland!'?
Predicted:  oklahoma
Correct Answer:  sand springs


3109
Question:  What parts of Oklahoma rarely see below-zero temperatures?
Predicted:  the state
Correct Answer:  south-central and southeastern


3110
Question:  How many tornadoes hit Oklahoma each year?
Predicted:  about one
Correct Answer:  average 62


3112
Question:  What does Oklahoma spend per student in public schools as of 2008?
Predicte

3188
Question:  An activist in the Arab spring said they were using youtube to do what?
Predicted:  quoted
Correct Answer:  tell the world


3189
Question:  What was the highest earning youtube partner video producer?
Predicted:  preroll advertisements
Correct Answer:  $ 12 million


3190
Question:  What was the original title of youtube red?
Predicted:  the announcement
Correct Answer:  music key


3191
Question:  How much content on youtube comes from outside the US?
Predicted:  2014
Correct Answer:  around three quarters


3192
Question:  As of 2010 what is the maximum length of a video a user can upload given the proper authority?
Predicted:  youtube
Correct Answer:  unlimited


3193
Question:  How many independent labels did the Merlin Network represent?
Predicted:  over 20000
Correct Answer:  20,000


3194
Question:  How many copyright infringements must an account have before the account is deleted?
Predicted:  three
Correct Answer:  three


3195
Question:  What type of videos d

ValueError: list.remove(x): x not in list