In [12]:
import nltk
import json
import spacy
from nltk.corpus import stopwords
from math import log
from collections import defaultdict, Counter
from string import punctuation
from nltk.stem.wordnet import WordNetLemmatizer
import gensim
import re
import csv
from gensim.summarization import bm25

# Variables

In [13]:
OPEN_QUESTION_WORDS = ['what','who','whose','whom','where','when','why','how',
                       'which',"what's","who's","where's","how's"]
CLOSED_QUESTION_WORDS = ['is','are','am','was','were','do','does,','did','can',
                         'could','will','would','shall','should','have','has',
                         'had']

# Stop words
stop = set(stopwords.words('english'))

lmtz = WordNetLemmatizer()

with open('testing.json') as json_data:
    test = json.load(json_data)

with open('documents.json') as json_data:
    documents = json.load(json_data)

# Spacy toolkit
nlp = spacy.load('en_core_web_sm')

punc = set(punctuation)

In [14]:
def strip_punctuation(s):
    return ''.join(c for c in s if c not in punc)

In [42]:
def lemmatize(token):
    lemma = lmtz.lemmatize(token, 'v')
    if lemma == token:
        lemma = lmtz.lemmatize(token, 'n')
    return lemma

        
def extract_term_freqs(doc):
    tfs = {}
    for token in nltk.word_tokenize(doc):
        lemma = lemmatize(token.lower())
        if lemma not in stop and lemma.isalpha():
            tfs[lemma] = tfs.get(lemma, 0) + 1
    return tfs


def compute_doc_freqs(doc_term_freqs):
    dfs = Counter()
    for tfs in doc_term_freqs.values():
        for term in tfs.keys():
            dfs[term] += 1
    return dfs


def query_vsm(query, index, k=4):
    accumulator = Counter()
    for term in query:
        postings = index[term]
        for docid, weight in postings:
            accumulator[docid] += weight
    return accumulator.most_common(k)


# Find the question word
def get_qword(question):
    tokens = nltk.word_tokenize(question.lower())
    for token in tokens:
        if token in OPEN_QUESTION_WORDS:
            return token
    for token in tokens:
        if token in CLOSED_QUESTION_WORDS:
            return token
    return 'others'

In [43]:
# length of longest same sequences of keywords
def get_overlap(sent1, sent2):
    tokens1 = []
    tokens2 = []

    for token in nltk.word_tokenize(strip_punctuation(sent1.lower())):
        lemma = lemmatize(token)
        if lemma not in stop:
            tokens1.append(lemma)

    for token in nltk.word_tokenize(strip_punctuation(sent2.lower())):
        lemma = lemmatize(token)
        if lemma not in stop:
            tokens2.append(lemma)

    max = 0
    for i in range(len(tokens1)):
        for j in range(len(tokens2)):

            if tokens1[i] == tokens2[j]:
                length = 1

                ii = i + 1
                jj = j + 1
                while ii < len(tokens1) and jj < len(tokens2) and \
                        tokens1[ii] == tokens2[jj]:
                    ii += 1
                    jj += 1
                    length += 1

                if length > max:
                    max = length

    return max

# Write to test file

In [44]:
csvFile = open("high.csv", "w")
writer = csv.writer(csvFile)
header = ['id','answer']
writer.writerow(header)


case_count = 0
empty_count = 0
# test = [test[17]]
for test_case in test:
    question = test_case['question']
    docid = test_case['docid']

    # Convert doc into one string, then tokenize sentences
    corpus = ''
    for para in documents[docid]['text']:
        corpus += para + ' '

    # sentence as a document
    raw_docs = nltk.sent_tokenize(corpus)
    

    # TFIDF
    doc_term_freqs = {}
    for (id, raw_doc) in enumerate(raw_docs):
        term_freqs = extract_term_freqs(raw_doc)
        doc_term_freqs[id] = term_freqs
    M = len(doc_term_freqs)

    doc_freqs = compute_doc_freqs(doc_term_freqs)

    vsm_inverted_index = defaultdict(list)
    for docid, term_freqs in doc_term_freqs.items():
        N = sum(term_freqs.values())
        length = 0

        # find tf*idf values and accumulate sum of squares
        tfidf_values = []
        for term, count in term_freqs.items():
            tfidf = float(count) / N * log(M / float(doc_freqs[term]))
            tfidf_values.append((term, tfidf))
            length += tfidf ** 2

        # normalise documents by length and insert into index
        length = length ** 0.5
        for term, tfidf in tfidf_values:
            # inversion of the indexing, term -> (doc_id, score)
            vsm_inverted_index[term].append([docid, tfidf / length])

    for term, docids in vsm_inverted_index.items():
        docids.sort()

    terms = extract_term_freqs(question) 
    results = query_vsm(terms, vsm_inverted_index)
    
    
#     tokenized_sentence = []
#     for each_sentence in raw_docs:
#         filter_stop_word = []
#         sentence_as_words = nltk.word_tokenize(each_sentence)
#         for each_word in sentence_as_words:
#             if each_word not in stop:
#                 filter_stop_word.append(each_word)
        
#         tokenized_sentence.append(filter_stop_word)
        
#     bm25Model = bm25.BM25(tokenized_sentence)
#     average_idf = sum(map(lambda k: float(bm25Model.idf[k]), bm25Model.idf.keys())) / len(bm25Model.idf.keys())
    
#     query = []
#     for word in nltk.word_tokenize(question):
#         if word not in stop:
#             query.append(word)
        
#     scores = bm25Model.get_scores(query,average_idf)
#     bm25_dic = Counter()
    
#     sentence_id = 0
#     for each_score in scores:
#         bm25_dic[sentence_id] = each_score
#         sentence_id += 1
#     results = bm25_dic.most_common(4)


    # Step 2
    # Analyse question type
    qword = get_qword(question)

    # the word after question word, such as 'what value', 'which gender'
    next_token = ''

    qtype = ''

    # dependency parsing
    dep = ''

    # head word
    head = ''

    # head dependency
    head_dep = ''

    # subject, root, object
    nsubj = ''
    ROOT = ''
    dobj = ''

    # yes or no questions have two options
    closed_q_choices = ('', '')

    doc = nlp(question)

    tokens = nltk.word_tokenize(question.lower())

    # get next word
    if qword in tokens:
        if tokens.index(qword) < len(tokens) - 1:
            next_token = tokens[tokens.index(qword) + 1]

    # get structure of sentence
    for token in doc:
        if 'nsubj' in token.dep_:
            nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_:
            dobj = lemmatize(strip_punctuation(token.text))

    # for noun (phrase) questions, get answer dependency
    for chunk in doc.noun_chunks:
        if qword in chunk.text:
            dep = chunk.root.dep_
            head = lemmatize(strip_punctuation(chunk.root.head.text))
            head_dep = chunk.root.head.dep_

    # determine answer type
    if 'stand for' in question or 'abbreviat' in question:
        qtype = 'abrv'

    elif qword in ['who',"who's",'whom','whose']:
        qtype = 'who'

    elif qword == 'when':
        qtype = 'when'

    elif qword in ['where',"where's"]:
        qtype = 'where'

    elif qword in ['how',"how's"]:
        if next_token == 'much':
            qtype = 'MONEY'
        elif next_token == 'many':
            qtype = 'CARDINAL'
        elif next_token == 'long':
            qtype = 'DATE'
        elif next_token in ['far','big','wide','deep','tall','high','fast','heavy']:
            qtype = 'QUANTITY'
        elif next_token in ['old','young']:
            qtype = 'DATE'
        elif next_token in ['does','did','do','have','has','had','should',
                              'can','could','will','would','must']:
            if dobj != '':
                qtype = 'adj'
            else:
                qtype = 'verb'

    elif qword in ['what', "what's", 'which']:
        if 'year'in tokens or \
                'day' in tokens or \
                'month' in tokens or \
                'era' in tokens or \
                'age' in tokens or \
                'century' in tokens or \
                'week' in tokens or \
                'period' in tokens or \
                'dynasty' in tokens:
            qtype = 'DATE'

        elif 'company' in tokens or \
                'organization' in tokens or \
                'organisation' in tokens or \
                'corporation' in tokens or \
                'institution' in tokens or \
                'university' in tokens or \
                'corporation' in tokens or \
                'association' in tokens or \
                'union' in tokens or \
                'agency' in tokens:
            qtype = 'ORG'

        elif 'city' in tokens or \
                'country' in tokens or \
                'state' in tokens or \
                'province' in tokens or \
                'county' in tokens:
            qtype = 'GPE'

        elif 'place' in tokens or \
                'river' in tokens or \
                'mountain' in tokens or \
                'ocean' in tokens or \
                'region' in tokens or \
                'area' in tokens or \
                'sea' in tokens or \
                'lake' in tokens or \
                'continent' in tokens or \
                'location' in tokens or \
                'forest' in tokens or \
                'jungle' in tokens:
            qtype = 'LOC'

        elif 'nationality' in tokens:
            qtype = 'NORP'

        elif 'building' in tokens or \
            'airport' in tokens or \
            'highway' in tokens or \
            'bridge' in tokens or \
            'harbour' in tokens or \
            'harbor' in tokens or \
            'port' in tokens or \
            'dam' in tokens:
            qtype = 'FACILITY'

        elif 'hurricane' in tokens or \
            'battle' in tokens or \
            'war' in tokens:
            qtype = 'EVENT'

        elif 'book' in tokens or \
            'novel' in tokens or \
            'song' in tokens or \
            'music' in tokens or \
            'painting' in tokens:
            qtype = 'WORK_OF_ART'

        elif 'language' in tokens or \
                'speak' in tokens:
            qtype = 'LANGUAGE'

        elif 'percentage' in tokens or 'percent' in tokens:
            qtype = 'PERCENT'

        elif 'value' in tokens or \
                'distance' in tokens or \
                'size' in tokens or \
                'length' in tokens or \
                'depth' in tokens or \
                'height' in tokens or \
                'density' in tokens or \
                'speed' in tokens or \
                'weight' in tokens or \
                'area' in tokens or \
                'temperature' in tokens or \
                'volume' in tokens:
            qtype = 'QUANTITY'

        elif 'number' in tokens:
            qtype = 'CARDINAL'

        elif 'price' in tokens:
            qtype = 'MONEY'

        elif 'name' in tokens:
            qtype = 'NE'

        else:
            # what...do type question
            tokens.remove(next_token)
            if 'do' in tokens:
                qtype = 'verb'
            else:
                qtype = 'noun'

    elif qword == 'why':
        qtype = 'why'

    elif qword in CLOSED_QUESTION_WORDS:
        qtype = 'closed'

        # answer is one of the 'or' options in the question
        if 'or' in tokens:
            index = tokens.index('or')
            prev1 = tokens[index - 1]
            next1 = tokens[index + 1]
            tag_tokens = nltk.pos_tag(tokens)

            tag = tag_tokens[index - 1][1]

            # if answer is a noun
            if tag in ['NN', 'NNP', 'NNS', 'NNPS']:
                for chunk in doc.noun_chunks:
                    if prev1 in chunk.text:
                        first = chunk.text
                    if next1 in chunk.text:
                        second = chunk.text
                closed_q_choices = (first, second)
            else:
                closed_q_choices = (prev1, next1)
        else:
            qtype = 'others'

    # re-rank the 20 sentences
    scores = {}
    for id, _ in results:
        sent = raw_docs[id]
        doc = nlp(sent)

        score = get_overlap(sent, question)

        if qtype == 'who':
            for ent in doc.ents:
                if ent.label_ == 'PERSON':
                    score += 1

        elif qtype == 'when':
            for ent in doc.ents:
                if ent.label_ == 'TIME' or ent.label_ == "DATE":
                    score += 1

        elif qtype == 'where':
            for ent in doc.ents:
                if ent.label_ == 'GPE' or ent.label_ == "LOC":
                    score += 1

        elif qtype in ['LANGUAGE','WORK_OF_ART','EVENT','NORP','FACILITY',
                       'GPE','DATE','TIME','PERCENT','QUANTITY','CARDINAL',
                     'MONEY','PERSON','ORG','LOC']:
            for ent in doc.ents:
                if ent.label_ == qtype:
                    score += 1
                    
        elif qtype == 'NE':
            for ent in doc.ents:
                    score += 1

        elif qtype == 'adj':
            for token in doc:
                if 'advmod' in token.dep_ or 'acomp' in token.dep_:
                    score += 1

        elif qtype == 'verb':
            for token in doc:
                if token.dep_ == 'ROOT':
                    score += 1

        elif qtype == 'closed':
            first = closed_q_choices[0]
            second = closed_q_choices[1]

            score += (first in sent) + (second in sent)

        elif qtype == 'why':
            if 'reason' in sent or 'because' in sent or 'due to' in sent or 'since' in sent or 'for' in sent:
                score += 1

        scores[id] = score

    rank = {}
    for id, sim in results:
        max_score = scores[max(scores, key=scores.get)]
        if max_score != 0:
            rank[id] = sim * 0.5 + (scores[id] / max_score * 0.5)
        else:
            rank[id] = sim
    
    # sentence with highest rank
    index = max(rank, key=rank.get)
    sent = raw_docs[index]
    doc = nlp(sent)

    # find sentence structure
    sent_nsubj = ''
    sent_ROOT = ''
    sent_dobj = ''
    for token in doc:
        if 'nsubj' in token.dep_:
            sent_nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            sent_ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_:
            sent_dobj = lemmatize(strip_punctuation(token.text))
            
    # find answer with highest score
    max_score = -1
    answer = ''
    
    if qtype == 'who':
        for np in doc.noun_chunks:
            score = 0
            
            if np in doc.ents:
                for ent in doc.ents:
                    if np.text in ent.text and ent.label_ == 'PERSON':
                            score += 3

            # find NP dependency
            np_dep = np.root.dep_
            np_head = lemmatize(strip_punctuation(np.root.head.text))
            np_head_dep = np.root.head.dep_

            if np_dep == dep:
                score += 1
            if np_head == head:
                score += 1
            if np_head_dep == head_dep:
                score += 1

            if np.text not in question:
                score += 1

            if strip_punctuation(np.text).strip().lower() not in stop:
                score += 1

            if np.text.lower() == 'it':
                score = -1
                
            if score > max_score:
                max_score = score
                answer = np.text

    elif qtype == 'when':
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == 'TIME' or ent.label_ == "DATE":
                score += 3
                
            if ent.text not in question:
                score += 1
            
            if score > max_score:
                max_score = score
                answer = ent.text

    elif qtype == 'where':
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == 'GPE' or ent.label_ == "LOC":
                score += 3

            if ent.text not in question:
                score += 1
                
            if score > max_score:
                max_score = score
                answer = ent.text
            
    elif qtype in ['LANGUAGE', 'WORK_OF_ART', 'EVENT', 'NORP', 'FACILITY',
                   'GPE', 'DATE', 'TIME', 'PERCENT', 'QUANTITY', 'CARDINAL',
                   'MONEY', 'PERSON', 'ORG', 'LOC']:
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == qtype:
                score += 3
            
            if ent.text not in question:
                score += 1
                
            if qtype in ['LOC','GPE'] and ent.root.tag_ not in ['NN','NNP','NNS','NNPS']:
                score -= 2
                
            if score > max_score:
                max_score = score
                answer = ent.text
                
#                 if qtype in ['MONEY']:
#                     for token in doc:
#                         if token.text == '$':
#                             answer = '$ ' + answer
                            
                if qtype in ['PERCENT']:
                    if 'percent' in answer:
                        answer = answer[:answer.index('percent')-1]
                            
                if qtype in ['PERCENT','QUANTITY','CARDINAL','MONEY']:
                    tokens = nltk.word_tokenize(answer)
                    i = 0
                    answer = ''
                    while i < len(tokens):
                        if tokens[i].lower() in ['well','about','around','approximately', 'some']:
                            del tokens[i]
                        else:
                            if i+1 < len(tokens) and tokens[i+1] == "'s":
                                answer += tokens[i]
                            else:
                                answer += tokens[i] + ' '
                            i += 1
                    answer = answer.strip()
                    
    elif qtype == 'NE':
        for ent in doc.ents:
            score = 3
            
            if ent.text not in question:
                score += 1
                
            if score > max_score:
                max_score = score
                answer = ent.text

    elif qtype == 'abrv':
        abrv = ''
        qdoc = nlp(question)
        for token in qdoc:
            text = token.text
            if len(text) >= 2 and text.isupper() and text.isalpha():
                abrv = text.lower()

        if abrv == '' and 'stand for' in question:
            tokens = question.lower().split(' ')
            abrv = tokens[tokens.index('stand')-1]

        if abrv != '':
            tokens = nltk.word_tokenize(sent)
            for (i, token) in enumerate(tokens):
                if token[0].isupper():
                    k = 1
                    phrase = token.lower()
                    initials = phrase[0]

                    while i+k < len(tokens) and tokens[i+k][0].isupper():
                        phrase = phrase + ' ' + tokens[i+k].lower()
                        initials += tokens[i+k][0].lower()
                        k += 1

                    phrase = phrase.strip()
                    if initials == abrv:
                        answer = phrase

        else:
            tokens = nltk.word_tokenize(question)
            for (i, token) in enumerate(tokens):
                if token[0].isupper():
                    k = 1
                    initials = token[0].lower()

                    while i + k < len(tokens) and tokens[i + k][0].isupper():
                        initials += tokens[i + k][0].lower()
                        k += 1

                    if len(initials) >= 2:
                        answer = initials

    elif qtype == 'adj':
        for token in doc:
            score = 0
            
            if 'advmod' in token.dep_ or 'acomp' in token.dep_:
                score += 3

            token_dep = token.dep_
            token_head = lemmatize(strip_punctuation(token.head.text))
            token_head_dep = token.head.dep_

            if token_dep == dep:
                score += 1
            if token_head == head:
                score += 1
            if token_head_dep == head_dep:
                score += 1

            if token.text not in question:
                score += 1

            if strip_punctuation(token.text).strip().lower() not in stop:
                score += 1

            if token.text.lower() == 'it':
                score = -1

            if score > max_score:
                max_score = score
                answer = token.text

    elif qtype == 'verb':
        for token in doc:
            score = 0

            if token.dep_ == 'ROOT':
                score += 1

            if lemmatize(strip_punctuation(token.text)) not in \
                    [lemmatize(strip_punctuation(s)) for s in nltk.word_tokenize(question)]:
                score += 1

            if strip_punctuation(token.text).strip().lower() not in stop:
                score += 1

            if score > max_score:
                max_score = score
                answer = token.text

    elif qtype == 'closed':
        first = closed_q_choices[0]
        second = closed_q_choices[1]

        # whether each option appears (and is negates)
        appear1 = False
        appear2 = False
        negate1 = False
        negate2 = False
        neg_count = 0
        tokens = nltk.word_tokenize(raw_docs[id])

        for (index, token) in enumerate(tokens):
            if token == 'not' or "n't" in token:
                neg_count += 1

                if index+1 < len(tokens):
                    if tokens[index+1] == first:
                        negate1 = True
                    if tokens[index+1] == second:
                        negate2 = True

            if token == first:
                appear1 = True
            if token == second:
                appear2 = True

        possible_answer = ''
        if appear1 and not appear2:
            if neg_count % 2 == 1:
                possible_answer = second
            else:
                possible_answer = first

        elif appear2 and not appear1:
            if neg_count % 2 == 0:
                possible_answer = second
            else:
                possible_answer = first

        elif appear1 and appear2:
            if negate1 and not negate2:
                possible_answer = second
            elif negate2 and not negate1:
                possible_answer = first
            else:
                possible_answer = second

        if possible_answer != '':
            score += 5
            if score > max_score:
                max_score = score
                answer = possible_answer

    elif qtype == 'why':

        possible_answer = ''
        score = 0

        if 'reason' in sent or 'because' in sent or 'due to' in sent or 'since' in sent or 'for' in sent:

            if 'because of' in sent:
                score += 3
                index = sent.index('because of')
                substr = sent[index+11:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break

            elif 'because' in sent:
                score += 3
                index = sent.index('because')
                substr = sent[index + 8:]
                possible_answer = substr

            elif 'due to' in sent:
                score += 3
                index = sent.index('due to')
                substr = sent[index+7:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    possible_answer = substr

            elif 'reason' in sent:
                score += 2
                index = sent.index('reason')
                substr = sent[index+7:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    index = substr.find('is')
                    if index != -1:
                        possible_answer = substr[index+3]
                    else:
                        index = substr.find('was')
                        if index != -1:
                            possible_answer = substr[index+4]
                        else:
                            possible_answer = sent[sent.index('reason'):]

            elif 'for' in sent:
                score += 1
                index = sent.index('for')
                substr = sent[index + 4:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    possible_answer = substr

            elif 'since' in sent:
                score += 1
                index = sent.index('since')
                substr = sent[index + 6:]
                possible_answer = substr

            if possible_answer != '' and score > max_score:
                answer = possible_answer
                max_score = score

    # if answer not found, find noun phrases
    if answer == '':
        for np in doc.noun_chunks:
            score = 0

            np_dep = np.root.dep_
            np_head = lemmatize(strip_punctuation(np.root.head.text))
            np_head_dep = np.root.head.dep_

            if np_dep == dep:
                score += 1
            if np_head == head:
                score += 1
            if np_head_dep == head_dep:
                score += 1

            if np.text not in question:
                score += 1

            if strip_punctuation(np.text).strip().lower() not in stop:
                score += 1
                
            if score > max_score:
                max_score = score
                answer = np.text
                
    a = nltk.word_tokenize(answer)
    if len(a) > 0 and a[0].lower() in stop:
        del a[0]
        answer = ''
        for i in range(len(a)):
            if i+1 < len(a) and a[i+1] == "'s":
                answer += a[i]
            else:
                answer += a[i] + ' '
        
    answer = answer.strip().lower()
    answer_result = []
    answer_result.append(case_count)
    answer_result.append(answer)
    writer.writerow(answer_result)
    print(case_count,' ',answer)
    case_count += 1
    if answer == '':
        empty_count += 1
    
csvFile.close()
print(empty_count)

0   combination
1   addition
2   browser's layout engine
3   internet explorer
4   late 2004
5   windows
6   1995
7   browsers
8   marc andreessen
9   first web browser
10   competition
11   dominance
12   internet relay chat
13   january
14   every major web browser
15   january 2003
16   january 2009
17   file transfer protocol
18   google
19   case
20   rich user interfaces
21   2002
22   august 2011
23   major web browsers
24   chrome's user-base
25   development
26   december 2011
27   1993
28   netscape
29   apple's safari
30   rapid development
31   prefix
32   mozilla foundation
33   private networks
34   mac
35   comparison
36   1994
37   user interface
38   addition
39   major browsers
40   information resources
41   live bookmarks
42   more traditional feed reader
43   bookmarks
44   january 2003
45   prefix
46   browser software
47   file transfer protocol
48   microsoft corp
49   mobile safari
50   web browsers
51   windows
52   user's default e-mail application
53   brows

469   1339
470   1233
471   university hospital southampton nhs foundation trust
472   biggest operator
473   4.2
474   1959
475   354
476   government figures
477   m27
478   coast
479   hanover buildings
480   city
481   december 2007
482   12th century
483   traffic congestion
484   southampton
485   2004
486   1233
487   1959
488   three fire stations
489   southampton docks
490   24
491   m27
492   southampton's largest retail centre
493   13th century
494   1066
495   clausentum
496   two
497   plans
498   southampton
499   port
500   over a quarter
501   south west trains
502   20–24
503   council estates
504   south west trains
505   16.2
506   king henry's departure
507   route
508   trust
509   queen victoria
510   16.2
511   two large live music venues
512   three
513   university of southampton
514   france
515   river test and river itchen
516   1968
517   area
518   hampshire county council
519   duchess
520   world's largest cruise ships
521   1938
522   large shopping c

923   19th-century
924   terror management theory
925   29
926   united states
927   germany
928   end of the last ice age
929   19th-century
930   contrasted
931   terror management theory
932   anthropologists
933   1970s
934   around 50,000 years ago
935   united states
936   diffusion
937   prussian linguist
938   1950s and 1960s
939   stuart hall
940   élite ideal
941   present legislation
942   jefferson's metaphor
943   1971
944   nonconformists
945   areas
946   pervasive secularism
947   wall
948   may 3, 2006
949   still other scholars
950   early as the mid-17th century
951   religious freedom
952   engel
953   madison
954   december 20, 2005
955   william penn
956   two
957   legal scholars
958   still other scholars
959   reynolds
960   1776
961   court's decision
962   opponents
963   december
964   robert s. wood
965   u.s.
966   1962
967   court
968   1994
969   kurtzman
970   argue
971   thomas jefferson's influential virginia statute
972   1947
973   lone dissenter
97

1335   12 july
1336   339
1337   august 1855
1338   black sea
1339   31 december 1853
1340   cronstadt
1341   george hamilton-gordon
1342   5 september
1343   32–40
1344   alliance
1345   local commanders
1346   russians
1347   crimean war
1348   movement
1349   omar pasha
1350   nicholas
1351   parliament
1352   constantinople
1353   åland islands
1354   alexander ii
1355   nicholas
1356   cardigan
1357   treaty of paris
1358   austria
1359   start
1360   october 1853
1361   french
1362   sunday
1363   centuries-old
1364   public opinion
1365   second counterattack
1366   winter of 1854
1367   sinop
1368   russian cavalry movement
1369   catholic support
1370   peaceful settlement
1371   1830
1372   alfred nobel
1373   vidin
1374   reaction
1375   september 1853
1376   danube river
1377   ottoman forces
1378   william howard russell
1379   june 24, 1839
1380   local commanders
1381   105  the tsar
1382   28 march 1854
1383   432–33
1384   roger fenton
1385   russian troops
1386   far 

1798   several studies
1799   1970s
1800   older form
1801   march 2011
1802   2005
1803   us
1804   winter
1805   north america
1806   1895
1807   ntfs
1808   
1809   northern summer
1810   farmers ' groups
1811   countries
1812   dst inherits
1813   dst
1814   filesystem
1815   2007
1816   merriam-webster
1817   farmers ' groups
1818   st
1819   people
1820   time zone differences
1821   early goal
1822   
1823   daylight
1824   regions
1825   kingsford charcoal
1826   1999 study
1827   many enactments
1828   one
1829   year-independent way
1830   autumn and two hours
1831   least two
1832   us
1833   clocks
1834   iceland
1835   europe
1836   britain
1837   2000
1838   daily
1839   name
1840   clocks
1841   united kingdom
1842   1784 satire
1843   daylight
1844   electricity use
1845   times
1846   1984
1847   adopt
1848   britain
1849   us
1850   first three weekdays
1851   plan
1852   standardize
1853   14 april
1854   2008
1855   people
1856   us
1857   summer hours
1858   little

2210   racism
2211   suggest
2212   populations
2213   cladistics
2214   phylogenetic analysis
2215   initial hypotheses
2216   east asians
2217   populations
2218   practice
2219   significant number
2220   700.000
2221   clade
2222   diagnosis
2223   concept
2224   eduardo bonilla-silva
2225   physical anthropologists
2226   adversely
2227   mass incarceration
2228   many thousands
2229   france
2230   y chromosomes
2231   5 %
2232   roughly 28–37 %
2233   93 %
2234   uses
2235   international epidemiological data
2236   cranial measurements
2237   european concept
2238   arbitrary matter
2239   1964
2240   another way
2241   blumenbach
2242   good arguments
2243   word
2244   earlier work
2245   researchers
2246   last two decades
2247   eduardo bonilla-silva
2248   race
2249   many people
2250   2003 paper
2251   system
2252   five
2253   europeans
2254   kaplan
2255   many biological anthropologists
2256   amerindians
2257   identification code
2258   system
2259   78 percent
2260

2625   passenger elevators
2626   less expensive installations
2627   1
2628   gearless traction machines
2629   steam driven devices
2630   cab interiors
2631   one
2632   dumbwaiters
2633   freight elevator
2634   first elevator shaft
2635   january
2636   construction
2637   single bulkhead cylinders
2638   machine-room-less elevators
2639   `` shaft
2640   neapolitan architect
2641   barrel
2642   40-50 %
2643   london
2644   method
2645   environmental concerns
2646   belt elevators
2647   team's earliest exit
2648   1189
2649   eight
2650   1953
2651   citation
2652   1872
2653   2001 and 2006
2654   february 2012
2655   1954 fifa world cup
2656   1974
2657   england
2658   2008–09 season
2659   england national football team
2660   
2661   charge
2662   england
2663   2002
2664   england
2665   john terry
2666   2002
2667   england's traditional away colours
2668   1923
2669   england
2670   game
2671   setanta sports's
2672   away kit
2673   roy hodgson
2674   2002
2675   motif

3042   roman women
3043   imperial era
3044   human sacrifice
3045   security
3046   edict
3047   public festivals
3048   camp
3049   constantine
3050   roman women
3051   second edict
3052   famous tirade
3053   human sacrifice
3054   strong connections
3055   ordinary romans
3056   vergil
3057   valerian's first religious edict
3058   di immortales
3059   roman
3060   roman camps
3061   ruins
3062   (509–27 bc
3063   excessive devotion
3064   rome's hegemony
3065   end
3066   product
3067   customary offers
3068   product
3069   rome's hegemony
3070   relationship
3071   little or no civil authority
3072   edict
3073   edict
3074   most important camp-offering
3075   several days
3076   women
3077   dictator
3078   public festivals
3079   edict
3080   opportunities
3081   solution
3082   cult
3083   sporadic and sometimes brutal attempts
3084   punic crisis
3085   human sacrifice
3086   roman oaths
3087   least
3088   office
3089   jupiter latiaris
3090   religious dimensions
3091   

3462   world war i
3463   contrast
3464   latin
3465   yiddish
3466   1967
3467   example
3468   republic of china
3469   italians
3470   ion bărbuţă
3471   english
3472   english
3473   german
3474   medieval painting red
3475   christian countries
3476   red
3477   1921
3478   seven
3479   karl marx
3480   red ochre
3481   british flag
3482   16th century
3483   one to two percent
3484   red flag
3485   danger
3486   red ochre
3487   most common colors
3488   boston
3489   ancient rome
3490   roman general
3491   red
3492   1960
3493   ray
3494   harvard university
3495   communist party
3496   1950s and 1960s
3497   central african cultures
3498   painters
3499   mexico
3500   spirit
3501   1790
3502   increased performance
3503   1523
3504   gathered
3505   ancient rome
3506   demonstration
3507   color
3508   banner
3509   red hair
3510   harvard university
3511   blood pressure
3512   jars
3513   scarlet academic gowns
3514   february 1848
3515   international football
3516   vir

#  准确率测试

In [46]:
with open('training.json') as json_data:
    train = json.load(json_data)
    
case_count = 0
empty_count = 0
# test = [test[17]]
for test_case in train:
    question = test_case['question']
    docid = test_case['docid']
    correct_answer = test_case['text']

    # Convert doc into one string, then tokenize sentences
    corpus = ''
    for para in documents[docid]['text']:
        corpus += para + ' '

    # sentence as a document
    raw_docs = nltk.sent_tokenize(corpus)
    

    # TFIDF
    doc_term_freqs = {}
    for (id, raw_doc) in enumerate(raw_docs):
        term_freqs = extract_term_freqs(raw_doc)
        doc_term_freqs[id] = term_freqs
    M = len(doc_term_freqs)

    doc_freqs = compute_doc_freqs(doc_term_freqs)

    vsm_inverted_index = defaultdict(list)
    for docid, term_freqs in doc_term_freqs.items():
        N = sum(term_freqs.values())
        length = 0

        # find tf*idf values and accumulate sum of squares
        tfidf_values = []
        for term, count in term_freqs.items():
            tfidf = float(count) / N * log(M / float(doc_freqs[term]))
            tfidf_values.append((term, tfidf))
            length += tfidf ** 2

        # normalise documents by length and insert into index
        length = length ** 0.5
        for term, tfidf in tfidf_values:
            # inversion of the indexing, term -> (doc_id, score)
            vsm_inverted_index[term].append([docid, tfidf / length])

    for term, docids in vsm_inverted_index.items():
        docids.sort()

    terms = extract_term_freqs(question) 
    results = query_vsm(terms, vsm_inverted_index)
    
    
#     tokenized_sentence = []
#     for each_sentence in raw_docs:
#         filter_stop_word = []
#         sentence_as_words = nltk.word_tokenize(each_sentence)
#         for each_word in sentence_as_words:
#             if each_word not in stop:
#                 filter_stop_word.append(each_word)
        
#         tokenized_sentence.append(filter_stop_word)
        
#     bm25Model = bm25.BM25(tokenized_sentence)
#     average_idf = sum(map(lambda k: float(bm25Model.idf[k]), bm25Model.idf.keys())) / len(bm25Model.idf.keys())
    
#     query = []
#     for word in nltk.word_tokenize(question):
#         if word not in stop:
#             query.append(word)
        
#     scores = bm25Model.get_scores(query,average_idf)
#     bm25_dic = Counter()
    
#     sentence_id = 0
#     for each_score in scores:
#         bm25_dic[sentence_id] = each_score
#         sentence_id += 1
#     results = bm25_dic.most_common(4)


    # Step 2
    # Analyse question type
    qword = get_qword(question)

    # the word after question word, such as 'what value', 'which gender'
    next_token = ''

    qtype = ''

    # dependency parsing
    dep = ''

    # head word
    head = ''

    # head dependency
    head_dep = ''

    # subject, root, object
    nsubj = ''
    ROOT = ''
    dobj = ''

    # yes or no questions have two options
    closed_q_choices = ('', '')

    doc = nlp(question)

    tokens = nltk.word_tokenize(question.lower())

    # get next word
    if qword in tokens:
        if tokens.index(qword) < len(tokens) - 1:
            next_token = tokens[tokens.index(qword) + 1]

    # get structure of sentence
    for token in doc:
        if 'nsubj' in token.dep_:
            nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_:
            dobj = lemmatize(strip_punctuation(token.text))

    # for noun (phrase) questions, get answer dependency
    for chunk in doc.noun_chunks:
        if qword in chunk.text:
            dep = chunk.root.dep_
            head = lemmatize(strip_punctuation(chunk.root.head.text))
            head_dep = chunk.root.head.dep_

    # determine answer type
    if 'stand for' in question or 'abbreviat' in question:
        qtype = 'abrv'

    elif qword in ['who',"who's",'whom','whose']:
        qtype = 'who'

    elif qword == 'when':
        qtype = 'when'

    elif qword in ['where',"where's"]:
        qtype = 'where'

    elif qword in ['how',"how's"]:
        if next_token == 'much':
            qtype = 'MONEY'
        elif next_token == 'many':
            qtype = 'CARDINAL'
        elif next_token == 'long':
            qtype = 'DATE'
        elif next_token in ['far','big','wide','deep','tall','high','fast','heavy']:
            qtype = 'QUANTITY'
        elif next_token in ['old','young']:
            qtype = 'DATE'
        elif next_token in ['does','did','do','have','has','had','should',
                              'can','could','will','would','must']:
            if dobj != '':
                qtype = 'adj'
            else:
                qtype = 'verb'

    elif qword in ['what', "what's", 'which']:
        if 'year'in tokens or \
                'day' in tokens or \
                'month' in tokens or \
                'era' in tokens or \
                'age' in tokens or \
                'century' in tokens or \
                'week' in tokens or \
                'period' in tokens or \
                'dynasty' in tokens:
            qtype = 'DATE'

        elif 'company' in tokens or \
                'organization' in tokens or \
                'organisation' in tokens or \
                'corporation' in tokens or \
                'institution' in tokens or \
                'university' in tokens or \
                'corporation' in tokens or \
                'association' in tokens or \
                'union' in tokens or \
                'agency' in tokens:
            qtype = 'ORG'

        elif 'city' in tokens or \
                'country' in tokens or \
                'state' in tokens or \
                'province' in tokens or \
                'county' in tokens:
            qtype = 'GPE'

        elif 'place' in tokens or \
                'river' in tokens or \
                'mountain' in tokens or \
                'ocean' in tokens or \
                'region' in tokens or \
                'area' in tokens or \
                'sea' in tokens or \
                'lake' in tokens or \
                'continent' in tokens or \
                'location' in tokens or \
                'forest' in tokens or \
                'jungle' in tokens:
            qtype = 'LOC'

        elif 'nationality' in tokens:
            qtype = 'NORP'

        elif 'building' in tokens or \
            'airport' in tokens or \
            'highway' in tokens or \
            'bridge' in tokens or \
            'harbour' in tokens or \
            'harbor' in tokens or \
            'port' in tokens or \
            'dam' in tokens:
            qtype = 'FACILITY'

        elif 'hurricane' in tokens or \
            'battle' in tokens or \
            'war' in tokens:
            qtype = 'EVENT'

        elif 'book' in tokens or \
            'novel' in tokens or \
            'song' in tokens or \
            'music' in tokens or \
            'painting' in tokens:
            qtype = 'WORK_OF_ART'

        elif 'language' in tokens or \
                'speak' in tokens:
            qtype = 'LANGUAGE'

        elif 'percentage' in tokens or 'percent' in tokens:
            qtype = 'PERCENT'

        elif 'value' in tokens or \
                'distance' in tokens or \
                'size' in tokens or \
                'length' in tokens or \
                'depth' in tokens or \
                'height' in tokens or \
                'density' in tokens or \
                'speed' in tokens or \
                'weight' in tokens or \
                'area' in tokens or \
                'temperature' in tokens or \
                'volume' in tokens:
            qtype = 'QUANTITY'

        elif 'number' in tokens:
            qtype = 'CARDINAL'

        elif 'price' in tokens:
            qtype = 'MONEY'

        elif 'name' in tokens:
            qtype = 'NE'

        else:
            # what...do type question
            tokens.remove(next_token)
            if 'do' in tokens:
                qtype = 'verb'
            else:
                qtype = 'noun'

    elif qword == 'why':
        qtype = 'why'

    elif qword in CLOSED_QUESTION_WORDS:
        qtype = 'closed'

        # answer is one of the 'or' options in the question
        if 'or' in tokens:
            index = tokens.index('or')
            prev1 = tokens[index - 1]
            next1 = tokens[index + 1]
            tag_tokens = nltk.pos_tag(tokens)

            tag = tag_tokens[index - 1][1]

            # if answer is a noun
            if tag in ['NN', 'NNP', 'NNS', 'NNPS']:
                for chunk in doc.noun_chunks:
                    if prev1 in chunk.text:
                        first = chunk.text
                    if next1 in chunk.text:
                        second = chunk.text
                closed_q_choices = (first, second)
            else:
                closed_q_choices = (prev1, next1)
        else:
            qtype = 'others'

    # re-rank the 20 sentences
    scores = {}
    for id, _ in results:
        sent = raw_docs[id]
        doc = nlp(sent)

        score = get_overlap(sent, question)

        if qtype == 'who':
            for ent in doc.ents:
                if ent.label_ == 'PERSON':
                    score += 1

        elif qtype == 'when':
            for ent in doc.ents:
                if ent.label_ == 'TIME' or ent.label_ == "DATE":
                    score += 1

        elif qtype == 'where':
            for ent in doc.ents:
                if ent.label_ == 'GPE' or ent.label_ == "LOC":
                    score += 1

        elif qtype in ['LANGUAGE','WORK_OF_ART','EVENT','NORP','FACILITY',
                       'GPE','DATE','TIME','PERCENT','QUANTITY','CARDINAL',
                     'MONEY','PERSON','ORG','LOC']:
            for ent in doc.ents:
                if ent.label_ == qtype:
                    score += 1
                    
        elif qtype == 'NE':
            for ent in doc.ents:
                    score += 1

        elif qtype == 'adj':
            for token in doc:
                if 'advmod' in token.dep_ or 'acomp' in token.dep_:
                    score += 1

        elif qtype == 'verb':
            for token in doc:
                if token.dep_ == 'ROOT':
                    score += 1

        elif qtype == 'closed':
            first = closed_q_choices[0]
            second = closed_q_choices[1]

            score += (first in sent) + (second in sent)

        elif qtype == 'why':
            if 'reason' in sent or 'because' in sent or 'due to' in sent or 'since' in sent or 'for' in sent:
                score += 1

        scores[id] = score

    rank = {}
    for id, sim in results:
        max_score = scores[max(scores, key=scores.get)]
        if max_score != 0:
            rank[id] = sim * 0.5 + (scores[id] / max_score * 0.5)
        else:
            rank[id] = sim
    
    # sentence with highest rank
    index = max(rank, key=rank.get,default=0)
    sent = raw_docs[index]
    doc = nlp(sent)

    # find sentence structure
    sent_nsubj = ''
    sent_ROOT = ''
    sent_dobj = ''
    for token in doc:
        if 'nsubj' in token.dep_:
            sent_nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            sent_ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_:
            sent_dobj = lemmatize(strip_punctuation(token.text))
            
    # find answer with highest score
    max_score = -1
    answer = ''
    
    if qtype == 'who':
        for np in doc.noun_chunks:
            score = 0
            
            if np in doc.ents:
                for ent in doc.ents:
                    if np.text in ent.text and ent.label_ == 'PERSON':
                            score += 3

            # find NP dependency
            np_dep = np.root.dep_
            np_head = lemmatize(strip_punctuation(np.root.head.text))
            np_head_dep = np.root.head.dep_

            if np_dep == dep:
                score += 1
            if np_head == head:
                score += 1
            if np_head_dep == head_dep:
                score += 1

            if np.text not in question:
                score += 1

            if strip_punctuation(np.text).strip().lower() not in stop:
                score += 1

            if np.text.lower() == 'it':
                score = -1
                
            if score > max_score:
                max_score = score
                answer = np.text

    elif qtype == 'when':
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == 'TIME' or ent.label_ == "DATE":
                score += 3
                
            if ent.text not in question:
                score += 1
            
            if score > max_score:
                max_score = score
                answer = ent.text

    elif qtype == 'where':
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == 'GPE' or ent.label_ == "LOC":
                score += 3

            if ent.text not in question:
                score += 1
                
            if score > max_score:
                max_score = score
                answer = ent.text
            
    elif qtype in ['LANGUAGE', 'WORK_OF_ART', 'EVENT', 'NORP', 'FACILITY',
                   'GPE', 'DATE', 'TIME', 'PERCENT', 'QUANTITY', 'CARDINAL',
                   'MONEY', 'PERSON', 'ORG', 'LOC']:
        for ent in doc.ents:
            score = 0
            
            if ent.label_ == qtype:
                score += 3
            
            if ent.text not in question:
                score += 1
                
            if qtype in ['LOC','GPE'] and ent.root.tag_ not in ['NN','NNP','NNS','NNPS']:
                score -= 2
                
            if score > max_score:
                max_score = score
                answer = ent.text
                
#                 if qtype in ['MONEY']:
#                     for token in doc:
#                         if token.text == '$':
#                             answer = '$ ' + answer
                            
                if qtype in ['PERCENT']:
                    if 'percent' in answer:
                        answer = answer[:answer.index('percent')-1]
                            
                if qtype in ['PERCENT','QUANTITY','CARDINAL','MONEY']:
                    tokens = nltk.word_tokenize(answer)
                    i = 0
                    answer = ''
                    while i < len(tokens):
                        if tokens[i].lower() in ['well','about','around','approximately', 'some']:
                            del tokens[i]
                        else:
                            if i+1 < len(tokens) and tokens[i+1] == "'s":
                                answer += tokens[i]
                            else:
                                answer += tokens[i] + ' '
                            i += 1
                    answer = answer.strip()
                    
    elif qtype == 'NE':
        for ent in doc.ents:
            score = 3
            
            if ent.text not in question:
                score += 1
                
            if score > max_score:
                max_score = score
                answer = ent.text

    elif qtype == 'abrv':
        abrv = ''
        qdoc = nlp(question)
        for token in qdoc:
            text = token.text
            if len(text) >= 2 and text.isupper() and text.isalpha():
                abrv = text.lower()

        if abrv == '' and 'stand for' in question:
            tokens = question.lower().split(' ')
            abrv = tokens[tokens.index('stand')-1]

        if abrv != '':
            tokens = nltk.word_tokenize(sent)
            for (i, token) in enumerate(tokens):
                if token[0].isupper():
                    k = 1
                    phrase = token.lower()
                    initials = phrase[0]

                    while i+k < len(tokens) and tokens[i+k][0].isupper():
                        phrase = phrase + ' ' + tokens[i+k].lower()
                        initials += tokens[i+k][0].lower()
                        k += 1

                    phrase = phrase.strip()
                    if initials == abrv:
                        answer = phrase

        else:
            tokens = nltk.word_tokenize(question)
            for (i, token) in enumerate(tokens):
                if token[0].isupper():
                    k = 1
                    initials = token[0].lower()

                    while i + k < len(tokens) and tokens[i + k][0].isupper():
                        initials += tokens[i + k][0].lower()
                        k += 1

                    if len(initials) >= 2:
                        answer = initials

    elif qtype == 'adj':
        for token in doc:
            score = 0
            
            if 'advmod' in token.dep_ or 'acomp' in token.dep_:
                score += 3

            token_dep = token.dep_
            token_head = lemmatize(strip_punctuation(token.head.text))
            token_head_dep = token.head.dep_

            if token_dep == dep:
                score += 1
            if token_head == head:
                score += 1
            if token_head_dep == head_dep:
                score += 1

            if token.text not in question:
                score += 1

            if strip_punctuation(token.text).strip().lower() not in stop:
                score += 1

            if token.text.lower() == 'it':
                score = -1

            if score > max_score:
                max_score = score
                answer = token.text

    elif qtype == 'verb':
        for token in doc:
            score = 0

            if token.dep_ == 'ROOT':
                score += 1

            if lemmatize(strip_punctuation(token.text)) not in \
                    [lemmatize(strip_punctuation(s)) for s in nltk.word_tokenize(question)]:
                score += 1

            if strip_punctuation(token.text).strip().lower() not in stop:
                score += 1

            if score > max_score:
                max_score = score
                answer = token.text

    elif qtype == 'closed':
        first = closed_q_choices[0]
        second = closed_q_choices[1]

        # whether each option appears (and is negates)
        appear1 = False
        appear2 = False
        negate1 = False
        negate2 = False
        neg_count = 0
        tokens = nltk.word_tokenize(raw_docs[id])

        for (index, token) in enumerate(tokens):
            if token == 'not' or "n't" in token:
                neg_count += 1

                if index+1 < len(tokens):
                    if tokens[index+1] == first:
                        negate1 = True
                    if tokens[index+1] == second:
                        negate2 = True

            if token == first:
                appear1 = True
            if token == second:
                appear2 = True

        possible_answer = ''
        if appear1 and not appear2:
            if neg_count % 2 == 1:
                possible_answer = second
            else:
                possible_answer = first

        elif appear2 and not appear1:
            if neg_count % 2 == 0:
                possible_answer = second
            else:
                possible_answer = first

        elif appear1 and appear2:
            if negate1 and not negate2:
                possible_answer = second
            elif negate2 and not negate1:
                possible_answer = first
            else:
                possible_answer = second

        if possible_answer != '':
            score += 5
            if score > max_score:
                max_score = score
                answer = possible_answer

    elif qtype == 'why':

        possible_answer = ''
        score = 0

        if 'reason' in sent or 'because' in sent or 'due to' in sent or 'since' in sent or 'for' in sent:

            if 'because of' in sent:
                score += 3
                index = sent.index('because of')
                substr = sent[index+11:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break

            elif 'because' in sent:
                score += 3
                index = sent.index('because')
                substr = sent[index + 8:]
                possible_answer = substr

            elif 'due to' in sent:
                score += 3
                index = sent.index('due to')
                substr = sent[index+7:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    possible_answer = substr

            elif 'reason' in sent:
                score += 2
                index = sent.index('reason')
                substr = sent[index+7:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    index = substr.find('is')
                    if index != -1:
                        possible_answer = substr[index+3]
                    else:
                        index = substr.find('was')
                        if index != -1:
                            possible_answer = substr[index+4]
                        else:
                            possible_answer = sent[sent.index('reason'):]

            elif 'for' in sent:
                score += 1
                index = sent.index('for')
                substr = sent[index + 4:]
                span = nlp(substr)
                for chunk in span.noun_chunks:
                    possible_answer = chunk.text
                    break
                if possible_answer == '':
                    possible_answer = substr

            elif 'since' in sent:
                score += 1
                index = sent.index('since')
                substr = sent[index + 6:]
                possible_answer = substr

            if possible_answer != '' and score > max_score:
                answer = possible_answer
                max_score = score

    # if answer not found, find noun phrases
    if answer == '':
        for np in doc.noun_chunks:
            score = 0

            np_dep = np.root.dep_
            np_head = lemmatize(strip_punctuation(np.root.head.text))
            np_head_dep = np.root.head.dep_

            if np_dep == dep:
                score += 1
            if np_head == head:
                score += 1
            if np_head_dep == head_dep:
                score += 1

            if np.text not in question:
                score += 1

            if strip_punctuation(np.text).strip().lower() not in stop:
                score += 1
                
            if score > max_score:
                max_score = score
                answer = np.text
                
    a = nltk.word_tokenize(answer)
    if len(a) > 0 and a[0].lower() in stop:
        del a[0]
        answer = ''
        for i in range(len(a)):
            if i+1 < len(a) and a[i+1] == "'s":
                answer += a[i]
            else:
                answer += a[i] + ' '
        
    if correct_answer != answer.strip().lower():
        print('Support: ',sent)
        print(question)
        print('Correct: ',correct_answer)
        print('predicted: ',answer)
        print('\n')

    case_count += 1
    if answer == '':
        empty_count += 1
    
csvFile.close()
print(empty_count)

Support:  A change of several tens of micrograms in one kilogram is equivalent to the current uncertainty in the value of the Planck constant in SI units.
A kilogram could be definined as having a Planck constant of what value?
Correct:  6966662606895999999♠6.62606896×10−34 j⋅s
predicted:  several tens


Support:  The Planck constant is given by There are a number of proposals to redefine certain of the SI base units in terms of fundamental physical constants.
What is the shape of the object that establishes the base unit of the kilogram?
Correct:  cylinder
predicted:  Planck constant 


Support:  In modern terms, if J is the total angular momentum of a system with rotational invariance, and Jz the angular momentum measured along any given direction, these quantities can only take on the values where the uncertainty is given as the standard deviation of the measured value from its expected value.
What example is given as another paired relationship of uncertainly related to standard de

Support:  For example, green light with a wavelength of 555 nanometres (the approximate wavelength to which human eyes are most sensitive) has a frequency of 7014540000000000000♠540 THz (7014540000000000000♠540×1012 Hz).
What is the frequency of the light to which the human eye is most sensitive?
Correct:  7014540000000000000♠540 thz
predicted:  example


Support:  These proofs are commonly known as the "ultraviolet catastrophe", a name coined by Paul Ehrenfest in 1911.
What name did Paul Ehrenfest give to the proofs from Einstein and Rayleigh & Jeans?
Correct:  the `` ultraviolet catastrophe ''
predicted:  1911


Support:  The Planck constant is given by There are a number of proposals to redefine certain of the SI base units in terms of fundamental physical constants.
What measurement is deemed most important to redefine in terms of physical constants?
Correct:  the kilogram
predicted:  Planck constant 


Support:  The energy transferred by a wave in a given time is called its intens

Support:  The Rayleigh–Jeans law makes close predictions for a narrow range of values at one limit of temperatures, but the results diverge more and more strongly as temperatures increase.
What rule predicted narrow range of energy values at lower temperatures?
Correct:  the rayleigh–jeans
predicted:  Rayleigh–Jeans law 


Support:  The Planck constant is given by There are a number of proposals to redefine certain of the SI base units in terms of fundamental physical constants.
What alloy is the base unit of the kilogram made from?
Correct:  platinum–iridium
predicted:  Planck constant 


Support:  If the object is black, meaning it absorbs all the light that hits it, then its thermal light emission is maximized.
What is maximized as a result of a black object absorbing all the light that hits it?
Correct:  thermal light emission
predicted:  object 


Support:  An amount of light compatible with everyday experience is the energy of one mole of photons; its energy can be computed by mu

Support:  Use of wind power in 2012 increased by 18.1 percent, to 521.3 TWh.
Use of wind power in 2012 increased by what percentage?
Correct:  18.1 percent
predicted:  18.1


Support:  Based on REN21's 2014 report, renewables contributed 19 percent to our energy consumption and 22 percent to our electricity generation in 2012 and 2013, respectively.
Renewables contributed what percentage to our electricity generation?
Correct:  22 percent
predicted:  19


Support:  The group is to be co-chaired by Kandeh Yumkella, the chair of UN Energy and director general of the UN Industrial Development Organisation, and Charles Holliday, chairman of Bank of America".
Who was the UN under-secretary in 2011?
Correct:  achim steiner
predicted:  Kandeh Yumkella


Support:  In 2011, UN under-secretary general Achim Steiner said: "The continuing growth in this core segment of the green economy is not happening by chance.
Who is Achim Steiner?
Correct:  un under-secretary general
predicted:  UN under-secr

Support:  As of July 2012, the largest photovoltaic (PV) power plants in the world are the Agua Caliente Solar Project (USA, 247 MW), Charanka Solar Park (India, 214 MW), Golmud Solar Park (China, 200 MW), Perovo Solar Park (Russia 100 MW), Sarnia Photovoltaic Power Plant (Canada, 97 MW), Brandenburg-Briest Solarpark (Germany 91 MW), Solarpark Finow Tower (Germany 84.7 MW), Montalto di Castro Photovoltaic Power Station (Italy, 84.2 MW), Eggebek Solar Park (Germany 83.6 MW), Senftenberg Solarpark (Germany 82 MW), Finsterwalde Solar Park (Germany, 80.7 MW), Okhotnykovo Solar Park (Russia, 80 MW), Lopburi Solar Farm (Thailand 73.16 MW), Rovigo Photovoltaic Power Plant (Italy, 72 MW), and the Lieberose Photovoltaic Park (Germany, 71.8 MW).
Where is the Agua Caliente Solar Project located?
Correct:  usa
predicted:  India


Support:  The group is to be co-chaired by Kandeh Yumkella, the chair of UN Energy and director general of the UN Industrial Development Organisation, and Charles Hollida

Support:  Spielberg also produced the Don Bluth animated features, An American Tail and The Land Before Time, which were released by Universal Studios.
Which studio produced 'Super 8'?
Correct:  paramount pictures
predicted:  Spielberg


Support:  It was on this project that Spielberg also met his future wife, actress Kate Capshaw.
What was Spielberg's future wife's career?
Correct:  actress
predicted:  project 


Support:  For cinematography, Allen Daviau, a childhood friend and cinematographer, shot the early Spielberg film Amblin and most of his films up to Empire of the Sun; Janusz Kamiński who has shot every Spielberg film since Schindler's List (see List of film director and cinematographer collaborations); and the film editor Michael Kahn who has edited every film directed by Spielberg from Close Encounters to Munich (except E.T.
Which childhood friend worked on Spielberg's films?
Correct:  allen daviau
predicted:  cinematography


Support:  Spielberg first met actress Amy Irvin

Support:  Spielberg won the Academy Award for Best Director for Schindler's List (1993) and Saving Private Ryan (1998).
Which film beat 'Saving Private Ryan' worldwide?
Correct:  armageddon
predicted:  Spielberg


Support:  While still a student, he was offered a small unpaid intern job at Universal Studios with the editing department.
What department did Spielberg have an unpaid internship with?
Correct:  editing
predicted:  small unpaid intern job 


Support:  It is the first non-Pixar film to win the award since the category was first introduced.
Which film did Daviau first work with Spielberg on?
Correct:  amblin
predicted:  first non-Pixar film 


Support:  He has been associated with composer John Williams since 1974, who composed music for all save five of Spielberg's feature films.
Who composes music for most of Spielberg's movies?
Correct:  john williams
predicted:  composer John Williams


Support:  In later years, his films began addressing humanistic issues such as the Holo

Support:  He bought Orson Welles's own directorial copy of the script for the radio broadcast The War of the Worlds (1938) in 1994.
When did Spielberg buy Orson Welles' personal script copy from 'The War of the Worlds'?
Correct:  1994
predicted:  1938


Support:  Also in 2005, Spielberg directed a modern adaptation of War of the Worlds (a co-production of Paramount and DreamWorks), based on the H. G. Wells book of the same name (Spielberg had been a huge fan of the book and the original 1953 film).
Who wrote the book 'Memoirs of a Geisha' is based on?
Correct:  arthur golden
predicted:  Spielberg


Support:  Studio vice president Sidney Sheinberg was impressed by the film, which had won a number of awards, and offered Spielberg a seven-year directing contract.
Who offered Spielberg a contract at Universal Studios?
Correct:  sidney sheinberg
predicted:  Spielberg


Support:  It will be filmed in early 2017 for release at the end of that year, before Ready Player One is completed and rel

Support:  In 1958, he became a Boy Scout and fulfilled a requirement for the photography merit badge by making a nine-minute 8 mm film entitled The Last Gunfight.
What genre was Spielberg's first film "The Last Gunfight"
Correct:  western
predicted:  Boy Scout 


Support:  In 1996, Spielberg worked on and shot original footage for a movie-making simulation game called Steven Spielberg's Director's Chair.
How did Steven Spielberg feel about being an Orhtodox Jew?
Correct:  embarrassed
predicted:  worked


Support:  It will be filmed in early 2017 for release at the end of that year, before Ready Player One is completed and released in 2018.
When was 'The Pacific' released?
Correct:  2010
predicted:  early 2017


Support:  Spielberg described himself as feeling like an alien during childhood, and his interest came from his father, a science fiction fan, and his opinion that aliens would not travel light years for conquest, but instead curiosity and sharing of knowledge.
When did Spielber

Support:  During the 1920s there was some work on a 4.7-inch which lapsed, but revived in 1937, leading to a new gun in 1944.
What is being lead to specialty roles?
Correct:  guns
predicted:  1920s 


Support:  The Germans developed massive reinforced concrete blockhouses, some more than six stories high, which were known as Hochbunker "High Bunkers" or "Flaktürme" flak towers, on which they placed anti-aircraft artillery.
What was placed on the blockhouses by the Germans?
Correct:  anti-aircraft artillery
predicted:  Germans 


Support:  The future of projectile based weapons may be found in the railgun.
Where can the future of projectile based weapons possibly be found?
Correct:  the railgun
predicted:  future 


Support:  A number of jet interceptors such as the F-102 Delta Dagger, the F-106 Delta Dart, and the MiG-25 were built in the period starting after the end of World War II and ending in the late 1960s, when they became less important due to the shifting of the strategic bomb

Support:  Rheinmetall in Germany developed an automatic 20 mm in the 1920s and Oerlikon in Switzerland had acquired the patent to an automatic 20 mm gun designed in Germany during World War I. Germany introduced the rapid-fire 2 cm FlaK 30 and later in the decade it was redesigned by Mauser-Werke and became the 2 cm FlaK 38.
In the 1920s, which company developed the automatic 20 mm?
Correct:  rheinmetall
predicted:  Oerlikon


Support:  The cannon Ljutovac used was not designed as an anti-aircraft gun, it was a slightly modified Turkish cannon captured during the First Balkan War in 1912.
What war was the FlaK first used in?
Correct:  spanish civil war
predicted:  First Balkan War 


Support:  The solution was automation, in the form of a mechanical computer, the Kerrison Predictor.
What was the name of the mechanical computer that used automation?
Correct:  the kerrison predictor
predicted:  solution 


Support:  The interceptor aircraft (or simply interceptor) is a type of fighter ai

Support:  The gun became so important to the British war effort that they even produced a movie, The Gun, that encouraged workers on the assembly line to work harder.
What was the movie made to inspire assembly line workers called?
Correct:  the gun
predicted:  gun 


Support:  The Army's Anti-aircraft command, which was under command of the Air Defence UK organisation, grew to 12 AA divisions in 3 AA corps.
How many corps was the UK's Anti-Aircraft Command?
Correct:  three aa corps
predicted:  12


Support:  In Somalia, militia members sometimes welded a steel plate in the exhaust end of an RPG's tube to deflect pressure away from the shooter when shooting up at US helicopters.
What did some militia members in Somalia weld in the exhaust of the RPG tube to protect the shooter?
Correct:  a steel plate
predicted:  steel plate 


Support:  Another potential weapon system for anti-aircraft use is the laser.
What weapon's system uses guns and missiles?
Correct:  kashtan ciws
predicted:  An

Support:  In particular the U.S. Army set up a huge air defence network around its larger cities based on radar-guided 90 mm and 120 mm guns.
Which military set up a large air defence network surrounding its larger cities?
Correct:  the u.s. army
predicted:  U.S. Army 


Support:  One term is 'ceiling', maximum ceiling being the height a projectile would reach if fired vertically, not practically useful in itself as few AA guns are able to fire vertically, and maximum fuse duration may be too short, but potentially useful as a standard to compare different weapons.
What is the term used to describe the height that a projectile would go to if it was fired vertically?
Correct:  maximum ceiling
predicted:  One


Support:  This was the first occasion in military history that a military aircraft was shot down with ground-to-air fire.
The military was not allowed to use anything that was bigger than what?
Correct:  .50-inch
predicted:  military history


Support:  Similar systems were adopte

Support:  Targets for non-ManPAD SAMs will usually be acquired by air-search radar, then tracked before/while a SAM is "locked-on" and then fired.
How are targets aquired for non-ManPAD SAMs?
Correct:  air-search radar
predicted:  Targets


Support:  In some countries, such as Britain and Germany during the Second World War, the Soviet Union and NATO's Allied Command Europe, ground based air defence and air defence aircraft have been under integrated command and control.
Which country's air defence and aircraft has been under integrated command and control?
Correct:  soviet union
predicted:  Britain


Support:  With the diversification of air defence there has been much more emphasis on mobility.
What systems are really geared toward mobility?
Correct:  soviet
predicted:  diversification 


Support:  At short range, the apparent target area is relatively large, the trajectory is flat and the time of flight is short, allowing to correct lead by watching the tracers.
Short range missiles

Support:  Multiple transmitter radars such as those from bistatic radars and low-frequency radars are said to have the capabilities to detect stealth aircraft.
Radar was supplemented by what in the 1980s?
Correct:  optronics
predicted:  bistatic radars


Support:  However, during the Second World War the RAF Regiment was formed to protect airfields everywhere, and this included light air defences.
Who protected airfields in WWII?
Correct:  raf regiment
predicted:  Second World War 


Support:  In the later decades of the Cold War this included the United States Air Force's operating bases in UK.
The United States Air Force helped protect the UK during what 'war'?
Correct:  cold war
predicted:  later decades 


Support:  MANPADS of the former Soviet Union have been exported around the World, and can be found in use by many armed forces.
This separate service  was known as what in the Soviet Union?
Correct:  voyska pvo
predicted:  World


Support:  One term is 'ceiling', maximum ceiling 

Support:  A species can be listed in two ways.
How many different ways can a species be added to the endangered list?
Correct:  two ways
predicted:  two


Support:  The policy was developed by the Clinton Administration in 1999.
Which presidential administration developed Safe Harbor policy?
Correct:  the clinton administration
predicted:  Clinton Administration 


Support:  The whooping crane population by 1941 was estimated at about only 16 birds still in the wild.
What was the estimated population of the whooping crane in 1941?
Correct:  16 birds
predicted:  whooping crane population 


Support:  The Endangered Species Act of 1973 (ESA; 16 U.S.C.
What section of the Endangered Species Act establishes critical habitat regulations?
Correct:  section 4
predicted:  Endangered Species Act 


Support:  The consultation can be informal, to determine if harm may occur; and then formal if the harm is believed to be likely.
Given that a violator caused wildlife harm, who may provide financial

Support:  Low agricultural productivity in the SNLs, repeated droughts, the devastating effect of HIV/AIDS and an overly large and inefficient government sector are likely contributing factors.
How large in square kilometers is Swaziland?
Correct:  17,364 km2
predicted:  Low agricultural productivity


Support:  In 2004, the Swaziland government acknowledged for the first time that it suffered an AIDS crisis, with 38.8% of tested pregnant women infected with HIV (see AIDS in Africa).
Of pregnant women, how many are believed to be infected with HIV?
Correct:  38.8 %
predicted:  2004


Support:  The constitution for independent Swaziland was promulgated by Britain in November 1963 under the terms of which legislative and executive councils were established.
In terms of Swaziland, what does SNL refer to?
Correct:  swazi nation land
predicted:  independent Swaziland


Support:  Mswati III, the son of Ntfombi, was crowned king on 25 April 1986 as King and Ingwenyama of Swaziland.
In terms o

Support:  Anglican, Protestant and indigenous African churches, including African Zionist, constitute the majority of the Christians (40%), followed by Roman Catholicism at 20% of the population.
What percentage of the Swazi population are Christian?
Correct:  83 %
predicted:  40 %


Support:  Swaziland's most well-known cultural event is the annual Umhlanga Reed Dance.
What individuals can take part in the Umhlanga Reed Dance?
Correct:  childless , unmarried girls
predicted:  Swaziland's most well-known cultural event


Support:  Swaziland has a wide variety of landscapes, from the mountains along the Mozambican border to savannas in the east and rain forest in the northwest.
How wide is Swaziland in miles??
Correct:  81 mi
predicted:  Mozambican


Support:  On the positive side, the external debt burden has declined markedly over the last 20 years, and domestic debt is almost negligible; external debt as a percent of GDP was less than 20% in 2006.
What has happened to debt external o

Support:  The empire collapsed in 330 BC following the conquests of Alexander the Great.
Who ended the Archaemenid Empire by conquest in 330 BC?
Correct:  alexander the great
predicted:  empire 


Support:  Christianity, Judaism, Zoroastrianism, and the Sunni branch of Islam are officially recognized by the government, and have reserved seats in the Iranian Parliament.
What other branch of Islam is recognized by the Iranian government?
Correct:  sunni branch
predicted:  Christianity


Support:  Iran has a paramilitary, volunteer militia force within the IRGC, called the Basij, which includes about 90,000 full-time, active-duty uniformed members.
What is Iran's volunteer militia force named?
Correct:  the basij
predicted:  paramilitary , volunteer militia force 


Support:  But the Bahá'í Faith, which is said to be the largest non-Muslim religious minority in Iran, is not officially recognized, and has been persecuted during its existence in Iran since the 19th century.
What religious m

Support:  The President is responsible for the implementation of the Constitution and for the exercise of executive powers, except for matters directly related to the Supreme Leader, who has the final say in all matters.
Who exercises exective powers in Iran?
Correct:  the president
predicted:  President 


Support:  Turkic tribesmen were first used in the Abbasid army as mamluks (slave-warriors), replacing Iranian and Arab elements within the army.
The Abbasid army replaced Iranian and Arabic men with Turkic tribesmen as what element in their army?
Correct:  mamluks ( slave-warriors )
predicted:  Abbasid army 


Support:  According to FAO, Iran has been a top five producer of the following agricultural products in the world in 2012: apricots, cherries, sour cherries, cucumbers and gherkins, dates, eggplants, figs, pistachios, quinces, walnuts, and watermelons.
What melon is Iran a Top 5 producer in the world in 2012?
Correct:  watermelons
predicted:  FAO


Support:  The political syst

Support:  The total Iranian casualties in the war were estimated to be 123,220–160,000 KIA, 60,711 MIA, and 11,000–16,000 civilians killed.
How many civilians in Iran were killed during the Iran-Iraq War?
Correct:  11,000–16,000
predicted:  123,220–160,000


Support:  Tehran is the country's capital and largest city, as well as its leading cultural and economic center.
What city was the ritual center of the Archaemenids?
Correct:  persepolis
predicted:  Tehran


Support:  In Iranian languages, the gentilic is attested as a self-identifier included in ancient inscriptions and the literature of Avesta,[a] and remains also in other Iranian ethnic names such as Alans (Ossetic: Ир – Ir) and Iron (Ossetic: Ирон – Iron).
What is attested as a self-identifier in Iranian languages?
Correct:  the gentilic
predicted:  gentilic 


Support:  Another civil war ensued after the death of Karim Khan in 1779, out of which Aqa Mohammad Khan emerged, founding the Qajar Dynasty in 1794.
Who founded the fol

Support:  It is estimated that in 480 BC, 50 million people lived in the Achaemenid Empire.
How many people live in Iran?
Correct:  78.4 million
predicted:  480


Support:  According to UNESCO and the deputy head of research for Iran Travel and Tourism Organization (ITTO), Iran is rated 4th among the top 10 destinations in the Middle East.
What was Iran's rank in the top 10 Middle East destinations according to UNESCO?
Correct:  rated 4th
predicted:  deputy head 


Support:  The unification of the Median tribes under a single ruler in 728 BC led to the foundation of the Median Empire which, by 612 BC, controlled the whole Iran and the eastern Anatolia.
When did the Median tribes unify under a single ruler to form the Median Empire?
Correct:  728 bc
predicted:  728


Support:  It is estimated that in 480 BC, 50 million people lived in the Achaemenid Empire.
The Achaemenid Empire expanded into what part of Asia?
Correct:  central asia
predicted:  480 BC


Support:  The presence of so man

predicted:  Florida


Support:  In 1961, President John F. Kennedy became the first to utilize the term "affirmative action" in Executive Order 10925 to ensure that government contractors "take affirmative action to ensure that applicants are employed, and employees are treated during employment, without regard to their race, creed, color, or national origin."
When was there a vote regarding affirmative action in Michigan?
Correct:  2006
predicted:  1961


Support:  The Supreme Court agreed in June 2015 to hear the case a second time.
When will the Supreme Court likely hear the case for the second time?
Correct:  june 2016
predicted:  June 2015


Support:  Opponents of racial affirmative action argue that the program actually benefits middle- and upper-class African Americans and Hispanic Americans at the expense of lower-class European Americans and Asian Americans.
At who's expense other than Asian Americans are upper-class African Americans and Hispanic Americans supposedly benefiti

Support:  Furthermore, those in favor of affirmative action see it as an effort towards inclusion rather than a discriminatory practice.
What is a typical argument in favor of affirmative action?
Correct:  compensation argument
predicted:  effort 


Support:  At the same time, affirmative action itself is both morally and materially costly: 52 percent of white populace (compared to 14 percent of black) thought it should be abolished, implying white distaste of using racial identity, and full-file review is expected to cost the universities an additional $1.5 million to $2 million per year, excluding possible cost of litigation.
What percentage of the black population thought affirmative action should be abolished?
Correct:  14
predicted:  52


Support:  ":40 On July 26, Truman mandated the end of hiring and employment discrimination in the federal government, reaffirming FDR's order of 1941.:40 He issued two executive orders on July 26, 1948: Executive Order 9980 and Executive Order 99

Support:  In 1990, they constituted 9% of the population, but only received 3.1% of the bachelors's degrees awarded.
How many of the total bachelor degrees awarded in 1990 went to Latinos?
Correct:  3.1 %
predicted:  9 %


Support:  Some policies adopted as affirmative action, such as racial quotas or gender quotas for collegiate admission, have been criticized as a form of reverse discrimination, and such implementation of affirmative action has been ruled unconstitutional by the majority opinion of Gratz v. Bollinger.
Having quotas regarding admissions or employment has been criticized and said to be what kind of discrimination?
Correct:  reverse
predicted:  policies 


Support:  Early Asian immigrants experienced prejudice and discrimination in the forms of not having the ability to become naturalized citizens.
What law type did Asian immigrants struggle with?
Correct:  school segregation
predicted:  Early Asian immigrants


Support:  Princeton Dean of Admissions Janet Rapelye respo

Support:  Although 23% of those taking the test were African American, none scored high enough to qualify.
What percentage of those people taking the test were African American?
Correct:  23
predicted:  23 %


Support:  Proponents of affirmative action argue that by nature the system is not only race based, but also class and gender based.
What do some people believe should be the basis for affirmative action instead of race based legislation?
Correct:  class-based affirmative action
predicted:  Proponents


Support:  Philadelphia was selected as the test case because, as Assistant Secretary of Labor Arthur Fletcher explained, "The craft unions and the construction industry are among the most egregious offenders against equal opportunity laws .
Who held the position of Assistant Secretary of Labor?
Correct:  arthur fletcher
predicted:  Philadelphia


Support:  Many corporations in the South, still afflicted with Jim Crow laws, largely ignored the federal recommendations.
Which laws did

Support:  At the outset of the Franco-Prussian War, 462,000 German soldiers concentrated on the French frontier while only 270,000 French soldiers could be moved to face them, the French army having lost 100,000 stragglers before a shot was fired through poor planning and administration.
In peacetime, what the approximate number of French soldiers?
Correct:  400,000
predicted:  462,000


Support:  During the war, the Paris National Guard, particularly in the working-class neighbourhoods of Paris, had become highly politicised and units elected officers; many refused to wear uniforms or obey commands from the national government.
What did the National Guard refuse to wear?
Correct:  uniforms
predicted:  war 


Support:  Napoleon's new prime minister, Emile Ollivier, declared that France had done all that it could humanly and honorably do to prevent the war, and that he accepted the responsibility "with a light heart."
Which French prime minister believed he had done all that he could to

Support:  Rail sidings and marshalling yards became choked with loaded wagons, with nobody responsible for unloading them or directing them to the destination.
Who was responsible for dealing with chaotic train yards and unloaded wagons?
Correct:  nobody
predicted:  Rail sidings


Support:  The French press and parliament demanded a war, which the generals of Napoleon III assured him that France would win.
Napoleon III belived he would win the Astro-Prussian war and win a conflict with what country?
Correct:  prussia
predicted:  France


Support:  However the Prussians resisted strongly, and the French suffered 86 casualties to the Prussian 83 casualties.
How many casualties did Moltke suffer at Beaumont?
Correct:  5,000 men
predicted:  86


Support:  However, planning for the next encounter was more based upon the reality of unfolding events rather than emotion or pride, as Intendant General Wolff told him and his staff that supply beyond the Saar would be impossible.
Who told LeBoeuf

Support:  Firing a contact-detonated shell, the Krupp gun had a longer range and a higher rate of fire than the French bronze muzzle loading cannon, which relied on faulty time fuses.
What type of shell did the Krupp weapon fire?
Correct:  a contact-detonated shell
predicted:  contact-detonated shell 


Support:  A pre-war plan laid out by the late Marshal Niel called for a strong French offensive from Thionville towards Trier and into the Prussian Rhineland.
From Thionville towards Trier, what was the final destination of the offensive?
Correct:  the prussian rhineland
predicted:  pre-war plan 


Support:  General Frossard's II Corps and Marshal Bazaine's III Corps crossed the German border on 2 August, and began to force the Prussian 40th Regiment of the 16th Infantry Division from the town of Saarbrücken with a series of direct attacks.
What town were the III Corps able to capture?
Correct:  vionville
predicted:  General Frossard's II Corps


Support:  A second French army which ope

Support:  Other countries quickly discerned the advantages given to the Germans by their military system, and adopted many of their innovations, particularly the General Staff, universal conscription and highly detailed mobilization systems.
What German advantages were discerned by other countries in the aftermath? 
Correct:  their military system
predicted:  countries 


Support:  This plan was discarded in favour of a defensive plan by Generals Charles Frossard and Bartélemy Lebrun, which called for the Army of the Rhine to remain in a defensive posture near the German border and repel any Prussian offensive.
Niel's plan was cast aside in favour of a plan by General Frossard and what other general??
Correct:  bartélemy lebrun
predicted:  plan 


Support:  In addition, the Prussian military education system was superior to the French model; Prussian staff officers were trained to exhibit initiative and independent thinking.
What commander of the General Staff held the expectaion of in

Support:  Hymns sung to Apollo were called paeans.
What was the term for hymns sung to Apollo?
Correct:  paeans
predicted:  Hymns


Support:  The pair was practicing throwing the discus when a discus thrown by Apollo was blown off course by the jealous Zephyrus and struck Hyacinthus in the head, killing him instantly.
Who blew the discus off course, killing Hyacinthus?
Correct:  zephyrus
predicted:  Hyacinthus


Support:  In the first large-scale depictions during the early archaic period (640–580 BC), the artists tried to draw one's attention to look into the interior of the face and the body which were not represented as lifeless masses, but as being full of life.
The period between 640-580 BC was known as what?
Correct:  early archaic period
predicted:  first


Support:  The name of Apollo's mother Leto has Lydian origin, and she was worshipped on the coasts of Asia Minor.
What origin was Leto?
Correct:  lydian
predicted:  name 


Support:  Coronis, was daughter of Phlegyas, King of

Support:  Apollo and the Furies argue about whether the matricide was justified; Apollo holds that the bond of marriage is sacred and Orestes was avenging his father, whereas the Erinyes say that the bond of blood between mother and son is more meaningful than the bond of marriage.
Who was accused of matricide?
Correct:  orestes
predicted:  Apollo


Support:  Leucothea was daughter of Orchamus and sister of Clytia.
Who did Leucothea fall in love with?
Correct:  leucothea
predicted:  Clytia


Support:  As sun-god and god of light, Apollo was also known by the epithets Aegletes (/əˈɡliːtiːz/ ə-GLEE-teez; Αἰγλήτης, Aiglētēs, from αἴγλη, "light of the sun"), Helius (/ˈhiːliəs/ HEE-lee-əs; Ἥλιος, Helios, literally "sun"), Phanaeus (/fəˈniːəs/ fə-NEE-əs; Φαναῖος, Phanaios, literally "giving or bringing light"), and Lyceus (/laɪˈsiːəs/ ly-SEE-əs; Λύκειος, Lykeios, from Proto-Greek *λύκη, "light").
Since Apollo was known as god of the sun, what was another name for Apollo?
Correct:  sol
predic

Support:  These free-standing statues were usually marble, but also the form rendered in limestone, bronze, ivory and terracotta.
These free-standing statues were sometimes rendered from immestone, bronze, ivory and terracotta but were usually made from what material?
Correct:  marble
predicted:  limestone


Support:  Some common epithets of Apollo as a healer are "paion" (παιών, literally "healer" or "helper") "epikourios" (ἐπικουρώ, "help"), "oulios" (οὐλή, "healed wound", also a "scar" ) and "loimios" (λοιμός, "plague").
What is the literal meaning of "healer?"
Correct:  paean
predicted:  common epithets 


Support:  During the Second Punic War in 212 BCE, the Ludi Apollinares ("Apollonian Games") were instituted in his honor, on the instructions of a prophecy attributed to one Marcius.
What is another name for the Ludi Apollinares?
Correct:  apollonian games
predicted:  Second Punic War 


Support:  The god seems to be related to Appaliunas, a tutelary god of Wilusa (Troy) in Asia 

Support:  As god of colonization, Apollo gave oracular guidance on colonies, especially during the height of colonization, 750–550 BCE.
When was the height of colonization?
Correct:  750–550 bce
predicted:  Apollo


Support:  The "Homeric hymn" represents Apollo as a Northern intruder.
What represents Apollo as a Northern intruder?
Correct:  homeric hymn
predicted:  `` Homeric hymn 


Support:  [citation needed] Apollo's sister Artemis, who was the Greek goddess of hunting, is identified with Britomartis (Diktynna), the Minoan "Mistress of the animals".
Who was the Minoan "Mistres of the animals?"
Correct:  britomartis
predicted:  Artemis


Support:  The epithet "Loxias" has historically been associated with λοξός, "ambiguous".
Which epithet has historically been associated with ambiguous?
Correct:  loxias
predicted:  epithet 


Support:  When they tried to depict the most abiding qualities of men, it was because men had common roots with the unchanging gods.
What word literally mens "

Support:  The chemical composition of wood varies from species to species, but is approximately 50% carbon, 42% oxygen, 6% hydrogen, 1% nitrogen, and 1% other elements (mainly calcium, potassium, sodium, magnesium, iron, and manganese) by weight.
What element joins potassium, calcium, manganese, iron, and sodium to make a combined 1% of the chemical composition of wood?
Correct:  magnesium
predicted:  chemical composition 


Support:  One noteworthy example of this trend is the golf club commonly known as the wood, the head of which was traditionally made of persimmon wood in the early days of the game of golf, but is now generally made of synthetic materials.
When the golf club called the "wood" was actually made out of it, what type of wood was used?
Correct:  persimmon
predicted:  early days 


Support:  Some species begin to form heartwood very early in life, so having only a thin layer of live sapwood, while in others the change comes slowly.
Where does wood come from?
Correct:  t

Support:  There is a strong relationship between the properties of wood and the properties of the particular tree that yielded it.
Ring-porous hardwoods have a clear relationship between their properties and what other factor?
Correct:  rate of growth
predicted:  properties 


Support:  Aside from water, wood has three main components.
Often the water in wood makes it more pliable and what else?
Correct:  softer
predicted:  three main components


Support:  People have used wood for millennia for many purposes, primarily as a fuel or as a construction material for making houses, tools, weapons, furniture, packaging, artworks, and paper.
What has been the primary purpose of wood for millennia other than fuel?
Correct:  construction material
predicted:  People


Support:  Examples include the totem poles carved by North American indigenous people from conifer trunks, often Western Red Cedar (Thuja plicata), and the Millennium clock tower, now housed in the National Museum of Scotland in 

Support:  The reddish-brown streaks so common in hickory and certain other woods are mostly the result of injury by birds.
What animals cause the reddish-brown streaks of color in hickory wood?
Correct:  birds
predicted:  reddish-brown streaks 


Support:  The black check in western hemlock is the result of insect attacks.
What causes the black checked pattern in the wood of western hemlocks?
Correct:  insect attacks
predicted:  black check 


Support:  Glued engineered wood products are manufactured by bonding together wood strands, veneers, lumber or other forms of wood fiber with glue to form a larger, more efficient composite structural unit.
Wood fibers from wood strands, lumber, and what other source can be glued together to make larger units?
Correct:  veneers
predicted:  Glued engineered wood products


Support:  Minimum values are associated with green (water-saturated) wood and are referred to as basic specific gravity (Timell 1986).
When wood is described as "green," what do

Support:  In white pines there is not much contrast between the different parts of the ring, and as a result the wood is very uniform in texture and is easy to work.
Is white pine easy or difficult to work with because of its texture?
Correct:  easy
predicted:  white pines


Support:  Wood can be dated by carbon dating and in some species by dendrochronology to make inferences about when a wooden object was created.
If dendrochronology can't be used, what method would scientists employ to date wood?
Correct:  carbon dating
predicted:  Wood


Support:  If a heavy piece of pine is compared with a lightweight piece it will be seen at once that the heavier one contains a larger proportion of latewood than the other, and is therefore showing more clearly demarcated growth rings.
What feature of a piece of pine with more latewood would be more clear and pronounced?
Correct:  growth rings
predicted:  heavy piece 


Support:  However, there are major differences, depending on the kind of wood 

Support:  The choice of wood may make a significant difference to the tone and resonant qualities of the instrument, and tonewoods have widely differing properties, ranging from the hard and dense african blackwood (used for the bodies of clarinets) to the light but resonant European spruce (Picea abies), which is traditionally used for the soundboards of violins.
What's the common name for Picea abies?
Correct:  european spruce
predicted:  European


Support:  Sound knots do not weaken wood when subject to compression parallel to the grain.
What direction of compression in relation to its grain won't weaken wood with sound knots in it?
Correct:  parallel
predicted:  Sound knots


Support:  Many other types of sports and recreation equipment, such as skis, ice hockey sticks, lacrosse sticks and archery bows, were commonly made of wood in the past, but have since been replaced with more modern materials such as aluminium, fiberglass, carbon fiber, titanium, and composite materials.
What

Support:  The trends suggest that particle board and fiber board will overtake plywood.
If current building trends continue, what material will be replaced by particle and fiber board?
Correct:  plywood
predicted:  trends 


Support:  As the tree gets larger, the sapwood must necessarily become thinner or increase materially in volume.
As a tree grows bigger, if the sapwood layer doesn't increase in volume, what will it become?
Correct:  thinner
predicted:  sapwood 


Support:  Wood to be used for construction work is commonly known as lumber in North America.
What term is used in North America for wood used in construction?
Correct:  lumber
predicted:  Wood


Support:  Upon drying, wood shrinks and its density increases.
What will drying often increase in wood?
Correct:  strength
predicted:  density increases 


Support:  Wood to be used for construction work is commonly known as lumber in North America.
Outside of North America, if someone said "lumber," to what would they be referri

Support:  Elm in particular was used for this purpose as it resisted decay as long as it was kept wet (it also served for water pipe before the advent of more modern plumbing).
What wood is decay-resistant when wet?
Correct:  elm
predicted:  purpose 


Support:  Today a wider variety of woods is used: solid wood doors are often made from poplar, small-knotted pine, and Douglas fir.
For modern solid wood doors, what wood is often used in addition to small-knotted pine or poplar?
Correct:  douglas fir
predicted:  wider variety 


Support:  NBA courts have been traditionally made out of parquetry.
What is commonly used for the basketball courts the NBA plays on?
Correct:  parquetry
predicted:  NBA courts


Support:  A knot (when the base of a side branch) is conical in shape (hence the roughly circular cross-section) with the inner tip at the point in stem diameter at which the plant's vascular cambium was located when the branch formed as a bud.
What measurement of a stem determines a kn

Support:  The Olympic men's tournament is played at Under-23 level.
What is the age limit on the Olympic men's tournament? 
Correct:  under-23
predicted:  Olympic men's tournament 


Support:  There has been a football tournament at every Summer Olympic Games since 1900, except at the 1932 games in Los Angeles.
What country is the game wogabaliri from?
Correct:  australia
predicted:  Los Angeles


Support:  The FA's ban was rescinded in December 1969 with UEFA voting to officially recognise women's football in 1971.
In 1971, who officially recognized women's football?
Correct:  uefa
predicted:  FA's ban 


Support:  The length of the pitch for international adult matches is in the range of 100–110 m (110–120 yd) and the width is in the range of 64–75 m (70–80 yd).
What is the range for the width for international matches in meters?
Correct:  64–75 m
predicted:  length 


Support:  England is also home to the world's first football league, which was founded in Birmingham in 1888 by Asto

Support:  The primary law is that players other than goalkeepers may not deliberately handle the ball with their hands or arms during play, though they do use their hands during a throw-in restart.
Who can only handle the ball with their hands or arms during play?
Correct:  goalkeepers
predicted:  primary law 


Support:  The Cambridge Rules were written at Trinity College, Cambridge, at a meeting attended by representatives from Eton, Harrow, Rugby, Winchester and Shrewsbury schools.
Which college where the Cambridge Rules written at?
Correct:  trinity college
predicted:  Cambridge Rules 


Support:  By contrast, football is widely considered to have been the final proximate cause for the Football War in June 1969 between El Salvador and Honduras.
Football made which wars more tense in the 1990's?
Correct:  yugoslav wars
predicted:  contrast


Support:  The team that has scored more goals at the end of the game is the winner; if both teams have scored an equal number of goals then the

Support:  MDNA debuted at number one on the Billboard 200 and many other countries worldwide.
How many submissions does the initiative have?
Correct:  over 3,000
predicted:  number one


Support:  She met director Guy Ritchie, who would become her second husband, in November 1998 and gave birth to their son Rocco John Ritchie on August 11, 2000 in Los Angeles.
Who is Madonna's second husband?
Correct:  guy ritchie
predicted:  Rocco John Ritchie


Support:  In September 1993, Madonna embarked on The Girlie Show World Tour, in which she dressed as a whip-cracking dominatrix surrounded by topless dancers.
What was Madonna dressed in for the tour?
Correct:  whip-cracking dominatrix
predicted:  September


Support:  Madonna was later presented with the Artist Achievement Award by Tony Bennett at the 1996 Billboard Music Awards.
When was Madonna awarded the accolade?
Correct:  2003
predicted:  1996


Support:  The Recording Industry Association of America (RIAA) listed her as the best-sellin

Support:  It was Madonna's 37th top-ten hit on the chart—it pushed Madonna past Elvis Presley as the artist with the most top-ten hits.
Madonna surpassed which artist with the most top-ten hits?
Correct:  elvis presley
predicted:  Madonna's 37th top-ten hit


Support:  Rolling Stone listed Madonna at number 56 on the "100 Greatest Songwriters of All Time".
Who listed Ray of Light as "The 500 Greatest Albums of All Time?"
Correct:  rolling stone
predicted:  Madonna


Support:  Madonna's major influences include Karen Carpenter, The Supremes and Led Zeppelin, as well as dancers Martha Graham and Rudolf Nureyev.
Name one of Madonna's major influence?
Correct:  karen carpenter
predicted:  Madonna's major influences


Support:  In True Blue, she incorporated classical music in order to engage an older audience who had been skeptical of her music.
True Blue has what type of music Incorporated in it?
Correct:  classical
predicted:  


Support:  Madonna was named the top-earning celebrity of t

Support:  By the end of the 1980s, Madonna was named as the "Artist of the Decade" by MTV, Billboard and Musician magazine.
What is Madonna's real name?
Correct:  madonna louise ciccone
predicted:  end of the 1980s 


Support:  Beginning in April 1985, Madonna embarked on her first concert tour in North America, The Virgin Tour, with the Beastie Boys as her opening act.
Who were the opening act for Madonna's concert The Virgin tour?
Correct:  beastie boys
predicted:  Madonna


Support:  She recorded the film's theme song, "This Used to Be My Playground", which became a Hot 100 number one hit.
The song "I'll Remember" was recorded for which film?
Correct:  with honors
predicted:  Hot 100


Support:  Morton wrote that "Madonna is opportunistic, manipulative, and ruthless—somebody who won't stop until she gets what she wants—and that's something you can get at the expense of maybe losing your close ones.
Who wrote that Madonna is opportunistic, manipulative and ruthless?
Correct:  morton


Support:  Precipitation hardening alloys, such as certain alloys of aluminium, titanium, and copper, are heat-treatable alloys that soften when quenched (cooled quickly), and then harden over time.
Precipitation hardening alloys are sometimes also?
Correct:  heat-treatable alloys
predicted:  certain alloys


Support:  Great care is often taken during the alloying process to remove excess impurities, using fluxes, chemical additives, or other methods of extractive metallurgy.
Using fluxes and chemical additives during the alloying process does what?
Correct:  remove excess impurities
predicted:  Great care


Support:  Because pig iron could be melted, people began to develop processes of reducing the carbon in the liquid pig iron to create steel.
By reducing carbon in liquid pig iron, what was created?
Correct:  steel
predicted:  people


Support:  Meteoric iron could be forged from a red heat to make objects such as tools, weapons, and nails.
Red gold is made by combining gold with?
Co

Support:  The administrative school such as ENA has been relocated to Strasbourg, the political science school Sciences-Po is still located in Paris' 7th arrondissement and the most prestigious university of economics and finance, Paris-Dauphine, is located in Paris' 16th.
Where was the University of Paris located?
Correct:  the left bank
predicted:  Strasbourg


Support:  [citation needed] At its origin, before the Middle Ages, the city was composed around several islands and sandbanks in a bend of the Seine; of those, two remain today: the île Saint-Louis, the île de la Cité; a third one is the 1827 artificially created île aux Cygnes.
When was ile aux Cygnes created?
Correct:  1827
predicted:  Middle Ages 


Support:  Each year, however, there are a few days where the temperature rises above 32 °C (90 °F).
How many years was Francois Mitterrand in power?
Correct:  14
predicted:  32


Support:  The average net household income (after social, pension and health insurance contributions

Support:  In 1163, during the reign of Louis VII, Maurice de Sully, bishop of Paris, undertook the construction of the Notre Dame Cathedral at its eastern extremity.
Under who's reign was the construction of Notre Dame Cathedral?
Correct:  louis vii
predicted:  Maurice de Sully


Support:  Other famous Paris music halls include Le Lido, on the Champs-Élysées, opened in 1946; and the Crazy Horse Saloon, featuring strip-tease, dance and magic, opened in 1951.
In what year did the Crazy Horse Saloon open?
Correct:  1951
predicted:  1946


Support:  In 2010 it was the workplace of 144,600 employees, of whom 38 percent worked in finance and insurance, 16 percent in business support services.
What percentage of people worked in finance and insurance?
Correct:  5.9
predicted:  38


Support:  Paris region manufacturing specialises in transportation, mainly automobiles, aircraft and trains, but this is in a sharp decline: Paris proper manufacturing jobs dropped by 64 percent between 1990 and 20

Support:  The budget of the city for 2013 was €7.6 billion, of which 5.4 billion went for city administration, while €2.2 billion went for investment.
What was the budget of the city in 2013
Correct:  €7.6 billion
predicted:  budget 


Support:  Almost all Protestant denominations are represented in Paris, with 74 evangelical churches from various denominations, including 21 parishes of the United Protestant Church of France and two parishes of the Church of Jesus Christ of the Latter-Day Saints.
How many parishes of the LDS church are in Paris?
Correct:  two
predicted:  74


Support:  In 2011, while only 56,927 construction workers worked in Paris itself, its metropolitan area employed 246,639, in an activity centred largely around the Seine-Saint-Denis (41,378) and Hauts-de-Seine (37,303) departments and the new business-park centres appearing there.
How many centres of higher education are in Paris?
Correct:  55
predicted:  56,927 


Support:  One of the most popular of all French w

Support:  Most of the postwar's presidents of the Fifth Republic wanted to leave their own monuments in Paris; President Georges Pompidou started the Centre Georges Pompidou (1977), Valéry Giscard d'Estaing began the Musée d'Orsay (1986); President François Mitterrand, in power for 14 years, built the Opéra Bastille (1985-1989), the Bibliothèque nationale de France (1996), the Arche de la Défense (1985-1989), and the Louvre Pyramid with its underground courtyard (1983-1989); Jacques Chirac (2006), the Musée du quai Branly.
Who built the Louvre Pyramid?
Correct:  i.m . pei
predicted:  Jacques Chirac


Support:  Two other important districts, Neuilly-sur-Seine and Levallois-Perret, are extensions of the Paris business district and of La Defense.
How many peole work in the La Defense district?
Correct:  144,600
predicted:  Two


Support:  By the end of the Western Roman Empire, the town was known simply as Parisius in Latin and Paris in French.
What was Paris known as in Latin?
Correct:  

Support:  The city's population loss came to an end in the 21st century; the population estimate of July 2004 showed a population increase for the first time since 1954, and the population reached 2,234,000 by 2009.
What year was the first population increase since 1954?
Correct:  2004
predicted:  21st century 


Support:  In addition there are eighty male religious orders and 140 female religious orders in the city, as well as 110 Catholic schools with 75,000 students.
How many students attend catholic schools?
Correct:  75,000
predicted:  eighty


Support:  Paul Delouvrier promised to resolve the Paris-suburbs mésentente when he became head of the Paris region in 1961: two of his most ambitious projects for the Region were the construction of five suburban villes nouvelles ("new cities") and the RER commuter train network.
How many suburban villes nouvelles did Paul Delouvrier constuct?
Correct:  five
predicted:  two


Support:  According to the 2012 census, 135,853 residents of the 

Support:  Most of the postwar's presidents of the Fifth Republic wanted to leave their own monuments in Paris; President Georges Pompidou started the Centre Georges Pompidou (1977), Valéry Giscard d'Estaing began the Musée d'Orsay (1986); President François Mitterrand, in power for 14 years, built the Opéra Bastille (1985-1989), the Bibliothèque nationale de France (1996), the Arche de la Défense (1985-1989), and the Louvre Pyramid with its underground courtyard (1983-1989); Jacques Chirac (2006), the Musée du quai Branly.
In what year was the Musee de quai Branly built?
Correct:  2006
predicted:  1977


Support:  More recently, the average temperature for July 2011 was 17.6 °C (63.7 °F), with an average minimum temperature of 12.9 °C (55.2 °F) and an average maximum temperature of 23.7 °C (74.7 °F).
What was the average temperature for July 2011?
Correct:  17.6 °c
predicted:  12.9


Support:  In 1648, the Académie royale de peinture et de sculpture (Royal Academy of Painting and Sculp

Support:  Montmartre and Montparnasse became centres for artistic production.
What two towns were pinnacle for artistic production?
Correct:  montmartre and montparnasse
predicted:  Montmartre


Support:  Paris' metropolitan area spans most of the Paris region and has a population of 12,341,418 (Jan. 2012 census), or one-fifth of the population of France.
What is the population of Paris' metropolitan area?
Correct:  12,341,418
predicted:  Jan. 2012


Support:  Victor Hugo's The Hunchback of Notre Dame inspired the renovation of its setting, the Notre-Dame de Paris.
Who wrote The Hunchback of Notre Dame?
Correct:  victor hugo
predicted:  Victor Hugo's The Hunchback


Support:  Paris played host to the 1900 and 1924 Summer Olympics, the 1938 and 1998 FIFA World Cups, and the 2007 Rugby World Cup.
In what year did Paris host the World Cup?
Correct:  2007
predicted:  1900


Support:  The Hôtel Ritz on Place Vendôme opened in 1898, followed by the Hôtel Crillon in an 18th-century building o

Support:  On July 17, 2012, the CRTC approved the shut down of CBC's analogue transmitters, noting that "while the Commission has the discretion to refuse to revoke broadcasting licences, even on application from a licensee, it cannot direct the CBC or any other broadcaster to continue to operate its stations and transmitters."
On what date did the CTRC approve the shut down of CBC's analogue transmitters?
Correct:  july 17 , 2012
predicted:  July


Support:  At the transition deadline, Barrie, Ontario lost both CBC and Radio-Canada signals as the CBC did not request that the CRTC allow these transmitters to continue operating.
Did Barrie, Ontario lose signal for CBC, Radio-Canada, or both?
Correct:  both
predicted:  


Support:  Under the CBC's current arrangement with Rogers Communications for National Hockey League broadcast rights, Hockey Night in Canada broadcasts on CBC-owned stations and affiliates are not technically aired over the CBC Television network, but over a separate CR

Support:  Island genetics is believed to be a common source of speciation in general, especially when it comes to radical adaptations that seem to have required inferior transitional forms.
What did radical adaptations seem to have required?
Correct:  inferior transitional forms
predicted:  Island genetics


Support:  Another possible whole genome duplication event at 160 million years ago perhaps created the ancestral line that led to all modern flowering plants.
What type of event perhaps created the line which led to modern flowering plants?
Correct:  whole genome duplication
predicted:  Another possible whole genome duplication event


Support:  Island genetics is believed to be a common source of speciation in general, especially when it comes to radical adaptations that seem to have required inferior transitional forms.
What is island genetics thought to be a default source of?
Correct:  speciation
predicted:  Island genetics


Support:  The number of families in APG (1998) was 4

Support:  Flowers show remarkable variation in form and elaboration, and provide the most trustworthy external characteristics for establishing relationships among angiosperm species.
What characteristic flowers show variation in?
Correct:  form and elaboration
predicted:  Flowers


Support:  The apparently sudden appearance of nearly modern flowers in the fossil record initially posed such a problem for the theory of evolution that it was called an "abominable mystery" by Charles Darwin.
What did Charles Darwin call the sudden appearance of nearly modern flowers in the fossil record?
Correct:  abominable mystery
predicted:  apparently sudden appearance 


Support:  [citation needed] From that time onward, as long as these Gymnosperms were, as was usual, reckoned as dicotyledonous flowering plants, the term Angiosperm was used antithetically by botanical writers, with varying scope, as a group-name for other dicotyledonous plants.
What is the group-name angiosperm has been used for by 

Support:  Washington University has over 300 undergraduate student organizations on campus.
What organization sought to strip Washington University of its tax-exempt status in the 1940's?
Correct:  the naacp
predicted:  Washington University


Support:  Among its many recent initiatives, The Genome Center at Washington University (directed by Richard K. Wilson) played a leading role in the Human Genome Project, having contributed 25% of the finished sequence.
How much of a contribution did the Genome Center at Washington University make in sequencing the Human Genome Project?
Correct:  25 %
predicted:  Genome Center 


Support:  The law school offers 3 semesters of courses in the Spring, Summer, and Fall, and requires at least 85 hours of coursework for the JD.
How many hours of coursework are required to obtain a JD at the Washington University School of Law?
Correct:  85 hours
predicted:  3


Support:  The cornerstone of the first building, Busch Hall, was laid on October 20, 1900.
W

Support:  The university's first chancellor was Joseph Gibson Hoyt.
Who was the first chancellor of Washington University?
Correct:  joseph gibson hoyt .
predicted:  Joseph Gibson Hoyt


Support:  Washington University's sports teams are called the Bears.
What is the name of the Washington University sports teams?
Correct:  the bears
predicted:  Washington University's


Support:  The Greenleafs, an all-female group is the oldest (and only) female group on campus.
What is the oldest female a cappella group at Washington University?
Correct:  the greenleafs
predicted:  Greenleafs


Support:  Most are funded by the Washington University Student Union, which has a $2 million plus annual budget that is completely student-controlled and is one of the largest student government budgets in the country.
What is the amount of the Washington University Student Union annual budget?
Correct:  $ 2 million
predicted:  Washington University Student Union 


Support:  Washington University School of L

Support:  Dance also plays a large role in Malian culture.
Aside from music what other activity plays a significant role in culture?
Correct:  dance
predicted:  Malian


Support:  In the far north, there is a division between Berber-descendent Tuareg nomad populations and the darker-skinned Bella or Tamasheq people, due the historical spread of slavery in the region.
What region of the country is historical slavery well known?
Correct:  far north
predicted:  Tuareg


Support:  The coup is remembered as Mali's March Revolution of 1991.
In March of 2012 whom gained control of Mali?
Correct:  amadou sanogo
predicted:  coup 


Support:  Some famous Malian influences in music are kora virtuoso musician Toumani Diabaté, the late roots and blues guitarist Ali Farka Touré, the Tuareg band Tinariwen, and several Afro-pop artists such as Salif Keita, the duo Amadou et Mariam, Oumou Sangare, and Habib Koité.
What is the name of the musician that was a part of the roots and was also a blues guitar

Support:  An exclusive boundary arises, for example, when a person adopts a marker that imposes restrictions on the behaviour of others.
A marker that imposes restriction on the behavior of others is what kind of boundary?
Correct:  exclusive
predicted:  person 


Support:  In the same way as Barth, in his approach to ethnicity, advocated the critical focus for investigation as being "the ethnic boundary that defines the group rather than the cultural stuff that it encloses" (1969:15), social anthropologists such as Cohen and Bray have shifted the focus of analytical study from identity to the boundaries that are used for purposes of identification.
What researcher advocated for focus on the boundaries of ethnic groups rather than the cultural aspects of ethnic groups?
Correct:  barth
predicted:  same way 


Support:  The strategic manipulator is a person who begins to regard all senses of identity merely as role-playing exercises, and who gradually becomes alienated from his or her so

Support:  The "Neo-Eriksonian" identity status paradigm emerged in later years[when?
What identity status paradigm emerged due to the work of James Marcia?
Correct:  neo-eriksonian
predicted:  `` Neo-Eriksonian '' identity status paradigm 


Support:  At the same time, the Eriksonian approach to identity remained in force, with the result that identity has continued until recently to be used in a largely socio-historical way to refer to qualities of sameness in relation to a person's connection to others and to a particular group of people.
Until recently, what approach was used to refer to qualities of sameness in relation to a person's connection to others?
Correct:  the eriksonian approach
predicted:  Eriksonian approach 


Support:  Finally, achievement is when a person makes identity choices and commits to them.
What is the permutation when a person commits to identity choices?
Correct:  identity achievement
predicted:  achievement


Support:  Diffusion is when a person lacks both

Support:  [citation needed] Eastern Catholic cardinals continue to wear the normal dress appropriate to their liturgical tradition, though some may line their cassocks with scarlet and wear scarlet fascias, or in some cases, wear Eastern-style cassocks entirely of scarlet.
Which Cardinals still wear traditional clothing?
Correct:  eastern catholic cardinals
predicted:  citation


Support:  Cardinal deacons are given title to one of these deaconries.
What does the pope give to a new Cardinal?
Correct:  a gold ring
predicted:  Cardinal deacons


Support:  In Latin, on the other hand, the [First name] Cardinal [Surname] order is used in the proclamation of the election of a new pope by the cardinal protodeacon: "Annuntio vobis gaudium magnum; habemus Papam: Eminentissimum ac Reverendissimum Dominum, Dominum (first name) Sanctae Romanae Ecclesiae Cardinalem (last name), ..." (Meaning: "I announce to you a great joy; we have a Pope: The Most Eminent and Most Reverend Lord, Lord (first name)

Support:  It is because of the scarlet color of cardinals' vesture that the bird of the same name has become known as such.
What color is the hat?
Correct:  white
predicted:  scarlet color 


Support:  Though in modern times most cardinals are also bishops, the term "cardinal bishop" only refers to the cardinals who are titular bishops of one of the "suburbicarian" sees.
In what year was it declared that Cardinals had to be bishops?
Correct:  1962
predicted:  one


Support:  The right to enter the conclave of cardinals where the pope is elected is limited to those who have not reached the age of 80 years by the day the vacancy occurs.
In what year did the pope make the declaration?
Correct:  1567
predicted:  age of 80 years 


Support:  While in 1939 about half were Italian by 1994 the number was reduced to one third.
In 1994, how many Cardinals were Italian?
Correct:  third
predicted:  half


Support:  If a pope dies before revealing the identity of an in pectore cardinal, the cardina

Support:  In ancient Greece, the epics of Homer, who wrote the Iliad and the Odyssey, and Hesiod, who wrote Works and Days and Theogony, are some of the earliest, and most influential, of Ancient Greek literature.
Who wrote Works and Days and Theogony?
Correct:  hesiod
predicted:  Works


Support:  Authors choose literary device according to what psychological emotion he or she is attempting to describe, thus certain literary devices are more emotionally effective than others.
Reporting that attempts a creative or literary bent is sometimes called what?
Correct:  literary journalism
predicted:  Authors


Support:  Authors choose literary device according to what psychological emotion he or she is attempting to describe, thus certain literary devices are more emotionally effective than others.
What are the levers an author uses in literature to describe a psychological emotion?
Correct:  literary devices
predicted:  Authors


Support:  They may indicate to a reader that there is a famil

Support:  Much of the output of naturalism was implicitly polemical, and influenced social and political change, but 20th century fiction and drama moved back towards the subjective, emphasising unconscious motivations and social and environmental pressures on the individual.
20th century literature reacted to the objectivity of naturalism in the 19th century to what?
Correct:  the subjective
predicted:  20th century


Support:  William Burroughs, in his early works, and Hunter S. Thompson expanded documentary reporting into strong subjective statements after the second World War, and post-modern critics have disparaged the idea of objective realism in general.
Critics based in what movement find fault in objective realism?
Correct:  post-modern
predicted:  William Burroughs


Support:  In Western Europe prior to the eighteenth century, literature as a term indicated all books and writing.
What French term for value-based literature literally translates as "fine writing?"
Correct:  bel

Support:  The brain is an organ that serves as the center of the nervous system in all vertebrate and most invertebrate animals.
What is the center of the nervous system in all creatures?
Correct:  the brain
predicted:  brain 


Support:  A myelinated axon is wrapped in a fatty insulating sheath of myelin, which serves to greatly increase the speed of signal propagation.
What period in history was anatomical  studies of nerves greatly increased?
Correct:  the renaissance
predicted:  myelinated axon 


Support:  Psychology seeks to understand mind and behavior, and neurology is the medical discipline that diagnoses and treats diseases of the nervous system.
What field of science strives to diagnose and treat diseases of the nervous system?
Correct:  neurology
predicted:  Psychology


Support:  Vertebrate brains are surrounded by a system of connective tissue membranes called meninges that separate the skull from the brain.
The brain is separated from the bloodstream by what feature?
Cor

Support:  Because the brain does not contain pain receptors, it is possible using these techniques to record brain activity from animals that are awake and behaving without causing distress.
Animals without a spine are called what?
Correct:  invertebrates
predicted:  brain 


Support:  Democritus, the inventor of the atomic theory of matter, argued for a three-part soul, with intellect in the head, emotion in the heart, and lust near the liver.
Who invented the atomic theory of matter?
Correct:  democritus ,
predicted:  Democritus


Support:  The most important is brain disease and the effects of brain damage, covered in the human brain article because the most common diseases of the human brain either do not show up in other species, or else manifest themselves in different ways.
What type of disease if often studied to understand damage to the brain?
Correct:  strokes
predicted:  brain disease


Support:  On average, a mammal has a brain roughly twice as large as that of a bird of th

Support:  Humans have an average EQ in the 7-to-8 range, while most other primates have an EQ in the 2-to-3 range.
Primates have an EQ in what range?
Correct:  2-to-3
predicted:  7-to-8 range 


Support:  A myelinated axon is wrapped in a fatty insulating sheath of myelin, which serves to greatly increase the speed of signal propagation.
An axon that can greatly increase speed of signals is wrapped in what?
Correct:  sheath of myelin
predicted:  fatty insulating sheath 


Support:  The brain is an organ that serves as the center of the nervous system in all vertebrate and most invertebrate animals.
Which brains are easier to work on, vertebrates or invertebrates?
Correct:  invertebrate
predicted:  brain 


Support:  A key component of the arousal system is the suprachiasmatic nucleus (SCN), a tiny part of the hypothalamus located directly above the point at which the optic nerves from the two eyes cross.
The suprachiasmatic nucleus is a small part of what part of the brain?
Correct:  t

Support:  Neuroanatomists usually divide the vertebrate brain into six main regions: the telencephalon (cerebral hemispheres), diencephalon (thalamus and hypothalamus), mesencephalon (midbrain), cerebellum, pons, and medulla oblongata.
The cerebral hemispheres of the brain are called what?
Correct:  telencephalon
predicted:  Neuroanatomists


Support:  The reward mechanism is better understood than the punishment mechanism, because its role in drug abuse has caused it to be studied very intensively.
Which neurotransmitter plays a large role in drug abuse?
Correct:  dopamine
predicted:  reward mechanism 


Support:  These distortions can make it difficult to match brain components from one species with those of another species.
Which part of the brain has led to many distortions among different species?
Correct:  forebrain area
predicted:  distortions 


Support:  For example, primates have brains 5 to 10 times larger than the formula predicts.
Which group of animals have brains 5-10 ti

Support:  The Spanish crown maintained its hegemony in Europe and was the leading power on the continent until the signing of the Treaty of the Pyrenees, which ended a conflict between Spain and France that had begun during the Thirty Years' War.
Between which two nations was the Treaty of the Pyrenees signed?
Correct:  spain and france
predicted:  Spanish crown 


Support:  There are other language groupings in Southern Europe.
What is the most common group of languages spoken in Mediterranean Europe?
Correct:  romance languages
predicted:  language groupings 


Support:  By 300 AD the Roman Empire was divided into the Western Roman Empire based in Rome, and the Eastern Roman Empire based in Constantinople.
Where was the headquarters of the Western Roman Empire?
Correct:  rome
predicted:  Roman Empire 


Support:  The countries in the Soviet sphere of influence joined the military alliance known as the Warsaw Pact and the economic bloc called Comecon.
What was the military partnership

Support:  The MoD has been criticised for an ongoing fiasco, having spent £240m on eight Chinook HC3 helicopters which only started to enter service in 2010, years after they were ordered in 1995 and delivered in 2001.
When were the helicopters ordered?
Correct:  1995
predicted:  2010


Support:  The Surgeon General, represents the Defence Medical Services on the Defence Staff, and is the clinical head of that service.
Who leads the Defence Medical Services?
Correct:  the surgeon general
predicted:  Surgeon General 


Support:  The Ministry of Defence (MoD) is the British government department responsible for implementing the defence policy set by Her Majesty's Government, and is the headquarters of the British Armed Forces.
For which part of the government is the MoD the headquarters?
Correct:  british armed forces
predicted:  Defence


Support:  The headquarters of the MoD are in Whitehall and are now known as Main Building.
Who designed the headquarters of the MoD?
Correct:  vincent

Support:  The most notable fraud conviction was that of Gordon Foxley, head of defence procurement at the Ministry of Defence from 1981 to 1984.
What years was Foxley employed by the MoD?
Correct:  1981 to 1984
predicted:  most notable fraud conviction 


Support:  Dannatt criticised a remnant "Cold War mentality", with military expenditures based on retaining a capability against a direct conventional strategic threat; He said currently only 10% of the MoD's equipment programme budget between 2003 and 2018 was to be invested in the "land environment"—at a time when Britain was engaged in land-based wars in Afghanistan and Iraq.
Where was Britain involved in a land war when some thought that land wars were basically a thing of the past?
Correct:  afghanistan and iraq
predicted:  Afghanistan


Support:  A significant form of post-Vedic Sanskrit is found in the Sanskrit of Indian epic poetry—the Ramayana and Mahabharata.
What organization is trying to revive Sanskrit?
Correct:  samskrita

Support:  According to Thomas Trautmann, after this period of "Indomania", a certain hostility to Sanskrit and to Indian culture in general began to assert itself in early 19th century Britain, manifested by a neglect of Sanskrit in British academia.
Besides assimilation of British culture, what else did the British consider Indians to be?
Correct:  inferior
predicted:  Thomas Trautmann


Support:  In the United States, since September 2009, high school students have been able to receive credits as Independent Study or toward Foreign Language requirements by studying Sanskrit, as part of the "SAFL: Samskritam as a Foreign Language" program coordinated by Samskrita Bharati.
What is the credit towards in study in the U.S.?
Correct:  foreign language
predicted:  United States 


Support:  Scholars often distinguish Vedic Sanskrit and Classical or "Pāṇinian" Sanskrit as separate dialects.
As what are Classical and Vedic Sanskrit often viewed?
Correct:  separate dialects
predicted:  Scholar

Support:  According to Thomas Trautmann, after this period of "Indomania", a certain hostility to Sanskrit and to Indian culture in general began to assert itself in early 19th century Britain, manifested by a neglect of Sanskrit in British academia.
According to Panini, from where did Sanskrit evolve?
Correct:  earlier vedic
predicted:  Britain


Support:  Sometime between the fourth and eighth centuries, the Gupta script, derived from Brahmi, became prevalent.
From what was Gupta derived?
Correct:  brahmi
predicted:  fourth and eighth centuries 


Support:  One opponent to the decision, Anthon H. Lund, later said, "I hope their head will grow big enough for their hat."
Who was chosen to head BYU?
Correct:  george h. brimhall
predicted:  Anthon H. Lund


Support:  The main campus in Provo, Utah, United States sits on approximately 560 acres (2.3 km2) nestled at the base of the Wasatch Mountains and includes 295 buildings.
Where is BYU's main campus?
Correct:  provo , utah
predicted:  

Support:  Lee was responsible for the Benson Science Building and the Museum of Art on campus.
Which president was responsible for the most amount of growth to the campus?
Correct:  ernest l. wilkinson
predicted:  Lee


Support:  BYU's Museum of Art, for example, is one of the largest and most attended art museums in the Mountain West.
Which BYU building is one of the most attended art museus in the Mountain West?
Correct:  museum of art
predicted:  Mountain West 


Support:  The high rate of enrollment at the university by members of The Church of Jesus Christ of Latter-day Saints (more than 98 percent) results in an amplification of LDS cultural norms; BYU was ranked by The Princeton Review in 2008 as 14th in the nation for having the happiest students and highest quality of life.
What does BYU's high rate of enrollment by LDS members result in regarding LDS cultural norms?
Correct:  amplification of
predicted:  high rate 


Support:  Jeffrey R. Holland followed as president in 1980,

Support:  The BYU Jerusalem Center, which was closed in 2000 due to student security concerns related to the Second Intifada and, more recently, the 2006 Israel-Lebanon conflict, was reopened to students in the Winter 2007 semester.
When was the BYU Jerusalem Center reopened for students following it's closure in 2000?
Correct:  winter 2007 semester
predicted:  2006


Support:  Franklin S. Harris was appointed the university's president in 1921.
What type of degree did BYU's appointed president of 1921, Franklin S. Harris have that no former BYU president had?
Correct:  doctoral
predicted:  Franklin S. Harris


Support:  Brigham Young University (often referred to as BYU or, colloquially, The Y) is a private research university located in Provo, Utah, United States.
Where was Brigham  Young's school originally believed to be located?
Correct:  draper , utah
predicted:  Provo


Support:  Approximately 51 percent of the graduates in BYU's class of 2005 were married.
What percentage of BY

Support:  In 1903, Brigham Young Academy was dissolved, and was replaced by two institutions: Brigham Young High School, and Brigham Young University.
Where is Brigham Young University?
Correct:  provo , utah
predicted:  1903


Support:  An education at BYU is also less expensive than at similar private universities, since "a significant portion" of the cost of operating the university is subsidized by the church's tithing funds.
How does the cost of attending BYU compare to the cost of attending similar universities?
Correct:  less expensive
predicted:  also


Support:  BYU is also ranked No.
What is BYU's ranking among private universities?
Correct:  third
predicted:  BYU


Support:  BYU is also ranked No.
What is BYU's ranking among private universities?
Correct:  third
predicted:  BYU


Support:  '96, Heisman Trophy winner Ty Detmer '90, and two-time Super Bowl winner Jim McMahon.
Which two-time Super Bowl winner graduated from BYU?
Correct:  jim mcmahon
predicted:  two-time Super 

Support:  Colour broadcasts started at similarly higher resolutions, first with the US NTSC color system in 1953, which was compatible with the earlier monochrome systems and therefore had the same 525 lines of resolution.
How many lines of resolution could an image using the Transformator system have?
Correct:  1,125
predicted:  525


Support:  The 1080i30 or 1080i60 notation identifies interlaced scanning format with 30 frames (60 fields) per second, each frame being 1,920 pixels wide and 1,080 pixels high.
The 1080i30 or 1080i60 notion identifies interlaced scanning format with how many frames per second?
Correct:  30
predicted:  1,920


Support:  There were four major HDTV systems tested by SMPTE in the late 1970s, and in 1979 an SMPTE study group released A Study of High Definition Television Systems: Since the formal adoption of digital video broadcasting's (DVB) widescreen HDTV transmission modes in the early 2000s; the 525-line NTSC (and PAL-M) systems, as well as the European 

Support:  Analog tape recorders with bandwidth capable of recording analog HD signals, such as W-VHS recorders, are no longer produced for the consumer market and are both expensive and scarce in the secondary market.
In the secondary market, analog tape recorders are considered what?
Correct:  expensive and scarce
predicted:  Analog tape recorders


Support:  Additionally, cable-ready TV sets can display HD content without using an external box.
What kind of built-in tuner will allow a cable-ready TV to display HD content without an external box?
Correct:  qam
predicted:  cable-ready TV sets


Support:  The limited standardization of analog HDTV in the 1990s did not lead to global HDTV adoption as technical and economic constraints at the time did not permit HDTV to use bandwidths greater than normal television.
What constraints kept HDTV from being widely adopted in the 90's?
Correct:  technical and economic
predicted:  limited standardization 


Support:  This new system, NHK Color,

Support:  Satellite test broadcasts started in 1989, with regular testing starting in 1991 and regular broadcasting of BS-9ch commencing on November 25, 1994, which featured commercial and NHK programming.
What year did color broadcasts start in the US?
Correct:  1953 ,
predicted:  1989


Support:  The first public HDTV broadcast in the United States occurred on July 23, 1996 when the Raleigh, North Carolina television station WRAL-HD began broadcasting from the existing tower of WRAL-TV southeast of Raleigh, winning a race to be first with the HD Model Station in Washington, D.C., which began broadcasting July 31, 1996 with the callsign WHD-TV, based out of the facilities of NBC owned and operated station WRC-TV.
When did the first public HDTV broadcast happen in the US?
Correct:  july 23 , 1996
predicted:  July 23, 1996


Support:  In 1958, the Soviet Union developed Тransformator (Russian: Трансформатор, meaning Transformer), the first high-resolution (definition) television system 

Support:  It is also known informally by its nickname, The Sooner State, in reference to the non-Native settlers who staked their claims on the choicest pieces of land before the official opening date, and the Indian Appropriations Act of 1889, which opened the door for white settlement in America's Indian Territory.
What is Oklahoma's nickname?
Correct:  the sooner state
predicted:  nickname 


Support:  Oklahoma holds eleven public regional universities, including Northeastern State University, the second-oldest institution of higher education west of the Mississippi River, also containing the only College of Optometry in Oklahoma and the largest enrollment of Native American students in the nation by percentage and amount.
What is the oldest community theater group west of the Mississippi River?
Correct:  theatre tulsa
predicted:  Mississippi River 


Support:  Oklahoma is the 20th largest state in the United States, covering an area of 69,898 square miles (181,035 km2), with 68,667

Support:  Oklahoma's centennial celebration was named the top event in the United States for 2007 by the American Bus Association, and consisted of multiple celebrations saving with the 100th anniversary of statehood on November 16, 2007.
When was the exact hundredth anniversary of Oklahoma's statehood?
Correct:  november 16 , 2007
predicted:  2007


Support:  Of the state's ten largest cities, three are outside the metropolitan areas of Oklahoma City and Tulsa, and only Lawton has a metropolitan statistical area of its own as designated by the United States Census Bureau, though the metropolitan statistical area of Fort Smith, Arkansas extends into the state.
What Arkansas city's metro area extends into Oklahoma?
Correct:  fort smith
predicted:  Oklahoma City


Support:  Precipitation and temperatures decline from east to west accordingly, with areas in the southeast averaging an annual temperature of 62 °F (17 °C) and an annual rainfall of generally over 40 inches (1,020 mm) and up t

Support:  Other languages include French with 8,258 speakers (0.3%), Chinese with 6,413 (0.2%), Korean with 3,948 (0.1%), Arabic with 3,265 (0.1%), other Asian languages with 3,134 (0.1%), Tagalog with 2,888 (0.1%), Japanese with 2,546 (0.1%), and African languages with 2,546 (0.1%).
What is Oklahoma's fifth most popular language?
Correct:  vietnamese
predicted:  Arabic


Support:  Oklahoma had 598 incorporated places in 2010, including four cities over 100,000 in population and 43 over 10,000.
Where does Oklahoma rank by population?
Correct:  28th
predicted:  Oklahoma


Support:  The Philbrook Museum of Tulsa is considered one of the top 50 fine art museums in the United States, and the Sam Noble Oklahoma Museum of Natural History in Norman, one of the largest university-based art and history museums in the country, documents the natural history of the region.
How many museums are in Oklahoma?
Correct:  more than 300
predicted:  one


Support:  In descending order of population, Oklah

Support:  Although registered Republicans were a minority in the state until 2015, starting in 1952, Oklahoma has been carried by Republican presidential candidates in all but one election (1964).
What is the one election since 1952 where a Republican presidential candidate didn't win in Oklahoma?
Correct:  1964
predicted:  registered Republicans


Support:  Prominent theatre companies in Oklahoma include, in the capital city, Oklahoma City Theatre Company, Carpenter Square Theatre, Oklahoma Shakespeare in the Park, and CityRep.
What is the 2nd-largest company in Oklahoma?
Correct:  williams companies
predicted:  Oklahoma City Theatre Company


Support:  With 39,000 acres (158 km2), the Tallgrass Prairie Preserve in north-central Oklahoma is the largest protected area of tallgrass prairie in the world and is part of an ecosystem that encompasses only 10 percent of its former land area, once covering 14 states.
What part of Oklahoma is the Tallgrass Prairie Preserve in?
Correct:  north-

Support:  In 2010, the state had the nation's third highest number of bridges classified as structurally deficient, with nearly 5,212 bridges in disrepair, including 235 National Highway System Bridges.
How many national highway bridges in Oklahoma were found to be deficient in 2010?
Correct:  235
predicted:  nearly 5,212


Support:  In descending order of population, Oklahoma's largest cities in 2010 were: Oklahoma City (579,999, +14.6%), Tulsa (391,906, −0.3%), Norman (110,925, +15.9%), Broken Arrow (98,850, +32.0%), Lawton (96,867, +4.4%), Edmond (81,405, +19.2%), Moore (55,081, +33.9%), Midwest City (54,371, +0.5%), Enid (49,379, +5.0%), and Stillwater (45,688, +17.0%).
What is Oklahoma's 2nd-largest city?
Correct:  tulsa
predicted:  Oklahoma City


Support:  Mid-summer (July and August) represents a secondary dry season over much of Oklahoma, with long stretches of hot weather with only sporadic thunderstorm activity not uncommon many years.
How much of Oklahoma's electricity come

ValueError: list.remove(x): x not in list