In [6]:
import nltk
import json
import spacy
from nltk.corpus import stopwords
from math import log
from collections import defaultdict, Counter
from string import punctuation
from nltk.stem.wordnet import WordNetLemmatizer
import csv
from gensim.summarization import bm25

OPEN_QUESTION_WORDS = ['what', 'who', 'whose', 'whom', 'where', 'when', 'why', 'how',
                       'which', "what's", "who's", "where's", "how's"]
CLOSED_QUESTION_WORDS = ['is', 'are', 'am', 'was', 'were', 'do', 'does,', 'did', 'can',
                         'could', 'will', 'would', 'shall', 'should', 'have', 'has',
                         'had']

# Stop words
stop = set(stopwords.words('english'))

lmtz = WordNetLemmatizer()

with open('testing.json') as json_data:
    test = json.load(json_data)

with open('documents.json') as json_data:
    documents = json.load(json_data)

# Spacy toolkit
nlp = spacy.load('en_core_web_sm')

punc = set(punctuation)


def strip_punctuation(s):
    return ''.join(c for c in s if c not in punc)


def lemmatize(token):
    lemma = lmtz.lemmatize(token, 'v')
    if lemma == token:
        lemma = lmtz.lemmatize(token, 'n')
    return lemma


def extract_term_freqs(doc):
    tfs = {}
    for token in nltk.word_tokenize(doc):
        lemma = lemmatize(token.lower())
        if lemma not in stop and lemma.isalpha():
            tfs[lemma] = tfs.get(lemma, 0) + 1
    return tfs


def compute_doc_freqs(doc_term_freqs):
    dfs = Counter()
    for tfs in doc_term_freqs.values():
        for term in tfs.keys():
            dfs[term] += tfs[term]
    return dfs


def query_vsm(query, index, k=10):
    accumulator = Counter()
    for term in query:
        postings = index[term]
        for docid, weight in postings:
            accumulator[docid] += weight
    return accumulator.most_common(k)


# Find the question word
def get_qword(question):
    tokens = nltk.word_tokenize(question.lower())
    for token in tokens:
        if token in OPEN_QUESTION_WORDS:
            return token
    for token in tokens:
        if token in CLOSED_QUESTION_WORDS:
            return token
    return 'others'


# length of longest same sequences of keywords
def get_overlap(sent1, sent2):
    tokens1 = []
    tokens2 = []

    for token in nltk.word_tokenize(strip_punctuation(sent1.lower())):
        lemma = lemmatize(token)
        if lemma not in stop:
            tokens1.append(lemma)

    for token in nltk.word_tokenize(strip_punctuation(sent2.lower())):
        lemma = lemmatize(token)
        if lemma not in stop:
            tokens2.append(lemma)

    max = 0
    for i in range(len(tokens1)):
        for j in range(len(tokens2)):

            if tokens1[i] == tokens2[j]:
                length = 1

                ii = i + 1
                jj = j + 1
                while ii < len(tokens1) and jj < len(tokens2) and \
                        tokens1[ii] == tokens2[jj]:
                    ii += 1
                    jj += 1
                    length += 1

                if length > max:
                    max = length

    return max


csvFile = open("low.csv", "w")
writer = csv.writer(csvFile)
header = ['id','answer']
writer.writerow(header)

case_count = 0
# test = [test[17]]
for test_case in test:
    question = test_case['question']
    docid = test_case['docid']

    # Convert doc into one string, then tokenize sentences
    corpus = ''
    for para in documents[docid]['text']:
        corpus += para + ' '

    # sentence as a document
    raw_docs = nltk.sent_tokenize(corpus)

    # TFIDF
#     doc_term_freqs = {}
#     for (id, raw_doc) in enumerate(raw_docs):
#         term_freqs = extract_term_freqs(raw_doc)
#         doc_term_freqs[id] = term_freqs
#     M = len(doc_term_freqs)

#     doc_freqs = compute_doc_freqs(doc_term_freqs)

#     vsm_inverted_index = defaultdict(list)
#     for docid, term_freqs in doc_term_freqs.items():
#         N = sum(term_freqs.values())
#         length = 0

#         # find tf*idf values and accumulate sum of squares
#         tfidf_values = []
#         for term, count in term_freqs.items():
#             tfidf = float(count) / N * log(M / float(doc_freqs[term]))
#             tfidf_values.append((term, tfidf))
#             length += tfidf ** 2

#         # normalise documents by length and insert into index
#         length = length ** 0.5
#         for term, tfidf in tfidf_values:
#             # inversion of the indexing, term -> (doc_id, score)
#             vsm_inverted_index[term].append([docid, tfidf / length])

#     for term, docids in vsm_inverted_index.items():
#         docids.sort()

#     terms = extract_term_freqs(question)
#     results = query_vsm(terms, vsm_inverted_index)



    tokenized_sentence = []
    for each_sentence in raw_docs:
        filter_stop_word = []
        sentence_as_words = nltk.word_tokenize(each_sentence)
        for each_word in sentence_as_words:
            if each_word not in stop:
                filter_stop_word.append(each_word)
        
        tokenized_sentence.append(filter_stop_word)
        
    bm25Model = bm25.BM25(tokenized_sentence)
    average_idf = sum(map(lambda k: float(bm25Model.idf[k]), bm25Model.idf.keys())) / len(bm25Model.idf.keys())
    
    query = []
    for word in nltk.word_tokenize(question):
        if word not in stop:
            query.append(word)
        
    scores = bm25Model.get_scores(query,average_idf)
    bm25_dic = Counter()
    
    sentence_id = 0
    for each_score in scores:
        bm25_dic[sentence_id] = each_score
        sentence_id += 1
    results = bm25_dic.most_common(3)



    # Step 2
    # Analyse question type
    qword = get_qword(question)

    # the word after question word, such as 'what value', 'which gender'
    next_token = ''

    target = [] # target dep

    # dependency parsing
    dep = ''

    # head word
    head = ''

    # head dependency
    head_dep = ''

    # subject, root, object
    nsubj = ''
    ROOT = ''
    dobj = ''
    attr = ''

    # yes or no questions have two options
    closed_q_choices = ('', '')

    doc = nlp(question)

    tokens = nltk.word_tokenize(question.lower())

    # get next word
    if qword in tokens:
        if tokens.index(qword) < len(tokens) - 1:
            next_token = tokens[tokens.index(qword) + 1]

    # get structure of question
    for token in doc:
        if 'nsubj' in token.dep_ and token.head.dep_ == 'ROOT':
            nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_ and token.head.dep_ == 'ROOT':
            dobj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'attr' and token.head.dep_ == 'ROOT':
            attr = lemmatize(strip_punctuation(token.text))

    # determine answer dependency
    if qword in ['who', "who's", 'whom', 'whose']:
        for chunk in doc.noun_chunks:
            if qword in chunk.text:
                target = [chunk.root.dep_]

    elif qword in ['where', "where's", 'when']:
        target = ['advmod','prep']

    elif qword in ['how', "how's"]:
        if next_token in ['much', 'many','long','far', 'big', 'wide', 'deep', 'tall', 'high', 'fast', 'heavy','old', 'young']:
            target = ['attr']
        
        elif next_token in ['does', 'did', 'do', 'have', 'has', 'had', 'should',
                            'can', 'could', 'will', 'would', 'must']:
            if dobj != '':
                target = ['advmod', 'acomp']
            else:
                target = ['ROOT']

    elif qword in ['what', "what's", 'which']:
        # what...do type question
        tokens.remove(next_token)
        if 'do' in tokens:
            target = ['ROOT']
        else:
            for chunk in doc.noun_chunks:
                if qword in chunk.text:
                    target = [chunk.root.dep_]

    elif qword == 'why':
        target = ['mark','prep']

    elif qword in CLOSED_QUESTION_WORDS:
        # answer is one of the 'or' options in the question
        if 'or' in tokens:
            index = tokens.index('or')
            
            # dep of phrase before and after 'or'
            prev = tokens[index - 1]
            for chunk in doc.noun_chunks:
                if prev in chunk.text:
                    target = [chunk.root.dep_]
            
            if target == []:
                for token in doc:
                    if prev in token.text:
                        target = [token.dep_]

     # re-rank the sentences
    scores = {}
    for id, _ in results:
        sent = raw_docs[id]
        doc = nlp(sent)

        score = get_overlap(sent, question)

        for token in doc:
            if token.dep_ in target:
                score += 1

        scores[id] = score

    rank = {}
    for id, sim in results:
        max_score = scores[max(scores, key=scores.get)]
        if max_score != 0:
            rank[id] = sim * 0.3 + (scores[id] / max_score * 0.7)
        else:
            rank[id] = sim
    
    # sentence with highest rank
    index = max(rank, key=rank.get)
    sent = raw_docs[index]
    doc = nlp(sent)
                   
    # find answer with highest score from the highest ranking sentence
    max_score = -1
    answer = ''

    # find sentence structure
    sent_nsubj = ''
    sent_ROOT = ''
    sent_dobj = ''
    sent_attr = ''
    for token in doc:
        if 'nsubj' in token.dep_ and token.head.dep_ == 'ROOT':
            sent_nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            sent_ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_ and token.head.dep_ == 'ROOT':
            sent_dobj = lemmatize(strip_punctuation(token.text))
        if 'attr' in token.dep_ and token.head.dep_ == 'ROOT':
            sent_attr = lemmatize(strip_punctuation(token.text))

    score = 0
    if nsubj == sent_nsubj:
        score += 1
        if ROOT == sent_ROOT:
            score += 1
            if dobj == sent_dobj:
                score += 1
            if attr == sent_attr:
                score += 1

    max_score = -1
    answer = ''
    if target == ['ROOT'] or target == ['attr'] or target == ['advmod','acomp']:
        for token in doc:
            if token.dep_ in target:
                score += 1

                if token.text not in question:
                    score += 1

                if strip_punctuation(token.text).lower() in stop:
                    score = -1

                if token.text.lower() == 'it':
                    score = -1

                if score > max_score:
                    max_score = score
                    answer = token.text

    elif target == ['mark','prep'] or target == ['advmod','prep']:
        for token in doc:
            if token.dep_ in target:
                score += 1

                if token.text not in question:
                    score += 1

                if strip_punctuation(token.text).lower() in stop:
                    score = -1

                if token.text.lower() == 'it':
                    score = -1

                if score > max_score:
                    max_score = score
                    substr = sent[sent.index(token.text):]
                    tokens = nltk.word_tokenize(substr)
                    del tokens[0]
                    for token in tokens:
                        if token in punc:
                            break
                        answer += token+' '

    else:
        for chunk in doc.noun_chunks:
            if chunk.root.dep_ in target:
                score += 1

                if chunk.text not in question:
                    score += 1

                if strip_punctuation(chunk.text).lower() in stop:
                    score = -1

                if chunk.text.lower() == 'it':
                    score = -1

                if score > max_score:
                    max_score = score
                    answer = chunk.text
        
    # default answer
    if answer == '':
        for chunk in doc.noun_chunks:
            if chunk.text not in stop and chunk.text not in question:
                answer = chunk.text
        
    a = nltk.word_tokenize(answer)
    if len(a) > 0 and a[0].lower() in stop:
        del a[0]
        answer = ''
        for i in range(len(a)):
            if i+1 < len(a) and a[i+1] == "'s":
                answer += a[i]
            else:
                answer += a[i] + ' '
        
    answer = answer.strip().lower()
    answer_result = []
    answer_result.append(case_count)
    answer_result.append(answer)
    writer.writerow(answer_result)
    print(case_count,' ',answer)
    case_count += 1
    
csvFile.close()

0   combination
1   capabilities
2   process
3   mac
4   released in september 2008
5   95 %
6   user
7   history
8   september
9   sir tim berners-lee
10   consumer choice
11   industry's first browser war
12   merely "web browsers
13   consumer choice
14   browser extensions
15   entire browser market
16   reduces consumer choice
17   local files
18   google search
19   web browser
20   file formats
21   reduces consumer choice
22   august
23   average person
24   internet explorer
25   form
26   used web browser but still had lower usage than all versions of internet explorer combined had lower usage than all versions of internet explorer combined
27   started his own company
28   open source software model
29   40 million phones
30   rapid development
31   url
32   open source software model
33   file systems
34   95 %
35   web browsers
36   started his own company
37   user interface
38   google chrome
39   user's consent
40   information resources
41   feed
42   feed
43   bookmar

406   hampton
407   bath house
408   pre-war era
409   handsome width
410   city
411   first purpose-built artillery fortification
412   galleries
413   melody
414   large ships
415   uk
416   changed to hampshire in 1959 although the county had been commonly known as hampshire or hantscire for centuries
417   worst behaved secondary schools
418   boys
419   sotonians
420   south western railway company
421   22,000 students
422   westquay park
423   programme
424   contemporary art installations
425   england
426   programme
427   town
428   202 crimes
429   96.1 °
430   aviation
431   depth
432   city's buildings
433   southampton container terminals
434   29 june
435   new county council
436   normandy
437   united kingdom
438   uk
439   city
440   city
441   area
442   flights
443   marina
444   city
445   southampton
446   queen mary
447   guildhall
448   four-year term
449   road
450   16.2 percent
451   southampton water
452   southampton
453   dates
454   provide
455   eight ye

800   local christian rulers
801   nations
802   east european ashkenazim
803   sephardic and mizrahi jews
804   percentage
805   money lending
806   nations
807   origin
808   levant
809   jewishness
810   ashkenazi jewish ancestry
811   yiddish speakers
812   cimmerians
813   siddur ( prayer book
814   distinct geographic locales
815   host community
816   canaan
817   vladimir ashkenazy
818   invading persians
819   free peoples
820   germany
821   lubavitch hasidim
822   holocaust
823   german lands
824   minhagim
825   data
826   at its exact discovery position in relation to the sun
827   neptune,[e
828   pluto
829   deeper atmospheric processes
830   north pole
831   french bureau des longitudes
832   result
833   position
834   jupiter
835   (נפטון
836   upper cloud
837   warmer
838   methane
839   solar system
840   sun
841   urbain le verrier
842   neptune's gravity
843   neptune
844   late 2020s
845   discovery in 1846
846   jupiter
847   french bureau des longitudes
848   2

1118   mortuary chapel
1119   laterano
1120   rebuilt abbey church
1121   christian interpretations
1122   beautiful mosaic pavement
1123   libyan town
1124   apse
1125   arab caliphate
1126   jewish month
1127   high artistic quality mosaics
1128   early 4th century
1129   sant'apollinare nuovo
1130   higher accuracy
1131   18th century
1132   robot
1133   high-ranking officials
1134   constantine monomachos
1135   holy land
1136   rams
1137   constantine monomachos
1138   misleading impression
1139   umm ar-rasas
1140   santa pudenziana
1141   rams
1142   6th century
1143   figures
1144   5th century
1145   seems that it was not until the christian era that figural wall mosaics became a major form of artistic expression
1146   bowl
1147   jerusalem
1148   monreale
1149   st. catherine's monastery
1150   either side
1151   constantine monomachos
1152   coarser work
1153   important place of pilgrimage
1154   late roman mosaic art
1155   idolatry
1156   3rd millennium bc
1157   period


1484   russian resistance
1485   french emperor
1486   saint petersburg
1487   duplicitous french and ottoman diplomacy
1488   danube
1489   treaty
1490   nitroglycerin
1491   french
1492   forgotten theatre
1493   village
1494   , re-appointed stratford canning
1495   rapid communications
1496   village
1497   imperial examination
1498   other `` mean '' people
1499   father
1500   ming military to get them to defect to the qing
1501   manchukuo
1502   a revolt by han in liaodong in 1623
1503   gapsin coup
1504   chinese zodiacal system
1505   kangxi emperor
1506   power
1507   time
1508   power
1509   manchu homeland
1510   viceroys
1511   early 17th century
1512   foreign legation quarter
1513   wore it in the form of a top-knot
1514   campaign
1515   fujian
1516   jahangir khoja
1517   instead
1518   chinese zodiacal system
1519   prior military experience
1520   chinese military
1521   population
1522   dorgon
1523   collection
1524   compensation for their expenses in invading ch

1830   peacetime
1831   feature
1832   many countries
1833   dst
1834   clock
1835   result
1836   draft legislation
1837   methodology
1838   ordinary system maintenance
1839   24:00 local time
1840   workday activities
1841   northern regions
1842   sunrise
1843   winter
1844   single studies
1845   typical `` nine-to-five '' workday
1846   golf industry revenues
1847   adopt
1848   uses
1849   evening dinner
1850   autumn transition
1851   single studies
1852   standardize
1853   dual-time zone arrangement
1854   dst
1855   complexity
1856   average
1857   favor
1858   little impact
1859   problems
1860   utc-based unix time
1861   ordinary system maintenance
1862   increase
1863   use
1864   committee
1865   older timestamps
1866   end
1867   confusion
1868   seasonal vaccinations
1869   30 april
1870   single studies
1871   daylight
1872   winter solstice
1873   rules
1874   file
1875   canadian location
1876   energy-consumption study
1877   term summer time
1878   help
1879   re

2188   progressively undesirable attributes
2189   clade
2190   sex
2191   united states
2192   climatic zones
2193   characters
2194   united states
2195   crime
2196   social constructs
2197   falsify
2198   neighbouring non-jewish populations
2199   
2200   female slaves
2201   use
2202   education
2203   discussion
2204   to 10 % african ancestry
2205   clustering
2206   natural sciences
2207   used as a synonym for `` hispanic ''
2208   mapping
2209   different racial groups
2210   tragedy
2211   suggest
2212   difference
2213   fst
2214   phylogenetic analysis
2215   researcher
2216   different mutations
2217   populations
2218   races
2219   invalid genetic or biological designation
2220   time
2221   two different populations
2222   medical conditions
2223   concept
2224   racial status quo
2225   united states
2226   adversely
2227   united states
2228   outgroup
2229   second world war
2230   father's line
2231   genetic variation
2232   demographics
2233   textbooks
2234   e

2542   proper authority
2543   treaty breach
2544   european economic area agreement
2545   acts
2546   treaty
2547   first agreement
2548   protocol
2549   complaint
2550   amend
2551   notification
2552   subject
2553   modern treaty law
2554   respective state legislature
2555   treaty obligations
2556   indigenous peoples
2557   execution
2558   forms
2559   international agreement
2560   qaynuqa
2561   same reservations
2562   treaty
2563   state
2564   different languages
2565   recognition
2566   war
2567   european economic area agreement
2568   certain defined conditions
2569   internal law
2570   verb
2571   two state parties
2572   multilateral treaty
2573   contracting parties ' full names
2574   treaties
2575   understanding
2576   specific provisions
2577   confederation
2578   treaty
2579   use
2580   occur
2581   clause
2582   gabriel
2583   muslim man
2584   colonization
2585   complaint
2586   treaties
2587   many treaties
2588   treaty obligations
2589   constitution

2896   least expensive mac
2897   mac os platform
2898   discontinued power macintosh g3
2899   dell
2900   memory issue
2901   ball
2902   higher-quality , all-aluminum unibody construction
2903   battery life
2904   replaceable batteries
2905   apple's margins
2906   16 mhz motorola 68020 processor
2907   boston software 's macpublisher and aldus pagemaker enabled users to design
2908   costlier macintosh
2909   6 percent
2910   open firmware in most powerpc-based macs or efi in all intel-based macs
2911   never-to-appear copland os
2912   3.36 million macs
2913   macintosh ii
2914   apple
2915   year
2916   9.3%
2917   aqua user interface
2918   original macintosh computer
2919   windows vista
2920   c++
2921   macbook air
2922   july
2923   seven years
2924   board
2925   14 %
2926   system
2927   apple
2928   compaq
2929   iici
2930   first macbook pro
2931   frogdesign
2932   microsoft's formerly separate ms-dos and windows products
2933   mac os gui
2934   a bad condition
2935  

3271   incomplete in annelids that are semi-sessile or that do not move by peristalsis or by movements of parapodia – for example some move by whipping movements of the body
3272   locomotion
3273   hatchlings
3274   anatomical structures
3275   segmentation
3276   lives
3277   scientists' current knowledge
3278   the prostomium the peristomium
3279   nemerteans
3280   hatchlings
3281   another pair
3282   annelid
3283   cocoon
3284   ecological diversity
3285   europe
3286   septa
3287   muscle fibers
3288   ganglia
3289   worms ' total weight
3290   enables
3291   polychaetes
3292   dubious practise of blood-letting have come from china around 30 ad 30 ad 30 ad throughout europe
3293   situations
3294   jurassic
3295   whose best-known members
3296   soil fertility
3297   cocoon
3298   deuterostomes
3299   gap
3300   adopted a slightly different version of the greenman rule in section 402a of the restatement different version of the greenman rule in section 402a of the restatement
33

3602   additive primary colors
3603   important role
3604   event
3605   vincent van gogh
3606   relatively low levels
3607   magnificent red
3608   mansions
3609   money
3610   alizarin crimson
3611   coccus
3612   oldest professional baseball team
3613   bright red
3614   highways
3615   power
3616   rouge d'adrinople
3617   rouge d'adrinople


# Train set test

In [8]:
import nltk
import json
import spacy
from nltk.corpus import stopwords
from math import log
from collections import defaultdict, Counter
from string import punctuation
from nltk.stem.wordnet import WordNetLemmatizer
import csv
from gensim.summarization import bm25

OPEN_QUESTION_WORDS = ['what', 'who', 'whose', 'whom', 'where', 'when', 'why', 'how',
                       'which', "what's", "who's", "where's", "how's"]
CLOSED_QUESTION_WORDS = ['is', 'are', 'am', 'was', 'were', 'do', 'does,', 'did', 'can',
                         'could', 'will', 'would', 'shall', 'should', 'have', 'has',
                         'had']

# Stop words
stop = set(stopwords.words('english'))

lmtz = WordNetLemmatizer()

with open('training.json') as json_data:
    train = json.load(json_data)

with open('documents.json') as json_data:
    documents = json.load(json_data)

# Spacy toolkit
nlp = spacy.load('en_core_web_sm')

punc = set(punctuation)


def strip_punctuation(s):
    return ''.join(c for c in s if c not in punc)


def lemmatize(token):
    lemma = lmtz.lemmatize(token, 'v')
    if lemma == token:
        lemma = lmtz.lemmatize(token, 'n')
    return lemma


def extract_term_freqs(doc):
    tfs = {}
    for token in nltk.word_tokenize(doc):
        lemma = lemmatize(token.lower())
        if lemma not in stop and lemma.isalpha():
            tfs[lemma] = tfs.get(lemma, 0) + 1
    return tfs


def compute_doc_freqs(doc_term_freqs):
    dfs = Counter()
    for tfs in doc_term_freqs.values():
        for term in tfs.keys():
            dfs[term] += tfs[term]
    return dfs


def query_vsm(query, index, k=10):
    accumulator = Counter()
    for term in query:
        postings = index[term]
        for docid, weight in postings:
            accumulator[docid] += weight
    return accumulator.most_common(k)


# Find the question word
def get_qword(question):
    tokens = nltk.word_tokenize(question.lower())
    for token in tokens:
        if token in OPEN_QUESTION_WORDS:
            return token
    for token in tokens:
        if token in CLOSED_QUESTION_WORDS:
            return token
    return 'others'


# length of longest same sequences of keywords
def get_overlap(sent1, sent2):
    tokens1 = []
    tokens2 = []

    for token in nltk.word_tokenize(strip_punctuation(sent1.lower())):
        lemma = lemmatize(token)
        if lemma not in stop:
            tokens1.append(lemma)

    for token in nltk.word_tokenize(strip_punctuation(sent2.lower())):
        lemma = lemmatize(token)
        if lemma not in stop:
            tokens2.append(lemma)

    max = 0
    for i in range(len(tokens1)):
        for j in range(len(tokens2)):

            if tokens1[i] == tokens2[j]:
                length = 1

                ii = i + 1
                jj = j + 1
                while ii < len(tokens1) and jj < len(tokens2) and \
                        tokens1[ii] == tokens2[jj]:
                    ii += 1
                    jj += 1
                    length += 1

                if length > max:
                    max = length

    return max


case_count = 0
# test = [test[17]]
for test_case in train:
    question = test_case['question']
    docid = test_case['docid']
    correct_answer = test_case['text']

    # Convert doc into one string, then tokenize sentences
    corpus = ''
    for para in documents[docid]['text']:
        corpus += para + ' '

    # sentence as a document
    raw_docs = nltk.sent_tokenize(corpus)

    # TFIDF
#     doc_term_freqs = {}
#     for (id, raw_doc) in enumerate(raw_docs):
#         term_freqs = extract_term_freqs(raw_doc)
#         doc_term_freqs[id] = term_freqs
#     M = len(doc_term_freqs)

#     doc_freqs = compute_doc_freqs(doc_term_freqs)

#     vsm_inverted_index = defaultdict(list)
#     for docid, term_freqs in doc_term_freqs.items():
#         N = sum(term_freqs.values())
#         length = 0

#         # find tf*idf values and accumulate sum of squares
#         tfidf_values = []
#         for term, count in term_freqs.items():
#             tfidf = float(count) / N * log(M / float(doc_freqs[term]))
#             tfidf_values.append((term, tfidf))
#             length += tfidf ** 2

#         # normalise documents by length and insert into index
#         length = length ** 0.5
#         for term, tfidf in tfidf_values:
#             # inversion of the indexing, term -> (doc_id, score)
#             vsm_inverted_index[term].append([docid, tfidf / length])

#     for term, docids in vsm_inverted_index.items():
#         docids.sort()

#     terms = extract_term_freqs(question)
#     results = query_vsm(terms, vsm_inverted_index)



    tokenized_sentence = []
    for each_sentence in raw_docs:
        filter_stop_word = []
        sentence_as_words = nltk.word_tokenize(each_sentence)
        for each_word in sentence_as_words:
            if each_word not in stop:
                filter_stop_word.append(each_word)
        
        tokenized_sentence.append(filter_stop_word)
        
    bm25Model = bm25.BM25(tokenized_sentence)
    average_idf = sum(map(lambda k: float(bm25Model.idf[k]), bm25Model.idf.keys())) / len(bm25Model.idf.keys())
    
    query = []
    for word in nltk.word_tokenize(question):
        if word not in stop:
            query.append(word)
        
    scores = bm25Model.get_scores(query,average_idf)
    bm25_dic = Counter()
    
    sentence_id = 0
    for each_score in scores:
        bm25_dic[sentence_id] = each_score
        sentence_id += 1
    results = bm25_dic.most_common(3)



    # Step 2
    # Analyse question type
    qword = get_qword(question)

    # the word after question word, such as 'what value', 'which gender'
    next_token = ''

    target = [] # target dep

    # dependency parsing
    dep = ''

    # head word
    head = ''

    # head dependency
    head_dep = ''

    # subject, root, object
    nsubj = ''
    ROOT = ''
    dobj = ''
    attr = ''

    # yes or no questions have two options
    closed_q_choices = ('', '')

    doc = nlp(question)

    tokens = nltk.word_tokenize(question.lower())

    # get next word
    if qword in tokens:
        if tokens.index(qword) < len(tokens) - 1:
            next_token = tokens[tokens.index(qword) + 1]

    # get structure of question
    for token in doc:
        if 'nsubj' in token.dep_ and token.head.dep_ == 'ROOT':
            nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_ and token.head.dep_ == 'ROOT':
            dobj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'attr' and token.head.dep_ == 'ROOT':
            attr = lemmatize(strip_punctuation(token.text))

    # determine answer dependency
    if qword in ['who', "who's", 'whom', 'whose']:
        for chunk in doc.noun_chunks:
            if qword in chunk.text:
                target = [chunk.root.dep_]

    elif qword in ['where', "where's", 'when']:
        target = ['advmod','prep']

    elif qword in ['how', "how's"]:
        if next_token in ['much', 'many','long','far', 'big', 'wide', 'deep', 'tall', 'high', 'fast', 'heavy','old', 'young']:
            target = ['attr']
        
        elif next_token in ['does', 'did', 'do', 'have', 'has', 'had', 'should',
                            'can', 'could', 'will', 'would', 'must']:
            if dobj != '':
                target = ['advmod', 'acomp']
            else:
                target = ['ROOT']

    elif qword in ['what', "what's", 'which']:
        # what...do type question
        tokens.remove(next_token)
        if 'do' in tokens:
            target = ['ROOT']
        else:
            for chunk in doc.noun_chunks:
                if qword in chunk.text:
                    target = [chunk.root.dep_]

    elif qword == 'why':
        target = ['mark','prep']

    elif qword in CLOSED_QUESTION_WORDS:
        # answer is one of the 'or' options in the question
        if 'or' in tokens:
            index = tokens.index('or')
            
            # dep of phrase before and after 'or'
            prev = tokens[index - 1]
            for chunk in doc.noun_chunks:
                if prev in chunk.text:
                    target = [chunk.root.dep_]
            
            if target == []:
                for token in doc:
                    if prev in token.text:
                        target = [token.dep_]

     # re-rank the sentences
    scores = {}
    for id, _ in results:
        sent = raw_docs[id]
        doc = nlp(sent)

        score = get_overlap(sent, question)

        for token in doc:
            if token.dep_ in target:
                score += 1

        scores[id] = score

    rank = {}
    for id, sim in results:
        max_score = scores[max(scores, key=scores.get)]
        if max_score != 0:
            rank[id] = sim * 0.3 + (scores[id] / max_score * 0.7)
        else:
            rank[id] = sim
    
    # sentence with highest rank
    index = max(rank, key=rank.get)
    sent = raw_docs[index]
    doc = nlp(sent)
                   
    # find answer with highest score from the highest ranking sentence
    max_score = -1
    answer = ''

    # find sentence structure
    sent_nsubj = ''
    sent_ROOT = ''
    sent_dobj = ''
    sent_attr = ''
    for token in doc:
        if 'nsubj' in token.dep_ and token.head.dep_ == 'ROOT':
            sent_nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            sent_ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_ and token.head.dep_ == 'ROOT':
            sent_dobj = lemmatize(strip_punctuation(token.text))
        if 'attr' in token.dep_ and token.head.dep_ == 'ROOT':
            sent_attr = lemmatize(strip_punctuation(token.text))

    score = 0
    if nsubj == sent_nsubj:
        score += 1
        if ROOT == sent_ROOT:
            score += 1
            if dobj == sent_dobj:
                score += 1
            if attr == sent_attr:
                score += 1

    max_score = -1
    answer = ''
    if target == ['ROOT'] or target == ['attr'] or target == ['advmod','acomp']:
        for token in doc:
            if token.dep_ in target:
                score += 1

                if token.text not in question:
                    score += 1

                if strip_punctuation(token.text).lower() in stop:
                    score = -1

                if token.text.lower() == 'it':
                    score = -1

                if score > max_score:
                    max_score = score
                    answer = token.text

    elif target == ['mark','prep'] or target == ['advmod','prep']:
        for token in doc:
            if token.dep_ in target:
                score += 1

                if token.text not in question:
                    score += 1

                if strip_punctuation(token.text).lower() in stop:
                    score = -1

                if token.text.lower() == 'it':
                    score = -1

                if score > max_score:
                    max_score = score
                    substr = sent[sent.index(token.text):]
                    tokens = nltk.word_tokenize(substr)
                    del tokens[0]
                    for token in tokens:
                        if token in punc:
                            break
                        answer += token+' '

    else:
        for chunk in doc.noun_chunks:
            if chunk.root.dep_ in target:
                score += 1

                if chunk.text not in question:
                    score += 1

                if strip_punctuation(chunk.text).lower() in stop:
                    score = -1

                if chunk.text.lower() == 'it':
                    score = -1

                if score > max_score:
                    max_score = score
                    answer = chunk.text
        
    # default answer
    if answer == '':
        for chunk in doc.noun_chunks:
            if chunk.text not in stop and chunk.text not in question:
                answer = chunk.text
        
    a = nltk.word_tokenize(answer)
    if len(a) > 0 and a[0].lower() in stop:
        del a[0]
        answer = ''
        for i in range(len(a)):
            if i+1 < len(a) and a[i+1] == "'s":
                answer += a[i]
            else:
                answer += a[i] + ' '
        
    if correct_answer != answer.strip().lower():
        print('Support: ',sent)
        print('***')
        print('Question: ',question)
        print('***')
        print('Correct: ',correct_answer)
        print('***')
        print('predicted: ',answer.strip().lower())
        print('\n')

    case_count += 1
    

Support:  A change of several tens of micrograms in one kilogram is equivalent to the current uncertainty in the value of the Planck constant in SI units.
***
Question:  A kilogram could be definined as having a Planck constant of what value?
***
Correct:  6966662606895999999♠6.62606896×10−34 j⋅s
***
predicted:  si units


Support:  The Planck constant is given by There are a number of proposals to redefine certain of the SI base units in terms of fundamental physical constants.
***
Question:  What is the shape of the object that establishes the base unit of the kilogram?
***
Correct:  cylinder
***
predicted:  fundamental physical constants


Support:  In modern terms, if J is the total angular momentum of a system with rotational invariance, and Jz the angular momentum measured along any given direction, these quantities can only take on the values where the uncertainty is given as the standard deviation of the measured value from its expected value.
***
Question:  What example is giv

Support:  For example, green light with a wavelength of 555 nanometres (the approximate wavelength to which human eyes are most sensitive) has a frequency of 7014540000000000000♠540 THz (7014540000000000000♠540×1012 Hz).
***
Question:  What color of light is the human eye most sensitive to?
***
Correct:  green
***
predicted:  hz


Support:  The black-body problem was revisited in 1905, when Rayleigh and Jeans (on the one hand) and Einstein (on the other hand) independently proved that classical electromagnetism could never account for the observed spectrum.
***
Question:  Einstein and what other two individuals revisited the black-body problem in 1905?
***
Correct:  rayleigh and jeans
***
predicted:  einstein


Support:  Bohr solved this paradox with explicit reference to Planck's work: an electron in a Bohr atom could only have certain defined energies En Bohr also introduced the quantity , now known as the reduced Planck constant, as the quantum of angular momentum.
***
Question:  Wh

Support:  Bohr solved this paradox with explicit reference to Planck's work: an electron in a Bohr atom could only have certain defined energies En Bohr also introduced the quantity , now known as the reduced Planck constant, as the quantum of angular momentum.
***
Question:  What is the reduced Planck constant also known as?
***
Correct:  the quantity
***
predicted:  angular momentum


Support:  Max Planck received the 1918 Nobel Prize in Physics "in recognition of the services he rendered to the advancement of Physics by his discovery of energy quanta".
***
Question:  In what year did Planck receive the Nobel Prize in Physics for his discovery of energy quanta?
***
Correct:  1918
***
predicted:  energy


Support:  In the last years of the nineteenth century, Planck was investigating the problem of black-body radiation first posed by Kirchhoff some forty years earlier.
***
Question:  What type of radiation was Planck studying in the late 19th century?
***
Correct:  black-body
***
pred

Support:  In the last years of the nineteenth century, Planck was investigating the problem of black-body radiation first posed by Kirchhoff some forty years earlier.
***
Question:  How many years prior to Planck's study had the scientific community first discussed black body radiation?
***
Correct:  forty
***
predicted:  kirchhoff


Support:  The very first Solvay Conference in 1911 was devoted to "the theory of radiation and quanta".
***
Question:  When was the first Solvay Conference held?
***
Correct:  1911
***
predicted:  quanta


Support:  The hot object in equilibrium with light absorbs just as much light as it emits.
***
Question:  What does a hot object in equilibrium absorb as much as it emits?
***
Correct:  light
***
predicted:  as much light


Support:  It is well known that hot objects glow, and that hotter objects glow brighter than cooler ones.
***
Question:  Cooler objects glow less than objects that are what?
***
Correct:  hotter
***
predicted:  cooler ones


Support: 

Support:  Wind power accounts for approximately 19% of electricity generated in Denmark, 9% in Spain and Portugal, and 6% in Germany and the Republic of Ireland.
***
Question:  In Denmark, wind power accounts for what percentage of electricity generated?
***
Correct:  19 %
***
predicted:  ireland


Support:  Other developed countries with defined national or regional targets include Australia, Canada, Israel, Japan, Korea, New Zealand, Norway, Singapore, Switzerland, and some US States.
***
Question:  Name one outher country with defined national or regional target?
***
Correct:  australia
***
predicted:  us states


Support:  Barriers to implementing the renewable energy plan are seen to be "primarily social and political, not technological or economic".
***
Question:  What is a barrier to implementing the renewable energy plan?
***
Correct:  social and political
***
predicted:  barriers


Support:  The 230 MW Antelope Valley Solar Ranch is a First Solar photovoltaic project which is 

Support:  Hydropower is produced in 150 countries, with the Asia-Pacific region generating 32 percent of global hydropower in 2010.
***
Question:  Hydropower is produced in how many countries?
***
Correct:  150
***
predicted:  global hydropower


Support:  Crop residues (such as corn stalks, wheat straw and rice straw), wood waste, and municipal solid waste are potential sources of cellulosic biomass.
***
Question:  What are potential sources of cellulosic biomass?
***
Correct:  crop residues
***
predicted:  municipal solid waste


Support:  The $1.00/W installed cost, is often regarded in the PV industry as marking the achievement of grid parity for PV.
***
Question:  What cost is often regarded as marking the achievment of grid parity for PV?
***
Correct:  $ 1.00/w installed cost
***
predicted:  achievement


Support:  Mark Z. Jacobson, professor of civil and environmental engineering at Stanford University and director of its Atmosphere and Energy Program says producing all new ener

Support:  Drawing from his own experiences in Scouting, Spielberg helped the Boy Scouts of America develop a merit badge in cinematography in order to help promote filmmaking as a marketable skill.
***
Question:  What was Indy's rank in Boy Scouts when shown in a movie?
***
Correct:  life scout
***
predicted:  marketable skill


Support:  Spielberg also said he suffered from acts of anti-Semitic prejudice and bullying: "In high school, I got smacked and kicked around.
***
Question:  What kind of prejudice did Spielberg have to deal with in High school?
***
Correct:  anti-semitic
***
predicted:  


Support:  Spielberg then revisited his Close Encounters project and, with financial backing from Columbia Pictures, released Close Encounters: The Special Edition in 1980.
***
Question:  Who did Spielberg get money from to film Close Encounters?
***
Correct:  his father
***
predicted:  special edition


Support:  This theme is arguably the most autobiographical aspect of Spielberg's films, si

Support:  Similarly, in Catch Me If You Can, Frank naively and foolishly believes that he can reclaim his shattered family if he accumulates enough money to support them.
***
Question:  Who is naive in 'Catch Me if You Can'?
***
Correct:  frank
***
predicted:  enough money


Support:  He is credited in the special thanks section of the 1998 video game Trespasser.
***
Question:  What was the first video game Spielberg played?
***
Correct:  pong
***
predicted:  1998 video game trespasser


Support:  Spielberg then revisited his Close Encounters project and, with financial backing from Columbia Pictures, released Close Encounters: The Special Edition in 1980.
***
Question:  When did Spielberg re-release Close Encounters?
***
Correct:  1980
***
predicted:  special edition


Support:  However, much to the surprise of many, Spielberg did not get a Best Director nomination.
***
Question:  How many Oscar nominations did 'Bridge of Spies' get?
***
Correct:  six
***
predicted:  best director nom

Support:  The segment, "Eyes," starred Joan Crawford; she and Spielberg were reportedly close friends until her death.
***
Question:  Who starred in 'Savage'/
***
Correct:  martin landau
***
predicted:  death


Support:  In November 2007, he was chosen for a Lifetime Achievement Award to be presented at the sixth annual Visual Effects Society Awards in February 2009.
***
Question:  When did the Visual Effects Society give Spielberg a Lifetime Achievement Award?
***
Correct:  february 2009
***
predicted:  february


Support:  :403 Their divorce was recorded as the third most costly celebrity divorce in history.
***
Question:  When did Spielberg and Irving divorce?
***
Correct:  1989
***
predicted:  history


Support:  He is credited in the special thanks section of the 1998 video game Trespasser.
***
Question:  When did Spielberg first play a video game?
***
Correct:  1974
***
predicted:  1998 video game trespasser


Support:  Spielberg is also producing the Fox TV series Terra Nova.
**

Support:  According to Forbes' Most Influential Celebrities 2014 list, Spielberg was listed as the most influential celebrity in America.
***
Question:  When was Spielberg named the 'most influential celebrity in America'?
***
Correct:  2014
***
predicted:  forbes


Support:  He was later given the opportunity to make a short film for theatrical release, the 26-minute, 35mm, Amblin', which he wrote and directed.
***
Question:  Who wrote 'Vengeance'?
***
Correct:  george jonas
***
predicted:  amblin


Support:  After completing filming on Ready Player One, while it is in its lengthy, effects-heavy post-production, he will film his long-planned adaptation of David Kertzer's acclaimed The Kidnapping of Edgardo Mortara.
***
Question:  Who wrote 'The Kidnapping of Edgardo Mortara'?
***
Correct:  david kertzer
***
predicted:  david kertzer's acclaimed the kidnapping


Support:  Terra Nova begins in the year 2149 when all life on the planet Earth is threatened with extinction resulting in sci

Support:  Spielberg stated he made Indiana Jones a Boy Scout in honor of his experience in Scouting.
***
Question:  Which major Spielberg movie character was a Boy Scout?
***
Correct:  indiana jones
***
predicted:  scouting


Support:  For his 2010 miniseries The Pacific he teamed up once again with co-producer Tom Hanks, with Gary Goetzman also co-producing'.
***
Question:  When was 'The Pacific' released?
***
Correct:  2010
***
predicted:  co-producing


Support:  Spielberg described himself as feeling like an alien during childhood, and his interest came from his father, a science fiction fan, and his opinion that aliens would not travel light years for conquest, but instead curiosity and sharing of knowledge.
***
Question:  When did Spielberg give an interview saying he had 'felt like an alien'?
***
Correct:  august 2000
***
predicted:  alien during childhood curiosity and sharing of knowledge


Support:  He was later given the opportunity to make a short film for theatrical releas

Support:  The world premiere took place on October 22, 2011 in Brussels, Belgium.
***
Question:  Who took over directing 'Interstellar'?
***
Correct:  christopher nolan
***
predicted:  belgium


Support:  As a child, Spielberg faced difficulty reconciling being an Orthodox Jew with the perception of him by other children he played with.
***
Question:  When did Steven Spielberg have trouble dealing with being an Orthodox Jew?
***
Correct:  as a child
***
predicted:  children


Support:  He is credited in the special thanks section of the 1998 video game Trespasser.
***
Question:  What video game did Spielberg come up with the concept for?
***
Correct:  the dig
***
predicted:  1998 video game trespasser


Support:  Furthermore, to this theme, protagonists in his films often come from families with divorced parents, most notably E.T.
***
Question:  When did 'War Horse' come out?
***
Correct:  december 25 , 2011
***
predicted:  divorced parents


Support:  A collector of film memorabilia, 

Support:  By December 1916 there were 183 AA Sections defending Britain (most with the 3-inch), 74 with the BEF in France and 10 in the Middle East.
***
Question:  How many AA Sections were defending Britain by December 1916?
***
Correct:  183
***
predicted:  sections


Support:  Further research started during the war.
***
Question:  Who started making an unlicensed version of the 40mm at the beginning of the war?
***
Correct:  americans
***
predicted:  research


Support:  The Germans developed massive reinforced concrete blockhouses, some more than six stories high, which were known as Hochbunker "High Bunkers" or "Flaktürme" flak towers, on which they placed anti-aircraft artillery.
***
Question:  What were the large blockhouses called that the Germans developed?
***
Correct:  hochbunker `` high bunkers ''
***
predicted:  anti-aircraft artillery


Support:  Their needs could cogently be met with smaller-calibre ordnance beyond using the usual singly-mounted M2 .50 caliber machine g

Support:  Although air planners have imagined lasers in combat since the late 1960s, only the most modern laser systems are currently reaching what could be considered "experimental usefulness".
***
Question:  The use of lasers in anti-aircraft warfare is currently considered what?
***
Correct:  experimental usefulness
***
predicted:  the most modern laser systems


Support:  Carrier battle groups are especially well defended, as not only do they typically consist of many vessels with heavy air defence armament but they are also able to launch fighter jets for combat air patrol overhead to intercept incoming airborne threats.
***
Question:  What type of ships are particularly well defended?
***
Correct:  carrier battle groups
***
predicted:  incoming airborne threats


Support:  The USSR also had a separate strategic rocket force in charge of nuclear intercontinental ballistic missiles.
***
Question:  Who had a separate military force for controlling nuclear ICBMs?
***
Correct:  ussr
*

Support:  Passive air defence is defined by NATO as "Passive measures taken for the physical defence and protection of personnel, essential installations and equipment in order to minimize the effectiveness of air and/or missile attack".
***
Question:  What does NATO define as passive measures to protect people, buildings and equipment from air or missile attacks?
***
Correct:  passive air defence
***
predicted:  air and/or missile attack


Support:  However, in 1924 work started on a new 105 mm static mounting AA gun, but only a few were produced by the mid-1930s because by this time work had started on the 90 mm AA gun, with mobile carriages and static mountings able to engage air, sea and ground targets.
***
Question:  When did work begin on the 105 mm static mounting AA gun?
***
Correct:  1924
***
predicted:  air, sea and ground targets


Support:  Since most attacks were at night, searchlights were soon used, and acoustic methods of detection and locating were developed.
***
Quest

Support:  In 1925 the British adopted a new instrument developed by Vickers.
***
Question:  What year did the British adopt the new Vickers instrument?
***
Correct:  1925
***
predicted:  new instrument


Support:  Another potential weapon system for anti-aircraft use is the laser.
***
Question:  What is another possible weapon for anti-aircraft use?
***
Correct:  the laser
***
predicted:  laser


Support:  In the United States Army for instance, air defence is part of the artillery arm, while in the Pakistan Army, it was split off from Artillery to form a separate arm of its own in 1990.
***
Question:  Air defence in Pakistan was separated from the Army in what year?
***
Correct:  1990
***
predicted:  artillery


Support:  Soon the forces were adding various machine-gun based weapons mounted on poles.
***
Question:  What was mounted on poles by forces?
***
Correct:  machine-gun based weapons
***
predicted:  various machine-gun based weapons


Support:  From the early 1930s eight countr

Support:  In some countries, such as Britain and Germany during the Second World War, the Soviet Union and NATO's Allied Command Europe, ground based air defence and air defence aircraft have been under integrated command and control.
***
Question:  Which country's air defence and aircraft has been under integrated command and control?
***
Correct:  soviet union
***
predicted:  ground based air defence and air defence aircraft


Support:  Soviet systems especially concentrate on mobility, after the lessons learnt in the Vietnam war between the USA and Vietnam.
***
Question:  What systems are really geared toward mobility?
***
Correct:  soviet
***
predicted:  vietnam


Support:  British naval missiles used included Sea Dart and the older Sea Slug longer range systems, Sea Cat and the new Sea Wolf short range systems.
***
Question:  Short range missiles are replacing what weapons?
***
Correct:  autocannons
***
predicted:  new sea wolf short range systems


Support:  Man-portable missiles

Support:  In the later decades of the Cold War this included the United States Air Force's operating bases in UK.
***
Question:  When did the United States Navy test a railgun?
***
Correct:  february 2008
***
predicted:  uk


Support:  The 419th and 601st Antiaircraft Gun Battalions of the US Army were first allocated to the Folkestone-Dover coast to defend London, and then moved to Belgium to become part of the "Antwerp X" project.
***
Question:  Where did the US Army's 419th and 601st locate at to defend London?
***
Correct:  folkestone-dover coast
***
predicted:  allocated to the folkestone-dover coast to defend london


Support:  Before the war it was recognised that ammunition needed to explode in the air.
***
Question:  It was understood that ammunition needed to explode where?
***
Correct:  in the air
***
predicted:  air


Support:  Soldiers shot at them with shotguns and machine-guns but failed to prevent them from dropping 45 bombs over the city, hitting military installations

Support:  The Army's Anti-aircraft command, which was under command of the Air Defence UK organisation, grew to 12 AA divisions in 3 AA corps.
***
Question:  What was under command of the Air Defence UK orgnisation?
***
Correct:  army 's anti-aircraft command
***
predicted:  3 aa corps


Support:  NATO later called these arrangements an "air defence ground environment", defined as "the network of ground radar sites and command and control centres within a specific theatre of operations which are used for the tactical control of air defence operations".
***
Question:  What is the air defence of a certain area called?
***
Correct:  area air defence
***
predicted:  air defence operations


Support:  British naval missiles used included Sea Dart and the older Sea Slug longer range systems, Sea Cat and the new Sea Wolf short range systems.
***
Question:  What new short range systems did the British naval use?
***
Correct:  sea wolf
***
predicted:  new sea wolf short range systems


Support:

Support:  The United States Fish and Wildlife Service (FWS) or NOAA Fisheries (also called the National Marine Fisheries Service) can directly list a species through its candidate assessment program, or an individual or organizational petition may request that the FWS or NMFS list a species.
***
Question:  What federal program is used to list a species?
***
Correct:  candidate assessment program
***
predicted:  nmfs


Support:  It would be another eight years before the first national law regulating wildlife commerce was signed, and another two years before the first version of the endangered species act was passed.
***
Question:  What is the nickname given to the first listing of endangered species?
***
Correct:  `` class of '67 ''
***
predicted:  endangered species act


Support:  This first list is referred to as the "Class of '67" in The Endangered Species Act at Thirty, Volume 1, which concludes that habitat destruction, the biggest threat to those 78 species, is still the same th

Support:  The population is primarily ethnic Swazis whose language is siSwati.
***
Question:  When did the ethnic Swazis establish a kingdom?
***
Correct:  mid-18th century
***
predicted:  ethnic swazis whose language is siswati


Support:  Sobhuza's official coronation was in December 1921 after the regency of Labotsibeni after which he led an unsuccessful deputation to the Privy council in London in 1922 regarding the issue of the land.
***
Question:  When was Sobhuza coronated?
***
Correct:  december 1921
***
predicted:  issue of the land


Support:  The chairman of the bucopho is elected at the inkhundla and is called indvuna ye nkhundla.
***
Question:  Where is the Bucopho chairman elected?
***
Correct:  at the inkhundla
***
predicted:  inkhundla


Support:  There are 14 Jewish families.
***
Question:  How many Jewish families are there in Swaziland?
***
Correct:  14
***
predicted:  families


Support:  At the end of the training, a graduation ceremony takes place where all the lo

Support:  Swaziland's most well-known cultural event is the annual Umhlanga Reed Dance.
***
Question:  What individuals can take part in the Umhlanga Reed Dance?
***
Correct:  childless , unmarried girls
***
predicted:  annual umhlanga reed dance


Support:  Swaziland has a wide variety of landscapes, from the mountains along the Mozambican border to savannas in the east and rain forest in the northwest.
***
Question:  How wide is Swaziland in miles??
***
Correct:  81 mi
***
predicted:  northwest


Support:  On the positive side, the external debt burden has declined markedly over the last 20 years, and domestic debt is almost negligible; external debt as a percent of GDP was less than 20% in 2006.
***
Question:  What has happened to debt external onus  in Swaziland in the past two decades?
***
Correct:  declined markedly
***
predicted:  less than 20%


Support:  The Good Shepherd Hospital in Siteki is home to the College for Nursing Assistants.
***
Question:  Where in Swaziland is The

Support:  Following the premature death of Alexander, Iran came under the control of the Hellenistic Seleucid Empire.
***
Question:  What sea did the Achaemenid Empire control the majority of the coastal regions of?
***
Correct:  the black sea
***
predicted:  hellenistic seleucid empire


Support:  The Assembly of Experts elects and dismisses the Supreme Leader on the basis of qualifications and popular esteem.
***
Question:  How often does the Assembly of Experts meet?
***
Correct:  one week annually
***
predicted:  popular esteem


Support:  In 2006, six Iranian films, of six different styles, represented Iranian cinema at the Berlin International Film Festival.
***
Question:  What year did 6 different Iranian films of six different styles represent at the Berlin International Film Festival?
***
Correct:  2006
***
predicted:  iranian cinema


Support:  [page needed] The emergence of Susa as a city, as determined by radiocarbon dating, dates back to early 4,395 BC.
***
Question:  What

Support:  It also ranks fourth in oil reserves with an estimated 153,600,000,000 barrels.
***
Question:  How much oil reserves does Iran have?
***
Correct:  153,600,000,000 barrels
***
predicted:  estimated 153,600,000,000 barrels


Support:  The heads of the judiciary, state radio and television networks, the commanders of the police and military forces and six of the twelve members of the Guardian Council are appointed by the Supreme Leader.
***
Question:  The Supreme Leader appoints how many members of the Guardian Council?
***
Correct:  six
***
predicted:  supreme leader


Support:  In 2010, the economic reform plan was approved by parliament to cut subsidies gradually and replace them with targeted social assistance.
***
Question:  What year did Iran pass an economic reform plan that would replace subsidies with targeted social assistance programs?
***
Correct:  2010
***
predicted:  parliament


Support:  The 1960s was a significant decade for Iranian cinema, with 25 commercial fi

Support:  The addition of new hydroelectric stations and the streamlining of conventional coal and oil-fired stations increased installed capacity to 33,000 megawatts.
***
Question:  How much of Iran's 33k megawatt installed capacity was based on oil? 
***
Correct:  18 %
***
predicted:  33,000 megawatts


Support:  The Guardian Council comprises twelve jurists including six appointed by the Supreme Leader.
***
Question:  The Council of Ministers is appointed and supervised by who?
***
Correct:  the president
***
predicted:  supreme leader


Support:  In 2004, Iran opened its first wind-powered and geothermal plants, and the first solar thermal plant is to come online in 2009.
***
Question:  When did Iran open its first wind-powered plants?
***
Correct:  2004
***
predicted:  2009


Support:  Iran is a major regional and middle power, exerting considerable influence in international energy security and the world economy through its large reserves of fossil fuels, which include the larges

Support:  On the pretext of restoring order, the Russians occupied Northern Iran in 1911, and maintained a military presence in the region for years to come.
***
Question:  In 1911, Russians occupied Northern Iran under what pretext?
***
Correct:  restoring order
***
predicted:  years


Support:  According to 2014 census, around 40% of the population of Iran are Internet users.
***
Question:  What percentage of Iran's population were internet users in 2014?
***
Correct:  around 40 %
***
predicted:  internet users


Support:  Shiraz, with a population of around 1.4 million (2011 census), is the sixth major city of Iran.
***
Question:  What is Shiraz' population by the 2011 Census?
***
Correct:  1.4 million
***
predicted:  iran


Support:  Under king Cyaxares, the Medes and Persians entered into an alliance with Nabopolassar of Babylon, as well as the Scythians and the Cimmerians, and together they attacked the Assyrian Empire.
***
Question:  Who lead the Persians into a coalition with t

Support:  According to UNESCO and the deputy head of research for Iran Travel and Tourism Organization (ITTO), Iran is rated 4th among the top 10 destinations in the Middle East.
***
Question:  What was Iran's rank in the top 10 Middle East destinations according to UNESCO?
***
Correct:  rated 4th
***
predicted:  middle east


Support:  The unification of the Median tribes under a single ruler in 728 BC led to the foundation of the Median Empire which, by 612 BC, controlled the whole Iran and the eastern Anatolia.
***
Question:  When did the Median tribes unify under a single ruler to form the Median Empire?
***
Correct:  728 bc
***
predicted:  eastern anatolia


Support:  The ruins of Persepolis and Pasargadae, two of the four capitals of the Achaemenid Empire, are located around the modern-day city of Shiraz.
***
Question:  The Achaemenid Empire expanded into what part of Asia?
***
Correct:  central asia
***
predicted:  shiraz


Support:  The presence of so many foreign troops in the

Support:  Of that amount, about 75% was based on natural gas, 18% on oil, and 7% on hydroelectric power.
***
Question:  How much of Iran's 33k megawatt installed capacity was based on natural gas? 
***
Correct:  75 %
***
predicted:  hydroelectric power


Support:  But the Bahá'í Faith, which is said to be the largest non-Muslim religious minority in Iran, is not officially recognized, and has been persecuted during its existence in Iran since the 19th century.
***
Question:  Which religious minority since the 1979 Revolution has been persecuted and in some cases executed by the Iranian government?
***
Correct:  the bahá'í faith
***
predicted:  19th century


Support:  On September 22, 1980, the Iraqi army invaded the Iranian Khuzestan, and the Iran–Iraq War began.
***
Question:  Who invaded Iran in 1980?
***
Correct:  the iraqi army
***
predicted:  iran–iraq war


Support:  Due to the 1973 spike in oil prices, the economy of Iran was flooded with foreign currency, which caused inflatio

Support:  When Eisenhower was elected President in 1952, he believed hiring practices and anti-discrimination laws should be decided by the states, although the administration gradually continued to desegregate the Armed Forces and the federal government.
***
Question:  Which year was Eisenhower elected President?
***
Correct:  1952
***
predicted:  federal government


Support:  The Reagan administration was opposed to the affirmative action requirements of Executive Order 11246, but these contemplated changes[which?]
***
Question:  Which Executive Order first contained the phrase "affirmative action"?
***
Correct:  executive order 10925
***
predicted:  affirmative action requirements


Support:  One argument for reverse discrimination is the idea that affirmative action encourages mediocrity and incompetence.
***
Question:  An argument against affirmative action is that it encourages what?
***
Correct:  mediocrity and incompetence
***
predicted:  mediocrity


Support:  Ideas for affir

Support:  The case concerns White and Hispanic firefighters in New Haven, Connecticut, who upon passing their test for promotions to management were denied the promotions, allegedly because of a discriminatory or at least questionable test.
***
Question:  Where was the issue of White and Hispanic firefighters heard in the case based out of?
***
Correct:  new haven , connecticut
***
predicted:  passing their test for promotions to management were denied the promotions


Support:  Lockheed signed an agreement with Vice President Johnson that pledged an "aggressive seeking out for more qualified minority candidates for technical and skill positions.
***
Question:  Who did Lockheed sign an agreement with to seek out more minority workers?
***
Correct:  vice president johnson
***
predicted:  technical and skill positions


Support:  In 1947 the committee published its findings, To Secure These Rights.
***
Question:  In which year were the "To Secure These Rights" findings published?
***
Cor

Support:  The Reagan administration was opposed to the affirmative action requirements of Executive Order 11246, but these contemplated changes[which?]
***
Question:  Which other Executive Order did the Civil Right's Act work closely with?
***
Correct:  11114
***
predicted:  affirmative action requirements


Support:  The Reagan administration was opposed to the affirmative action requirements of Executive Order 11246, but these contemplated changes[which?]
***
Question:  What is one of the issues with affirmative action?
***
Correct:  inherently unequal
***
predicted:  executive order


Support:  [citation needed] FDR's New Deal programs often contained equal opportunity clauses stating "no discrimination shall be made on account of race, color or creed",:11 but the true forerunner to affirmative action was the Interior Secretary of the time, Harold L. Ickes.
***
Question:  What position of power did Harold L. Ickes hold?
***
Correct:  interior secretary
***
predicted:  time


Support

Support:  It was regarded as the most forceful plan thus far to guarantee fair hiring practices in construction jobs.
***
Question:  Which company was targeted by the NAACP for not having fair practices?
***
Correct:  lockheed aircraft corporation
***
predicted:  construction jobs


Support:  The article presents a study that shows that half of all black law students rank near the bottom of their class after the first year of law school and that black law students are more likely to drop out of law school and to fail the bar exam.
***
Question:  Where do half of the black college students rank in terms of their performance relative to the rest of their class?
***
Correct:  bottom 20 percent
***
predicted:  bottom of their class after the first year of law school and that black law students are more likely to drop out of law school and to fail the bar exam


Support:  President Kennedy stated in Executive Order 10925 that "discrimination because of race, creed, color, or national origin

Support:  The French railway system, with multiple competing companies, had developed purely from commercial pressures and many journeys to the front in Alsace and Lorraine involved long diversions and frequent changes between trains.
***
Question:  What was one factor behind the inefficiency of the French railway system?
***
Correct:  multiple competing companies
***
predicted:  trains


Support:  German tactics emphasised encirclement battles like Cannae and using artillery offensively whenever possible.
***
Question:  What newer artillery were Prussians using?
***
Correct:  steel breech-loading guns
***
predicted:  cannae


Support:  The immediate cause of the war resided in the candidacy of a Leopold of Hohenzollern-Sigmaringen, a Prussian prince, to the throne of Spain.
***
Question:  What did this cause the Republic to renew?
***
Correct:  declaration of war
***
predicted:  spain


Support:  What made a bad situation much worse was the conduct of General Auguste-Alexandre Ducrot,

Support:  The republic then renewed the declaration of war, called for recruits in all parts of the country and pledged to drive the German troops out of France by a guerre à outrance.
***
Question:  In which country was a war against France condsidered desirable?
***
Correct:  prussia
***
predicted:  guerre à outrance


Support:  The quick German victory over the French stunned neutral observers, many of whom had expected a French victory and most of whom had expected a long war.
***
Question:  What outcome had most people expected from the war?
***
Correct:  french victory
***
predicted:  long war


Support:  An alarming report on the Posen situation, sent to Bismarck on 16 August 1870, led to the quartering of reserve troop contingents in the restive province.
***
Question:  On what date did Bismarkck receive the disturbing report on the Posen situation?
***
Correct:  16 august 1870
***
predicted:  restive province


Support:  A Government of National Defence declared the Third Repu

Support:  Firing a contact-detonated shell, the Krupp gun had a longer range and a higher rate of fire than the French bronze muzzle loading cannon, which relied on faulty time fuses.
***
Question:  What type of shell did the Krupp weapon fire?
***
Correct:  a contact-detonated shell
***
predicted:  faulty time fuses


Support:  A pre-war plan laid out by the late Marshal Niel called for a strong French offensive from Thionville towards Trier and into the Prussian Rhineland.
***
Question:  From Thionville towards Trier, what was the final destination of the offensive?
***
Correct:  the prussian rhineland
***
predicted:  prussian rhineland


Support:  General Frossard's II Corps and Marshal Bazaine's III Corps crossed the German border on 2 August, and began to force the Prussian 40th Regiment of the 16th Infantry Division from the town of Saarbrücken with a series of direct attacks.
***
Question:  What town were the III Corps able to capture?
***
Correct:  vionville
***
predicted:  dir

Support:  President Trochu resigned on 25 January and was replaced by Favre, who signed the surrender two days later at Versailles, with the armistice coming into effect at midnight.
***
Question:  On what date did president Trochu resign?
***
Correct:  25 january
***
predicted:  midnight


Support:  On 19 July 1870 a declaration of war was sent to the Prussian government.
***
Question:  On which date did France issue a declaration of war to the Prussian government?
***
Correct:  19 july 1870
***
predicted:  19 july


Support:  More recent histories, based on studies of the number buried in Paris cemeteries and in mass graves after the fall of the Commune, put the number killed at between 6,000 and 10,000.
***
Question:  What was the number of French prisoners?
***
Correct:  4,420
***
predicted:  commune


Support:  On 10 October, hostilities began between German and French republican forces near Orléans.
***
Question:  On which date did hostilities between the German and French troops

KeyboardInterrupt: 