In [11]:
import nltk
import json
import spacy
from nltk.corpus import stopwords
from math import log
from collections import defaultdict, Counter
from string import punctuation
from nltk.stem.wordnet import WordNetLemmatizer

OPEN_QUESTION_WORDS = ['what', 'who', 'whose', 'whom', 'where', 'when', 'why', 'how',
                       'which', "what's", "who's", "where's", "how's"]
CLOSED_QUESTION_WORDS = ['is', 'are', 'am', 'was', 'were', 'do', 'does,', 'did', 'can',
                         'could', 'will', 'would', 'shall', 'should', 'have', 'has',
                         'had']

# Stop words
stop = set(stopwords.words('english'))

lmtz = WordNetLemmatizer()

with open('testing.json') as json_data:
    test = json.load(json_data)

with open('documents.json') as json_data:
    documents = json.load(json_data)

# Spacy toolkit
nlp = spacy.load('en_core_web_sm')

punc = set(punctuation)


def strip_punctuation(s):
    return ''.join(c for c in s if c not in punc)


def lemmatize(token):
    lemma = lmtz.lemmatize(token, 'v')
    if lemma == token:
        lemma = lmtz.lemmatize(token, 'n')
    return lemma


def extract_term_freqs(doc):
    tfs = {}
    for token in nltk.word_tokenize(doc):
        lemma = lemmatize(token.lower())
        if lemma not in stop and lemma.isalpha():
            tfs[lemma] = tfs.get(lemma, 0) + 1
    return tfs


def compute_doc_freqs(doc_term_freqs):
    dfs = Counter()
    for tfs in doc_term_freqs.values():
        for term in tfs.keys():
            dfs[term] += tfs[term]
    return dfs


def query_vsm(query, index, k=10):
    accumulator = Counter()
    for term in query:
        postings = index[term]
        for docid, weight in postings:
            accumulator[docid] += weight
    return accumulator.most_common(k)


# Find the question word
def get_qword(question):
    tokens = nltk.word_tokenize(question.lower())
    for token in tokens:
        if token in OPEN_QUESTION_WORDS:
            return token
    for token in tokens:
        if token in CLOSED_QUESTION_WORDS:
            return token
    return 'others'


file = open('Second_method.csv', 'w')
file.write('id,answer\n')

case_count = 0
# test = [test[17]]
for test_case in test:
    question = test_case['question']
    docid = test_case['docid']

    # Convert doc into one string, then tokenize sentences
    corpus = ''
    for para in documents[docid]['text']:
        corpus += para + ' '

    # sentence as a document
    raw_docs = nltk.sent_tokenize(corpus)

    # TFIDF
    doc_term_freqs = {}
    for (id, raw_doc) in enumerate(raw_docs):
        term_freqs = extract_term_freqs(raw_doc)
        doc_term_freqs[id] = term_freqs
    M = len(doc_term_freqs)

    doc_freqs = compute_doc_freqs(doc_term_freqs)

    vsm_inverted_index = defaultdict(list)
    for docid, term_freqs in doc_term_freqs.items():
        N = sum(term_freqs.values())
        length = 0

        # find tf*idf values and accumulate sum of squares
        tfidf_values = []
        for term, count in term_freqs.items():
            tfidf = float(count) / N * log(M / float(doc_freqs[term]))
            tfidf_values.append((term, tfidf))
            length += tfidf ** 2

        # normalise documents by length and insert into index
        length = length ** 0.5
        for term, tfidf in tfidf_values:
            # inversion of the indexing, term -> (doc_id, score)
            vsm_inverted_index[term].append([docid, tfidf / length])

    for term, docids in vsm_inverted_index.items():
        docids.sort()

    terms = extract_term_freqs(question)
    results = query_vsm(terms, vsm_inverted_index)

    # Step 2
    # Analyse question type
    qword = get_qword(question)

    # the word after question word, such as 'what value', 'which gender'
    next_token = ''

    target = [] # target dep

    # dependency parsing
    dep = ''

    # head word
    head = ''

    # head dependency
    head_dep = ''

    # subject, root, object
    nsubj = ''
    ROOT = ''
    dobj = ''
    attr = ''

    # yes or no questions have two options
    closed_q_choices = ('', '')

    doc = nlp(question)

    tokens = nltk.word_tokenize(question.lower())

    # get next word
    if qword in tokens:
        if tokens.index(qword) < len(tokens) - 1:
            next_token = tokens[tokens.index(qword) + 1]

    # get structure of question
    for token in doc:
        if 'nsubj' in token.dep_ and token.head.dep_ == 'ROOT':
            nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_ and token.head.dep_ == 'ROOT':
            dobj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'attr' and token.head.dep_ == 'ROOT':
            attr = lemmatize(strip_punctuation(token.text))

    # determine answer dependency
    if qword in ['who', "who's", 'whom', 'whose']:
        for chunk in doc.noun_chunks:
            if qword in chunk.text:
                target = [chunk.root.dep_]

    elif qword in ['where', "where's", 'when']:
        target = ['advmod','prep']

    elif qword in ['how', "how's"]:
        if next_token in ['much', 'many','long','far', 'big', 'wide', 'deep', 'tall', 'high', 'fast', 'heavy','old', 'young']:
            target = ['attr']
        
        elif next_token in ['does', 'did', 'do', 'have', 'has', 'had', 'should',
                            'can', 'could', 'will', 'would', 'must']:
            if dobj != '':
                target = ['advmod', 'acomp']
            else:
                target = ['ROOT']

    elif qword in ['what', "what's", 'which']:
        # what...do type question
        tokens.remove(next_token)
        if 'do' in tokens:
            target = ['ROOT']
        else:
            for chunk in doc.noun_chunks:
                if qword in chunk.text:
                    target = [chunk.root.dep_]

    elif qword == 'why':
        target = ['mark','prep']

    elif qword in CLOSED_QUESTION_WORDS:
        # answer is one of the 'or' options in the question
        if 'or' in tokens:
            index = tokens.index('or')
            
            # dep of phrase before and after 'or'
            prev = tokens[index - 1]
            for chunk in doc.noun_chunks:
                if prev in chunk.text:
                    target = [chunk.root.dep_]
            
            if target == '':
                for token in doc:
                    if prev in token.text:
                        target = [token.dep_]

    # find answer with highest score from the 10 most similar sentences
    max_score = -1
    answer = ''

    for id, _ in results:
        sent = raw_docs[id]
        doc = nlp(sent)

        # find sentence structure
        sent_nsubj = ''
        sent_ROOT = ''
        sent_dobj = ''
        sent_attr = ''
        for token in doc:
            if 'nsubj' in token.dep_ and token.head.dep_ == 'ROOT':
                sent_nsubj = lemmatize(strip_punctuation(token.text))
            if token.dep_ == 'ROOT':
                sent_ROOT = lemmatize(strip_punctuation(token.text))
            if 'dobj' in token.dep_ and token.head.dep_ == 'ROOT':
                sent_dobj = lemmatize(strip_punctuation(token.text))
            if 'attr' in token.dep_ and token.head.dep_ == 'ROOT':
                sent_attr = lemmatize(strip_punctuation(token.text))

        score = 0
        if nsubj == sent_nsubj:
            score += 1
            if ROOT == sent_ROOT:
                score += 1
                if dobj == sent_dobj:
                    score += 1
                if attr == sent_attr:
                    score += 1

        max_score = -1
        answer = ''
        if target == ['ROOT'] or target == ['attr'] or target == ['advmod','acomp']:
            for token in doc:
                if token.dep_ in target:
                    score += 1
                    
                    if token.text not in question:
                        score += 1
                        
                    if strip_punctuation(token.text).lower() in stop:
                        score = -1
                        
                    if token.text.lower() == 'it':
                        score = -1
                    
                    if score > max_score:
                        max_score = score
                        answer = token.text
                        
        elif target == ['mark','prep'] or target == ['advmod','prep']:
            for token in doc:
                if token.dep_ in target:
                    score += 1
                    
                    if token.text not in question:
                        score += 1
                        
                    if strip_punctuation(token.text).lower() in stop:
                        score = -1
                        
                    if token.text.lower() == 'it':
                        score = -1
                    
                    if score > max_score:
                        max_score = score
                        substr = sent[sent.index(token.text):]
                        tokens = nltk.word_tokenize(substr)
                        del tokens[0]
                        for token in tokens:
                            if token in punc:
                                break
                            answer += token+' '
                        
        else:
            for chunk in doc.noun_chunks:
                if chunk.root.dep_ in target:
                    score += 1
                    
                    if chunk.text not in question:
                        score += 1
                        
                    if strip_punctuation(chunk.text).lower() in stop:
                        score = -1
                        
                    if chunk.text.lower() == 'it':
                        score = -1
                    
                    if score > max_score:
                        max_score = score
                        answer = chunk.text
        
    # default answer
    if answer == '':
        for chunk in doc.noun_chunks:
            if chunk.text not in stop and chunk.text not in question:
                answer = chunk.text
        
        
    file.write(str(case_count))
    file.write(',')
    file.write(strip_punctuation(answer).strip().lower())
    file.write('\n')
    print(case_count, ' ', answer)
    case_count += 1

file.close()

0   all capabilities
1   the World Wide Web Foundation
2   the user
3   the open source software model
4   started his own company 
5   month
6   referred to as a browser 
7   Java applets
8   Internet Explorer
9   the World Wide Web
10   part
11   Sir Tim Berners-Lee
12   merely "web browsers
13   a web browser
14   merely "web browsers
15   started his own company 
16   Commission Safari and Mobile Safari were likewise always included with OS X and iOS respectively always included with OS X and iOS respectively included with OS X and iOS respectively 
17   local files
18   a process
19   history
20   a web browser
21   released in September 2008 
22   Net Applications
23   the World Wide Web
24   over 95%
25   e-mail
26   reduces consumer choice 
27   their managers 
28   web browsers
29   (HTTP
30   CSS
31   the browser
32   web browsers
33   the World Wide Web Foundation
34   users
35   over 40 million phones
36   the average person
37   Internet Explorer
38   a web browser
39   Mi

337   more than the number of protesters 
338   the United States
339   their own "Human Rights Torch
340   all times
341   the Chinese government's crackdown
342   protests
343   Tibet
344   requested the torch route in San Francisco be shortened 
345   Jinnah Stadium
346   August
347   the Chinese media
348   a Beijing citizen
349   to Asian Times 
350   the curtailed 3 km route 
351   propane
352   its own face
353   a sound
354   slower oscillation
355   C♯
356   frequency
357   the waveform
358   the number
359   frequency
360   higher frequencies
361   fundamental frequency
362   upper partials
363   the frequencies
364   the pitches
365   the basic prerequisite
366   higher frequencies
367   fundamental frequency
368   a pitch
369   any specific pitch
370   several permanent and travelling exhibitions
371   the River Itchen
372   apparent crime figures
373   ruins
374   local public transport
375   jobs
376   England
377   Carnival UK
378   boys’ and girls’ teams
379   elections

702   cruise operator Carnival UK
703   the city's buildings
704   the base for any cross-channel ferries 
705   the River Itchen
706   dance music
707   the town
708   Southampton Town
709   England
710   the city's waterfront
711   the town
712   the friars
713   Ronald Koeman
714   Southampton Central
715   an ethnic group
716   this reference point
717   association
718   Jews
719   local Christian rulers
720   Ashkenazi maternal lineages
721   the Caucasus region
722   3 million DNA letters
723   the general European population
724   outnumbered Ashkenazim three to two '' 
725   the pre-2006 origin hypothesis
726   the Bray et al
727   the second half
728   recent times
729   subgroup
730   their fellow non-Jewish countrymen
731   Israelis
732   other parts
733   the Bray et al
734   together
735   differing rates
736   the Roman empire
737   the practice
738   a nun נ
739   refers to the nusach Ashkenaz 
740   the Khazar hypothesis
741   Orthodox Judaism
742   faiths
743   the la

1019   the Theotokos
1020   Constantinople
1021   Georgia
1022   Roman prototypes
1023   strong Italianate influence
1024   the Theotokos
1025   The mosaics
1026   a beautiful mosaic pavement
1027   three medallions
1028   a Samaritan synagogue
1029   initiated in these arts '' – says the chronicler about the role of the Greeks in the revival of mosaic art in medieval Italy 
1030   Kiev
1031   The most superb example
1032   fully coloured work
1033   slaves
1034   the Lusosphere
1035   the city
1036   Hungary
1037   a robot
1038   Empress Zoe
1039   the 8th century
1040   grains
1041   Cipriana
1042   bland mosaics
1043   figures
1044   the Dome
1045   the Deacon
1046   
1047   , lesser known sites
1048   Georgia
1049   initiated in these arts '' – says the chronicler about the role of the Greeks in the revival of mosaic art in medieval Italy 
1050   the 16th century
1051   Thessaloniki
1052   the church
1053   Bethlehem
1054   the 8th century
1055   only the floor panels
1056   mosaic

1311   Eastern Orthodoxy
1312   batteries
1313   the Crimean peninsula
1314   disaster
1315   a Franco-British naval force
1316   one American officer
1317   orders
1318   equipment
1319   30 March
1320   the Greek army
1321   part
1322   the more sweeping demands
1323   Sinop
1324   the nearest future
1325   his protection
1326   equipment
1327   the Crimean Peninsula
1328   the Crimean peninsula
1329   Charlemagne
1330   the previous war
1331   Alger
1332   Alfred Nobel
1333   ordered
1334   a German satellite
1335   Rear Admiral Yevfimy Putyatin
1336   problems to the south and the heavy artillery was brought ashore with batteries and connecting trenches built so that by 10 October some batteries were ready and by 17 October—when the bombardment commenced—126 guns were firing 
1337   the day
1338   direct conflict
1339   Oltenița
1340   Italy
1341   the attack
1342   27/28 October
1343   Gallipoli
1344   Russia
1345   the Russians
1346   the French Emperor
1347   William Howard Russ

1642   this city
1643   all Eight Han Banners
1644   foreign control
1645   local elites
1646   the Chongzhen Emperor
1647   Qing Dynasty
1648   incorporated them into the Jurchen `` nation '' as full 
1649   the provinces
1650   the 20th century
1651   their names
1652   the war
1653   local elites
1654   China
1655   the war
1656   the early 19th century
1657   the Qing
1658   Wu Sangui's appeal
1659   Vietnam
1660   simultaneously
1661   Han Chinese civilians
1662   Beijing
1663   the Eight Banners
1664   Liaodong
1665   the Foreign Legation Quarter
1666   the already hugely unpopular regime
1667   inferior status
1668   a short, but hard-fought campaign
1669   instruction
1670   the imperial examination
1671   an emergency army
1672   these military reforms enabled Hong Taiji to resoundingly defeat Ming forces in a series of battles from 1640 to 1642 for the territories of Songshan and Jinzhou defeat Ming forces in a series of battles from 1640 to 1642 for the territories of Songsh

1986   KU Med
1987   the three campuses
1988   "Bleeding Kansas
1989   the first college building
1990   seven locations
1991   Lawrence, Kansas
1992   university
1993   the Kansas City metropolitan area
1994   three NCAA tournament championships in 1952 
1995   accounting
1996   urban planning
1997   accounting
1998   university
1999   any other university
2000   Kansas City
2001   "Bleeding Kansas
2002   a public university
2003   the 2014 U.S. News & World Report "Best Graduate Schools" edition
2004   Kansas
2005   accounting
2006   includes KU Dining Services the Kansas Union 
2007   graduate study
2008   Student Union Activities
2009   additional fees
2010   any other university
2011   accounting
2012   295
2013   Planning
2014   Social Welfare
2015   of the university 's William Allen White School of Journalism and reaches an audience of at least 30,000 daily readers through its print and online publications The university houses the following public broadcasting stations 
2016  

2302   traditional racial groupings
2303   some European ancestors
2304   their descent groups
2305   biological races
2306   such [racial] theories
2307   Homo
2308   Biological Races
2309   other such means
2310   groups
2311   various human groups
2312   recent times
2313   each race/ethnicity group
2314   population
2315   haplogroup
2316   the limits
2317   race
2318   social categorization
2319   European Y-DNA
2320   their descent groups
2321   intellectual, behavioral, and moral qualities
2322   Homo
2323   such [racial] theories
2324   East Asian sweeps
2325   individual phenotypes
2326   European Y-DNA
2327   the variation
2328   traditional racial groupings
2329   nineteen rejected race
2330   this African population
2331   the emigrating groups
2332   this new color-blind racism
2333   "Indian blood
2334   social constructs
2335   each group
2336   New Guinea
2337   the popular imagination
2338   minor ones
2339   intellectual, behavioral, and moral qualities
2340   this Af

2636   passengers
2637   the local jurisdiction
2638   The first elevator shaft
2639   the most efficient design
2640   the ASME A17.2 Standard
2641   the ASME A17.2 Standard
2642   machine room
2643   the lower city
2644   the car
2645   a plunger gripping device
2646   the car
2647   Wembley
2648   West Germany
2649   their first and only World Cup title
2650   two years
2651   the final
2652   their first and only World Cup title
2653   the away shirt
2654   King Richard I
2655   the 2006 FIFA World Cup's conclusion
2656   their first and only World Cup title
2657   at least one FIFA World Cup title
2658   the away shirt
2659   The team
2660   the World Cup
2661   West Germany
2662   the 2006 FIFA World Cup
2663   Central Europe
2664   West Germany
2665   third place
2666   Ramsey's dismissal
2667   central and northern Scotland
2668   the first round
2669   King Richard I
2670   Central Europe
2671   West Germany
2672   that company's collapse
2673   King Richard I
2674   the 2006 

2962   battery life
2963   the memory issue
2964   Apple
2965   the more expensive Macintosh systems
2966   a dealer
2967   other personal computers
2968   keyboard
2969   Claris Impact
2970   potential buyers
2971   second
2972   the iMac G4
2973   the problem
2974   larger monitors
2975   the average consumer
2976   a whole
2977   security updates
2978   from its fast 40 MHz 68030 processor 
2979   hard drives
2980   later appeals
2981   version
2982   nine months
2983   Snow Leopard
2984   VirtualBox
2985   iteration
2986   a price
2987   Microsoft's formerly separate MS-DOS and Windows products
2988   1998 phased out the Macintosh name in favor of `` Mac '' 
2989   Microsoft Windows
2990   laptops
2991   the Legislative Assembly
2992   cable
2993   the third-fastest growing economy
2994   its difficult terrain
2995   marketing facilities
2996   Pathankot–Jogindernagar
2997   various other house hold items
2998   in the list of the highest per capita incomes of Indian states 
2999  

3295   the underside
3296   plants
3297   use suckers at their front and rear ends to move like inchworms 
3298   ammonia
3299   trademarks
3300   the general and permanent federal statutes
3301   wire fraud
3302   the Supreme Court
3303   the federal judiciary
3304   contemporary English cases
3305   American reports
3306   the Constitution
3307   criminal law
3308   the rest
3309   precedents
3310   given
3311   stare decisis
3312   the territories
3313   the Constitution
3314   mail
3315   mail
3316   American tort law
3317   the Constitution
3318   the territories
3319   federal agencies
3320   on 
3321   law enforcement powers
3322   abolished
3323   state courts
3324   the Constitution
3325   mail
3326   the Uniform Commercial Code
3327   criminal law
3328   272,795 new cases
3329   stare decisis
3330   issue
3331   a settlement
3332   post-Revolution Commonwealth rulings
3333   1.9 million juvenile cases
3334   state courts
3335   West Publishing
3336   several ways
3337   sever

# Train set test

In [15]:
import nltk
import json
import spacy
from nltk.corpus import stopwords
from math import log
from collections import defaultdict, Counter
from string import punctuation
from nltk.stem.wordnet import WordNetLemmatizer

OPEN_QUESTION_WORDS = ['what', 'who', 'whose', 'whom', 'where', 'when', 'why', 'how',
                       'which', "what's", "who's", "where's", "how's"]
CLOSED_QUESTION_WORDS = ['is', 'are', 'am', 'was', 'were', 'do', 'does,', 'did', 'can',
                         'could', 'will', 'would', 'shall', 'should', 'have', 'has',
                         'had']

# Stop words
stop = set(stopwords.words('english'))

lmtz = WordNetLemmatizer()

with open('testing.json') as json_data:
    test = json.load(json_data)

with open('training.json') as json_data:
    train = json.load(json_data)
    
with open('documents.json') as json_data:
    documents = json.load(json_data)

# Spacy toolkit
nlp = spacy.load('en_core_web_sm')

punc = set(punctuation)


def strip_punctuation(s):
    return ''.join(c for c in s if c not in punc)


def lemmatize(token):
    lemma = lmtz.lemmatize(token, 'v')
    if lemma == token:
        lemma = lmtz.lemmatize(token, 'n')
    return lemma


def extract_term_freqs(doc):
    tfs = {}
    for token in nltk.word_tokenize(doc):
        lemma = lemmatize(token.lower())
        if lemma not in stop and lemma.isalpha():
            tfs[lemma] = tfs.get(lemma, 0) + 1
    return tfs


def compute_doc_freqs(doc_term_freqs):
    dfs = Counter()
    for tfs in doc_term_freqs.values():
        for term in tfs.keys():
            dfs[term] += tfs[term]
    return dfs


def query_vsm(query, index, k=10):
    accumulator = Counter()
    for term in query:
        postings = index[term]
        for docid, weight in postings:
            accumulator[docid] += weight
    return accumulator.most_common(k)


# Find the question word
def get_qword(question):
    tokens = nltk.word_tokenize(question.lower())
    for token in tokens:
        if token in OPEN_QUESTION_WORDS:
            return token
    for token in tokens:
        if token in CLOSED_QUESTION_WORDS:
            return token
    return 'others'


# file = open('Second_method.csv', 'w')
# file.write('id,answer\n')

case_count = 0
# test = [test[17]]
for train_case in train:
    question = train_case['question']
    docid = train_case['docid']
    correct_answer = train_case['text']

    # Convert doc into one string, then tokenize sentences
    corpus = ''
    for para in documents[docid]['text']:
        corpus += para + ' '

    # sentence as a document
    raw_docs = nltk.sent_tokenize(corpus)

    # TFIDF
    doc_term_freqs = {}
    for (id, raw_doc) in enumerate(raw_docs):
        term_freqs = extract_term_freqs(raw_doc)
        doc_term_freqs[id] = term_freqs
    M = len(doc_term_freqs)

    doc_freqs = compute_doc_freqs(doc_term_freqs)

    vsm_inverted_index = defaultdict(list)
    for docid, term_freqs in doc_term_freqs.items():
        N = sum(term_freqs.values())
        length = 0

        # find tf*idf values and accumulate sum of squares
        tfidf_values = []
        for term, count in term_freqs.items():
            tfidf = float(count) / N * log(M / float(doc_freqs[term]))
            tfidf_values.append((term, tfidf))
            length += tfidf ** 2

        # normalise documents by length and insert into index
        length = length ** 0.5
        for term, tfidf in tfidf_values:
            # inversion of the indexing, term -> (doc_id, score)
            vsm_inverted_index[term].append([docid, tfidf / length])

    for term, docids in vsm_inverted_index.items():
        docids.sort()

    terms = extract_term_freqs(question)
    results = query_vsm(terms, vsm_inverted_index)

    # Step 2
    # Analyse question type
    qword = get_qword(question)

    # the word after question word, such as 'what value', 'which gender'
    next_token = ''

    target = [] # target dep

    # dependency parsing
    dep = ''

    # head word
    head = ''

    # head dependency
    head_dep = ''

    # subject, root, object
    nsubj = ''
    ROOT = ''
    dobj = ''
    attr = ''

    # yes or no questions have two options
    closed_q_choices = ('', '')

    doc = nlp(question)

    tokens = nltk.word_tokenize(question.lower())

    # get next word
    if qword in tokens:
        if tokens.index(qword) < len(tokens) - 1:
            next_token = tokens[tokens.index(qword) + 1]

    # get structure of question
    for token in doc:
        if 'nsubj' in token.dep_ and token.head.dep_ == 'ROOT':
            nsubj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'ROOT':
            ROOT = lemmatize(strip_punctuation(token.text))
        if 'dobj' in token.dep_ and token.head.dep_ == 'ROOT':
            dobj = lemmatize(strip_punctuation(token.text))
        if token.dep_ == 'attr' and token.head.dep_ == 'ROOT':
            attr = lemmatize(strip_punctuation(token.text))

    # determine answer dependency
    if qword in ['who', "who's", 'whom', 'whose']:
        for chunk in doc.noun_chunks:
            if qword in chunk.text:
                target = [chunk.root.dep_]

    elif qword in ['where', "where's", 'when']:
        target = ['advmod','prep']

    elif qword in ['how', "how's"]:
        if next_token in ['much', 'many','long','far', 'big', 'wide', 'deep', 'tall', 'high', 'fast', 'heavy','old', 'young']:
            target = ['attr']
        
        elif next_token in ['does', 'did', 'do', 'have', 'has', 'had', 'should',
                            'can', 'could', 'will', 'would', 'must']:
            if dobj != '':
                target = ['advmod', 'acomp']
            else:
                target = ['ROOT']

    elif qword in ['what', "what's", 'which']:
        # what...do type question
        tokens.remove(next_token)
        if 'do' in tokens:
            target = ['ROOT']
        else:
            for chunk in doc.noun_chunks:
                if qword in chunk.text:
                    target = [chunk.root.dep_]

    elif qword == 'why':
        target = ['mark','prep']

    elif qword in CLOSED_QUESTION_WORDS:
        # answer is one of the 'or' options in the question
        if 'or' in tokens:
            index = tokens.index('or')
            
            # dep of phrase before and after 'or'
            prev = tokens[index - 1]
            for chunk in doc.noun_chunks:
                if prev in chunk.text:
                    target = [chunk.root.dep_]
            
            if target == '':
                for token in doc:
                    if prev in token.text:
                        target = [token.dep_]

    # find answer with highest score from the 10 most similar sentences
    max_score = -1
    answer = ''

    for id, _ in results:
        sent = raw_docs[id]
        doc = nlp(sent)

        # find sentence structure
        sent_nsubj = ''
        sent_ROOT = ''
        sent_dobj = ''
        sent_attr = ''
        for token in doc:
            if 'nsubj' in token.dep_ and token.head.dep_ == 'ROOT':
                sent_nsubj = lemmatize(strip_punctuation(token.text))
            if token.dep_ == 'ROOT':
                sent_ROOT = lemmatize(strip_punctuation(token.text))
            if 'dobj' in token.dep_ and token.head.dep_ == 'ROOT':
                sent_dobj = lemmatize(strip_punctuation(token.text))
            if 'attr' in token.dep_ and token.head.dep_ == 'ROOT':
                sent_attr = lemmatize(strip_punctuation(token.text))

        score = 0
        if nsubj == sent_nsubj:
            score += 1
            if ROOT == sent_ROOT:
                score += 1
                if dobj == sent_dobj:
                    score += 1
                if attr == sent_attr:
                    score += 1

        max_score = -1
        answer = ''
        if target == ['ROOT'] or target == ['attr'] or target == ['advmod','acomp']:
            for token in doc:
                if token.dep_ in target:
                    score += 1
                    
                    if token.text not in question:
                        score += 1
                        
                    if strip_punctuation(token.text).lower() in stop:
                        score = -1
                        
                    if token.text.lower() == 'it':
                        score = -1
                    
                    if score > max_score:
                        max_score = score
                        answer = token.text
                        
        elif target == ['mark','prep'] or target == ['advmod','prep']:
            for token in doc:
                if token.dep_ in target:
                    score += 1
                    
                    if token.text not in question:
                        score += 1
                        
                    if strip_punctuation(token.text).lower() in stop:
                        score = -1
                        
                    if token.text.lower() == 'it':
                        score = -1
                    
                    if score > max_score:
                        max_score = score
                        substr = sent[sent.index(token.text):]
                        tokens = nltk.word_tokenize(substr)
                        del tokens[0]
                        for token in tokens:
                            if token in punc:
                                break
                            answer += token+' '
                        
        else:
            for chunk in doc.noun_chunks:
                if chunk.root.dep_ in target:
                    score += 1
                    
                    if chunk.text not in question:
                        score += 1
                        
                    if strip_punctuation(chunk.text).lower() in stop:
                        score = -1
                        
                    if chunk.text.lower() == 'it':
                        score = -1
                    
                    if score > max_score:
                        max_score = score
                        answer = chunk.text
        
    # default answer
    if answer == '':
        for chunk in doc.noun_chunks:
            if chunk.text not in stop and chunk.text not in question:
                answer = chunk.text
        
        
#     file.write(str(case_count))
#     file.write(',')
#     file.write(strip_punctuation(answer).strip().lower())
#     file.write('\n')
    if answer != correct_answer:
        print('Correct: ',correct_answer)
    
        print('Predicted: ',answer)
       
        print('Question: ',question)
        
        for id, _ in results:
            sent = raw_docs[id]
            doc = nlp(sent)
            
        print('\n')
    case_count += 1


Correct:  6966662606895999999♠6.62606896×10−34 j⋅s
Predicted:  more detailed information
Question:  A kilogram could be definined as having a Planck constant of what value?


Correct:  cylinder
Predicted:  diagram
Question:  What is the shape of the object that establishes the base unit of the kilogram?


Correct:  time vs. energy
Predicted:  a given time
Question:  What example is given as another paired relationship of uncertainly related to standard deviation?


Correct:  quantum of action
Predicted:  angular momentum
Question:  What does the Planck Constant refer to?


Correct:  1913
Predicted:  calculated in the early twentieth century 
Question:  When was the first quantized model of the atom introduced?


Correct:  kirchhoff
Predicted:  J⋅s
Question:  What scientist first studied black body radiation?


Correct:  schrödinger
Predicted:  the Bohr model
Question:  Who helped to give the correct quantization rules for electrons in 1926?


Correct:  h
Predicted:  Rutherford's classi

Correct:  the rayleigh–jeans
Predicted:  a continuous variable
Question:  What rule predicted narrow range of energy values at lower temperatures?


Correct:  platinum–iridium
Predicted:  conventional electrical units
Question:  What alloy is the base unit of the kilogram made from?


Correct:  blackbody emissions
Predicted:  the new definition
Question:  What does Planck's law correctly predict?


Correct:  niels bohr
Predicted:  angular momentum
Question:  Who modeled the atom in 1913, challenging Rutherford's model?


Correct:  2007
Predicted:  present
Question:  The value quoted here for the Planck constant is based on a measurement in what year?


Correct:  thermal light emission
Predicted:  the colour
Question:  What is maximized as a result of a black object absorbing all the light that hits it?


Correct:  6981358000000000000♠3.58×10−19 j
Predicted:  a certain kinetic energy
Question:  What is the energy of a photon?


Correct:  white hot
Predicted:  hot atoms
Question:  What c

Correct:  lester brown
Predicted:  the RES Directive
Question:  Who believes that the market does not value nature's service adequately?


Correct:  144
Predicted:  plants
Question:  How many countries now have renewable energy policies?


Correct:  19 percent
Predicted:  an indicative renewable energy target
Question:  Renewables contributed what percentage to our energy consumption?


Correct:  dedicated energy crops
Predicted:  many countries
Question:  What is a promising cellulose source that can be sustainably produced in many regions?


Correct:  coal
Predicted:  many countries
Question:  Some countries have phased out all subsidies for what substance?


Correct:  the internet
Predicted:  the renewable energy and energy efficiency industries
Question:  What was the result of publicly funded links among computers in government labs and reserach institutes?


Correct:  2011
Predicted:  conventional energy sources
Question:  As of what year have there been substantial reductions in

Correct:  brussels , belgium
Predicted:  the Indiana Jones films
Question:  Where did Adventures of Tintin debut?


Correct:  co-producer
Predicted:  Winfrey
Question:  What was Spielberg's role on 'Batteries Not Included'?


Correct:  five
Predicted:  the first Monkey Island games
Question:  How many Oscar nominations did 'Munich' get?


Correct:  26-minute
Predicted:  Broadway
Question:  How long was 'Amblin'?


Correct:  paramount pictures
Predicted:  the editing department
Question:  Which studio produced 'Super 8'?


Correct:  actress
Predicted:  their relationship
Question:  What was Spielberg's future wife's career?


Correct:  four
Predicted:  a major Hollywood studio
Question:  How many TV movies was Spielberg signed to direct?


Correct:  allen daviau
Predicted:  the Third Kind
Question:  Which childhood friend worked on Spielberg's films?


Correct:  brian de palma
Predicted:  the film
Question:  Who introduced Irving to Spielberg?


Correct:  sean connery
Predicted:  the In

Correct:  oskar schindler
Predicted:  the Normandy landing
Question:  Whose life was 'Schindler's List' based on?


Correct:  jaws
Predicted:  the Third Kind
Question:  What movie was Spielberg working on when he first played Pong?


Correct:  two
Predicted:  his direction
Question:  How many Oscars did Close Encounters win?


Correct:  richmond , virginia
Predicted:  never worked with the same screenwriter in his films 
Question:  Where was 'Lincoln' filmed?


Correct:  1975
Predicted:  the water scenes in Jaws are filmed from the low-angle perspective of someone swimming 
Question:  When was Jaws released?


Correct:  `` bachelor funky ''
Predicted:  a movie-making simulation game
Question:  How did Irving describe Spielberg's house?


Correct:  firelight
Predicted:  an actress
Question:  What film led to Close Encounters?


Correct:  capt . miller
Predicted:  that end scene
Question:  Who did Tom Hanks play in 'Saving Private Ryan'?


Correct:  halo
Predicted:  Martin Landau
Questio

Correct:  june 2006
Predicted:  as the first to win for a performance directed by Spielberg 
Question:  When did Spielberg announce what would become 'Interstellar'?


Correct:  47 %
Predicted:  enough money
Question:  How much of the US believes Spielberg is influential?


Correct:  cincinnati , ohio
Predicted:  the film
Question:  Where was Spielberg born?


Correct:  best picture
Predicted:  Best Picture
Question:  What was the film "Jaws" nominated for?


Correct:  john williams
Predicted:  the film audience
Question:  Who composed music for 'Munich'?


Correct:  salt lake city
Predicted:  the player
Question:  Where was Spielberg an Olympic flagbearer?


Correct:  electrical engineer
Predicted:  often fathers
Question:  What job did Steven Spielberg's father have?


Correct:  joan crawford
Predicted:  Universal
Question:  Who starred in the TV show segment "Eyes"


Correct:  john williams
Predicted:  Jaws
Question:  Who has composed most of Steven Spielberg's movies?


Correct:  t

Correct:  bombers
Predicted:  positions
Question:  What particular aircraft did the interceptor target most?


Correct:  2 to 12
Predicted:  battery
Question:  How many guns or missile launchers are typically in a battery?


Correct:  height/fuse indicator ( hfi )
Predicted:  flight
Question:  What was the HRF used with to set fuses?


Correct:  manpads
Predicted:  up to 30 nmi
Question:  What are man-portable missiles better known as?


Correct:  elevation angle
Predicted:  fuse setting
Question:  The HRF measured target distance and what else?


Correct:  1940
Predicted:  little financing
Question:  What year was the M1 version approved?


Correct:  pvo sv
Predicted:  the NATO Integrated Air Defence System
Question:  What was the arm that was the Air Defence of the Ground Forces?


Correct:  multiple transmitter radars
Predicted:  an enemy aircraft
Question:  What can detect stealth aircraft?


Correct:  altitude
Predicted:  neither answer
Question:  Along with speed of the modern je

Correct:  guided missiles
Predicted:  any projectile
Question:  In addition to radar and computing, what else did air defence want to exploit?


Correct:  important buildings
Predicted:  John M. Browning
Question:  What was camouflaged during World War II?


Correct:  britain
Predicted:  UK
Question:  Where was the Observer Corps based?


Correct:  1950s
Predicted:  high speeds
Question:  What decade were the 90 and 120 millimeter guns used until?


Correct:  world war ii
Predicted:  this role
Question:  Rocket research began prior to which war in some countries?


Correct:  the u.s. army
Predicted:  anti-aircraft warfare
Question:  Which military set up a large air defence network surrounding its larger cities?


Correct:  maximum ceiling
Predicted:  fuse length
Question:  What is the term used to describe the height that a projectile would go to if it was fired vertically?


Correct:  john m. browning
Predicted:  SAM Roland
Question:  Who designed this cannon?


Correct:  .50-inch
Pr

Correct:  hydraulics
Predicted:  less sophisticated arrangements
Question:  What was used to aim the guns after electrical commands were sent?


Correct:  1935
Predicted:  followed by the 39 for static sites but had a mobile mounting and the unit had 220v 24 kW generators 
Question:  When was the first 3.7 cm FlaK 18 introduced?


Correct:  1938
Predicted:  static positions
Question:  When did the design begin for the 12.8 centimeter FlaK?


Correct:  amyas borton
Predicted:  the 1940-origin Flakvierling quadruple-20 mm-gun antiaircraft weapon system
Question:  Who is probably the one who coined the term archie for anti-aircraft guns?


Correct:  anti-aircraft warfare
Predicted:  a Royal Air Force command
Question:  What does NATO ascribe naval air defence as?


Correct:  raf regiment
Predicted:  the balloon
Question:  What was formed in 1941 to protect airfields?


Correct:  the user
Predicted:  the instruments
Question:  When an RPG is fired at a steep angle, who is in danger?


Corr

Correct:  45 days
Predicted:  species
Question:  How long after a published notice of the listing may a public hearing regarding the listing be requested?


Correct:  cotton-top tamarin
Predicted:  the species' area
Question:  The Center for Public Integrity found that 151 of what primate were moved from the New England Primate Research Center into exotic pet trade?


Correct:  twenty-eight
Predicted:  16 U.S.C.
Question:  As of September 2012, how many species had been delisted due to recovery?


Correct:  candidate assessment program
Predicted:  the currently listed species
Question:  What federal program is used to list a species?


Correct:  `` class of '67 ''
Predicted:  state wildlife agencies
Question:  What is the nickname given to the first listing of endangered species?


Correct:  over 2,000
Predicted:  a species
Question:  How many species are currently listed?


Correct:  president obama
Predicted:  a "distinct population segment
Question:  Which president signed an Act di

Correct:  ngwane iii
Predicted:  five-year terms
Question:  Who was the leader of the 18th century Swazi kingdom?


Correct:  anglo-boer war
Predicted:  approximately 26°
Question:  What 1903 conflict involving the British caused Swaziland to become a protectorate?


Correct:  less than 20 %
Predicted:  South Africa
Question:  What percentage of the GDP was represented by outside debt in Swaziland in 2006?


Correct:  83 %
Predicted:  its exports
Question:  What percentage of the Swazi population are Christian?


Correct:  childless , unmarried girls
Predicted:  the inkhundla
Question:  What individuals can take part in the Umhlanga Reed Dance?


Correct:  81 mi
Predicted:  country
Question:  How wide is Swaziland in miles??


Correct:  declined markedly
Predicted:  a British protectorate
Question:  What has happened to debt external onus  in Swaziland in the past two decades?


Correct:  siteki
Predicted:  the country
Question:  Where in Swaziland is The Good shepherd Hospital?


Corr

Correct:  153,600,000,000 barrels
Predicted:  the world
Question:  How much oil reserves does Iran have?


Correct:  six
Predicted:  the Supreme Leader
Question:  The Supreme Leader appoints how many members of the Guardian Council?


Correct:  2010
Predicted:  literature
Question:  What year did Iran pass an economic reform plan that would replace subsidies with targeted social assistance programs?


Correct:  25 commercial films
Predicted:  technological lags
Question:  How many commercial films were produced yearly on average in the early 1960s in Iran?


Correct:  khosrow sinai
Predicted:  a nuclear weapon
Question:  Which director led off a new age of Iranian film after the 1979 Revolution?


Correct:  in 1906
Predicted:  Karim Khan
Question:  The first Constitution of Iran was founded in what year?


Correct:  armenia
Predicted:  its leading cultural and economic center
Question:  Which country borders Iran to Iran's northwest?


Correct:  construction materials
Predicted:  more 

Correct:  9th century bc
Predicted:  Greece
Question:  Persians have been in the Fars Province since what century?


Correct:  86
Predicted:  Iran
Question:  The Assembly of Experts has how many clerics?


Correct:  samuel rahbar
Predicted:  "skillful assembler
Question:  Which Iranian scientist discovered HbA1c to further treatment and research of diabetes?


Correct:  jizya
Predicted:  the Berlin International Film Festival
Question:  What special tax did the discriminated Iranians - both converted and nonconverted - have to pay?


Correct:  an economic recession
Predicted:  Encyclopædia Iranica
Question:  Iran's inflation led to what in 1975-1976?


Correct:  the caucasus
Predicted:  the Hellenistic Seleucid Empire
Question:  What region consisting of Dagestan, Georgia, Armenia, & Azerbaijan did Iran lose control of to the Russians?


Correct:  ranked 69
Predicted:  country in the Middle East and the 18th-largest in the world 
Question:  The Global COmpetitiveness Report in 2010 ran

Correct:  2006
Predicted:  middle class minorities 
Question:  When was there a vote regarding affirmative action in Michigan?


Correct:  june 2016
Predicted:  some of the country 's lead statistical methodologists told the Supreme Court that Sander 's analyses were sufficiently flawed that the Court would be wise to ignore them entirely 
Question:  When will the Supreme Court likely hear the case for the second time?


Correct:  lower-class european americans
Predicted:  the African American community
Question:  At who's expense other than Asian Americans are upper-class African Americans and Hispanic Americans supposedly benefiting?


Correct:  whites and males
Predicted:  past discrimination
Question:  Studies showed that discrimination in both business sectors and education resulted in advantages for what group of people?


Correct:  two
Predicted:  all qualified people
Question:  How many executive orders were issued to help prevent discrimination?


Correct:  enforcing equal opp

Correct:  equality of opportunity
Predicted:  all Americans
Question:  During the speech, Truman made the statement that each man should be guaranteed what?


Correct:  against affirmative action
Predicted:  de facto racial quotas
Question:  What moral position does Shelby Steele hold?


Correct:  four times
Predicted:  their graduating class
Question:  What is the failure rate for black law school graduates compared to whites for the bar exam?


Correct:  eight percent
Predicted:  the Supreme Court
Question:  How much does the article estimate the black lawyer population would potentially grow if affirmative action was ended?


Correct:  dr. paul brest
Predicted:  Latino groups
Question:  Who conducted a study about the origins of "Latinos"?


Correct:  3.1 %
Predicted:  the Reconstruction
Question:  How many of the total bachelor degrees awarded in 1990 went to Latinos?


Correct:  reverse
Predicted:  job discrimination
Question:  Having quotas regarding admissions or employment has 

Correct:  executive order 10925
Predicted:  Lockheed Aircraft Corporation
Question:  Which order was issued soon after Kennedy took office?


Correct:  1 september 1870
Predicted:  St. Privat
Question:  On which date did the battle begin with the Army of Chalons attacking various Prussian divisions?


Correct:  the main power
Predicted:  the war
Question:  At the end of the Napoleonic wars, Germany had established itself as what, in continental Europe?


Correct:  bronze , rifled muzzle-loading
Predicted:  a French artillery battery
Question:  What type of artillery were the French equipped with?


Correct:  over 17,000 men
Predicted:  the peace
Question:  How many casualties did the French suffer?


Correct:  400,000
Predicted:  his soldiers
Question:  In peacetime, what the approximate number of French soldiers?


Correct:  35,000
Predicted:  Otto von Bismarck
Question:  What number of troops did France have to contend with?


Correct:  uniforms
Predicted:  the Mance Ravine
Question:

Correct:  462,000
Predicted:  alternative
Question:  At the start of the war, how many German troops were focused on the French frontier?


Correct:  steinmetz
Predicted:  a French artillery battery
Question:  Who commanded the German 1st Army?


Correct:  siege of metz
Predicted:  inland
Question:  What seige did these victories lead to?


Correct:  the north sea
Predicted:  Gambetta
Question:  The autumn storms of what sea forced the return of yet more French ships?


Correct:  four to one
Predicted:  the west
Question:  What odds were at stake for the attack launched by the III Corps?


Correct:  working-class neighbourhoods
Predicted:  this role
Question:  In what type of neighborhoods did the occur especially?


Correct:  french chassepot rifles
Predicted:  the all-steel Krupp breech-loading gun
Question:  What did most Prussians fall under?


Correct:  marshal adolphe niel
Predicted:  the number
Question:  Who reformed the low numbers of troops by implementing universal conscript

Correct:  literally `` circle ''
Predicted:  the line
Question:  What is the literal meaning of the Prussian word "Kreis?"


Correct:  musagetes
Predicted:  poetry
Question:  Which epithet did Apollo have as god of music and arts?


Correct:  hera
Predicted:  labor
Question:  Who kidnapped Eileithyia?


Correct:  artemis
Predicted:  Apollo
Question:  To whom was the Delos sanctuary dedicated?


Correct:  acraepheus
Predicted:  marriage
Question:  Who is Apollo's son?


Correct:  dionysus
Predicted:  the sun
Question:  Who was the god of wine?


Correct:  eidos
Predicted:  something
Question:  What is one Greek word that has the same root as the word idea?


Correct:  polykleitos
Predicted:  the Athenian state
Question:  Who wrote that beauty consists in the proportion not of the elements?


Correct:  labrys
Predicted:  the statues
Question:  What is another name for the double-axe?


Correct:  apollo
Predicted:  column
Question:  Who brought the art of inspection of "symbols and omina?

Correct:  paean
Predicted:  the instrument
Question:  About the 4th Century BCE, what became merely a formula of adulation?


Correct:  marsyas
Predicted:  Thessaly
Question:  Who could not sing at the same time he played the flute?


Correct:  212 bce
Predicted:  a supposed Greek bronze original made in the second quarter of the 5th century BCE 
Question:  When was the Second Punic war?


Correct:  praxiteles
Predicted:  apollymi
Question:  In whose depictions can the evolution of Greek sculpture be observed?


Correct:  peneus
Predicted:  Apollo
Question:  Who is Daphne's father?


Correct:  asia minor
Predicted:  Apollo
Question:  Where was Leto worshipped?


Correct:  midas
Predicted:  skill
Question:  Who was a faithful follower of Pan?


Correct:  orchamus
Predicted:  κτείνειν
Question:  Who was Leucothea's mother?


Correct:  leto
Predicted:  the same locality
Question:  Who was Apollo's mother?


Correct:  a crow
Predicted:  the Delphic Oracle
Question:  How was Apollo informed

Correct:  paean
Predicted:  literally "physician
Question:  Who is the physician of the Gods in the Iliad?


Correct:  artemis
Predicted:  the animals
Question:  What was the name of Apollo's sister?


Correct:  agyieus
Predicted:  (μάντις, "prophet
Question:  What word means "to avert?"


Correct:  kouros
Predicted:  Polykleitos
Question:  What is the modern term given to those representations of stnding male youths that first appear in the archaic period in Greece?


Correct:  discus
Predicted:  κτείνειν
Question:  What hit Hyacinthus in the head, killing him?


Correct:  one
Predicted:  the finished instrument
Question:  Into how many characteristics are the factors of wood combined?


Correct:  earlywood
Predicted:  the change
Question:  What name, besides springwood, is used for the part of a growth ring formed in the beginning of the growing season?


Correct:  cells
Predicted:  species
Question:  What does wood consist of?


Correct:  circular
Predicted:  the regular wood
Questi

Correct:  walls
Predicted:  the strength
Question:  Does strength come from the walls or cavities of wood cells?


Correct:  not attached
Predicted:  the same tree
Question:  Are the knots that dead tree limbs form attached or not attached?


Correct:  rate of growth
Predicted:  the attaching stem
Question:  Ring-porous hardwoods have a clear relationship between their properties and what other factor?


Correct:  softer
Predicted:  the roots
Question:  Often the water in wood makes it more pliable and what else?


Correct:  construction material
Predicted:  fiber
Question:  What has been the primary purpose of wood for millennia other than fuel?


Correct:  western red cedar
Predicted:  other trees
Question:  What type of tree was often used for totem poles?


Correct:  the trunk
Predicted:  dense forests
Question:  Decay can spread to all of what part of a tree?


Correct:  hickory
Predicted:  many hardwoods
Question:  What specific kind of "second-growth" hardwood is often used to m

Correct:  synthetic materials
Predicted:  the bacteria
Question:  What is the golf club known as the "wood" usually made out of today?


Correct:  3.5
Predicted:  the unaided eye
Question:  About how many cubic kilometers of the vast stock forest's wood were harvested in 1991?


Correct:  rural
Predicted:  the main consideration
Question:  What type of areas use more wood for fuel?


Correct:  palms
Predicted:  that wood
Question:  Along with bamboo, what's the other monocot that's a major source of so-called "wood"?


Correct:  application-specific performance requirements
Predicted:  made
Question:  What requirements do engineered wood products meet?


Correct:  chopsticks
Predicted:  used
Question:  What special wooden utensils do many people use to eat Chinese takeout?


Correct:  temperate
Predicted:  the comparative amounts
Question:  What kind of softwoods often have significant differences in their earlywood and latewood?


Correct:  easy
Predicted:  the strength
Question:  Is 

Correct:  thick-walled
Predicted:  pieces
Question:  What kind of fibers are in the latewood of good oak, making it very firm?


Correct:  decay
Predicted:  Such resin-saturated heartwood
Question:  Along with insect infestation, what process can discolor wood and make it look like heartwood?


Correct:  lignin
Predicted:  a dramatic chemical difference
Question:  What substance is hemicellulose impregnated with?


Correct:  water
Predicted:  no seasonal difference growth rings
Question:  The capability of diffuse-porous woods to carry what substance is spread out in the growth ring?


Correct:  physiological
Predicted:  the material
Question:  Along with growth, what kind of factors determine the density of wood?


Correct:  395 to 400
Predicted:  the marketplace
Question:  How many millions of years old were the plants that were discovered in New Brunswick?


Correct:  grading
Predicted:  visual interest
Question:  What's the process in which knots are classified called?


Correct:  

Correct:  paper
Predicted:  MDF
Question:  What material we use to write or print on, like wood, becomes softer and more pliable when wet?


Correct:  resin
Predicted:  the past
Question:  Along with fatty acids, what kind of acids are in the extractives in wood?


Correct:  symbiotic
Predicted:  composite materials
Question:  What type of bacteria are present in Xylophaga?


Correct:  hardwood
Predicted:  hardwoods
Question:  Is mahogany a hardwood or a softwood?


Correct:  flavobacteria
Predicted:  many years
Question:  What bacteria starting with the letter "F" were found in wood after it was underwater for more than a year?


Correct:  wider
Predicted:  sapwood
Question:  Is harder, heavier wood indicated by narrower or wider growth rings?


Correct:  inner heartwood
Predicted:  the grain
Question:  What part of a tree often stays impressively sound even when hundreds or thousands of years old?


Correct:  thicker
Predicted:  the inner heartwood
Question:  Does a tree growing quic

Correct:  halves
Predicted:  C. W. Alcock
Question:  What are the periods in a football match called?


Correct:  england 's football association
Predicted:  the ball
Question:  Who voted to ban women's football from its grounds in 1921?


Correct:  the pitch
Predicted:  the penalty area in front of their own goal 
Question:  throughout when can players move the ball in any direction?


Correct:  fa cup
Predicted:  Aston Villa director William McGregor
Question:  What is C.W. Alcock responsible for founding?


Correct:  goalkeepers
Predicted:  a penalty kick
Question:  Who can only handle the ball with their hands or arms during play?


Correct:  goalkeeper
Predicted:  three
Question:  Out of the maximum amount of players allowed, one must be a what?


Correct:  trinity college
Predicted:  rules
Question:  Which college where the Cambridge Rules written at?


Correct:  yugoslav wars
Predicted:  the Football Association
Question:  Football made which wars more tense in the 1990's?


Cor

Correct:  her parents
Predicted:  number
Question:  Like a Prayer reflects Madonna's relationship with who?


Correct:  madonna
Predicted:  band
Question:  Who changed the dynamics of the singers back to mostly female?


Correct:  thirteenth
Predicted:  the female fashion trends
Question:  4 minutes became Madonna's which number one single in the UK?


Correct:  madonna
Predicted:  the award
Question:  The most remarkable creation in MTV is whom?


Correct:  around $ 120 million
Predicted:  tour
Question:  HOw much did the tour earn?


Correct:  june 1986
Predicted:  the Billboard
Question:  When was Madonna's third album released?


Correct:  causing a commotion
Predicted:  the film critic community
Question:  Name a soundtrack in the film Who's That Girl?


Correct:  dunkin ' donuts
Predicted:  all time
Question:  Which restaurant did Madonna work in New York City?


Correct:  tony sclafani
Predicted:  Emmy
Question:  Who stated that Madonna's voice are key to her rock roots?


Corre

Correct:  with honors
Predicted:  a record company
Question:  The song "I'll Remember" was recorded for which film?


Correct:  morton
Predicted:  the movie
Question:  Who wrote that Madonna is opportunistic, manipulative and ruthless?


Correct:  the english roses ,
Predicted:  a taxi cab
Question:  What was the title of the first book Madonna penned?


Correct:  three
Predicted:  another record
Question:  Madonna's dance singles reached which number in the "Hot Dance Club Songs" by the Billboard Magazine?


Correct:  j. randy taraborrelli
Predicted:  the public
Question:  Which biographer described her song "I'll Remember" as trying to clean up Madonna's image?


Correct:  stephanie zacharek
Predicted:  Billboard magazine
Question:  Who stated that Madonna was a bad actress to watch?


Correct:  1979
Predicted:  That Girl
Question:  Which year was Patrick Hernandez's world tour held?


Correct:  warner music
Predicted:  Warner
Question:  Maverick became a wholly owned  subsidiary of 

Correct:  archimedes
Predicted:  the famous bath-house
Question:  Who shouted "Eureka!" while checking the purity of a crown?


Correct:  dissolves
Predicted:  done
Question:  What does mercury make most metals do?


Correct:  tin
Predicted:  Mercury
Question:  During the bronze age, which metal was valued higher than gold in Europe and the Mediterranean?


Correct:  nickel and iron
Predicted:  the steel
Question:  What is meteoric iron composed of?


Correct:  chromium
Predicted:  many forms
Question:  What can be added to steel to enhance is corrosion resistance?


Correct:  mangalloy
Predicted:  two or more metals
Question:  Steel and manganese combines form to make what?


Correct:  heat treatment
Predicted:  steel
Question:  What kind of treatment can be made to alter it's properties?


Correct:  steel
Predicted:  magnesium alloys
Question:  What is the name of a common alloy?


Correct:  brass
Predicted:  its ingredients
Question:  What does copper and zinc form to make?


Correc

Correct:  `` île-de-france ''
Predicted:  the 7th arrondissement
Question:  What was the Paris region renamed to?


Correct:  commune of paris
Predicted:  2,300 km2
Question:  What was the most populated city in the EU in 2012?


Correct:  50 m
Predicted:  the 2nd, 8th, 9th, 16th and 18th arrondissements
Question:  What is the current height limitation in Paris in central areas?


Correct:  7 °c
Predicted:  night
Question:  What is the average temperature during winter?


Correct:  10,550,350
Predicted:  incomes
Question:  According to the INSEE what is Paris Urban Area population?


Correct:  bal-musette
Predicted:  collections
Question:  What style of french music became populars in the 1870sto 1880s?


Correct:  louis vii
Predicted:  its metropolitan area
Question:  Under who's reign was the construction of Notre Dame Cathedral?


Correct:  1968
Predicted:  a 2012 GDP
Question:  WHen was District de la region parisienne reorganised?


Correct:  1951
Predicted:  the 18th arrondisseme

Correct:  second empire
Predicted:  northern central France
Question:  During what period of time was Victor Hugo exiled from France?


Correct:  henry iv
Predicted:  existence
Question:  Who was responsible for rebuilding Paris in the 1600's


Correct:  stade français
Predicted:  roll
Question:  What is the rugby club in Paris?


Correct:  georges bizet 's
Predicted:  other musical forms
Question:  Who wrote Carmen?


Correct:  epistolae
Predicted:  France's top art school
Question:  What was the title of the first book printed in France?


Correct:  thirteen
Predicted:  the city
Question:  How many people were killed at the Charlie Hebdo attack?


Correct:  1977
Predicted:  the year
Question:  In what year was the Centre Georges Pompidou erected?


Correct:  beauvais-tillé airport
Predicted:  the nation's flag carrier Air France
Question:  What airport does Ryanair use?


Correct:  €624 billion
Predicted:  the same period
Question:  What was Paris Region's GDP in 2012?


Correct:  14

Correct:  1929
Predicted:  its current limits
Question:  In what year were Bois de Boulogne and Bois de Vincennes annexed? 


Correct:  1961
Predicted:  the French Kings
Question:  When did Paul Delouvrier become the head of the Paris region?


Correct:  25 august 1944
Predicted:  2,300 km2
Question:  On what date was the city liberated?


Correct:  1809
Predicted:  roi des Franks
Question:  WHen did the Canal de l'Ourcq start providing Paris with water?


Correct:  1648
Predicted:  the Champs-Élysées
Question:  In what year was the Academie royale de peinture et de sculpture founded?


Correct:  1875
Predicted:  of Paris in the commune of Saint-Denis 
Question:  When was teh Palais Garnier Opera House built?


Correct:  12th
Predicted:  the Hôtel de Ville
Question:  in what century was a school of polyphony established at Notre Dame?


Correct:  northern central
Predicted:  the Notre Dame Cathedral and the Louvre as well as the Sainte-Chapelle 
Question:  Where in France is Paris loca

Correct:  620
Predicted:  mandatory markets
Question:  How many analogue transmitters were shut down on July 31, 2012?


Correct:  chbc-tv in kelowna
Predicted:  CBC's afternoon schedule
Question:  Which CBC affiliate joined E! in February 2006?


Correct:  curling
Predicted:  Saturday nights
Question:  CBC was eclusive carrier of what other sport during the 2004-2005 season?


Correct:  2 to 51
Predicted:  11:59 p.m. in each time zone 
Question:  Where could CBC's channels need to be relocated in order to continue operating?


Correct:  `` t ''
Predicted:  the merger
Question:  What is the last letter of nearly all CBC stations?


Correct:  august 31 , 2012
Predicted:  T
Question:  When was CBC's anologue upgrade extension set to expire?


Correct:  1952
Predicted:  each episode
Question:  What year did HNIC begin being broadcasted on CBC?


Correct:  ashtabula , ohio
Predicted:  those areas
Question:  Which US city can receive broadcasts from CBC's London transmitter?


Correct:  fin

Correct:  prettiest
Predicted:  no continuous fossil evidence
Question:  What feature helped flowers not get plucked?


Correct:  different flowers
Predicted:  an outer cortex
Question:  How are some male and female parts separated for some species?


Correct:  two
Predicted:  nature
Question:  How many distinct rounds of genome duplication events are suspected in the evolution of seed plants?


Correct:  an enclosure
Predicted:  a seed
Question:  What does an angiosperm produce its seeds within?


Correct:  embryo
Predicted:  a seed
Question:  What does the seed coat protect?


Correct:  basal
Predicted:  the seeds
Question:  What term refers to the first three groups to diverge from angiosperm?


Correct:  spores
Predicted:  new shoots
Question:  Upright meisporangia allowed what to be dispersed to new habitats?


Correct:  hermaphrodite
Predicted:  influenced
Question:  What sexual feature do a majority of flowers demonstrate?


Correct:  sepals and petals
Predicted:  his class
Ques

Correct:  seven
Predicted:  the "25 Most Wired Law Schools
Question:  How many schools does Washington University have?


Correct:  shanghai
Predicted:  their own accord
Question:  Where was an Executive MBA program established by Washington University in 2002?


Correct:  division iii
Predicted:  the top departments
Question:  At what division level does the Washington University sports teams compete?


Correct:  25 %
Predicted:  campus
Question:  What percentage of the sequencing did the Washington University Medical School contribute to the Human Genome Project?


Correct:  60 %
Predicted:  Sciences
Question:  What percent of undergraduate students are involved in faculty research?


Correct:  14 %
Predicted:  over 180 law schools
Question:  What percent of applicants were admitted to Olin Business School in 2007? 


Correct:  80 %
Predicted:  World Report
Question:  What percentage of NH grants in Missouri went to Washington University? 


Correct:  $ 537.5 million
Predicted:  Geor

Correct:  the federal government
Predicted:  research/education center
Question:  Who previously owned the property where Tyson Research Center is located?


Correct:  2008
Predicted:  campus
Question:  What year did Chancellor Wrighton submit another bid for a presidential debate at Washington University after 2004? 


Correct:  over two-thirds
Predicted:  campus
Question:  What proportion of students at Washington University came from outside the St. Louis area by 1964?


Correct:  1891
Predicted:  to 1909 in the university 's School of Social Economy 
Question:  When was the Washington University School of Medicine founded?


Correct:  1899
Predicted:  a national research institution
Question:  When did Washington University hold a national design contest for the new campus?


Correct:  co-ed
Predicted:  modern residential halls
Question:  What is the gender make-up of the residence halls at Washington University?


Correct:  1976
Predicted:  their own accord
Question:  When was the

Correct:  northern desert
Predicted:  the other group
Question:  What part of the country has the Köppen Climate Classification of (BWh)?


Correct:  1968 to 1974
Predicted:  just over 1,240,000 square kilometres
Question:  Between what years did famine and drought afflict the country?


Correct:  keepers of memories
Predicted:  the country
Question:  What is the translation or meaning of a griot?


Correct:  march revolution
Predicted:  the government
Question:  There was a nationwide strike being held that was called les envenements and also named what?


Correct:  occupational settings
Predicted:  a somewhat different emphasis
Question:  The implications of identity and identity construction are discussed in what settings?


Correct:  a primordialist approach
Predicted:  conceive ethnic boundaries
Question:  What approach takes the sense of self and belonging as a fixed thing?


Correct:  foreclosed or defensive
Predicted:  either the social identity
Question:  Those with low levels

Correct:  inclusive
Predicted:  a negative boundary
Question:  A marker that people are ready and willing to associate with is what kind of boundary?


Correct:  weinreich 's identity variant
Predicted:  occupational settings
Question:  Identity diffusion, foreclosure, and crisis are categories of what?


Correct:  define it empirically
Predicted:  other disciplines
Question:  What is it impossible to do with identity?


Correct:  benign
Predicted:  certain characteristics
Question:  What is it called when someone aspires to the characteristics of significant others?


Correct:  one 's total identity
Predicted:  identity achievement
Question:  What are gender identity, ethnic identity, and occupational identity aspects of?


Correct:  the cardinal protodeacon
Predicted:  unity
Question:  Who announces the election of a new pope?


Correct:  pope innocent iv
Predicted:  the pope
Question:  Who was the pope who gave cardinals the right to wear the hats?


Correct:  120
Predicted:  14 car

Correct:  sun tzu
Predicted:  writing
Question:  Who wrote The Art Of War?


Correct:  herodotus and thucydides
Predicted:  the last two centuries
Question:  The two great ancient Greek historians were?


Correct:  the byzantine empire
Predicted:  significant literature
Question:  What empire did Justinian preside over?


Correct:  the nineteenth century
Predicted:  focused on philosophy 
Question:  Poetry was considered to need lines and meter until when?


Correct:  logic
Predicted:  new emotional experiences
Question:  What aspect of modern academic philosophy is less literary than technical in nature?


Correct:  science fiction
Predicted:  real life
Question:  Alternative reality genre fiction is also known as what?


Correct:  academic journals
Predicted:  history
Question:  Most serious studies in philosophy are segregated to what publications?


Correct:  journals
Predicted:  pronounced over the last two centuries 
Question:  The segregation of science writing means that articl

Correct:  larger
Predicted:  the human brain article
Question:  Do predators have larger or smaller brains compared to their prey?


Correct:  rhombencephalon
Predicted:  a "tail brain
Question:  The hindbrain during development is known as what?


Correct:  glial cells
Predicted:  Neurons
Question:  Which type of cells in the brain are generated throughout your lifetime?


Correct:  decisions
Predicted:  hydraulic terms
Question:  The basal ganglia is thought to be the central location at which what are made?


Correct:  snakes
Predicted:  the lifespan
Question:  What type of animal uses infrared heat to sense?


Correct:  teleost fishes
Predicted:  the forebrain
Question:  The forebrain is everted in what type of fishes?


Correct:  the forebrain
Predicted:  the hypothalamus
Question:  The hypothalamus is located at the base of what?


Correct:  neocortex
Predicted:  a complex six-layered structure
Question:  The hippocampus and amygdala are ares inside what structure?


Correct:  ga

Correct:  neuroscience and psychology
Predicted:  the forebrain area
Question:  Cognitive science seeks to join what two branches of science with other fields?


Correct:  magnetoencephalography
Predicted:  the brain
Question:  MEG of the brain is an abbreviation of what?


Correct:  sense of smell
Predicted:  a computer
Question:  The olfactory bulb is related to what sense?


Correct:  ketones
Predicted:  other parameters
Question:  Other sources than glucose that provide energy to the brain are what?


Correct:  mice
Predicted:  invertebrates
Question:  What is the most common test subjects for studying of the brain?


Correct:  rem sleep
Predicted:  a nearby small area
Question:  What type of sleep involves dreaming?


Correct:  12 to 14-year-old
Predicted:  increasingly complex responses
Question:  The oldest known brain discovered was found in how old of a person?


Correct:  200 mya
Predicted:  computation
Question:  At how many mya did mammals first appear in time?


Correct:  

Correct:  pyramidal tract .
Predicted:  one specific sensory modality
Question:  The primary motor cortex sends signals to the spinal cord through what?


Correct:  ten times
Predicted:  the cerebral cortex
Question:  A mammal's brain is how many times larger than a reptiles relative to body size?


Correct:  the skull
Predicted:  variations
Question:  Meninges separate what structure from the brain?


Correct:  electrochemical
Predicted:  specific brain areas
Question:  What type of signals do neurons transfer from one another?


Correct:  14th century
Predicted:  Europe and indeed the world in the period between 1610 and 1700 the world in the period between 1610 and 1700 
Question:  When was the start of the period known as the Renaissance?


Correct:  english
Predicted:  the Mediterranean islands
Question:  What is considered a de facto second language in Mediterranean Europe?


Correct:  phytochoria
Predicted:  Southwestern Europe
Question:  What is a word that can be used to descr

Correct:  the surgeon general
Predicted:  the Ministry
Question:  Who leads the Defence Medical Services?


Correct:  british armed forces
Predicted:  the purposes
Question:  For which part of the government is the MoD the headquarters?


Correct:  vincent harris
Predicted:  April
Question:  Who designed the headquarters of the MoD?


Correct:  permanent secretary
Predicted:  terrorism
Question:  What is the Permanent Under-Secretary of State for Defence generally known as?


Correct:  equipment programme
Predicted:  campaigns
Question:  Which programme was mentioned as being underfunded?


Correct:  1971
Predicted:  Clement Attlee's government
Question:  When did the defence functions of the Ministry of Aviation Supply merge into the Ministry of Defence?


Correct:  brick
Predicted:  World War II
Question:  What is the main material used to build the cellar in the basement of Main Building?


Correct:  cadets
Predicted:  cutbacks
Question:  One of the Assistant Chiefs of the Defence S

Correct:  late vedic
Predicted:  content
Question:  In what period did Vyakarana begin?


Correct:  gurukulas
Predicted:  the curriculum
Question:  In what other type of schools is Sanskrit also taught?


Correct:  close
Predicted:  the schools
Question:  What is the relationship between Indo-Iranian and Baltic languages?


Correct:  oral tradition
Predicted:  no native script
Question:  Under what type of tradition did Sanskrit begin?


Correct:  6:55 am ist
Predicted:  separate dialects
Question:  What is the broadcast time for Sanskrit news on the DD National channel?


Correct:  mattur village
Predicted:  Uttarakhand
Question:  Where in India is Sanskrit still spoken by the population?


Correct:  devanagari
Predicted:  the scribes
Question:  Which script replaced the Gupta script?


Correct:  mysore , india
Predicted:  Sanskrit
Question:  Where is the newspaper Sudharma published?


Correct:  spanish
Predicted:  Buddhist Hybrid Sanskrit
Question:  From what language is a large pro

Correct:  experimental
Predicted:  BYU No
Question:  What type of theater is Nelke Theatre primarily used for?


Correct:  88
Predicted:  study
Question:  How many freshman at BYU were Merit Scholars in 2006?


Correct:  19
Predicted:  the school's mascot
Question:  At what age, since 2012, are women allowed to serve a mission after high school graduation?


Correct:  art rascon
Predicted:  The Ohio State University
Question:  Which former CBS News correspondent graduated from BYU?


Correct:  three
Predicted:  one
Question:  How many television production studios is BYU Broadcasting Technical Operations Center home to?


Correct:  98
Predicted:  Merrill J. Bateman
Question:  How many miles of shelving are in BYU's Harold B. Lee Library?


Correct:  university of deseret
Predicted:  their fields
Question:  What college did BYU separate from to become its own entity?


Correct:  68
Predicted:  these programs
Question:  How many master's programs does BYU have?


Correct:  byu recycles
P

Correct:  culturally conservative
Predicted:  its US students
Question:  What BYU environment seems to surprise many visitors to BYU as well as to the Utah Valley?


Correct:  college of engineering
Predicted:  an independent
Question:  Which BYU college was founded by former alumnus Harvey Fletcher?


Correct:  warren dusenberry
Predicted:  two No
Question:  Who began the school that previously existed at the site where BYU is now located?


Correct:  tracy hall
Predicted:  the rating
Question:  Which notable former BYU student invented the man-made diamond?


Correct:  over three quarters
Predicted:  their apartments
Question:  What percentage of the student body of BYU has some proficiency in a second language?


Correct:  brigham young
Predicted:  the current campus
Question:  Which president of the LDS church purchased the Lewis Building after hinting at the building of a school?


Correct:  22
Predicted:  the previous policy
Question:  What is the average age that BYU students ma

Correct:  progressive-scan
Predicted:  NBC owned and operated station WRC-TV
Question:  Most computer monitors operate in what mode?


Correct:  pal-m
Predicted:  resolution
Question:  In addition to the NTSC system, what 525-line system is considered a standard definition television system?


Correct:  185 million
Predicted:  SES's Astra 1H satellite
Question:  According to the 2010 Satellite Monitor market survey, how many HD capable TV's were sold in Europe?


Correct:  euro1080
Predicted:  kick-start HDTV interest
Question:  Which European company launched HD1?


Correct:  japan
Predicted:  satellite
Question:  Which country had successful public analog HDTV broadcasting?


Correct:  two
Predicted:  the newer and more efficient H.264/MPEG-4 AVC compression standards
Question:  How many frame/field rates were primarily being used in 1983?


Correct:  seven
Predicted:  these files
Question:  How many broadcasters did Japan have sharing a single channel?


Correct:  analog hd
Predicte

Correct:  1989
Predicted:  GoogleTV Roku boxes and AppleTV or built into `` Smart Televisions '' 
Question:  When did satellite test broadcasts of Hi-Vision start?


Correct:  the soviet union
Predicted:  military command
Question:  Who developed Transformator in 1958?


Correct:  hd1
Predicted:  ATSC
Question:  The New Year's Day broadcast officially launched which channel?


Correct:  1979
Predicted:  the 405-line system
Question:  In what year did NHK first develop consumer HD television with a 5:3 aspect ratio?


Correct:  mbaff
Predicted:  a USB 2.0 interface
Question:  Which encoding contains both progressive and interlaced content?


Correct:  digital video broadcasting
Predicted:  standard definition television systems
Question:  What does DVB stand for?


Correct:  1983
Predicted:  to be inaccurately rechristened 'progressive rechristened 'progressive 
Question:  When was France's 819 line system discontinued?


Correct:  8k
Predicted:  normal television
Question:  What is the

KeyboardInterrupt: 