In [203]:
import sys
sys.path.append("..")
sys.path.insert(0, './app')
from flask import Flask, jsonify, request
import pickle
# from util import *
# from app import util
from flask import Blueprint, render_template, redirect
from transformers import BertTokenizer, BertForSequenceClassification
import re

# 加载模型

In [204]:
def load_bert_model():
    model = BertForSequenceClassification.from_pretrained(
        '../model/difficulty_models/BERT_full_question')
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    return model, tokenizer

In [205]:
def get_pretrained_tfidf_vectorizer():
    with open("../model/model.pickle", "rb") as f:
        params = pickle.load(f)

    vectorizer = params["tfidf_vectorizer"]
    Matrix = params["tfidf_matrix"]
    ans = params["i_to_ans"]
    return vectorizer, Matrix, ans

In [206]:
model, tokenizer = load_bert_model()
params = get_pretrained_tfidf_vectorizer()



In [207]:
vectorizer, Matrix, ans = params[0], params[1], params[2]

In [208]:
vectorizer

TfidfVectorizer(dtype=<class 'numpy.int64'>, max_df=0.9, min_df=2,
                ngram_range=(1, 3))

In [209]:
Matrix

<24448x1460332 sparse matrix of type '<class 'numpy.float64'>'
	with 13702827 stored elements in Compressed Sparse Row format>

# 计算结果

In [219]:
def get_actual_guess_with_index(question, max=12):
    answer = []
    repre = vectorizer.transform(question)
    matrix = Matrix.dot(repre.T).T
    indices = (-matrix).toarray().argsort(axis=1)[:, 0:max]
    for i in range(len(question)):
        idx = indices[i]
        answer.append([(ans[j], matrix[i, j]) for j in idx])
    return answer[0][0][0:], indices[0][0]

In [349]:
question="Along with orbitons and holons, quasiparticles carrying this property are formed from electrons in Luttinger liquids, which carry density waves in charge and this property. Similar wave-like disturbances in the collective structure of this property for a crystal lattice are quantized into magnons. This property separates orthohydrogen from parahydrogen, and its total value is 0 for singlet states and 1 for triplet states. This property causes a beam of silver atoms sent through an inhomogeneous magnetic field to split into two beams instead of forming a continuous band. This property, described by a set of Hermitian unitary matrices named for Wolfgang Pauli, was discovered in the Stern-Gerlach experiment. For 10 points, name this intrinsic form of angular momentum, whose quantum number can be plus or minus one-half for electrons."
question=[question]
answer = []
repre = vectorizer.transform(question)
matrix = Matrix.dot(repre.T).T  # 结果就已经是相似度了
indices = (-matrix).toarray().argsort(axis=1)[:, 0:12]  #排序，结果是索引
i=0
idx = indices[i]
answer.append([(ans[j], matrix[i, j]) for j in idx])

In [350]:
answer[0][0:]

[('Spin_(physics)', 0.3896472256623267),
 ('Commutative_property', 0.2971257586424044),
 ('Chirality', 0.26751165604533755),
 ('Differentiable_function', 0.2669230106098698),
 ('Aromaticity', 0.26159644234522683),
 ('Compact_space', 0.26149545892847254),
 ('Solubility', 0.24626703885793874),
 ('CONvergence', 0.24431863893048913),
 ('Ferroelectricity', 0.2270951513317122),
 ('Hydrophobe', 0.22655805999737152),
 ('Associative_property', 0.2263659955317855),
 ('The_Open_Championship', 0.22595733950315214)]

In [255]:
indices

array([[14647,  7322, 12451,  3689, 13065,  8923, 10536,  7206,  7594,
         7862, 21926, 12121]])

In [256]:
answer

[[('Texas_annexation', 0.10866806717246956),
  ('Alaska_Purchase', 0.0660379528687406),
  ('Back-to-Africa_movement', 0.06506766093835703),
  ('Gadsden_Purchase', 0.06415168247695884),
  ('German_reunification', 0.05334426633591013),
  ('Republic_of_Texas', 0.052787021359412774),
  ('French_Revolution', 0.052236304814261995),
  ('Panic_of_1837', 0.049619827176177356),
  ('Great_Famine_(Ireland)', 0.04795355781457573),
  ('Assassination_of_John_F._Kennedy', 0.04781273760267537),
  ('Religious_conversion', 0.047740971767453365),
  ('Glorious_Revolution', 0.04759893642134324)]]

In [223]:
actual_answer, index_of_answer = get_actual_guess_with_index(question=question)

In [224]:
actual_answer

('Counties_of_Ireland', 0.3952392588292712)

In [225]:
index_of_answer

197

# 句子分句

In [226]:
import nltk

In [227]:
question="After making a deal with Akbar Bugti, this leader launched a raid on the Iraqi embassy that revealed a startlingly robust arms cache intended to support an uprising in Balochistan. He promised to \"break the legs\" of anyone who broke his parliamentary boycott following the victory of Sheikh Mujibur Rahman's Awami League. This leader resolved to develop nuclear weapons \"even if we have to eat grass\" after his nation's major rival conducted the Smiling Buddha test. Operation Fair Play ousted this successor of Yahya Khan, who signed the Simla Agreement with Indira Gandhi. This founder of the PPP was executed after a coup by Zia-ul-Haq. For 10 points, name this Pakistani leader of the 1970's, whose office was later held by his daughter Benazir."
array_of_sentences_in_question = nltk.tokenize.sent_tokenize(question)


In [None]:
array_of_sentences_in_question

# 重要的单词

In [294]:
def guess_top_n(question, params, max=12, n=3):
    vectorizer, Matrix, ans = params[0], params[1], params[2]
    answer = []
    repre = vectorizer.transform(question)
    matrix = Matrix.dot(repre.T).T
    indices = (-matrix).toarray().argsort(axis=1)[:, 0:max]
    for i in range(len(question)):
        answer.append([(ans[j], matrix[i, j]) for j in indices[i]])
    return answer[0][0:n]

In [295]:
def buzz_Cai(question, answer, min_index=0, threshold_buzz=0.3):
    temp_word_array = question.split(' ')
    # check if buzzer ever goes above threshold
    index_of_bin_search = len(temp_word_array)
    question_sentence = question
    temp_var = guess_top_n(question=[question_sentence], params=params, max=3, n=1)
    if(temp_var[0][1] < threshold_buzz and temp_var[0][0] == answer):
        return "", "1", False
    elif(temp_var[0][0] != answer):
        return "", "2", False
    else:
        pass
    max_index = index_of_bin_search - 1

    while max_index >= min_index:
        index_of_bin_search = (max_index+min_index)//2
        question_sentence = " ".join(temp_word_array[:index_of_bin_search])
        temp_var = guess_top_n(question=[question_sentence], params=params, max=1, n=1)
        if (temp_var[0][1] > threshold_buzz):
            max_index = index_of_bin_search-1
        else:
            min_index = index_of_bin_search+1
    rest_of_sentence = " ".join(temp_word_array[index_of_bin_search:])
    return question_sentence, index_of_bin_search, rest_of_sentence, True

In [357]:
def get_important_sentence_to_get_right_answer(question, answer):
    temp_sentence_array = break_into_sentences(question)
    array_of_important_sentence = []
    for i in range(len(temp_sentence_array)):
        temp_sentence = temp_sentence_array[:i] + temp_sentence_array[i+1:]
        temp_sentence_string = ' '.join(temp_sentence)
        curr_answer, index_of_answer = get_actual_guess_with_index(question=[temp_sentence_string])
        if curr_answer[0] != answer:
            array_of_important_sentence.append(temp_sentence_array[i])
    return array_of_important_sentence

In [344]:
def get_important_word_to_delay_the_buzzer(question, answer):
    temp_word_array = question.split(' ')
    array_of_important_word_to_delay_buzzer = []
    array_of_important_word_to_right_answer = []
    init_question_sentence, index_buzzer, init_rest_of_sentence, flag = buzz_Cai(
        question=question, answer=answer)
    if flag == False:
        return array_of_important_word_to_delay_buzzer, array_of_important_word_to_right_answer
    length = len(init_question_sentence)
    for i in range(int(index_buzzer))//2:
        if len(temp_word_array[i]) < 6:
            continue
        temp_sentence = temp_word_array[:i*2] + temp_word_array[i*2+2:]
        temp_sentence_string = ' '.join(temp_sentence)
        question_sentence, index_buzzer_nouse, rest_of_sentence, flag = buzz_Cai(
            question=temp_sentence_string, answer=answer)
        if flag == True and len(question_sentence) > length:
            array_of_important_word_to_delay_buzzer.append(temp_word_array[i])
        elif flag == False and rest_of_sentence == "2":
            array_of_important_word_to_right_answer.append(temp_word_array[i])
        elif flag == False and rest_of_sentence == "1":
            array_of_important_word_to_right_answer.append(temp_word_array[i])
        else:
            pass
    return array_of_important_word_to_delay_buzzer, array_of_important_word_to_right_answer

In [358]:
def high_light(question, answer):

    temp_var = guess_top_n(question=[question], params=params, n=1)[0][0]
    if answer != temp_var:
        return []

    highlight = []
    array_of_important_word_to_delay_buzzer, array_of_important_word_to_right_answer = get_important_word_to_delay_the_buzzer(question, answer)
    array_of_important_sentence_to_right_answer = get_important_sentence_to_get_right_answer(question, answer)


    highlight.extend(highlight_json(items=array_of_important_word_to_right_answer, color="#e91640"))
    highlight.extend(highlight_json(items=array_of_important_sentence_to_right_answer, color="#e91640"))
    highlight.extend(highlight_json(items=array_of_important_word_to_delay_buzzer, color="#fff05e"))

    print(highlight)
    # return jsonify({"highlight": highlight})


In [346]:
def highlight_json(items = None, color = None):
    '''
    Organize the json structure for text highlighting in frontend
    highlight: [
        { text: "American", style: "background-color:#f37373" },
        { text: "India", style: "background-color:#f37373" },
        { text: "Jack", style: "background-color:#fff05e" },
        { text: "Mary", style: "background-color:#fff05e" },
      ],
    '''
    highlight = []
    for item in items:
        temp = {}
        temp['text'] = item
        temp['style'] = "background-color:"+color
        highlight.append(temp)
    return highlight


In [326]:
highlight_json(items=['Jack', 'Mary'], color="#f37373")

[{'text': 'Jack', 'style': 'background-color:#f37373'},
 {'text': 'Mary', 'style': 'background-color:#f37373'}]

In [300]:
import nltk
def break_into_sentences(question):
    array_of_sentences_in_question = nltk.tokenize.sent_tokenize(question)
    return array_of_sentences_in_question

In [351]:
question="Along with orbitons and holons, quasiparticles carrying this property are formed from electrons in Luttinger liquids, which carry density waves in charge and this property. Similar wave-like disturbances in the collective structure of this property for a crystal lattice are quantized into magnons. This property separates orthohydrogen from parahydrogen, and its total value is 0 for singlet states and 1 for triplet states. This property causes a beam of silver atoms sent through an inhomogeneous magnetic field to split into two beams instead of forming a continuous band. This property, described by a set of Hermitian unitary matrices named for Wolfgang Pauli, was discovered in the Stern-Gerlach experiment. For 10 points, name this intrinsic form of angular momentum, whose quantum number can be plus or minus one-half for electrons."
answer='Spin_(physics)'

Along with orbitons and holons, quasiparticles carrying this property are formed from electrons in Luttinger liquids, which carry density waves in charge and this property. Similar wave-like disturbances in the collective structure of this property for a crystal lattice are quantized into magnons. This property separates orthohydrogen from parahydrogen, and its total value is 0 for singlet states and 1 for triplet states. This property causes a beam of silver atoms sent through an inhomogeneous magnetic field to split into two beams instead of forming a continuous band. This property, described by a set of Hermitian unitary matrices named for Wolfgang Pauli, was discovered in the Stern-Gerlach experiment. For 10 points, name this intrinsic form of angular momentum, whose quantum number can be plus or minus one-half for electrons.
Spin_(physics)

In [352]:
actual_answer, index_of_answer = get_actual_guess_with_index(question=[question])

In [312]:
result=word(question, answer)

In [314]:
print(result)

None


In [353]:
actual_answer

('Spin_(physics)', 0.3896472256623267)

In [354]:
array_of_important_word=get_important_word_to_delay_the_buzzer(question, answer)

In [None]:
buzz_Cai(question, answer)

In [355]:
array_of_important_word

(['Along',
  'with',
  'and',
  'holons,',
  'this',
  'property',
  'are',
  'formed',
  'in',
  'carry',
  'this',
  'property.',
  'Similar',
  'the',
  'of',
  'this',
  'property',
  'for',
  'a',
  'crystal',
  'are',
  'into',
  'This'],
 [])

In [359]:
high_light(question, answer)

[{'text': 'For 10 points, name this intrinsic form of angular momentum, whose quantum number can be plus or minus one-half for electrons.', 'style': 'background-color:#e91640'}, {'text': 'Along', 'style': 'background-color:#fff05e'}, {'text': 'with', 'style': 'background-color:#fff05e'}, {'text': 'and', 'style': 'background-color:#fff05e'}, {'text': 'holons,', 'style': 'background-color:#fff05e'}, {'text': 'this', 'style': 'background-color:#fff05e'}, {'text': 'property', 'style': 'background-color:#fff05e'}, {'text': 'are', 'style': 'background-color:#fff05e'}, {'text': 'formed', 'style': 'background-color:#fff05e'}, {'text': 'in', 'style': 'background-color:#fff05e'}, {'text': 'carry', 'style': 'background-color:#fff05e'}, {'text': 'this', 'style': 'background-color:#fff05e'}, {'text': 'property.', 'style': 'background-color:#fff05e'}, {'text': 'Similar', 'style': 'background-color:#fff05e'}, {'text': 'the', 'style': 'background-color:#fff05e'}, {'text': 'of', 'style': 'background-co

In [402]:
list1=['Jack']
list2=['Mary']
list3=["Lilaa"]
list4=[]
list4.extend(list1)
list4.extend(list2)
list4.extend(list3)

In [406]:
list4=['aaaaa', 'bbb', 'ccc', 'rrrr', 'eeeeeee']

In [407]:
for word in list4:
    if len(word) < 5:
        list4.remove(word)

In [408]:
list4

['aaaaa', 'ccc', 'eeeeeee']

In [397]:
list4.remove('Mary')

In [398]:
list4

['Lilaa']

In [411]:
s=int("3")

In [412]:
s

3

In [414]:
3/2

1.5

In [415]:
i=1
print("the %d sentence" % i)

the 1 sentence
