In [46]:
"""Task 0: Question Answering"""
import os
import glob
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from transformers import BertTokenizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [12]:
def question_answer(question, reference):
    """Finds a snippet of text within a reference document to
    answer a question.
    Args:
        question (str): Contains the question to answer.
        reference (str): Contains the reference document from which to find
                         the answer.
    Returns:
        (str): Contains the answer."""

    tkn = BertTokenizer.from_pretrained(('bert-large-uncased-whole'
                                        '-word-masking-finetuned-squad'))
    model = hub.load('https://tfhub.dev/see--/bert-uncased-tf2-qa/1')
    
    q_tokens = tkn.tokenize(question)
    ref_tokens = tkn.tokenize(reference)
    tokens = ['[CLS]'] + q_tokens + ['[SEP]'] + ref_tokens + ['[SEP]']
    input_word_ids = tkn.convert_tokens_to_ids(tokens)
    input_mask = [1] * len(input_word_ids)
    input_type_ids = ([0] * (1 + len(q_tokens) + 1) + [1] * (len(ref_tokens) + 1))
    
    input_word_ids, input_mask, input_type_ids = map(lambda t: tf.expand_dims(
        tf.convert_to_tensor(t, dtype=tf.int32), 0), (input_word_ids, input_mask, input_type_ids))
    outputs = model([input_word_ids, input_mask, input_type_ids])
    
    short_start = tf.argmax(outputs[0][0][1:]) + 1
    short_end = tf.argmax(outputs[1][0][1:]) + 1
    answer_tokens = tokens[short_start: short_end + 1]
    answer = tkn.convert_tokens_to_string(answer_tokens)

    if not answer:
        return None
    return answer

In [15]:
with open('ZendeskArticles/PeerLearningDays.md') as f:
    reference = f.read()

print(question_answer('What are Mock Interviews?', reference))

2023-08-21 13:35:33.740172: E tensorflow/core/framework/node_def_util.cc:675] NodeDef mentions attribute epsilon which is not in the op definition: Op<name=_MklFusedBatchMatMulV2; signature=x:T, y:T, args:num_args*T -> output:T; attr=T:type,allowed=[DT_BFLOAT16, DT_FLOAT]; attr=adj_x:bool,default=false; attr=adj_y:bool,default=false; attr=num_args:int,min=0; attr=fused_ops:list(string),default=[]> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node encoder/layer_._0/attention/self/add}}


None


In [20]:
def answer_loop(reference):
    """Answers questions from a reference text.
    Args:
        reference (str): Contains the reference document from which to find
                         the answer.
    """
    while(True):
        question = input('Q: ')
        if question.lower() in ['exit', 'quit', 'goodbye', 'bye']:
            print('A: Goodbye')
            break
        else:
            answer = question_answer(question, reference)
            confused = 'Sorry, I do not understand your question.'
            print('A: {}'.format(answer if answer else confused))

In [21]:
with open('ZendeskArticles/PeerLearningDays.md') as f:
    reference = f.read()

answer_loop(reference)

2023-08-21 14:37:33.758503: E tensorflow/core/framework/node_def_util.cc:675] NodeDef mentions attribute epsilon which is not in the op definition: Op<name=_MklFusedBatchMatMulV2; signature=x:T, y:T, args:num_args*T -> output:T; attr=T:type,allowed=[DT_BFLOAT16, DT_FLOAT]; attr=adj_x:bool,default=false; attr=adj_y:bool,default=false; attr=num_args:int,min=0; attr=fused_ops:list(string),default=[]> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node encoder/layer_._0/attention/self/add}}


A: Sorry, I do not understand your question.
A: Goodbye


In [47]:
def load_corpus(corpus_path):
    """Loads the corpus into memory"""
    documents = []
    for filename in os.listdir(corpus_path):
        with open(os.path.join(corpus_path, filename), "r", encoding="utf-8") as f:
            content = f.read()
        documents.append(content)
    return documents


def semantic_search(corpus_path, sentence):
    """
    Performs semantic search on a corpus of documents.
    Returns: the reference text of the best-matching document.
    """
    
    model_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5"
    model = hub.load(model_url)
    
    documents = load_corpus(corpus_path)
    sentence_embeddings = model([sentence])[0]
    corpus_embeddings = model(documents)
    similarity_scores = cosine_similarity([sentence_embeddings],
                                          corpus_embeddings)[0]
    idx = np.argmax(similarity_scores)
    return documents[idx]

In [48]:
print(semantic_search('ZendeskArticles', 'When are PLDs?'))

PLD Overview
Peer Learning Days (PLDs) are a time for you and your peers to ensure that each of you understands the concepts you've encountered in your projects, as well as a time for everyone to collectively grow in technical, professional, and soft skills. During PLD, you will collaboratively review prior projects with a group of cohort peers.
PLD Basics
PLDs are mandatory on-site days from 9:00 AM to 3:00 PM. If you cannot be present or on time, you must use a PTO. 
No laptops, tablets, or screens are allowed until all tasks have been whiteboarded and understood by the entirety of your group. This time is for whiteboarding, dialogue, and active peer collaboration. After this, you may return to computers with each other to pair or group program. 
Peer Learning Days are not about sharing solutions. This doesn't empower peers with the ability to solve problems themselves! Peer learning is when you share your thought process, whether through conversation, whiteboarding, debugging, or li

In [49]:
def question_answer_semantic(corpus_path):
    """answers questions from multiple reference texts"""
    while(True):
        question = input('Q: ')
        if question.lower() in ['exit', 'quit', 'goodbye', 'bye']:
            print('A: Goodbye')
            break
        else:
            reference = semantic_search(corpus_path, question)
            answer = question_answer(question, reference)
            confused = 'Sorry, I do not understand your question.'
            print('A: {}'.format(answer if answer else confused))

In [50]:
question_answer_semantic('ZendeskArticles')

2023-08-21 15:31:18.519253: E tensorflow/core/framework/node_def_util.cc:675] NodeDef mentions attribute epsilon which is not in the op definition: Op<name=_MklFusedBatchMatMulV2; signature=x:T, y:T, args:num_args*T -> output:T; attr=T:type,allowed=[DT_BFLOAT16, DT_FLOAT]; attr=adj_x:bool,default=false; attr=adj_y:bool,default=false; attr=num_args:int,min=0; attr=fused_ops:list(string),default=[]> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node encoder/layer_._0/attention/self/add}}


A: on - site days from 9 : 00 am to 3 : 00 pm


2023-08-21 15:32:29.971167: E tensorflow/core/framework/node_def_util.cc:675] NodeDef mentions attribute epsilon which is not in the op definition: Op<name=_MklFusedBatchMatMulV2; signature=x:T, y:T, args:num_args*T -> output:T; attr=T:type,allowed=[DT_BFLOAT16, DT_FLOAT]; attr=adj_x:bool,default=false; attr=adj_y:bool,default=false; attr=num_args:int,min=0; attr=fused_ops:list(string),default=[]> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node encoder/layer_._0/attention/self/add}}


A: help you train for technical interviews


2023-08-21 15:33:43.016815: E tensorflow/core/framework/node_def_util.cc:675] NodeDef mentions attribute epsilon which is not in the op definition: Op<name=_MklFusedBatchMatMulV2; signature=x:T, y:T, args:num_args*T -> output:T; attr=T:type,allowed=[DT_BFLOAT16, DT_FLOAT]; attr=adj_x:bool,default=false; attr=adj_y:bool,default=false; attr=num_args:int,min=0; attr=fused_ops:list(string),default=[]> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node encoder/layer_._0/attention/self/add}}


A: peer learning days
A: Goodbye
