Model for Q&A


In [None]:
!pip install -qU datasets pinecone-client sentence-transformers torch
!pip install apache_beam
!pip install ndg-httpsclient
!pip install pyopenssl
!pip install pyasn1

In [1]:
from datasets import load_dataset
from tqdm.auto import tqdm
import pandas as pd
import pinecone
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

In [2]:
API_KEY = "4da56e04-60e9-4e45-90ae-c9cba9b01686"
ENV = "us-west1-gcp-free"

Retriver Model

In [3]:
# set device to GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# load the retriever model from huggingface model hub
retriever = SentenceTransformer("multi-qa-mpnet-base-dot-v1", device=device)
retriever

Downloading (…)16ebc/.gitattributes:   0%|          | 0.00/737 [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)b6b5d16ebc/README.md:   0%|          | 0.00/8.65k [00:00<?, ?B/s]

Downloading (…)b5d16ebc/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)ebc/data_config.json:   0%|          | 0.00/25.5k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)16ebc/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading (…)6ebc/train_script.py:   0%|          | 0.00/13.9k [00:00<?, ?B/s]

Downloading (…)b6b5d16ebc/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5d16ebc/modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
)

Picone initialization

In [None]:
# connect to pinecone environment
pinecone.init(
    api_key=API_KEY,
    environment=ENV)

In [None]:
index_name = "abstractive-question-answering-bologna"

# check if the abstractive-question-answering index exists
if index_name not in pinecone.list_indexes():
    # create the index if it does not exist
    pinecone.create_index(
        index_name,
        dimension=retriever.get_sentence_embedding_dimension(),
        metric="cosine"
    )

# connect to abstractive-question-answering index we created
index = pinecone.Index(index_name)

## QA Module

In [None]:
tk_summary = AutoTokenizer.from_pretrained("t5-small")
summarizer = AutoModelForSeq2SeqLM.from_pretrained("t5-small").to(device)
summarizer.from_pretrained("./Trained Models/trained_summary")

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/27.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.32k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Utility Functions

In [None]:
def query_pinecone(query, top_k):
    # generate embeddings for the query
    xq = retriever.encode([query]).tolist()
    # search pinecone index for context passage with the answer
    xc = index.query(xq, top_k=top_k, include_metadata=True)
    return xc

def format_query(query, context):
    # extract passage_text from Pinecone search result and add the <P> tag
    context = [f"<P> {m['metadata']['passage_text']}" for m in context]
    # concatinate all context passages
    context = " ".join(context)
    # contcatinate the query and context passages
    query = f"question: {query} context: {context}"
    return query

def generate_answer(query):
    # tokenize the query to get input_ids
    inputs = tk_summary([query], max_length=1024, return_tensors="pt",device=device)
    # use generator to predict output ids
    ids = summarizer.generate(inputs["input_ids"].to(device), num_beams=2, min_length=20, max_length=100)
    # use tokenizer to decode the output ids
    answer = tk_summary.batch_decode(ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
    return print(answer)

Example of QA

In [None]:
query = "Describe the history of Bologna cathedral"
context = query_pinecone(query, top_k=2)
query = format_query(query, context["matches"])
generate_answer(query)

## Translator Module

In [9]:
tk_translator = AutoTokenizer.from_pretrained("t5-small")
translator = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
translator.from_pretrained("./Trained Models/trained_translator")

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Drop

In [10]:
def translate(text):
    model_inputs = tk_translator(text, return_tensors="pt")
    gen_tokens = translator.generate(**model_inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
    translated_text = tk_translator.batch_decode(gen_tokens, skip_special_tokens=True)

    return str(translated_text)

Example of translation

In [12]:
test_text = 'Oggi voglio visitare la cattedrale di Bologna'
translated_text = translate(test_text)
translated_text

"['Oggi voglio visitare la cattedrale di Bologna']"

In [None]:
print("Hi, I'm an helper Bot for tourists. Please enter your question below!")
print("Ciao, sono un Bot aiutante per turisti. Per favore inserisci la tua domanda qui sotto!")

language = input('en/it')


query = input("Question/Domanda/问题? ")

if language != 'en':
    query = translate(query)

context = query_pinecone(query, top_k=2)
query = format_query(query, context["matches"])
answer = generate_answer(query)

answer