In [None]:
!pip -q install datasets transformers sentence-transformers accelerate gradio

In [None]:
import math
from datetime import datetime

from datasets import Dataset
from sentence_transformers import SentenceTransformer, losses, models, util
from sentence_transformers.readers import InputExample
from torch.utils.data import DataLoader

# Config hyperparameters
DATA_PATH = "/content/drive/MyDrive/Pj3/clear_data.jsonl"
BASE_MODEL = "intfloat/multilingual-e5-base"

for x in [DATA_PATH, BASE_MODEL]:
    print(x)

In [None]:
model = SentenceTransformer('ThanhRise/intfloat-multilingual-e5-base2023-08-30_16-52-03')

In [None]:
Full_Data = Dataset.from_json(DATA_PATH)

In [None]:
example = Full_Data.shuffle()
print(example[0]['user_question'])
print()
print(example[0]['question'])

In [None]:
corpus_full = Full_Data['question']
corpus_full_embeddings = model.encode(corpus_full)

In [None]:
def get_answer(question, topk=3):
    q_embeddings = model.encode(question)
    result = util.semantic_search(q_embeddings,corpus_full_embeddings)
    result = [(x['corpus_id'],x['score']) for x in result[0]]
    return [(corpus_full[i], Full_Data[i]["url"],Full_Data[i]["answer"],score) for i, score in result]

In [None]:
import gradio as gr

def generate_answer(question):
    output = ''
    for a,b,c,d in get_answer(question):
        output += f"Question: {a}\n\nUrl: {b}\n\nAnswer: {c}\n\nScore: {d}\n\n" + '-----'*30 + '\n\n'
    return output

iface = gr.Interface(
    fn=generate_answer,
    inputs=gr.components.Textbox(lines=2, placeholder="Enter your question here..."),
    outputs=gr.components.Textbox(lines=2, placeholder="Answer..."),
    title="Question Answering",
    description="Answering question from law documents"
)

iface.launch()