<a href="https://colab.research.google.com/github/Akshith213/HackOffV3---Team-DrainBrain/blob/main/Final_Chatbot_Deployment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Run this cell to import/download all the neccesary libraries and also to instantiate 'FAISS' document store  and store the medical corpus in it.
!pip install git+https://github.com/deepset-ai/haystack.git
!pip install urllib3==1.25.4
!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html

from haystack import Finder
from haystack.preprocessor.cleaning import clean_wiki_text
from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
from haystack.reader.farm import FARMReader
from haystack.reader.transformers import TransformersReader
from haystack.utils import print_answers

from haystack.document_store.faiss import FAISSDocumentStore

document_store = FAISSDocumentStore(faiss_index_factory_str="Flat")

doc_dir = "data/healthcare_corpus"
s3_url = "https://drainbrain.s3.amazonaws.com/Hackoff+Data-20201129T103328Z-001.zip"
#s3_url = "https://drainbrain.s3.amazonaws.com/Drugs+hack+off-20201213T060446Z-001+(2).zip"

fetch_archive_from_http(url=s3_url, output_dir=doc_dir)

dicts = convert_files_to_dicts(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)

document_store.write_documents(dicts)

In [None]:
# Run this cell to instantiate the 'Dense Passage Retrieval' Retriever
from haystack.retriever.dense import DensePassageRetriever
retriever = DensePassageRetriever(document_store=document_store,
                                  query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
                                  passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
                                  max_seq_len_query=64,
                                  max_seq_len_passage=256,
                                  batch_size=16,
                                  use_gpu=True,
                                  embed_title=True,
                                  use_fast_tokenizers=True)
document_store.update_embeddings(retriever)


In [None]:
#Run this cell to instantiate the reader which is a Roberta base model pretrained on Stanford Squad dataset
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)

In [4]:
# Run this cell to create a pipeline containing both retriever and reader
finder = Finder(reader, retriever)

            1. The 'Finder' class will be deprecated in the next Haystack release in 
            favour of a new `Pipeline` class that supports building custom search pipelines using Haystack components
            including Retriever, Readers, and Generators.
            For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/544
            2. The `question` parameter in search requests & results is renamed to `query`.


In [5]:
# Function to output the predictions from the finder
def print_final_answers(predictions):
  pred=[]
  final_ans=''
  ans_1=''
  for i in range (len(predictions['answers'])):
    ans=predictions['answers'][i]['answer']
    con=predictions['answers'][i]['context']
    pro=predictions['answers'][i]['probability']
    #print('Answer: ',ans)
    #print('Context: ',con)
    #print('Probability: ',pro)
    #print('-------------------------------------------------------------------------------------------------------------------------------------------------------------')
    ans_1=str(ans)+';'+str(con)+';'+str(pro)+'@'
    final_ans=str(final_ans)+str(ans_1)
  return str(final_ans)

In [None]:
# If you want to check the model performance without any help of an interface, uncomment the 3 lines of code below, run this cell and ask your medical query
#que=input()
#prediction = finder.get_answers(question=que, top_k_retriever=15, top_k_reader=3)
#print_final_answers(prediction)


In [None]:
# Run this cell to install mysql packages
!pip install mysql-connector-python

In [7]:
# Run this cell to import the necessary libraries for connecting to the sql database
import mysql.connector
from mysql.connector import Error

In [None]:
# Run this cell to connect to the database, retrieve a question, find an answer to that question and send it again to the database. Once you are done, just stop the execution of the cell
while (True):
  connection = mysql.connector.connect(host='remotemysql.com',database='E786ozefXp',user='E786ozefXp',password='AJVykSfVuB')
  cursor = connection.cursor()
  cursor.execute("SELECT * from requests")
  myresult = cursor.fetchone()
  connection.close()
  if (myresult):
    req_id=myresult[0]
    username=myresult[1]
    question_user=myresult[2]
    print(req_id)
    print(username)
    print(question_user)
    #sql1 = "DELETE FROM requests WHERE req_id = %s"
    #val1 = (req_id)
    connection = mysql.connector.connect(host='remotemysql.com',database='E786ozefXp',user='E786ozefXp',password='AJVykSfVuB')
    cursor = connection.cursor()
    cursor.execute("DELETE FROM requests WHERE req_id = %s ",(req_id,))
    connection.commit()
    connection.close()
    print('Success1!')
    predictions=finder.get_answers(question=question_user, top_k_retriever=10, top_k_reader=2)
    pred=print_final_answers(predictions)
    print('Success2')
    #sql2 = "UPDATE users SET question = %s, answer= %s WHERE username = %s"
    #val2 = (question_user, pred, username )
    connection = mysql.connector.connect(host='remotemysql.com',database='E786ozefXp',user='E786ozefXp',password='AJVykSfVuB')
    cursor = connection.cursor()
    cursor.execute("UPDATE users SET question = %s, answer = %s WHERE username = %s",(question_user,pred,username,))
    connection.commit()
    connection.close()
    print('Success3')
  else:
    pass
