In [None]:
!pip install -qqq torch==2.0.1 --progress-bar off
!pip install -qqq transformers==4.31 --progress-bar off
!pip install -qqq langchain==0.0.266 --progress-bar off
!pip install -qqq pypdf==3.15.0 --progress-bar off
!pip install -qqq xformers==0.0.20 --progress-bar off
!pip install -qqq sentence_transformers==2.2.2 --progress-bar off
!pip install -qqq InstructorEmbedding==1.0.1 --progress-bar off
!pip install -q kedro --progress-bar off
!wget -qqq https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl
!pip install -qqq auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl --progress-bar off



In [None]:
!wget -q https://raw.githubusercontent.com/RCostaBooks/ScalableMLPipelines/main/code/config.yml

In [None]:
!kedro new -c config.yml

In [None]:
!wget -q -P ./chatbot/data/01_raw https://raw.githubusercontent.com/RCostaBooks/ScalableMLPipelines/main/pdfs/2022-TSLA-F10K.pdf
!wget -q -P ./chatbot/data/01_raw https://raw.githubusercontent.com/RCostaBooks/ScalableMLPipelines/main/pdfs/2022-NVDA-F10K.pdf
!wget -q -P ./chatbot/data/01_raw https://raw.githubusercontent.com/RCostaBooks/ScalableMLPipelines/main/pdfs/2022-AMD-F10K.pdf

In [None]:
%cd chatbot
!kedro pipeline create ingestDocuments

## Code: Pipeline from ingestDocuments

In [None]:
from kedro.pipeline import Pipeline, node, pipeline
from .nodes import loadDocuments, textSplitting, createEmbeddings

def create_pipeline(**kwargs) -> Pipeline:
    return pipeline(
        [
            node(
                func=loadDocuments,
                inputs=None,
                outputs="docs",
                name="loadDocuments_node",
            ),
            node(
                func=textSplitting,
                inputs="docs",
                outputs="texts",
                name="textSplitting_node",
            ),
            node(
                func=createEmbeddings,
                inputs="texts",
                outputs=None,
                name="createEmbeddings_node",
            ),
        ]
    )

In [None]:
#You can download it...
!wget -q -P ./src/chatbot/pipelines/ingestDocuments https://raw.githubusercontent.com/RCostaBooks/ScalableMLPipelines/main/code/pipeline.py -O ./src/chatbot/pipelines/ingestDocuments/pipeline.py

## Code: Nodes from Pipeline ingestDocuments

In [None]:
import torch
import logging
from typing import List
import pandas as pd
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import SKLearnVectorStore

log = logging.getLogger(__name__)
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

def loadDocuments() -> PyPDFDirectoryLoader:
  log.info("Starting PDFLoader Node.")

  loader = PyPDFDirectoryLoader("./data/01_raw")
  docs = loader.load()
  log.info(f"Total pages loaded: {len(docs)}")
  return docs

def textSplitting(docs: PyPDFDirectoryLoader) -> List:
  log.info('Splitting Text into Chunks...')
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
  texts = text_splitter.split_documents(docs)
  log.info(f"Total Chunks created: {len(texts)}")
  return texts

def createEmbeddings(texts: List):
  log.info(f'Loading Embedding Model on {DEVICE}...')

  embeddings = HuggingFaceInstructEmbeddings(
    model_name="hkunlp/instructor-large",
    model_kwargs={"device": DEVICE}
  )

  log.info('Saving Persistent Vector Database')
  db = SKLearnVectorStore.from_documents(
    documents=texts,
    embedding=embeddings,
    persist_path="./data/05_model_input/vectordb.parquet",
    serializer="parquet"
    )
  db.persist()

  log.info('Ingestion process completed for all pdf files.')
  return

In [None]:
!wget -q -P ./src/chatbot/pipelines/ingestDocuments https://raw.githubusercontent.com/RCostaBooks/ScalableMLPipelines/main/code/nodes.py -O ./src/chatbot/pipelines/ingestDocuments/nodes.py

In [None]:
# ...finally
!kedro run

## API Server Flask

In [None]:
!pip install flask-ngrok
!pip install pyngrok
!ngrok authtoken 'YOUR NGROK API KEY'

In [None]:
import torch
from flask import Flask, request, jsonify
from flask_ngrok import run_with_ngrok
from langchain.vectorstores import SKLearnVectorStore
from langchain import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.output_parsers import RegexParser
from langchain.embeddings import HuggingFaceInstructEmbeddings
from transformers import AutoTokenizer, TextStreamer, pipeline
from auto_gptq import AutoGPTQForCausalLM

In [None]:
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

In [None]:
embeddings = HuggingFaceInstructEmbeddings(
    model_name="hkunlp/instructor-large",
    model_kwargs={"device": DEVICE}
)

In [None]:
model_name_or_path= "TheBloke/Llama-2-13B-chat-GPTQ"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

model = AutoGPTQForCausalLM.from_quantized(
    model_name_or_path,
    use_safetensors=True,
    trust_remote_code=True,
    inject_fused_attention=False,
    device=DEVICE,
    quantize_config=None,
)

streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    temperature=0.5,
    top_p=0.95,
    repetition_penalty=1.15,
    streamer=streamer,
    )

llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})

In [None]:
vector_store2 = SKLearnVectorStore(
    embedding=embeddings, persist_path="./chatbot/data/05_model_input/vectordb.parquet", serializer="parquet"
)

In [None]:
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

This should be in the following format:

Question: [question here]
Helpful Answer: [answer here]
Score: [score between 0 and 100]

Begin!

Context:
---------
{context}
---------
Question: {question}
Helpful Answer:"""
output_parser = RegexParser(
    regex=r"(.*?)\nScore: (.*)",
    output_keys=["answer", "score"],
)

PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"],
    output_parser=output_parser
)

chain = load_qa_chain(llm=llm, chain_type="map_rerank",
                      return_intermediate_steps=True, prompt=PROMPT)


def getanswer(query):
    print(f'Function getanswer...{query}')
    relevant_chunks = vector_store2.similarity_search_with_score(query, k=2)
    print(f'relevant: {relevant_chunks}')
    chunk_docs = []
    for chunk in relevant_chunks:
        chunk_docs.append(chunk[0])
    results = chain({"input_documents": chunk_docs, "question": query})
    text_reference = ""
    for i in range(len(results["input_documents"])):
        text_reference += results["input_documents"][i].page_content
    output = {"Answer": results["output_text"], "Reference": text_reference}
    return output

app = Flask(__name__)
run_with_ngrok(app)

@app.route('/', methods=["POST"])
def processclaim():
    print('Got Question...')
    try:
        input_json = request.get_json(force=True)
        print(f'Question: {input_json}')
        query = input_json["query"]
        print(f'Query: {query}')
        output = getanswer(query)
        print(f'Output: {output}')
        return output
    except:
        return jsonify({"Status": "Failure --- some error occured"})

In [None]:
getanswer('Please give me the name of the CTO of the companies in our database')

In [None]:
if __name__ == "__main__":
    app.run()