In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import HuggingFaceTextGenInference
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import pypdf


In [None]:
def llm_initalization(model, max_new_tokens, top_k, top_p, temperature):
    # global processed
    urls = {'gpt2':"http://127.0.0.1:5000/own_gpt2"}
    url = urls.get(model.lower(), urls['gpt2'])
    llm = HuggingFaceTextGenInference(inference_server_url = url,
                                      max_new_tokens = max_new_tokens,
                                      top_k = top_k,
                                      top_p = top_p,
                                      temperature = temperature,
                                      callbacks = [StreamingStdOutCallbackHandler()]
                                      )
    return llm

In [None]:
def text_extract_pypdf(file):
    '''This is a simple method to extract text from PDF using pypdf'''
    try:
        text = str()
        pdf = pypdf.PdfReader(file)
        for page in pdf.pages:
            text += page.extract_text()
        return text
    except Exception as ep:
        return f'Something went wrong {ep}'


In [None]:
def get_text_chunk(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100)
    chunks = text_splitter.split_text(text)
    return chunks

In [None]:
# model_path ='/home/laxmidhar/Coding Stuff/AI Models/GTE-Base/'
model_path = '/home/laxmidhar/Coding Stuff/AI Models/e5-large-v2/'
embedings = HuggingFaceEmbeddings(model_name = model_path)

In [None]:
def get_vector_store(text_chunks):
    vector_store = FAISS.from_texts(text_chunks, embedding = embedings)
    vector_store.save_local('/home/laxmidhar/Coding Stuff/Vector Database/')

In [None]:
def get_conversational_chain():
    prompt_template = """
    Answer the question as detailed as possible from the providede context, make sure to provide all the details if the answer is not in provided
    context just say, "Answer is not available in the context", dont't provide the wrong answer\n\n
    Context:\n{context}?\n
    Question:\n{question}\n
    
    Answer:
    """
    model = llm_initalization('gpt2', max_new_tokens=512, top_k = 50, top_p = 0.95, temperature = 0.4)
    prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
    chain = load_qa_chain(model, chain_type='stuff', prompt = prompt)
    return chain

In [None]:
def QnA(user_question):
    new_DB = FAISS.load_local('/home/laxmidhar/Coding Stuff/Vector Database/', embedings, allow_dangerous_deserialization = True)
    docs = new_DB.similarity_search(user_question)
    chains = get_conversational_chain()
    response = chains({'input_documents':docs, 'question':user_question})#, retun_only_outputs = True)
    return response

In [None]:
def main(file, query):
    raw_text = text_extract_pypdf(file)
    text_chunk = get_text_chunk(raw_text)
    get_vector_store(text_chunk)
    answer = QnA(query)
    return answer

In [None]:
file = '/home/laxmidhar/Coding/data/6.3.+Modifying+strings.pdf'
question = 'Which method change all first letters to uppercase'
main(file, question)

In [None]:
from pydantic import Extra
import requests
from typing import Any, List, Mapping, Optional
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM


In [None]:
class LlamaLLM(LLM):

    class Config:
        extra = Extra.forbid

    @property
    def _llm_type(self, model) -> str:
        return model

    def _call(self, query: str, context = None, run_manager: Optional[CallbackManagerForLLMRun] = None, max_length = 512, temperature = 0.3, **kwargs: Any) -> str:
        api_url = "http://127.0.0.1:5000/own_gpt2"
        data = {"context": context, "question": query, "max_length": max_length, "temperature": temperature}
        response = requests.post(url = api_url, json = data)
        result = response.json()
        return result

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {"llmUrl": self.llm_url}


In [None]:
llm = LlamaLLM()
#Testing
prompt = "Question: Who is Albert Einstein? \n Answer:"
result = llm._call(prompt)
print(result)
