In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

  from tqdm.autonotebook import tqdm


In [2]:
PINECONE_API_KEY = "426dfc89-b204-42bb-ace9-ef8d7033c55e"
PINECONE_API_ENV = "gcp-starter"

In [3]:
#extracting data
def load_pdf(data):
    loader = DirectoryLoader(data,
                             glob="*.pdf",
                             loader_cls=PyPDFLoader)
    
    documents = loader.load()
    return documents

In [4]:
extracted_data = load_pdf("data/")

In [5]:
#extracted_data

In [6]:
# create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [7]:
text_chunks = text_split(extracted_data)
print("Length of my chunk:", len(text_chunks))

Length of my chunk: 25687


In [8]:
# download embedding model
def download_hugging_face_embedding():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [9]:
embeddings = download_hugging_face_embedding()

  return self.fget.__get__(instance, owner)()


In [10]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={})

In [11]:
query_result = embeddings.embed_query("Hello World")
print("length", len(query_result))

length 384


In [12]:
#query_result

In [13]:
#Initializing the Pinecone
pinecone.init(api_key=PINECONE_API_KEY,
              environment=PINECONE_API_ENV)

index_name="medical--chatbot"


In [None]:

#Creating Embeddings for Each of The Text Chunks & storing
docsearch=Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [14]:
#if we already have index then we can load like this

docsearch=  Pinecone.from_existing_index(index_name, embeddings)

querry = "What are Allergies?"

docs = docsearch.similarity_search(querry, k=3)

print("Result", docs)

Result [Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-', metadata={}), Document(page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePollen grains\nLymphocyte\nFIRST EXPOSURE", metadata={}), Document(page_content='allergens are the following:\n• plant pollens\n• animal fur and dander\n• body parts from house mites (microscopic creatures\nfound in all houses)\n• house dust• mold spores• cigarette smoke• solvents• cleaners\nCommon food allergens include the following:\n• 

In [15]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know , don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else .
Helpful answer:
"""

In [16]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context","question"])
chain_type_kwargs= {"prompt": PROMPT}

In [17]:
llm=CTransformers(model="Model\llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [18]:
qa=RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={'k':2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [19]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response: ", result["result"])

Response:  For long-lasting sex, it is important to ensure that you are in a comfortable and relaxed state. This can involve taking steps such as practicing deep breathing exercises, communicating with your partner about what feels good and what doesn't, and taking breaks when needed. Additionally, making sure that you are physically comfortable, such as by adjusting positions or adding padding, can also help to extend the duration of sex.
Don't worry too much about the length of time during sex, as this is not necessarily an indicator of sexual satisfaction. Instead, focus on enjoying the experience and connecting with your partner.
Response:  If your friend is experiencing symptoms such as frequent urination or difficulty with urination due to a condition that affects the penis, it is important that they consult with a medical professional for proper diagnosis and treatment. While some alternative medicine practitioners may offer remedies for small penises, these remedies are not sci

KeyboardInterrupt: Interrupted by user