In [1]:
# !pip install -qqq langchain --progress-bar off
# !pip install -qqq langchain-community --progress-bar off
# !pip install -qqq sentence-transformers --progress-bar off
# !pip install -qqq pinecone-client --progress-bar off
# !pip install -qqq langchain_pinecone --progress-bar off
# !pip install -Uqqq bitsandbytes --progress-bar off

In [2]:
# !pip install python-dotenv
# !pip install ctransformers

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.schema import Document
from langchain.embeddings import HuggingFaceEmbeddings
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain.llms import CTransformers
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
)
import torch
from dotenv import load_dotenv
import os
from langchain.llms import CTransformers

  from tqdm.autonotebook import tqdm


In [4]:
with open("Harry_Potter_all_books_preprocessed.txt") as f:
    data = f.read()

In [5]:
data[:1000]

'THE BOY WHO LIVED Mr and Mrs Dursley of number four Privet Drive were proud to say that they were perfectly normal thank you very much .They were the last people youd expect to be involved in anything strange or mysterious because they just didnt hold with such nonsense .Mr Dursley was the director of a firm called Grunnings which made drills .He was a big beefy man with hardly any neck although he did have a very large mustache .Mrs Dursley was thin and blonde and had nearly twice the usual amount of neck which came in very useful as she spent so much of her time craning over garden fences spying on the neighbors .The Dursley s had a small son called Dudley and in their opinion there was no finer boy anywhere .The Dursleys had everything they wanted but they also had a secret and their greatest fear was that somebody would discover it .They didnt think they could bear it if anyone found out about the Potters .Mrs Potter was Mrs Dursleys sister but they hadnt met for several years in 

In [6]:
type(data)

str

In [7]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [8]:
data = [Document(page_content=data)]

In [9]:
text_chunks = text_split(data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 33284


In [10]:
print(len(text_chunks[4].page_content))

194


In [11]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [12]:
embeddings = download_hugging_face_embeddings()

  warn_deprecated(


In [13]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [14]:
load_dotenv()

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

In [15]:
#Initializing the Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)

In [16]:
pc.list_indexes().names()

['medical-chatbot',
 'harry-potter-chatbot',
 'hybrid-search-langchain-pinecone',
 'medical-bot',
 'harrypotter-chatbot']

In [17]:
index_name = "harry-potter-chatbot"

In [18]:
# Create the index
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384, 
        metric='cosine',  
        spec=ServerlessSpec(cloud='aws', region="us-east-1")
    )

In [19]:
index=pc.Index(index_name)
index

<pinecone.data.index.Index at 0x28f5555b160>

In [20]:
os.environ['PINECONE_API_KEY'] = "4d449dd9-3d9a-418c-9857-1547e946d530"

In [21]:
#Creating Embeddings for Each of The Text Chunks & storing in pinecone
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)

# vectorstore.add_texts([t.page_content for t in text_chunks])

In [22]:
query= "What are the key events that lead to Harry Potter discovering he is a wizard?"

In [23]:
context = vectorstore.similarity_search(query,k=3)

In [24]:
context

[Document(page_content='Potter had become famous .It had been enough of a shock for Harry to discover on his eleventh birthday that he was a wizard it had been even more disconcerting to find out that everyone in the hidden'),
 Document(page_content='of his years at Hogwarts .You will remember how excited he was to hear that he was a wizard that he refused my company on a trip to Diagon Alley and that I in turn warned him against continued'),
 Document(page_content='you think you can explain all this in a letter ?These people will never understand him !Hell be famous a legend I wouldnt be surprised if today was known as Harry Potter Day in the future there will')]

In [25]:
new_context=""
for i in range(3):
    new_context+=context[i].page_content

In [26]:
new_context

'Potter had become famous .It had been enough of a shock for Harry to discover on his eleventh birthday that he was a wizard it had been even more disconcerting to find out that everyone in the hiddenof his years at Hogwarts .You will remember how excited he was to hear that he was a wizard that he refused my company on a trip to Diagon Alley and that I in turn warned him against continuedyou think you can explain all this in a letter ?These people will never understand him !Hell be famous a legend I wouldnt be surprised if today was known as Harry Potter Day in the future there will'

In [27]:
print(context[0].page_content)

Potter had become famous .It had been enough of a shock for Harry to discover on his eleventh birthday that he was a wizard it had been even more disconcerting to find out that everyone in the hidden


In [30]:
from transformers import AutoTokenizer

# Initialize the tokenizer for your model
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")


query_tokens = tokenizer.tokenize(query)
context_tokens = tokenizer.tokenize(new_context)
prompt = tokenizer.tokenize(prompt_template)

print(f"Query tokens: {len(query_tokens)}")
print(f"Context tokens: {len(context_tokens)}")
print(f"Total tokens: {len(query_tokens) + len(context_tokens)+len(prompt)}")


Query tokens: 19
Context tokens: 137
Total tokens: 232


In [99]:
prompt_template="""
Use the following pieces of information to answer the user's question in a funny or sarcasm way , some spicy way.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [100]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [101]:
huggingface_token = os.getenv("HF_TOKEN")

In [102]:
# Load the LLM
llm = CTransformers(
    model="llama-2-7b-chat.ggmlv3.q4_0.bin",
    model_type="llama",
    config={
        'max_new_tokens': 200,
        'temperature': 0.8
    }
)

In [103]:
qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        chain_type="stuff",
        chain_type_kwargs={"prompt": PROMPT},
        return_source_documents=False)

In [104]:
response = qa_chain(query)

In [105]:
print(response['result'])

Woah, you're asking me to reveal the biggest secrets of the magical world! *covers mouth* I can't satisfy your request, I'm just an AI, I don't have access to that kind of information. *winks* But I can tell you that Harry Potter is a very special wizard who discovered his powers on his eleventh birthday. *giggles* It's not every day someone finds out they're a wizard! 
