In [None]:
import os
from urllib.request import urlretrieve
import numpy as np
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.llms import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

loader = PyPDFDirectoryLoader("./docs/")


#we are going to split documents to chunks of roughly 700 characters with an overlap of 50 characters.
docs_before_split = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 700,
    chunk_overlap  = 50,
)
docs_after_split = text_splitter.split_documents(docs_before_split)

print(docs_after_split[0])

In [None]:
avg_doc_length = lambda docs: sum([len(doc.page_content) for doc in docs])//len(docs)
avg_char_before_split = avg_doc_length(docs_before_split)
avg_char_after_split = avg_doc_length(docs_after_split)

print(f'Before split, there were {len(docs_before_split)} documents loaded, with average characters equal to {avg_char_before_split}.')
print(f'After split, there were {len(docs_after_split)} documents (chunks), with average characters equal to {avg_char_after_split} (average chunk length).')

Before split, there were 5620 documents loaded, with average characters equal to 2429.
After split, there were 23865 documents (chunks), with average characters equal to 580 (average chunk length).


In [None]:
huggingface_embeddings = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",  # alternatively use "sentence-transformers/all-MiniLM-l6-v2" for a light and faster experience.
    model_kwargs={'device':'cpu'}, 
    encode_kwargs={'normalize_embeddings': True}
)

sample_embedding = np.array(huggingface_embeddings.embed_query(docs_after_split[0].page_content))
print("Sample embedding of a document chunk: ", sample_embedding)
print("Size of the embedding: ", sample_embedding.shape)

Sample embedding of a document chunk:  [-0.07436386 -0.01998872 -0.00487509 -0.04411514 -0.00495798  0.01608374
 -0.02976514 -0.0292733   0.0136712  -0.02410323 -0.00374751  0.04361073
  0.04644122 -0.05092742 -0.01117372 -0.03702448  0.0080249  -0.02212526
  0.03765038  0.01640296  0.05425163 -0.01962815 -0.00200765 -0.02028733
 -0.02688534  0.03403157 -0.02158841 -0.04208039 -0.01581992 -0.17553467
 -0.00700382  0.02311602  0.1219614  -0.0125589  -0.01177902 -0.01615924
 -0.03740779  0.04749591 -0.05492797 -0.03859182  0.00445785  0.02535718
  0.05207978 -0.02799923  0.03141743 -0.00261718 -0.02025448 -0.02933352
  0.07278661 -0.02766144 -0.03099785  0.03073107  0.00741677  0.03022622
 -0.00773252  0.03148766  0.00364994  0.06883534  0.03887975 -0.03845931
  0.01809014 -0.04526175 -0.14382744  0.13326585  0.02325645  0.07026108
 -0.02684466 -0.06027509  0.01477881 -0.00771915 -0.03375783  0.01078334
 -0.02800822 -0.02175796 -0.03199112 -0.04069415  0.03008674 -0.01559065
  0.05108069

In [None]:
#Retrieval System for vector embeddings
#FAISS (Facebook AI Similarity Search) is a library that allows developers to quickly search for embeddings of multimedia documents that are similar to each other.

vectorstore = FAISS.from_documents(docs_after_split, huggingface_embeddings)

In [None]:
query = "Tell me something about computer architecrure"
relevant_documents = vectorstore.similarity_search(query)
print(f'There are {len(relevant_documents)} documents retrieved which are relevant to the query. Display the first one:\n')
print(relevant_documents[0].page_content)

There are 4 documents retrieved which are relevant to the query. Display the first one:

Vol. 3A 3-1CHAPTER 3
PROTECTED-MODE MEMORY MANAGEMENT
This chapter describes the Intel 64 and IA-32 architec ture’s protected-mode memory management facilities, 
including the physical memory requirements, se gmentation mechanism, and paging mechanism.
See also: Chapter 5, “Protection‚” (for a description of  the processor’s protection mechanism) and Chapter 21, 
“8086 Emulation‚” (for a description of memory addressi ng protection in real-address and virtual-8086 modes).
3.1 MEMORY MANAGEMENT OVERVIEW
The memory management facilities of the IA-32 architectu re are divided into two parts: segmentation and paging.


In [None]:
# Use similarity searching algorithm and return 3 most relevant documents.
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [None]:
from langchain_community.llms import HuggingFaceHub

#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

hf = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",
    model_kwargs={"temperature":0.1, "max_length":500})

query = "what is add instruction" 
hf.invoke(query)

In [8]:
from langchain_community.llms import HuggingFaceHub
from huggingface_hub import HfApi

# Obtain your API token and pass it explicitly
api_token = "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"

hf = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",
    model_kwargs={"temperature": 0.1, "max_length": 500},
    huggingfacehub_api_token=api_token  # Pass the token here
)

query = "give example of adding two values by storing in register"
result = hf.invoke(query)
print(result)


give example of adding two values by storing in register and then adding them

```

mov r1, 10
mov r2, 20
add r1, r2
```

give example of adding two values by storing in register and then adding them

```

mov r1, 10
mov r2, 20
add r1, r2
```

give example of adding two values by storing in register and then
