In [None]:
!pip install langchain openai faiss-cpu tiktoken

In [6]:
from dotenv import load_dotenv
import os

In [7]:
load_dotenv()
HF_TOKEN = os.getenv("Huggingface_api")

# Indexing

### Loading the documents

In [9]:
from langchain.document_loaders import WebBaseLoader

yolo_nas_loader = WebBaseLoader("https://deci.ai/blog/yolo-nas-object-detection-foundation-model/").load()
decicoder_loader = WebBaseLoader("https://deci.ai/blog/decicoder-efficient-and-accurate-code-generation-llm/").load()
yolo_newsletter_loader = WebBaseLoader("https://deeplearningdaily.substack.com/p/unleashing-the-power-of-yolo-nas").load()

In [10]:
yolo_nas_loader

[Document(page_content='\n\n\n\n\n\nYOLO-NAS by Deci Achieves State-of-the-Art Performance on Object Detection Using Neural Architecture Search\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSkip to content\n\n\n\n\n\n\n \n \n\n\n\n\n\n\n\nPlatform\n\nCOMPUTER VISION\nModels\nCustom Models\nTrain\nGENERATIVE AI\nModels\nDeployment\n\n\nTechnology\nSolutions\n\nUSE CASES\nOptimize Generative AI Models\nRun on Edge Devices\nReduce Cloud Cost\nShorten Development Time\nMaximize Data Center Utilization\nINDUSTRIES\nAutomotive\nSmart Retail\nPublic Sector\nSmart Manufacturing\nVideo Analytics\n\n\nPricing\nResources\n\nResource Center\nBlog\nGlossary\nModel Zoo\nThe Deep Learning Podcast\nNeural Architecture Search 101\nQuantization Aware Training 101\nDeci University\nCV Dataset Profiling & Analysis\nEfficient CV Model Training\nDL Inference Acceleration\nDownload Guide\n\n\nCommunity\nCompany\n\nAbout Us\nCareers\nPartners\nNewsroom\nContact Us\n\

### Chunking the documents

In [11]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 25,
    length_function = len
)


In [12]:
yolo_nas_chunks = text_splitter.transform_documents(yolo_nas_loader)
decicoder_chunks = text_splitter.transform_documents(decicoder_loader)
yolo_newsletter_chunks = text_splitter.transform_documents(yolo_newsletter_loader)

### create an index

In [14]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore

In [16]:
store = LocalFileStore("./cachce/")

# create an embedder
core_embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

embedder = CacheBackedEmbeddings.from_bytes_store(
    core_embeddings_model,
    store,
    namespace = "Huggingface_embeddings"
)

In [17]:
# store embeddings in vector store
vectorstore = FAISS.from_documents(yolo_nas_chunks, embedder)

vectorstore.add_documents(decicoder_chunks)

vectorstore.add_documents(yolo_newsletter_chunks)

# instantiate a retriever
retriever = vectorstore.as_retriever()

# Retrieval System

In [18]:
#from langchain.llms.openai import OpenAIChat
from langchain_community.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.callbacks import StdOutCallbackHandler

In [19]:
repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
llm = HuggingFaceHub(repo_id=repo_id, huggingfacehub_api_token=HF_TOKEN)

  warn_deprecated(


In [20]:
handler =  StdOutCallbackHandler()

In [21]:
# this is the entire retrieval system
qa_with_sources_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    callbacks=[handler],
    return_source_documents=True
)

# Augment System

In [22]:
# This is the entire augment system!
response = qa_with_sources_chain({"query":"What does Neural Architecture Search have to do with how Deci creates its models?"})

  warn_deprecated(




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [25]:
response

{'query': 'What does Neural Architecture Search have to do with how Deci creates its models?',
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nDeci’s suite of Large Language Models and text-to-Image models, with DeciCoder leading the charge, is spearheading the movement to address this gap.DeciCoder’s efficiency is evident when compared to other top-tier models. Owing to its innovative architecture, DeciCoder surpasses models like SantaCoder in both accuracy and speed. The innovative elements of DeciCoder’s architecture were generated using Deci’s proprietary Neural Architecture Search technology, AutoNAC™.\xa0\nAnother Win for AutoNAC\n\nto previous SOTA on COCOSuperGradients star historyDesigning the BlueprintInspired by the success of modern YOLO architectures, our team set out to create a new quantization-friendly architecture - and it all started with N

In [26]:
print(response['result'])

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Deci’s suite of Large Language Models and text-to-Image models, with DeciCoder leading the charge, is spearheading the movement to address this gap.DeciCoder’s efficiency is evident when compared to other top-tier models. Owing to its innovative architecture, DeciCoder surpasses models like SantaCoder in both accuracy and speed. The innovative elements of DeciCoder’s architecture were generated using Deci’s proprietary Neural Architecture Search technology, AutoNAC™. 
Another Win for AutoNAC

to previous SOTA on COCOSuperGradients star historyDesigning the BlueprintInspired by the success of modern YOLO architectures, our team set out to create a new quantization-friendly architecture - and it all started with Neural Architecture Search (NAS).NAS:Everything You Need to KnowThe first thing you need to do when performing Neura

In [27]:
print(response['source_documents'])

[Document(page_content='Deci’s suite of Large Language Models and text-to-Image models, with DeciCoder leading the charge, is spearheading the movement to address this gap.DeciCoder’s efficiency is evident when compared to other top-tier models. Owing to its innovative architecture, DeciCoder surpasses models like SantaCoder in both accuracy and speed. The innovative elements of DeciCoder’s architecture were generated using Deci’s proprietary Neural Architecture Search technology, AutoNAC™.\xa0\nAnother Win for AutoNAC', metadata={'source': 'https://deci.ai/blog/decicoder-efficient-and-accurate-code-generation-llm/', 'title': 'Introducing DeciCoder: The New Gold Standard in Efficient and Accurate Code Generation', 'description': 'Today, we introduce DeciCoder, our 1B-parameter open-source Large Language Model for code generation, equipped with a 2048-context window.', 'language': 'en-US'}), Document(page_content='to previous SOTA on COCOSuperGradients star historyDesigning the Blueprin

In [28]:
response = qa_with_sources_chain({"query":"What is DeciCoder"})



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [29]:
response

{'query': 'What is DeciCoder',
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nTo learn more about DeciCoder, check out the model on Hugging Face.\n\nReady for Commercial Applications: Beyond just experimentation and personal projects, Deci’s permissive licensing means you can confidently deploy DeciCoder in commercial applications. Whether you’re looking to enhance your product, offer new services, or simply leverage the model for business growth, DeciCoder is ready to be your partner in innovation.\n\nSo, what drives DeciCoder’s impressive throughput? A combination of architectural efficiency and optimized implementation. Notably, DeciCoder is significantly more memory efficient, allowing it to manage larger batch sizes. This memory efficiency means that Deci’s throughput reaches its maximum when its batch size is at 128, whereas SantaCoder capped at 32. W

In [33]:
print(response['result'].split(':')[-1])

 DeciCoder is a text-to-text model developed by Deci, a leading company in the field of AI research. It is a permissively licensed model, meaning it can be used in commercial applications without any major legal concerns. DeciCoder is known for its impressive throughput, which is achieved through architectural efficiency and optimized implementation. It is significantly more memory efficient than other models, allowing it to manage larger batch sizes and process more data at once.


In [34]:
response = qa_with_sources_chain({"query":"How many version are there in YOLO"})



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [36]:
print(response['result'])

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

The first version of YOLO was introduced in 2016 and changed how object detection was performed by treating object detection as a single regression problem. It divided images into a grid and simultaneously predicted bounding boxes and class probabilities. Though it was faster than previous object detection methods, YOLOv1 had limitations in detecting small objects and struggled with localization accuracy. Since the first YOLO architecture hit the scene, several YOLO-based architectures have

hesitate to reach out with any questions or feedback. The Team Behind YOLO-NASThe success of YOLO-NAS can be attributed to the hard work, dedication, and brilliance of the following individuals:Research: Amos Gropp, Ido Shahaf, Ran El-Yaniv, Akhiad BercovichEngineering: Ofri Masad, Shay Aharon, Eugene Khvedchenia, Louis Dupont, Kate Yurk

In [38]:
llm = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-Instruct-v0.2",
    task="text-generation",
    huggingfacehub_api_token = HF_TOKEN,
    model_kwargs={
        "max_new_tokens": 512,
        "top_k": 30,
        "temperature": 0.1,
        "repetition_penalty": 1.1,
    },
)

In [39]:
from langchain_community.chat_models.huggingface import ChatHuggingFace
chat_model = ChatHuggingFace(llm=llm)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [42]:
# this is the entire retrieval system
qa_with_sources_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    callbacks=[handler],
    return_source_documents=True
)

In [43]:
response = qa_with_sources_chain({"query":"What does Neural Architecture Search have to do with how Deci creates its models?"})



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [44]:
response

{'query': 'What does Neural Architecture Search have to do with how Deci creates its models?',
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nDeci’s suite of Large Language Models and text-to-Image models, with DeciCoder leading the charge, is spearheading the movement to address this gap.DeciCoder’s efficiency is evident when compared to other top-tier models. Owing to its innovative architecture, DeciCoder surpasses models like SantaCoder in both accuracy and speed. The innovative elements of DeciCoder’s architecture were generated using Deci’s proprietary Neural Architecture Search technology, AutoNAC™.\xa0\nAnother Win for AutoNAC\n\nto previous SOTA on COCOSuperGradients star historyDesigning the BlueprintInspired by the success of modern YOLO architectures, our team set out to create a new quantization-friendly architecture - and it all started with N

In [46]:
print(response['result'].split(':')[-1])

 Neural Architecture Search (NAS) is a technique used by Deci to automatically discover the optimal neural network architecture for its models, such as DeciCoder, using its proprietary AutoNAC algorithm. This approach allows Deci to create more efficient and accurate models than manually designing them.
