In [56]:
#!pip install langchain sentence-transformers chromadb

In [2]:
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings  # embeddings will not downloaded, it will directly run on cloud base.
from langchain.vectorstores import Chroma

from langchain.llms import HuggingFaceHub   # llm model
from langchain.chains import RetrievalQA

In [4]:
import os
from getpass import getpass

In [5]:
HF_token = getpass()

··········


In [6]:
# creating environ for huggingface_api
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_token

In [57]:
URL = "https://www.innomatics.in/"

In [58]:
data = WebBaseLoader(URL)

In [59]:
content = data.load()

In [60]:
print(content)

[Document(page_content=' Best Data Science & Big Data EdTech Company Hyderabad, India      \n   \n\n\n¬†+91 9951666670¬†|  ¬†For Inquiry:¬† +91 9347804375¬†|¬†+91 6304126766\nHomeCoursesData ScienceIBM Certified Data scienceOnline Data SciencePre-Placement ProgramData Science Summer InternshipFull Stack DevelopmentDigital MarketingAmazon Web Services (AWS)EventsFree Offline Data Science BootcampInternshipFree Data Science InternshipData Science Summer InternshipAbout usAbout usPlacementsContact usBlogReviewsGalleryRefer a Friend\n  HomeCoursesData ScienceIBM Certified Data scienceOnline Data SciencePre-Placement ProgramData Science Summer InternshipFull Stack DevelopmentDigital MarketingAmazon Web Services (AWS)EventsFree Offline Data Science BootcampInternshipFree Data Science InternshipData Science Summer InternshipAbout usAbout usPlacementsContact usBlogReviewsGalleryRefer a Friend\n\nPay Now\n\n\uf8ffüìû 9951666670¬†+91 9951666670¬†|  ¬†For Inquiry:¬† +91 9347804375¬†|¬†+91 6304126

# Chunking --> Text_splitter

In [61]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=5)

In [62]:
chunking = text_splitter.split_documents(content)

In [63]:
len(chunking)

81

In [64]:
chunking

[Document(page_content='Best Data Science & Big Data EdTech Company Hyderabad, India', metadata={'source': 'https://www.innomatics.in/', 'title': 'Best Data Science & Big Data EdTech Company Hyderabad, India', 'description': 'Recognized & Rewarded by Times Of India as the Best Training Institute for the Data Science & Digital Marketing. Training Partner with IBM.', 'language': 'en-US'}),
 Document(page_content='¬†+91 9951666670¬†|  ¬†For Inquiry:¬† +91 9347804375¬†|¬†+91 6304126766', metadata={'source': 'https://www.innomatics.in/', 'title': 'Best Data Science & Big Data EdTech Company Hyderabad, India', 'description': 'Recognized & Rewarded by Times Of India as the Best Training Institute for the Data Science & Digital Marketing. Training Partner with IBM.', 'language': 'en-US'}),
 Document(page_content='HomeCoursesData ScienceIBM Certified Data scienceOnline Data SciencePre-Placement ProgramData Science Summer InternshipFull Stack DevelopmentDigital MarketingAmazon Web Services (AWS)

In [65]:
chunking[0]

Document(page_content='Best Data Science & Big Data EdTech Company Hyderabad, India', metadata={'source': 'https://www.innomatics.in/', 'title': 'Best Data Science & Big Data EdTech Company Hyderabad, India', 'description': 'Recognized & Rewarded by Times Of India as the Best Training Institute for the Data Science & Digital Marketing. Training Partner with IBM.', 'language': 'en-US'})

# HuggingFace --> Embeddings model

In [66]:
embeddings = HuggingFaceInferenceAPIEmbeddings(api_key = HF_token,
                                                model_name = "BAAI/bge-base-en-v1.5")

In [97]:
embeddings

HuggingFaceInferenceAPIEmbeddings(api_key=SecretStr('**********'), model_name='BAAI/bge-base-en-v1.5', api_url=None)

In [67]:
# storing embeddings vectors
vectore_store = Chroma.from_documents(chunking, embeddings)  # Chroma.from_documents --> responsible for processing documents or text data.

In [98]:
vectore_store

<langchain_community.vectorstores.chroma.Chroma at 0x7efca33b5e40>

# huggingface --> retrieving our model (HuggingFaceH4/zephyr-7b-alpha), which is fine-tuned model of mistral model.

In [68]:
model = HuggingFaceHub(repo_id = "HuggingFaceH4/zephyr-7b-alpha",
                       model_kwargs = {"temperature":0.5, "max_new_tokens":512, "max_length":64})

# step1 : Retriever

In [99]:
retriever = vectore_store.as_retriever(search_type = "mmr", search_kwarg={"k":5})

In [72]:
query = "which courses are providing by innomatics"

In [73]:
doc_rel = retriever.get_relevant_documents(query)
doc_rel

[Document(page_content='A leading training institute, Innomatics Research Labs offers courses in areas such as IBM Certified Data Science, Python, AI, Data Analytics, Full Stack Development, and Digital Marketing. Our practical training is designed meticulously to meet industry', metadata={'description': 'Recognized & Rewarded by Times Of India as the Best Training Institute for the Data Science & Digital Marketing. Training Partner with IBM.', 'language': 'en-US', 'source': 'https://www.innomatics.in/', 'title': 'Best Data Science & Big Data EdTech Company Hyderabad, India'}),
 Document(page_content='Read MoreDigital MarketingDigital Marketing Course at Innomatics is perfect for Entrepreneurs, Freelancers, Job seekers, Wanna be Entrepreneurs, working professionals who want to enhance their skill.', metadata={'description': 'Recognized & Rewarded by Times Of India as the Best Training Institute for the Data Science & Digital Marketing. Training Partner with IBM.', 'language': 'en-US', 

In [75]:
#retriever1 = vectore_store.as_retriever(search_type="mmr", search_kwarg={"k":6})

## step2 : Prompt Augmentation (system_prompt)

In [89]:
query = "which courses are providing by innomatics"

In [90]:
prompt = f"""
<|system|>
you are an AI assistant that follows instructions extremely well.
please be truthful and give direct answers.
</s>

<|user|>
{query}
</s>
<|assistant|>
"""

# step3 : Generation

In [100]:
qa = RetrievalQA.from_chain_type(llm=model, retriever=retriever, chain_type = "stuff")

In [92]:
response = qa(prompt)

In [93]:
response

{'query': '\n<|system|>\nyou are an AI assistant that follows instructions extremely well.\nplease be truthful and give direct answers.\n</s>\n\n<|user|>\nwhich courses are providing by innomatics\n</s>\n<|assistant|>\n',
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nAmar student of the innomatics research labs .. I love the innomatics research lab because the mentors and trainers help me a lot in any query in the course. If anyone looking for data science in the market, I can say innomatics is one of the best\n\n+91-9951666670¬†/¬†+91-6304126766¬†¬†/¬†+91-9347804375ÓÅ∂For Course:¬†[email\xa0protected]¬© 2023 Innomatics Research Labs Pvt Ltd, All rights reserved.Enroll Here for the CourseEnquiry form\n\nRead MoreAdvantages of training at InnomaticsFlexible Online Live and Classroom Training100% Internship guarantee programDedicated Placement verticalMaster

In [94]:
print(response["result"])

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Amar student of the innomatics research labs .. I love the innomatics research lab because the mentors and trainers help me a lot in any query in the course. If anyone looking for data science in the market, I can say innomatics is one of the best

+91-9951666670¬†/¬†+91-6304126766¬†¬†/¬†+91-9347804375ÓÅ∂For Course:¬†[email protected]¬© 2023 Innomatics Research Labs Pvt Ltd, All rights reserved.Enroll Here for the CourseEnquiry form

Read MoreAdvantages of training at InnomaticsFlexible Online Live and Classroom Training100% Internship guarantee programDedicated Placement verticalMastery level certifications that are globally acceptedBackup Classes & Lifetime LMS accessCompletely

DevRel @AI PlanetðŸ¥‘ ||  AI with Tarun- YouTubeðŸ¤—||  GDE in MLðŸš€ ||  GSoC'23 caMicroscope
Know more















About Me

Question: 
<|sys