In [1]:
!pip install -q unstructured
!pip install -q langchain-chroma
!pip install -q langchain
!pip install -q langchain-community
!pip install -q langchain-google-genai
!pip install -q faiss-cpu

In [2]:
!pip install -q numpy==1.24.4

In [3]:
!pip install --upgrade nltk==3.9.1



In [4]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [5]:
import warnings
warnings.filterwarnings('ignore')

In [6]:
from google.colab import userdata
GOOGLE_API_KEY= userdata.get('GOOGLE_API_KEY')

In [7]:
import os
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [8]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [9]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-1.0-pro",convert_system_message_to_human=True)

In [10]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import MessagesPlaceholder

In [11]:
URLs = [
    "https://zepanalytics.com/",
    "https://zepanalytics.com/courses",
    "https://zepanalytics.com/bundle",
    "https://zepanalytics.com/projects",
    "https://zepanalytics.com/blogs",
    "https://zepanalytics.com/virtual-internship",
    "https://zepanalytics.com/courses/python-programming-for-data-science-a-z",
    "https://zepanalytics.com/courses/microsoft-power-bi-a-complete-guide-2023-edition",
    "https://zepanalytics.com/courses/cnn-everything-about-convolution-neural-networks"

]

In [12]:
loader = UnstructuredURLLoader(urls = URLs)
data = loader.load()

In [13]:
data

[Document(metadata={'source': 'https://zepanalytics.com/'}, page_content="Get Access to Unlimited Educational Resources. Everywhere, Everytime!\n\nPremium access to more than 1,000 resources ranging from courses, bootcamps e.t.c.\n\nGet Access\n\nLets Study Data Science, AI/ML and Gen AI. Enroll Now!!\n\nThe most intense self paced program with detailed teaching and 24/7 chat support\n\nKnow More\n\nPrevious Next\n\nWhy Choose Zep Analytics\n\nOn demand courses\n\nCourses according to industry demand and standards. Both Free and Paid. Well structured in-depth courses to learn topics effectively.\n\nUnlimited Access\n\nGet lifetime access to resources and courses and learn at your own pace.\n\nImmense Facilities\n\nGet avail with immense facilities like Blogging, resume discussion, mock interview, career counselling, etc.\n\nMentorship program\n\nGet mentored by various successful professionals in the industry for better career growth.\n\nTreasure of Resources\n\nGet access to various r

In [14]:
text_splitter = RecursiveCharacterTextSplitter(separators= '\n',
                                              chunk_size=1000,
                                              chunk_overlap=200)
splits = text_splitter.split_documents(data)

In [15]:
vectorstores = Chroma.from_documents(documents=splits, embedding= gemini_embeddings)
retriever = vectorstores.as_retriever()

In [16]:
retriever

VectorStoreRetriever(tags=['Chroma', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x7fab1224d9f0>)

In [17]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question "
    "If you don't know the answer, say that you don't know."
    "Use three sentences maximum and keep the answer concise."
    "\n\n"
    "{context}"
)


In [18]:
chat_prompt = ChatPromptTemplate(
    [
        ("system", system_prompt),
        ("human", "{input}")
    ]
)

In [19]:
question_answering_chain = create_stuff_documents_chain(model, chat_prompt)

In [20]:
rag_chain= create_retrieval_chain(retriever, question_answering_chain)

In [21]:
response = rag_chain.invoke({"input": "Can you tell the cources price for data science"})

In [22]:
response["answer"]

'The price for the course "Python Programming for Data Science: A-Z" is ₹1,999.00 ₹999.00.\nThe price for the course "CNN: Everything about Convolution Neural Networks" is ₹999.00 ₹499.00.\nThe price for the course "The Complete Deep Learning Guide" is ₹1,999.00 ₹999.00.'

In [23]:
from langchain.chains import create_history_aware_retriever

In [24]:
retriever_prompt = ("Given a chat history and the latest user question which might reference context in the chat history,"
    "formulate a standalone question which can be understood without the chat history."
    "Do NOT answer the question, just reformulate it if needed and otherwise return it as is."
    )

In [25]:
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
    ("system", retriever_prompt),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}"),
    ]
)
contextualize_q_prompt

ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='Given a chat history and the latest user question which might reference context in the chat history,formulate a standalone question which can be understood without the chat history.Do NOT answer the question, just reformulate it if needed and otherwise return it as is.')), MessagesPlaceholder(variable_name='chat_history'), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))])

In [26]:
history_aware_retriever = create_history_aware_retriever(model, retriever, contextualize_q_prompt)

In [27]:
from langchain.chains import create_retrieval_chain

In [28]:
from langchain.chains.combine_documents import create_stuff_documents_chain

In [29]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}")
    ]
)

In [30]:
question_answer_chain = create_stuff_documents_chain(model, qa_prompt)

In [31]:
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [32]:
from langchain_core.messages import HumanMessage, AIMessage

In [33]:
chat_history= []

In [34]:
question1 = "Why should i choose Zep Analytics?"

In [35]:
message1 = rag_chain.invoke({"input": question1, "chat_history": chat_history})

In [36]:
message1["answer"]

'I apologize, but the provided context does not contain the answer to your question about why you should choose Zep Analytics.'

In [37]:
chat_history.extend(
    [
        HumanMessage(content= question1),
        AIMessage(content=message1["answer"])
    ]
)

In [38]:
chat_history

[HumanMessage(content='Why should i choose Zep Analytics?'),
 AIMessage(content='I apologize, but the provided context does not contain the answer to your question about why you should choose Zep Analytics.')]

In [39]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

In [40]:
store = {}

In [41]:
def get_session_history(session_id: str)-> BaseChatMessageHistory:
  if session_id not in store:
    store[session_id]= ChatMessageHistory()
  return store[session_id]

In [42]:
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [43]:
conversational_rag_chain.invoke(
    {"input": "Why should i choose Zep Analytics?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'I cannot answer this question because the provided context does not give information on why Zep Analytics should be chosen.'

In [44]:
store

{'abc123': InMemoryChatMessageHistory(messages=[HumanMessage(content='Why should i choose Zep Analytics?'), AIMessage(content='I cannot answer this question because the provided context does not give information on why Zep Analytics should be chosen.')])}

In [45]:
conversational_rag_chain.invoke(
    {"input": "Can give the summary of reviews from the students"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'Students have given positive reviews for Zep Analytics, praising the course structure, clear explanations, and real-life examples used in the data analytics concepts. They also appreciate the affordable course fee and the support provided by Satyajit Sir throughout the learning journey. Overall, students recommend the course to anyone looking to transition into the data science field.'

In [46]:
store

{'abc123': InMemoryChatMessageHistory(messages=[HumanMessage(content='Why should i choose Zep Analytics?'), AIMessage(content='I cannot answer this question because the provided context does not give information on why Zep Analytics should be chosen.'), HumanMessage(content='Can give the summary of reviews from the students'), AIMessage(content='Students have given positive reviews for Zep Analytics, praising the course structure, clear explanations, and real-life examples used in the data analytics concepts. They also appreciate the affordable course fee and the support provided by Satyajit Sir throughout the learning journey. Overall, students recommend the course to anyone looking to transition into the data science field.')])}

In [47]:
for message in store["abc123"].messages:
    if isinstance(message, AIMessage):
        prefix = "AI"
    else:
        prefix = "User"

    print(f"{prefix}: {message.content}\n")

User: Why should i choose Zep Analytics?

AI: I cannot answer this question because the provided context does not give information on why Zep Analytics should be chosen.

User: Can give the summary of reviews from the students

AI: Students have given positive reviews for Zep Analytics, praising the course structure, clear explanations, and real-life examples used in the data analytics concepts. They also appreciate the affordable course fee and the support provided by Satyajit Sir throughout the learning journey. Overall, students recommend the course to anyone looking to transition into the data science field.



In [48]:
conversational_rag_chain.invoke(
    {"input": "give me the details for Python Programming for Data Science?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'**Python Programming for Data Science**\n\n**Course Overview:**\n\n* Learn the fundamentals of Python programming from scratch\n* Explore essential data science libraries like Pandas, NumPy, Matplotlib, and Seaborn\n* Master data analysis, visualization, and machine learning algorithms\n* Gain hands-on experience through practical exercises and code notebooks\n\n**Course Content:**\n\n* Introduction to Python\n* Data Structures and Algorithms\n* Data Manipulation with Pandas\n* Data Analysis with NumPy\n* Data Visualization with Matplotlib and Seaborn\n* Machine Learning with Python\n* Object-Oriented Programming\n\n**Benefits:**\n\n* **Comprehensive:** Covers all essential Python concepts and data science techniques\n* **Hands-On:** Provides practical exercises and code notebooks for better understanding\n* **Beginner-Friendly:** Suitable for individuals with no prior programming experience\n* **Affordable:** Offered at a reasonable cost\n* **Instructor Support:** Get support from ex

**Python Programming for Data Science**

**Course Overview:**

* Learn the fundamentals of Python programming from scratch
* Explore essential data science libraries like Pandas, NumPy, Matplotlib, and Seaborn
* Master data analysis, visualization, and machine learning algorithms
* Gain hands-on experience through practical exercises and code notebooks

**Course Content:**

* Introduction to Python
* Data Structures and Algorithms
* Data Manipulation with Pandas
* Data Analysis with NumPy
* Data Visualization with Matplotlib and Seaborn
* Machine Learning with Python
* Object-Oriented Programming

**Benefits:**

* **Comprehensive:** Covers all essential Python concepts and data science techniques
* **Hands-On:** Provides practical exercises and code notebooks for better understanding
* **Beginner-Friendly:** Suitable for individuals with no prior programming experience
* **Affordable:** Offered at a reasonable cost
* **Instructor Support:** Get support from experienced instructor Satyajit Pattnaik throughout the learning journey

**Target Audience:**

* Individuals aspiring to enter the field of data science
* Students pursuing data science or analytics programs
* Professionals looking to enhance their data analysis skills
* Anyone interested in learning Python for data science

**Course Duration:**

* 6 hours of video content

**Course Fee:**

* ₹999.00 (discounted from ₹1,999.00)

In [49]:
conversational_rag_chain.invoke(
    {"input": "give me the course content?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'**Python Programming for Data Science Course Content:**\n\n**Module 1: Introduction to Python**\n\n* Python basics and setup\n* Data types and operators\n* Control flow and functions\n\n**Module 2: Data Structures and Algorithms**\n\n* Lists, tuples, sets, and dictionaries\n* Data manipulation techniques\n* Algorithms for sorting and searching\n\n**Module 3: Data Manipulation with Pandas**\n\n* Introduction to Pandas\n* DataFrames and Series\n* Data cleaning and transformation\n\n**Module 4: Data Analysis with NumPy**\n\n* Introduction to NumPy\n* Numerical operations and functions\n* Statistical analysis\n\n**Module 5: Data Visualization with Matplotlib and Seaborn**\n\n* Introduction to Matplotlib and Seaborn\n* Creating charts and graphs\n* Customizing visualizations\n\n**Module 6: Machine Learning with Python**\n\n* Introduction to machine learning\n* Supervised and unsupervised learning\n* Model evaluation and selection\n\n**Module 7: Object-Oriented Programming**\n\n* Object-ori

**Python Programming for Data Science Course Content:**

**Module 1: Introduction to Python**

* Python basics and setup
* Data types and operators
* Control flow and functions

**Module 2: Data Structures and Algorithms**

* Lists, tuples, sets, and dictionaries
* Data manipulation techniques
* Algorithms for sorting and searching

**Module 3: Data Manipulation with Pandas**

* Introduction to Pandas
* DataFrames and Series
* Data cleaning and transformation

**Module 4: Data Analysis with NumPy**

* Introduction to NumPy
* Numerical operations and functions
* Statistical analysis

**Module 5: Data Visualization with Matplotlib and Seaborn**

* Introduction to Matplotlib and Seaborn
* Creating charts and graphs
* Customizing visualizations

**Module 6: Machine Learning with Python**

* Introduction to machine learning
* Supervised and unsupervised learning
* Model evaluation and selection

**Module 7: Object-Oriented Programming**

* Object-oriented concepts
* Classes and objects
* Inheritance and polymorphism

**Bonus Module:**

* Hands-on projects and case studies