In [1]:
import langchain

In [2]:
print(langchain.__version__)

0.3.12


In [6]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [7]:
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [8]:
from langchain_groq import ChatGroq

In [10]:
llm = ChatGroq(model_name = "llama-3.2-90b-vision-preview")

In [11]:
llm_response = llm.invoke("Tell me a joke")

In [12]:
llm_response

AIMessage(content='A man walked into a library and asked the librarian, "Do you have any books on Pavlov\'s dogs and Schrödinger\'s cat?" \n\nThe librarian replied, "It rings a bell, but I\'m not sure if it\'s here or not."', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 55, 'prompt_tokens': 39, 'total_tokens': 94, 'completion_time': 0.205055146, 'prompt_time': 0.004730418, 'queue_time': 0.005557032, 'total_time': 0.209785564}, 'model_name': 'llama-3.2-90b-vision-preview', 'system_fingerprint': 'fp_9c2a937c92', 'finish_reason': 'stop', 'logprobs': None}, id='run-730ac4c1-8820-4576-aaad-4edf3b5055eb-0', usage_metadata={'input_tokens': 39, 'output_tokens': 55, 'total_tokens': 94})

In [13]:
from langchain_core.output_parsers import StrOutputParser

In [14]:
output_parser = StrOutputParser()

In [15]:
output_parser.invoke(llm_response)

'A man walked into a library and asked the librarian, "Do you have any books on Pavlov\'s dogs and Schrödinger\'s cat?" \n\nThe librarian replied, "It rings a bell, but I\'m not sure if it\'s here or not."'

In [16]:
chain = llm | output_parser

In [17]:
chain.invoke('tell me a joke')

'A man walked into a library and asked the librarian, "Do you have any books on Pavlov\'s dogs and Schrödinger\'s cat?" \n\nThe librarian replied, "It rings a bell, but I\'m not sure if it\'s here or not."'

In [18]:
from typing import List
from pydantic import BaseModel, Field

class MobileReview(BaseModel):
    mobile_rating:str = Field(description="Name and model of the phone")
    rating:str = Field(description="Overall rating out of 5")
    pros:str = Field(description="List of positive aspect")
    cons:str = Field(description="List of negetive aspect")
    summary:str = Field(description="Brief summary of the review")


In [19]:
review_text = """
Just got my hands on the new Galaxy S21 and wow, this thing is slick! The screen is gorgeous,
colors pop like crazy. Camera's insane too, especially at night - my Insta game's never been
stronger. Battery life's solid, lasts me all day no problem.

Not gonna lie though, it's pretty pricey. And what's with ditching the charger? C'mon Samsung.
Also, still getting used to the new button layout, keep hitting Bixby by mistake.

Overall, I'd say it's a solid 4 out of 5. Great phone, but a few annoying quirks keep it from
being perfect. If you're due for an upgrade, definitely worth checking out!
"""


In [20]:
structured_llm = llm.with_structured_output(MobileReview)
output = structured_llm.invoke(review_text)
output

MobileReview(mobile_rating='Galaxy S21', rating='4', pros='gorgeous screen, insane camera, battery life', cons='pricey, no charger, button layout', summary='A solid 4 out of 5, great phone but a few annoying quirks')

In [23]:
output.pros

'gorgeous screen, insane camera, battery life'

In [24]:
from langchain_core.prompts import ChatPromptTemplate

In [26]:
prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
prompt.invoke({"topic":"programing"})

ChatPromptValue(messages=[HumanMessage(content='Tell me a short joke about programing', additional_kwargs={}, response_metadata={})])

In [27]:
chain = prompt|llm|output_parser

In [28]:
chain.invoke({"topic":'programming'})

'Why do programmers prefer dark mode? \n\nBecause light attracts bugs.'

In [29]:
prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")

chain = prompt|llm|output_parser

In [31]:
res = chain.invoke({"topic": "programming"})

In [32]:
print(res)

Why do programmers prefer dark mode? 

Because light attracts bugs.


In [33]:
from langchain_core.messages import HumanMessage, SystemMessage

In [34]:
system_message = SystemMessage(content="You are helpful assistang that tells joks")
HumanMessage_message = HumanMessage(content="Tell me about programming")

In [36]:
result = llm.invoke([system_message, HumanMessage_message])

In [37]:
print(result)

content="Programming is like a joke. Here's one:\n\nWhy do programmers prefer dark mode?\n\nBecause light attracts bugs.\n\nBut seriously, programming is the process of writing instructions that a computer can understand and execute. It's like writing a recipe for your favorite dish, but instead of using ingredients and cooking techniques, you use code and algorithms.\n\nThere are many different programming languages, each with its own strengths and weaknesses. Some popular ones include:\n\n1. Python: Known for its simplicity and readability, Python is a great language for beginners.\n2. Java: A versatile language used for Android app development, web development, and more.\n3. JavaScript: Used for client-side scripting on the web, JavaScript is also popular for mobile and desktop app development.\n4. C++: A powerful language with a steep learning curve, C++ is used for operating systems, games, and other high-performance applications.\n\nProgrammers use various tools and techniques to

In [38]:
template = ChatPromptTemplate([
    ("system","You are a helpful assistant that tells jokes"),
    ("human", "Tell me about {topic}")
])

In [39]:
prompt_value = template.invoke(
    {
        "topic": "programming"
    }
)

In [41]:
llm.invoke(prompt_value)

AIMessage(content='Programming - it\'s a real "byte" to chew, isn\'t it?. But seriously, programming is the process of designing, writing, testing, and maintaining the source code of computer programs. It involves using programming languages like Java, Python, or C++ to instruct computers what to do.\n\nImagine you\'re writing a recipe for your favorite dish. You need to list the ingredients, the steps to follow, and the desired outcome. Programming is similar, but instead of ingredients and cooking steps, you\'re working with variables, loops, and functions to create a software program that solves a problem or performs a specific task.\n\nThere are many types of programming, including:\n\n1. **Web development**: Building websites and web applications using languages like HTML, CSS, and JavaScript.\n2. **Mobile app development**: Creating apps for Android and iOS devices using languages like Java, Swift, or Kotlin.\n3. **Game development**: Building games for PCs, consoles, or mobile d

In [43]:
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.documents import Document

In [44]:
text_spliter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200,
    length_function = len
)

In [46]:
docx_loader = Docx2txtLoader("D:\GenAI\Project\GenAI-RAGchatbot\docs\GreenGrow Innovations_ Company History.docx")
documents = docx_loader.load()

In [47]:
print(len(documents))

1


In [48]:
splits = text_spliter.split_documents(documents)

In [49]:
print(f"Split the documents into {len(splits)} chunks.")

Split the documents into 2 chunks.


In [50]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [73]:
def load_document(folder_path:str)-> List[Document]:
    documents = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if filename.endswith(".pdf"):
            loader = PyPDFLoader(file_path)
        elif filename.endswith(".docx"):
            loader = Docx2txtLoader(file_path)
        else:
            print(f"unsupported file type: {filename}")
            continue
        documents.extend(loader.load())
    return documents

In [74]:
folder_path = "D:\GenAI\Project\GenAI-RAGchatbot\docs"
documents = load_document(folder_path)

In [75]:
print(f"loaded {len(documents)} documents from the folder.")

loaded 5 documents from the folder.


In [77]:
print(f"loaded {len(documents)} documents from the folder.")
splits = text_spliter.split_documents(documents)
print(f"length of chunk {len(splits)}")

loaded 5 documents from the folder.
length of chunk 8


In [78]:
document_embeddings = embeddings.embed_documents([split.page_content for split in splits])

In [80]:
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
embedding_function = SentenceTransformerEmbeddings(model_name = "all-MiniLM-L6-v2")
document_embeddings = embedding_function.embed_documents([split.page_content for split in splits])

  embedding_function = SentenceTransformerEmbeddings(model_name = "all-MiniLM-L6-v2")


In [81]:
documents[0]

Document(metadata={'source': 'D:\\GenAI\\Project\\GenAI-RAGchatbot\\docs\\Company_ GreenFields BioTech.docx'}, page_content='Company: GreenFields BioTech\n\nHeadquarters: GreenFields BioTech is headquartered in Zurich, Switzerland. Known for its groundbreaking research in sustainable agriculture and biotechnology, the company has strategically positioned itself in Zurich, a city recognized for its leadership in scientific research and innovation. This location provides GreenFields BioTech with an ideal environment to collaborate with leading academic institutions and industry experts, driving forward its mission to create eco-friendly farming solutions.')

In [82]:
from langchain_chroma import Chroma

In [104]:
collection_name = "my_collection"
vectorstore = Chroma.from_documents(collection_name=collection_name, documents=splits, embedding=embeddings, persist_directory='./chroma_db')

In [105]:
query  = "When was GreeenGrow Innovations founded?"
search_results = vectorstore.similarity_search(query, k=2)

In [106]:
search_results

[Document(metadata={'source': 'D:\\GenAI\\Project\\GenAI-RAGchatbot\\docs\\GreenGrow Innovations_ Company History.docx'}, page_content="The company's breakthrough came in 2018 with the introduction of the EcoHarvest System, an integrated solution that combined smart irrigation, soil monitoring, and automated harvesting techniques. This system caught the attention of large-scale farmers across the United States, propelling GreenGrow to national prominence.\n\n\n\nToday, GreenGrow Innovations employs over 200 people and has expanded its operations to include offices in California and Iowa. The company continues to focus on developing sustainable agricultural technologies, with ongoing projects in vertical farming, drought-resistant crop development, and AI-powered farm management systems.\n\n\n\nDespite its growth, GreenGrow remains committed to its original mission of promoting sustainable farming practices. The company regularly partners with universities and research institutions to a

In [115]:
retriever = vectorstore.as_retriever(search_kwargs = {"k":2})

In [116]:
retriever.invoke("When was GreeenGrow Innovations founded?")

[Document(metadata={'source': 'D:\\GenAI\\Project\\GenAI-RAGchatbot\\docs\\GreenGrow Innovations_ Company History.docx'}, page_content="The company's breakthrough came in 2018 with the introduction of the EcoHarvest System, an integrated solution that combined smart irrigation, soil monitoring, and automated harvesting techniques. This system caught the attention of large-scale farmers across the United States, propelling GreenGrow to national prominence.\n\n\n\nToday, GreenGrow Innovations employs over 200 people and has expanded its operations to include offices in California and Iowa. The company continues to focus on developing sustainable agricultural technologies, with ongoing projects in vertical farming, drought-resistant crop development, and AI-powered farm management systems.\n\n\n\nDespite its growth, GreenGrow remains committed to its original mission of promoting sustainable farming practices. The company regularly partners with universities and research institutions to a

In [117]:
template = """ 
Answer the question based only on the following context:
{context}

Question:{question}

Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

In [118]:
from langchain.schema.runnable import RunnablePassthrough
rag_chain = (
    {"context":retriever, "question":RunnablePassthrough()} | prompt
)

In [123]:
rag_chain.invoke("When was GreenGrow Innovations founded?")

ChatPromptValue(messages=[HumanMessage(content=" \nAnswer the question based only on the following context:\n[Document(metadata={'source': 'D:\\\\GenAI\\\\Project\\\\GenAI-RAGchatbot\\\\docs\\\\GreenGrow Innovations_ Company History.docx'}, page_content='GreenGrow Innovations was founded in 2010 by Sarah Chen and Michael Rodriguez, two agricultural engineers with a passion for sustainable farming. The company started in a small garage in Portland, Oregon, with a simple mission: to make farming more environmentally friendly and efficient.\\n\\n\\n\\nIn its early days, GreenGrow focused on developing smart irrigation systems that could significantly reduce water usage in agriculture. Their first product, the WaterWise Sensor, was launched in 2012 and quickly gained popularity among local farmers. This success allowed the company to expand its research and development efforts.\\n\\n\\n\\nBy 2015, GreenGrow had outgrown its garage origins and moved into a proper office and research facilit

In [124]:
def docs2str(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [126]:
rag_chain = (
    {"context": retriever | docs2str, "question": RunnablePassthrough()} |prompt
)

In [128]:
rag_chain = (
    {"context": retriever | docs2str, "question": RunnablePassthrough()}
    | prompt
    | llm
    |StrOutputParser()
)

In [130]:
question = "When was GreenGrow Innovations founded?"
response = rag_chain.invoke(question)
response

'2010'

In [131]:
from langchain_core.messages import HumanMessage, AIMessage
chat_history = []
chat_history.extend([
    HumanMessage(content=question),
    AIMessage(content=response)
])

In [132]:
chat_history

[HumanMessage(content='When was GreenGrow Innovations founded?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='2010', additional_kwargs={}, response_metadata={})]

In [133]:
from langchain_core.prompts import MessagesPlaceholder

In [136]:
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

In [137]:
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
       ( "human", "{input}")
    ]
)

In [138]:
contextualize_chain = contextualize_q_prompt | llm | StrOutputParser()

In [140]:
contextualize_chain.invoke({"input":"Where it is headquartered?", "chat_history": chat_history})

'Where is GreenGrow Innovations headquartered?'

In [145]:
from langchain.chains import create_history_aware_retriever
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [141]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [143]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are helpfull assistant. Use the following context to answer the user's question"),
        ("system", "Context: {context}"),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}")

    ]
)

In [152]:
qa_chain = create_stuff_documents_chain(llm,qa_prompt, output_parser=output_parser)
rag_chain = create_retrieval_chain(history_aware_retriever, qa_chain)

In [153]:
rag_chain.invoke({"input":"Where it is headquartered?", "chat_history":chat_history})

{'input': 'Where it is headquartered?',
 'chat_history': [HumanMessage(content='When was GreenGrow Innovations founded?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='2010', additional_kwargs={}, response_metadata={})],
 'context': [Document(metadata={'source': 'D:\\GenAI\\Project\\GenAI-RAGchatbot\\docs\\GreenGrow Innovations_ Company History.docx'}, page_content="The company's breakthrough came in 2018 with the introduction of the EcoHarvest System, an integrated solution that combined smart irrigation, soil monitoring, and automated harvesting techniques. This system caught the attention of large-scale farmers across the United States, propelling GreenGrow to national prominence.\n\n\n\nToday, GreenGrow Innovations employs over 200 people and has expanded its operations to include offices in California and Iowa. The company continues to focus on developing sustainable agricultural technologies, with ongoing projects in vertical farming, drought-resistant crop de

In [154]:
qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant. Use the following context to answer the user's question."),
    #  ("system", "Tell me joke on Programming"),
    ("system", "Context: {context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}")
])

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [155]:
rag_chain.invoke({"input": "Where it is headquartered?", "chat_history":chat_history})

{'input': 'Where it is headquartered?',
 'chat_history': [HumanMessage(content='When was GreenGrow Innovations founded?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='2010', additional_kwargs={}, response_metadata={})],
 'context': [Document(metadata={'source': 'D:\\GenAI\\Project\\GenAI-RAGchatbot\\docs\\GreenGrow Innovations_ Company History.docx'}, page_content="The company's breakthrough came in 2018 with the introduction of the EcoHarvest System, an integrated solution that combined smart irrigation, soil monitoring, and automated harvesting techniques. This system caught the attention of large-scale farmers across the United States, propelling GreenGrow to national prominence.\n\n\n\nToday, GreenGrow Innovations employs over 200 people and has expanded its operations to include offices in California and Iowa. The company continues to focus on developing sustainable agricultural technologies, with ongoing projects in vertical farming, drought-resistant crop de

In [156]:
import sqlite3
from datetime import datetime

DB_NAME = "rag_app.db"

def get_db_connection():
    conn = sqlite3.connect(DB_NAME)
    conn.row_factory = sqlite3.Row
    return conn

In [157]:
def create_application_logs():
    conn = get_db_connection()
    conn.execute(
        ''' 
        CREATE TABLE IF NOT EXISTS application_logs
                    (id INTEGER PRIMARY KEY AUTOINCREMENT,
                     session_id TEXT,
                     user_query TEXT,
                     gpt_response TEXT,
                     model TEXT,
                     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)
        '''
    )
    conn.close()

In [158]:
def insert_application_logs(session_id, user_query, gpt_response, model):
    conn = get_db_connection()
    conn.execute('INSERT INTO application_logs (session_id, user_query, gpt_response, model) VALUES (?, ?, ?, ?)',
                 (session_id, user_query, gpt_response, model))
    conn.commit()
    conn.close()

In [159]:
def get_chat_history(session_id):
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute('SELECT user_query, gpt_response FROM application_logs WHERE session_id = ? ORDER BY created_at', (session_id,))
    messages = []
    for row in cursor.fetchall():
        messages.extend([
            {"role": "human", "content": row['user_query']},
            {"role": "ai", "content": row['gpt_response']}
        ])
    conn.close()
    return messages

In [160]:
create_application_logs()

In [161]:
import uuid
session_id = str(uuid.uuid4)
chat_history = get_chat_history(session_id=session_id)
print(chat_history)
question1 = "When was GreenGrow Innovations founded?"
answer1 = rag_chain.invoke({"input": question1, "chat_history":chat_history})['answer']
insert_application_logs(session_id, question1, answer1, "gpt-4-o-mini")
print(f"Human: {question1}")
print(f"AI: {answer1}\n")

[]
Human: When was GreenGrow Innovations founded?
AI: GreenGrow Innovations was founded in 2010 by Sarah Chen and Michael Rodriguez.



In [162]:
question2 = "Where it is headquartered?"
chat_history = get_chat_history(session_id=session_id)
print(chat_history)
answer2 = rag_chain.invoke({"input": question2, "chat_history":chat_history})['answer']
insert_application_logs(session_id, question2, answer2, "llama-3.2-90b-vision-preview")
print(f"Human: {question2}")
print(f"AI: {answer2}\n")

[{'role': 'human', 'content': 'When was GreenGrow Innovations founded?'}, {'role': 'ai', 'content': 'GreenGrow Innovations was founded in 2010 by Sarah Chen and Michael Rodriguez.'}]
Human: Where it is headquartered?
AI: The provided context does not specify the exact location of GreenGrow Innovations' headquarters. However, it mentions that the company has offices in California and Iowa.



In [163]:
session_id = str(uuid.uuid4())
question = "What is GreenGrow"
chat_history = get_chat_history(session_id)
print(chat_history)
answer = rag_chain.invoke({"input": question, "chat_history":chat_history})['answer']
insert_application_logs(session_id, question, answer, "gpt-3.5-turbo")
print(f"Human: {question}")
print(f"AI: {answer}\n")

[]
Human: What is GreenGrow
AI: GreenGrow Innovations is a company that focuses on making farming more environmentally friendly and efficient. It was founded in 2010 by two agricultural engineers, Sarah Chen and Michael Rodriguez. The company started in a small garage in Portland, Oregon, and has since grown and developed innovative products to support sustainable agriculture, such as smart irrigation systems and soil health monitoring systems.

