<a href="https://colab.research.google.com/github/DenisVasil/LLM_examples/blob/main/LangChain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Chat Models (čata modeļi)

In [1]:
!pip install langchain_google_genai

Collecting langchain_google_genai
  Downloading langchain_google_genai-2.0.9-py3-none-any.whl.metadata (3.6 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain_google_genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading langchain_google_genai-2.0.9-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Installing collected packages: filetype, langchain_google_genai
Successfully installed filetype-1.2.0 langchain_google_genai-2.0.9


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)


result = model.invoke("What is 81 divided by 9")

print("Full result:")
print(result)
print("Context:")
print(result.content)

Full result:
content='81 divided by 9 is 9.' additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []} id='run-175648f4-fe16-49a2-86ea-d6516608aca8-0' usage_metadata={'input_tokens': 10, 'output_tokens': 11, 'total_tokens': 21, 'input_token_details': {'cache_read': 0}}
Context:
81 divided by 9 is 9.


In [None]:
# Basic Conversation

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from google.colab import userdata

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)

messages = [
    SystemMessage(content="Solve the following math problem"),
    HumanMessage(content="What is 81 divided by 9?")
]

result = model.invoke(messages)

print(f"Answer from AI: {result.content}")

messages = [
    SystemMessage(content="Solve the following math problem"),
    HumanMessage(content="What is 81 divided by 9?"),
    AIMessage(content=f"{result.content}"),
    HumanMessage(content="Divide this result by 3")
]

result = model.invoke(messages)
print(f"New answer from AI: {result.content}")


Answer from AI: 81 divided by 9 is 9.
New answer from AI: 9 divided by 3 is 3.


In [None]:
from langchain.memory.chat_memory import InMemoryChatMessageHistory

history = InMemoryChatMessageHistory()
history.add_message({"role": "user", "content": "Hello"})
history.add_message({"role": "assistant", "content": "Hi there!"})

print(history.messages)

[{'role': 'user', 'content': 'Hello'}, {'role': 'assistant', 'content': 'Hi there!'}]


In [None]:
from langchain.memory import ConversationBufferMemory
from langchain.schema import HumanMessage, AIMessage # Import necessary classes

memory = ConversationBufferMemory()
memory.chat_memory.add_message(HumanMessage(content="Hello")) # Use HumanMessage
memory.chat_memory.add_message(AIMessage(content="Hi there!")) # Use AIMessage

print(memory.load_memory_variables({}))

{'history': 'Human: Hello\nAI: Hi there!'}


  memory = ConversationBufferMemory()


In [None]:
# chat
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from google.colab import userdata

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model
model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)


chat_history = []

system_message = SystemMessage(content="You are helpful AI assistant")
chat_history.append(system_message)

# chat loop
while True:
    query = input("You: ")
    if query.lower() == "exit":
        break
    chat_history.append(HumanMessage(content=query))

    result = model.invoke(chat_history)
    response = result.content
    chat_history.append(AIMessage(content=response))
    print(f"AI response: {response}")

print("----- Message history ------")
print(chat_history)

You: How many birds are there in Latvia?
AI response: It's impossible to give an exact number of birds in Latvia at any given moment.  Bird populations are dynamic and constantly changing due to:

* **Migration:**  Latvia is on a major migration route, so the number of birds present fluctuates dramatically throughout the year.  Millions of birds pass through during spring and autumn.
* **Breeding Cycles:** Bird numbers increase during breeding seasons as chicks hatch and fledge.
* **Food Availability:** Bird populations are affected by the availability of food resources, which can vary seasonally and annually.
* **Predation and other environmental factors:** Weather conditions, disease, and predator populations all impact bird numbers.

While a precise count isn't feasible, ornithologists and conservation organizations in Latvia monitor bird populations through various surveys and studies.  These provide estimates of population sizes for different species and help track long-term trend

In [None]:
!pip install duckdb



In [None]:
# Saving Chat History
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from google.colab import userdata

from langchain.memory.chat_memory import InMemoryChatMessageHistory
from langchain.schema import HumanMessage, AIMessage
import duckdb
import os

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)


# Constants
DB_PATH = "chat_history.db"
TABLE_NAME = "chat_history"
SESSION_ID = "user_session_new"  # This could be a username or unique ID

# Initialize DuckDB
con = duckdb.connect(DB_PATH)
con.execute(f"""
CREATE TABLE IF NOT EXISTS {TABLE_NAME} (
    session_id TEXT,
    role TEXT,
    content TEXT
)
""")

# Helper Functions


def save_message_to_db(session_id, role, content):
    """Save a single message to the DuckDB database."""
    con.execute(f"""
    INSERT INTO {TABLE_NAME} (session_id, role, content)
    VALUES (?, ?, ?)
    """, [session_id, role, content])


def load_messages_from_db(session_id):
    """Load chat history for a specific session from the DuckDB database."""
    result = con.execute(f"""
    SELECT role, content FROM {TABLE_NAME} WHERE session_id = ? ORDER BY rowid
    """, [session_id]).fetchall()
    return [HumanMessage(content=row[1]) if row[0] == "user" else AIMessage(content=row[1]) for row in result]


# Initialize Chat Message History
print("Initializing DuckDB Chat Message History...")
messages = load_messages_from_db(SESSION_ID)
chat_history = InMemoryChatMessageHistory(messages=messages)

print("Chat History Initialized.")
print("Current Chat History:", [msg.content for msg in chat_history.messages])


print("Start chatting with the AI. Type 'exit' to quit.")

while True:
    human_input = input("User: ")
    if human_input.lower() == "exit":
        break

    # Save and process user message
    chat_history.add_user_message(human_input)
    save_message_to_db(SESSION_ID, "user", human_input)

    # Generate AI response
    ai_response = model.invoke(chat_history.messages)
    chat_history.add_ai_message(ai_response.content)
    save_message_to_db(SESSION_ID, "ai", ai_response.content)

    print(f"AI: {ai_response.content}")

# Close DuckDB connection when done
con.close()

In [None]:
!pip install -U langgraph

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
from google.colab import userdata
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model
model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)

# Define a new graph
workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    response = model.invoke(state["messages"])
    return {"messages": response}


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)
config = {"configurable": {"thread_id": "abc123"}}

query = "Hi! I'm Bob."

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()  # output contains all messages in state


Hi Bob! It's nice to meet you. How can I help you today?


In [None]:
app = workflow.compile(checkpointer=memory)
config = {"configurable": {"thread_id": "abc123"}}

query = "What's my name?"

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


You told me your name is Bob.


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
from google.colab import userdata
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model
model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)

# Define a new graph
workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    response = model.invoke(state["messages"])
    return {"messages": response}


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)
config = {"configurable": {"thread_id": "abc123"}}

while True:
    query = input("You: ")
    if query.lower() == "exit":
        break

    input_messages = [HumanMessage(query)]
    output = app.invoke({"messages": input_messages}, config)
    output["messages"][-1].pretty_print()  # output contains all messages in state


You: My name is Bob

Okay, Bob. How can I help you today?
You: What is my name?

Your name is Bob.
You: exit


# Prompt Templates (Veidnes Uzvednēm)

In [None]:
from langchain_core.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template("Tell me a joke about {topic}")

prompt_template.invoke({"topic": "cats"})

StringPromptValue(text='Tell me a joke about cats')

In [None]:
from langchain_core.prompts import ChatPromptTemplate

prompt_template = ChatPromptTemplate([
    ("system", "You are a helpful assistant"),
    ("user", "Tell me a joke about {topic}")
])

prompt_template.invoke({"topic": "cats"})
# prompt = prompt_template.invoke({"topic": "cats"})
# result = model.invoke(prompt)
# print(result.content)

ChatPromptValue(messages=[SystemMessage(content='You are a helpful assistant', additional_kwargs={}, response_metadata={}), HumanMessage(content='Tell me a joke about cats', additional_kwargs={}, response_metadata={})])

In [None]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage

prompt_template = ChatPromptTemplate([
    ("system", "You are a helpful assistant"),
    MessagesPlaceholder("msgs")
])

prompt_template.invoke({"msgs": [HumanMessage(content="hi!"), HumanMessage(content="How are you?")]})

ChatPromptValue(messages=[SystemMessage(content='You are a helpful assistant', additional_kwargs={}, response_metadata={}), HumanMessage(content='hi!', additional_kwargs={}, response_metadata={}), HumanMessage(content='How are you?', additional_kwargs={}, response_metadata={})])

In [None]:
# AIMessage, HumanMessage, SystemMessage

from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)


template = "Tell me a joke about {topic}"
prompt_template = ChatPromptTemplate.from_template(template)

print("------Prompt from template------")
prompt = prompt_template.invoke({"topic": "cats"})
print(prompt)

result = model.invoke(prompt)
print("\n------Result-------")
print(result.content)


------Prompt from template------
messages=[HumanMessage(content='Tell me a joke about cats', additional_kwargs={}, response_metadata={})]

------Result-------
Why was the cat sitting on the computer?  To keep an eye on the mouse!



In [None]:
# Create a ChatPromptTemplate using a template string
from langchain.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage

template = "Tell me a joke about {topic}."
prompt_template = ChatPromptTemplate.from_template(template)

print("-----Prompt from Template-----")
prompt = prompt_template.invoke({"topic": "cats"})
print(prompt)

-----Prompt from Template-----
messages=[HumanMessage(content='Tell me a joke about cats.', additional_kwargs={}, response_metadata={})]


In [None]:
# Prompt with Multiple Placeholders

from langchain.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage

template_multiple = """You are a helpful assistant.
Human: Tell me a {adjective} story about a {animal}.
Assistant:"""
prompt_multiple = ChatPromptTemplate.from_template(template_multiple)
prompt = prompt_multiple.invoke({"adjective": "funny", "animal": "panda"})
print("\n----- Prompt with Multiple Placeholders -----\n")
print(prompt)


----- Prompt with Multiple Placeholders -----

messages=[HumanMessage(content='You are a helpful assistant.\nHuman: Tell me a funny story about a panda.\nAssistant:', additional_kwargs={}, response_metadata={})]


In [None]:
# Prompt with System and Human Messages (Using Tuples)
from langchain.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage

messages = [
    ("system", "You are a comedian who tells jokes about {topic}."),
    ("human", "Tell me {joke_count} jokes."),
]
prompt_template = ChatPromptTemplate.from_messages(messages)
prompt = prompt_template.invoke({"topic": "lawyers", "joke_count": 3})
print("\n----- Prompt with System and Human Messages (Tuple) -----\n")
print(prompt)


----- Prompt with System and Human Messages (Tuple) -----

messages=[SystemMessage(content='You are a comedian who tells jokes about lawyers.', additional_kwargs={}, response_metadata={}), HumanMessage(content='Tell me 3 jokes.', additional_kwargs={}, response_metadata={})]


# Chains (Ķēdes)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)

prompt = ChatPromptTemplate.from_messages(
    [("user", "Tell me a {adjective} joke")],
)

chain = prompt | model | StrOutputParser()

chain.invoke({"adjective": "funny"})

"Why don't scientists trust atoms? \n\nBecause they make up everything!\n"

In [None]:
# Basic Chain

from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain_core.messages import HumanMessage

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY
)

prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a comedian who tells jokes about {topic}."),
        ("human", "Tell me {joke_count} jokes."),
    ]
)
# StrOutputParser() ~ .content()
# LangChain expression language
chain = prompt_template | model | StrOutputParser()

result = chain.invoke({"topic": "fish", "joke_count": 3})

print(result)


1.  Why did the fish blush?  Because it saw the sea-bed!


2. What do you call a fish with no eyes?  Fsh!


3.  I went to a seafood disco last week...  It was legen-dairy!  (…and totally cod-awful, to be honest.  The DJ was a real sole-mate, though.)



In [None]:
# Extended Chain

from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda
from langchain_core.messages import HumanMessage

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY
)

prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a comedian who tells jokes about {topic}."),
        ("human", "Tell me {joke_count} jokes."),
    ]
)

uppercase_output = RunnableLambda(lambda x: x.upper())
count_words = RunnableLambda(lambda x: f"Word count {len(x.split())} \n {x}")

chain = prompt_template | model | StrOutputParser() | uppercase_output | count_words

result = chain.invoke({"topic": "fish", "joke_count": 3})

print(result)


Word count 43 
 1. WHY DID THE FISH BLUSH?  BECAUSE IT SAW THE OCEAN BOTTOM!

2. WHAT DO YOU CALL A FISH WITH NO EYES?  FSH!

3. I WENT TO A SEAFOOD DISCO LAST WEEK...  IT WAS LEGEN-DAIRY!  (…AND THERE WAS A REALLY GOOD COD SELECTION).



In [None]:
# Parallel
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableParallel, RunnableLambda
from langchain_google_genai import ChatGoogleGenerativeAI

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY
)


# Define prompt template
prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are an expert product reviewer."),
        ("human", "List the main features of the product {product_name}."),
    ]
)


# Define pros analysis step
def analyze_pros(features):
    pros_template = ChatPromptTemplate.from_messages(
        [
            ("system", "You are an expert product reviewer."),
            (
                "human",
                "Given these features: {features}, list the pros of these features.",
            ),
        ]
    )
    return pros_template.format_prompt(features=features)


# Define cons analysis step
def analyze_cons(features):
    cons_template = ChatPromptTemplate.from_messages(
        [
            ("system", "You are an expert product reviewer."),
            (
                "human",
                "Given these features: {features}, list the cons of these features.",
            ),
        ]
    )
    return cons_template.format_prompt(features=features)


# Combine pros and cons into a final review
def combine_pros_cons(pros, cons):
    return f"Pros:\n{pros}\n\nCons:\n{cons}"


# Simplify branches with LCEL
pros_branch_chain = (
    RunnableLambda(lambda x: analyze_pros(x)) | model | StrOutputParser()
)

cons_branch_chain = (
    RunnableLambda(lambda x: analyze_cons(x)) | model | StrOutputParser()
)

# Create the combined chain using LangChain Expression Language (LCEL)
chain = (
    prompt_template
    | model
    | StrOutputParser()
    | RunnableParallel(branches={"pros": pros_branch_chain, "cons": cons_branch_chain})
    | RunnableLambda(lambda x: combine_pros_cons(x["branches"]["pros"], x["branches"]["cons"]))
)

# Run the chain
result = chain.invoke({"product_name": "MacBook Pro"})

# Output
print(result)


In [None]:
# Branching
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableBranch
from langchain_google_genai import ChatGoogleGenerativeAI

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY
)

# Define prompt templates for different feedback types
positive_feedback_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        ("human",
         "Generate a thank you note for this positive feedback: {feedback}."),
    ]
)

negative_feedback_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        ("human",
         "Generate a response addressing this negative feedback: {feedback}."),
    ]
)

neutral_feedback_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        (
            "human",
            "Generate a request for more details for this neutral feedback: {feedback}.",
        ),
    ]
)

escalate_feedback_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        (
            "human",
            "Generate a message to escalate this feedback to a human agent: {feedback}.",
        ),
    ]
)

# Define the feedback classification template
classification_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        ("human",
         "Classify the sentiment of this feedback as positive, negative, neutral, or escalate: {feedback}."),
    ]
)

# Define the runnable branches for handling feedback
branches = RunnableBranch(
    (
        lambda x: "positive" in x,
        positive_feedback_template | model | StrOutputParser()  # Positive feedback chain
    ),
    (
        lambda x: "negative" in x,
        negative_feedback_template | model | StrOutputParser()  # Negative feedback chain
    ),
    (
        lambda x: "neutral" in x,
        neutral_feedback_template | model | StrOutputParser()  # Neutral feedback chain
    ),
    escalate_feedback_template | model | StrOutputParser()
)

# Create the classification chain
classification_chain = classification_template | model | StrOutputParser()

# Combine classification and response generation into one chain
chain = classification_chain | branches

# Run the chain with an example review
# Good review - "The product is excellent. I really enjoyed using it and found it very helpful."
# Bad review - "The product is terrible. It broke after just one use and the quality is very poor."
# Neutral review - "The product is okay. It works as expected but nothing exceptional."
# Default - "I'm not sure about the product yet. Can you tell me more about its features and benefits?"

review = "The product is okay. It works as expected but nothing exceptional."
result = chain.invoke({"feedback": review})

# Output the result
print(result)

# Document Loaders (Dokumentu Ielādētāji)

In [2]:
!pip install langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.17-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.34 (from langchain-community)
  Downloading langchain_core-0.3.34-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain<1.0.0,>=0.3.18 (from langchain-community)
  Downloading langchain-0.3.18-py3-none-any.whl.metadata (7.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-

In [None]:
from langchain_community.document_loaders.csv_loader import CSVLoader

file_path = "/content/diabetes.csv"

loader = CSVLoader(file_path=file_path)
data = loader.load()

for record in data[:2]:
    #print(type(record))
    print(record)

page_content='Pregnancies: 6
Glucose: 148
BloodPressure: 72
SkinThickness: 35
Insulin: 0
BMI: 33.6
DiabetesPedigreeFunction: 0.627
Age: 50
Outcome: 1' metadata={'source': '/content/diabetes.csv', 'row': 0}
page_content='Pregnancies: 1
Glucose: 85
BloodPressure: 66
SkinThickness: 29
Insulin: 0
BMI: 26.6
DiabetesPedigreeFunction: 0.351
Age: 31
Outcome: 0' metadata={'source': '/content/diabetes.csv', 'row': 1}


In [None]:
!pip install -qU pypdf

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/298.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m153.6/298.0 kB[0m [31m4.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("/content/invoice_1001329.pdf")
pages = []

async for page in loader.alazy_load():
    pages.append(page)
print(f"{pages[0].metadata}\n")
print(pages[0].page_content)

{'source': '/content/invoice_1001329.pdf', 'page': 0}

ExcelCult
India
Bill To
Ms.Santoshi
Mumbai, India
Santoshvarma0988@gmail.com
9999999999
 
  
Invoice no.
1001329
Date
5/4/2023
Description
Quantity
Unit price
Amount
Total
$2,200.00
Office Chair
2
$1,100.00
$2,200.00
System Generated
 


# Agents (Aģenti)

In [None]:
!pip install -qU duckduckgo-search langchain-community

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.6/2.5 MB[0m [31m20.0 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.5/2.5 MB[0m [31m40.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m26.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.6/410.6 kB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m48.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain.agents import initialize_agent, Tool, AgentType
from langchain.agents import AgentExecutor
from langchain_community.tools import DuckDuckGoSearchRun


GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY
)

# Define the tool: DuckDuckGo search results
ddg_search = DuckDuckGoSearchRun()

# Set up the agent with the search tool
tools = [Tool(
    name="DuckDuckGoSearchRun",
    func=ddg_search.run,
    description="Searches the web using DuckDuckGo."
)]

# Define a simple prompt template for asking a question
prompt_template = "Search the web for information about {query}"

# Initialize the agent with a basic agent type
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# Use the agent to interact and fetch search results
def get_search_results_using_agent(query: str):
    query_formatted = prompt_template.format(query=query)
    response = agent.run(query_formatted)
    return response

# Example usage
query = "latest news on AI advancements"
search_results = get_search_results_using_agent(query)
print(search_results)


In [None]:
import pandas as pd
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain.agents import initialize_agent, Tool, AgentType
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer


GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY
)


from sklearn.datasets import load_iris

def load_data(query=None):  # Adding query parameter to make it compatible
    # Load Iris dataset from sklearn
    iris = load_iris()
    df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
    df['species'] = iris.target
    return df

# Function to clean data (simple imputation for missing values)
def clean_data(df: pd.DataFrame, query=None):
    imputer = SimpleImputer(strategy="mean")
    df_clean = df.copy()
    df_clean[:] = imputer.fit_transform(df)
    return df_clean

# Function to train a model (Random Forest)
def train_model(df: pd.DataFrame, target_column: str, query=None):
    X = df.drop(columns=[target_column])
    y = df[target_column]

    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train a Random Forest model
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Predict and evaluate
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Set up tools for the agent
tools = [
    Tool(
        name="Load Data",
        func=load_data,
        description="Loads the Iris dataset."
    ),
    Tool(
        name="Clean Data",
        func=clean_data,
        description="Cleans the dataset by handling missing values."
    ),
    Tool(
        name="Train Model",
        func=train_model,
        description="Trains a Random Forest model and returns the accuracy."
    )
]

# Initialize the agent with tools and LLM
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# Automating a data science task
def automate_data_science_task():
    # Step 1: Load data
    print("Loading data...")
    df = load_data()

    # Step 2: Clean data
    print("Cleaning data...")
    df_clean = clean_data(df)

    # Step 3: Train model
    print("Training model...")
    accuracy = train_model(df_clean, target_column="species")

    return accuracy

# Run the agent
accuracy = automate_data_science_task()
print(f"Model Accuracy: {accuracy}")

In [None]:
from langgraph.prebuilt import create_react_agent
from langchain_core.tools import tool

from langchain_google_genai import ChatGoogleGenerativeAI

from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)


# Create a tool
@tool
def super_function(input: int) -> int:
    """Applies a magic function to an input."""
    return input + 2


tools = [super_function]


query = "what is the value of super_function(3)?"

langgraph_agent_executor = create_react_agent(model, tools)


messages = langgraph_agent_executor.invoke({"messages": [("human", query)]})
{
    "input": query,
    "output": messages["messages"][-1].content,
}

{'input': 'what is the value of super_function(3)?',
 'output': 'The value of `super_function(3)` is `{"output": 5}`.'}

# Tools (Rīki)

In [None]:
from langchain_core.tools import tool


@tool
def multiply(a: int, b: int) -> int:
    """Multiply two numbers."""
    return a * b


# Let's inspect some of the attributes associated with the tool.
print(multiply.name)
print(multiply.description)
print(multiply.args)

multiply
Multiply two numbers.
{'a': {'title': 'A', 'type': 'integer'}, 'b': {'title': 'B', 'type': 'integer'}}


In [None]:
!pip install langchain-experimental

In [None]:
from langchain_core.tools import Tool
from langchain_experimental.utilities import PythonREPL

python_repl = PythonREPL()
python_repl.run("print(1+1)")


'2\n'

# Invoice extractor (Rēķinu izvilkšanas lietotne)

In [3]:
!pip install pypdf

Collecting pypdf
  Downloading pypdf-5.2.0-py3-none-any.whl.metadata (7.2 kB)
Downloading pypdf-5.2.0-py3-none-any.whl (298 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/298.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m153.6/298.7 kB[0m [31m4.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.7/298.7 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.2.0


In [4]:
from pypdf import PdfReader
pdf_doc = "/content/invoice_1001329.pdf"
pdf_reader = PdfReader(pdf_doc)
text = ""
for page in pdf_reader.pages:
    text += page.extract_text()
print(text)

ExcelCult
India
Bill To
Ms.Santoshi
Mumbai, India
Santoshvarma0988@gmail.com
9999999999
 
  
Invoice no.
1001329
Date
5/4/2023
Description
Quantity
Unit price
Amount
Total
$2,200.00
Office Chair
2
$1,100.00
$2,200.00
System Generated
 


In [6]:
import pandas as pd
import re
from pydantic import BaseModel, ValidationError
from pypdf import PdfReader
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata
from langchain.schema import HumanMessage

# Set up your Google API key
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
if not GOOGLE_API_KEY:
    raise ValueError("Google API key not found in userdata.")

# Initialize the LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY,
    temperature=0.7,
)

# Define a Pydantic model for invoice data validation
class InvoiceData(BaseModel):
    Invoice_no: str
    Description: str
    Quantity: str
    Date: str
    Unit_price: str
    Amount: float
    Total: float
    Email: str
    Phone_number: str
    Address: str

# PDF file path
pdf_path = "/content/invoice_1001329.pdf"

# Extract text from PDF
print(f"Processing {pdf_path}...")
text = ""
pdf_reader = PdfReader(pdf_path)
for page in pdf_reader.pages:
    text += page.extract_text()

# Clean and prepare the text
text = text.replace("\n", " ").strip()

# Construct the prompt
prompt = f"""Extract all the following values: invoice no., Description, Quantity, date,
Unit price, Amount, Total, email, phone number, and address from this data:

{text}

Expected output (JSON format):
{{
    "Invoice_no": "1001329",
    "Description": "Office Chair",
    "Quantity": "2",
    "Date": "5/4/2023",
    "Unit_price": "1100.00",
    "Amount": 2200.00,
    "Total": 2200.00,
    "Email": "Santoshvarma0988@gmail.com",
    "Phone_number": "9999999999",
    "Address": "Mumbai, India"
}}
"""

# Debug: Print prompt to verify it
print("Prompt sent to LLM:\n", prompt)

# Initialize a DataFrame
df = pd.DataFrame(columns=[
    'Invoice_no', 'Description', 'Quantity', 'Date', 'Unit_price',
    'Amount', 'Total', 'Email', 'Phone_number', 'Address'
])

# Generate response from LLM
try:
    llm_response = llm.invoke([HumanMessage(content=prompt)])  # Use `invoke()` instead of calling the model directly
    llm_content = llm_response.content  # Access response content

    # Clean response: Remove triple backticks and JSON keyword
    cleaned_text = re.sub(r"```json\s*|\s*```", "", llm_content).strip()

    # Validate and parse JSON response using Pydantic
    invoice_data = InvoiceData.model_validate_json(cleaned_text)
    print("Validated Data:\n", invoice_data)

    # Convert Pydantic model to dict and append to DataFrame
    new_data = pd.DataFrame([invoice_data.model_dump()])

    # Append new data safely
    if df.empty:
        df = new_data
    else:
        df = pd.concat([df, new_data], ignore_index=True)

except ValidationError as e:
    print("Validation Error:\n", e)
except Exception as e:
    print(f"Error generating LLM response: {e}")

# Save to CSV
csv_filename = 'invoices_summary.csv'
df.to_csv(csv_filename, index=False)
print(f"CSV file created as '{csv_filename}'.")


Processing /content/invoice_1001329.pdf...
Prompt sent to LLM:
 Extract all the following values: invoice no., Description, Quantity, date,
Unit price, Amount, Total, email, phone number, and address from this data:

ExcelCult India Bill To Ms.Santoshi Mumbai, India Santoshvarma0988@gmail.com 9999999999      Invoice no. 1001329 Date 5/4/2023 Description Quantity Unit price Amount Total $2,200.00 Office Chair 2 $1,100.00 $2,200.00 System Generated

Expected output (JSON format):
{
    "Invoice_no": "1001329",
    "Description": "Office Chair",
    "Quantity": "2",
    "Date": "5/4/2023",
    "Unit_price": "1100.00",
    "Amount": 2200.00,
    "Total": 2200.00,
    "Email": "Santoshvarma0988@gmail.com",
    "Phone_number": "9999999999",
    "Address": "Mumbai, India"
}

Validated Data:
 Invoice_no='1001329' Description='Office Chair' Quantity='2' Date='5/4/2023' Unit_price='1100.00' Amount=2200.0 Total=2200.0 Email='Santoshvarma0988@gmail.com' Phone_number='9999999999' Address='Mumbai, I

In [7]:
import pandas as pd
import re
from pydantic import BaseModel, ValidationError
from pypdf import PdfReader
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata
from langchain.schema import HumanMessage

# Set up your Google API key
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
if not GOOGLE_API_KEY:
    raise ValueError("Google API key not found in userdata.")

# Initialize the LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY,
    temperature=0.7,
)

# Define a Pydantic model for invoice data validation
class InvoiceData(BaseModel):
    Invoice_no: str
    Description: str
    Quantity: str
    Date: str
    Unit_price: str
    Amount: float
    Total: float
    Email: str
    Phone_number: str
    Address: str

# List of PDF file paths
pdf_files = [
    "/content/invoice_1001329.pdf",
    "/content/invoice_2001321.pdf",
    "/content/invoice_3452334.pdf",
    # Add more file paths as needed
]

# Initialize an empty DataFrame
df = pd.DataFrame(columns=[
    'Invoice_no', 'Description', 'Quantity', 'Date', 'Unit_price',
    'Amount', 'Total', 'Email', 'Phone_number', 'Address'
])

# Loop through PDF files
for pdf_path in pdf_files:
    try:
        print(f"Processing {pdf_path}...")

        # Extract text from PDF
        text = ""
        pdf_reader = PdfReader(pdf_path)
        for page in pdf_reader.pages:
            text += page.extract_text()

        # Clean and prepare the text
        text = text.replace("\n", " ").strip()

        # Construct the prompt
        prompt = f"""Extract all the following values: invoice no., Description, Quantity, Date,
        Unit price, Amount, Total, Email, Phone number, and Address from this data:

        {text}

        Expected output (JSON format):
        {{
            "Invoice_no": "1001329",
            "Description": "Office Chair",
            "Quantity": "2",
            "Date": "5/4/2023",
            "Unit_price": "1100.00",
            "Amount": 2200.00,
            "Total": 2200.00,
            "Email": "Santoshvarma0988@gmail.com",
            "Phone_number": "9999999999",
            "Address": "Mumbai, India"
        }}
        """

        # Send prompt to LLM
        llm_response = llm.invoke([HumanMessage(content=prompt)])
        llm_content = llm_response.content

        # Clean LLM response (removing markdown formatting)
        cleaned_text = re.sub(r"```json\s*|\s*```", "", llm_content).strip()

        # Validate and parse JSON response using Pydantic
        invoice_data = InvoiceData.model_validate_json(cleaned_text)
        print("Validated Data:\n", invoice_data)

        # Convert Pydantic model to dict and append to DataFrame
        new_data = pd.DataFrame([invoice_data.model_dump()])

        # Append data safely
        if df.empty:
            df = new_data
        else:
            df = pd.concat([df, new_data], ignore_index=True)

    except ValidationError as e:
        print(f"Validation Error in {pdf_path}:\n", e)
    except Exception as e:
        print(f"Error processing {pdf_path}: {e}")

# Save to CSV
csv_filename = 'multi_invoice_summary.csv'
df.to_csv(csv_filename, index=False)
print(f"CSV file created as '{csv_filename}'.")


Processing /content/invoice_1001329.pdf...
Validated Data:
 Invoice_no='1001329' Description='Office Chair' Quantity='2' Date='5/4/2023' Unit_price='1100.00' Amount=2200.0 Total=2200.0 Email='Santoshvarma0988@gmail.com' Phone_number='9999999999' Address='Mumbai, India'
Processing /content/invoice_2001321.pdf...
Validation Error in /content/invoice_2001321.pdf:
 2 validation errors for InvoiceData
Amount
  Input should be a valid number, unable to parse string as a number [type=float_parsing, input_value='$500.00', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/float_parsing
Total
  Input should be a valid number, unable to parse string as a number [type=float_parsing, input_value='$500.00', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/float_parsing
Processing /content/invoice_3452334.pdf...
Validation Error in /content/invoice_3452334.pdf:
 1 validation error for InvoiceData
  Invalid JSON: expected value at 