<a href="https://colab.research.google.com/github/DenisVasil/LLM_examples/blob/main/LangChain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Chat Models (čata modeļi)

In [None]:
!pip install langchain_google_genai

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)


result = model.invoke("What is 81 divided by 9")

print("Full result:")
print(result)
print("Context:")
print(result.content)

In [None]:
# Basic Conversation

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from google.colab import userdata

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)

messages = [
    SystemMessage(content="Solve the following math problem"),
    HumanMessage(content="What is 81 divided by 9?")
]

result = model.invoke(messages)

print(f"Answer from AI: {result.content}")

messages = [
    SystemMessage(content="Solve the following math problem"),
    HumanMessage(content="What is 81 divided by 9?"),
    AIMessage(content=f"{result.content}"),
    HumanMessage(content="Divide this result by 3")
]

result = model.invoke(messages)
print(f"New answer from AI: {result.content}")


In [None]:
from langchain.memory.chat_memory import InMemoryChatMessageHistory

history = InMemoryChatMessageHistory()
history.add_message({"role": "user", "content": "Hello"})
history.add_message({"role": "assistant", "content": "Hi there!"})

print(history.messages)

In [None]:
from langchain.memory import ConversationBufferMemory
from langchain.schema import HumanMessage, AIMessage # Import necessary classes

memory = ConversationBufferMemory()
memory.chat_memory.add_message(HumanMessage(content="Hello")) # Use HumanMessage
memory.chat_memory.add_message(AIMessage(content="Hi there!")) # Use AIMessage

print(memory.load_memory_variables({}))

In [None]:
# chat
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from google.colab import userdata

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model
model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)


chat_history = []

system_message = SystemMessage(content="You are helpful AI assistant")
chat_history.append(system_message)

# chat loop
while True:
    query = input("You: ")
    if query.lower() == "exit":
        break
    chat_history.append(HumanMessage(content=query))

    result = model.invoke(chat_history)
    response = result.content
    chat_history.append(AIMessage(content=response))
    print(f"AI response: {response}")

print("----- Message history ------")
print(chat_history)

In [None]:
!pip install duckdb

In [None]:
# Saving Chat History
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from google.colab import userdata

from langchain.memory.chat_memory import InMemoryChatMessageHistory
from langchain.schema import HumanMessage, AIMessage
import duckdb
import os

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)


# Constants
DB_PATH = "chat_history.db"
TABLE_NAME = "chat_history"
SESSION_ID = "user_session_new"  # This could be a username or unique ID

# Initialize DuckDB
con = duckdb.connect(DB_PATH)
con.execute(f"""
CREATE TABLE IF NOT EXISTS {TABLE_NAME} (
    session_id TEXT,
    role TEXT,
    content TEXT
)
""")

# Helper Functions


def save_message_to_db(session_id, role, content):
    """Save a single message to the DuckDB database."""
    con.execute(f"""
    INSERT INTO {TABLE_NAME} (session_id, role, content)
    VALUES (?, ?, ?)
    """, [session_id, role, content])


def load_messages_from_db(session_id):
    """Load chat history for a specific session from the DuckDB database."""
    result = con.execute(f"""
    SELECT role, content FROM {TABLE_NAME} WHERE session_id = ? ORDER BY rowid
    """, [session_id]).fetchall()
    return [HumanMessage(content=row[1]) if row[0] == "user" else AIMessage(content=row[1]) for row in result]


# Initialize Chat Message History
print("Initializing DuckDB Chat Message History...")
messages = load_messages_from_db(SESSION_ID)
chat_history = InMemoryChatMessageHistory(messages=messages)

print("Chat History Initialized.")
print("Current Chat History:", [msg.content for msg in chat_history.messages])


print("Start chatting with the AI. Type 'exit' to quit.")

while True:
    human_input = input("User: ")
    if human_input.lower() == "exit":
        break

    # Save and process user message
    chat_history.add_user_message(human_input)
    save_message_to_db(SESSION_ID, "user", human_input)

    # Generate AI response
    ai_response = model.invoke(chat_history.messages)
    chat_history.add_ai_message(ai_response.content)
    save_message_to_db(SESSION_ID, "ai", ai_response.content)

    print(f"AI: {ai_response.content}")

# Close DuckDB connection when done
con.close()

In [None]:
!pip install -U langgraph

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
from google.colab import userdata
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model
model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)

# Define a new graph
workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    response = model.invoke(state["messages"])
    return {"messages": response}


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)
config = {"configurable": {"thread_id": "abc123"}}

query = "Hi! I'm Bob."

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()  # output contains all messages in state

In [None]:
app = workflow.compile(checkpointer=memory)
config = {"configurable": {"thread_id": "abc123"}}

query = "What's my name?"

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
from google.colab import userdata
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model
model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)

# Define a new graph
workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    response = model.invoke(state["messages"])
    return {"messages": response}


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)
config = {"configurable": {"thread_id": "abc123"}}

while True:
    query = input("You: ")
    if query.lower() == "exit":
        break

    input_messages = [HumanMessage(query)]
    output = app.invoke({"messages": input_messages}, config)
    output["messages"][-1].pretty_print()  # output contains all messages in state


# Prompt Templates (Veidnes Uzvednēm)

In [None]:
from langchain_core.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template("Tell me a joke about {topic}")

prompt_template.invoke({"topic": "cats"})

In [None]:
from langchain_core.prompts import ChatPromptTemplate

prompt_template = ChatPromptTemplate([
    ("system", "You are a helpful assistant"),
    ("user", "Tell me a joke about {topic}")
])

prompt_template.invoke({"topic": "cats"})
# prompt = prompt_template.invoke({"topic": "cats"})
# result = model.invoke(prompt)
# print(result.content)

In [None]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage

prompt_template = ChatPromptTemplate([
    ("system", "You are a helpful assistant"),
    MessagesPlaceholder("msgs")
])

prompt_template.invoke({"msgs": [HumanMessage(content="hi!"), HumanMessage(content="How are you?")]})

In [None]:
# AIMessage, HumanMessage, SystemMessage

from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)


template = "Tell me a joke about {topic}"
prompt_template = ChatPromptTemplate.from_template(template)

print("------Prompt from template------")
prompt = prompt_template.invoke({"topic": "cats"})
print(prompt)

result = model.invoke(prompt)
print("\n------Result-------")
print(result.content)


In [None]:
# Create a ChatPromptTemplate using a template string
from langchain.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage

template = "Tell me a joke about {topic}."
prompt_template = ChatPromptTemplate.from_template(template)

print("-----Prompt from Template-----")
prompt = prompt_template.invoke({"topic": "cats"})
print(prompt)

In [None]:
# Prompt with Multiple Placeholders

from langchain.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage

template_multiple = """You are a helpful assistant.
Human: Tell me a {adjective} story about a {animal}.
Assistant:"""
prompt_multiple = ChatPromptTemplate.from_template(template_multiple)
prompt = prompt_multiple.invoke({"adjective": "funny", "animal": "panda"})
print("\n----- Prompt with Multiple Placeholders -----\n")
print(prompt)

In [None]:
# Prompt with System and Human Messages (Using Tuples)
from langchain.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage

messages = [
    ("system", "You are a comedian who tells jokes about {topic}."),
    ("human", "Tell me {joke_count} jokes."),
]
prompt_template = ChatPromptTemplate.from_messages(messages)
prompt = prompt_template.invoke({"topic": "lawyers", "joke_count": 3})
print("\n----- Prompt with System and Human Messages (Tuple) -----\n")
print(prompt)

# Chains (Ķēdes)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)

prompt = ChatPromptTemplate.from_messages(
    [("user", "Tell me a {adjective} joke")],
)

chain = prompt | model | StrOutputParser()

chain.invoke({"adjective": "funny"})

In [None]:
# Basic Chain

from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain_core.messages import HumanMessage

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY
)

prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a comedian who tells jokes about {topic}."),
        ("human", "Tell me {joke_count} jokes."),
    ]
)
# StrOutputParser() ~ .content()
# LangChain expression language
chain = prompt_template | model | StrOutputParser()

result = chain.invoke({"topic": "fish", "joke_count": 3})

print(result)


In [None]:
# Extended Chain

from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda
from langchain_core.messages import HumanMessage

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY
)

prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a comedian who tells jokes about {topic}."),
        ("human", "Tell me {joke_count} jokes."),
    ]
)

uppercase_output = RunnableLambda(lambda x: x.upper())
count_words = RunnableLambda(lambda x: f"Word count {len(x.split())} \n {x}")

chain = prompt_template | model | StrOutputParser() | uppercase_output | count_words

result = chain.invoke({"topic": "fish", "joke_count": 3})

print(result)


In [None]:
# Parallel
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableParallel, RunnableLambda
from langchain_google_genai import ChatGoogleGenerativeAI

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY
)


# Define prompt template
prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are an expert product reviewer."),
        ("human", "List the main features of the product {product_name}."),
    ]
)


# Define pros analysis step
def analyze_pros(features):
    pros_template = ChatPromptTemplate.from_messages(
        [
            ("system", "You are an expert product reviewer."),
            (
                "human",
                "Given these features: {features}, list the pros of these features.",
            ),
        ]
    )
    return pros_template.format_prompt(features=features)


# Define cons analysis step
def analyze_cons(features):
    cons_template = ChatPromptTemplate.from_messages(
        [
            ("system", "You are an expert product reviewer."),
            (
                "human",
                "Given these features: {features}, list the cons of these features.",
            ),
        ]
    )
    return cons_template.format_prompt(features=features)


# Combine pros and cons into a final review
def combine_pros_cons(pros, cons):
    return f"Pros:\n{pros}\n\nCons:\n{cons}"


# Simplify branches with LCEL
pros_branch_chain = (
    RunnableLambda(lambda x: analyze_pros(x)) | model | StrOutputParser()
)

cons_branch_chain = (
    RunnableLambda(lambda x: analyze_cons(x)) | model | StrOutputParser()
)

# Create the combined chain using LangChain Expression Language (LCEL)
chain = (
    prompt_template
    | model
    | StrOutputParser()
    | RunnableParallel(branches={"pros": pros_branch_chain, "cons": cons_branch_chain})
    | RunnableLambda(lambda x: combine_pros_cons(x["branches"]["pros"], x["branches"]["cons"]))
)

# Run the chain
result = chain.invoke({"product_name": "MacBook Pro"})

# Output
print(result)


In [None]:
# Branching
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableBranch
from langchain_google_genai import ChatGoogleGenerativeAI

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY
)

# Define prompt templates for different feedback types
positive_feedback_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        ("human",
         "Generate a thank you note for this positive feedback: {feedback}."),
    ]
)

negative_feedback_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        ("human",
         "Generate a response addressing this negative feedback: {feedback}."),
    ]
)

neutral_feedback_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        (
            "human",
            "Generate a request for more details for this neutral feedback: {feedback}.",
        ),
    ]
)

escalate_feedback_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        (
            "human",
            "Generate a message to escalate this feedback to a human agent: {feedback}.",
        ),
    ]
)

# Define the feedback classification template
classification_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        ("human",
         "Classify the sentiment of this feedback as positive, negative, neutral, or escalate: {feedback}."),
    ]
)

# Define the runnable branches for handling feedback
branches = RunnableBranch(
    (
        lambda x: "positive" in x,
        positive_feedback_template | model | StrOutputParser()  # Positive feedback chain
    ),
    (
        lambda x: "negative" in x,
        negative_feedback_template | model | StrOutputParser()  # Negative feedback chain
    ),
    (
        lambda x: "neutral" in x,
        neutral_feedback_template | model | StrOutputParser()  # Neutral feedback chain
    ),
    escalate_feedback_template | model | StrOutputParser()
)

# Create the classification chain
classification_chain = classification_template | model | StrOutputParser()

# Combine classification and response generation into one chain
chain = classification_chain | branches

# Run the chain with an example review
# Good review - "The product is excellent. I really enjoyed using it and found it very helpful."
# Bad review - "The product is terrible. It broke after just one use and the quality is very poor."
# Neutral review - "The product is okay. It works as expected but nothing exceptional."
# Default - "I'm not sure about the product yet. Can you tell me more about its features and benefits?"

review = "The product is okay. It works as expected but nothing exceptional."
result = chain.invoke({"feedback": review})

# Output the result
print(result)

# Document Loaders (Dokumentu Ielādētāji)

In [None]:
!pip install langchain-community

In [None]:
from langchain_community.document_loaders.csv_loader import CSVLoader

file_path = "/content/diabetes.csv"

loader = CSVLoader(file_path=file_path)
data = loader.load()

for record in data[:2]:
    #print(type(record))
    print(record)

In [None]:
!pip install -qU pypdf

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("/content/invoice_1001329.pdf")
pages = []

async for page in loader.alazy_load():
    pages.append(page)
print(f"{pages[0].metadata}\n")
print(pages[0].page_content)

# Agents (Aģenti)

In [None]:
!pip install -qU duckduckgo-search langchain-community

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain.agents import initialize_agent, Tool, AgentType
from langchain.agents import AgentExecutor
from langchain_community.tools import DuckDuckGoSearchRun


GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY
)

# Define the tool: DuckDuckGo search results
ddg_search = DuckDuckGoSearchRun()

# Set up the agent with the search tool
tools = [Tool(
    name="DuckDuckGoSearchRun",
    func=ddg_search.run,
    description="Searches the web using DuckDuckGo."
)]

# Define a simple prompt template for asking a question
prompt_template = "Search the web for information about {query}"

# Initialize the agent with a basic agent type
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# Use the agent to interact and fetch search results
def get_search_results_using_agent(query: str):
    query_formatted = prompt_template.format(query=query)
    response = agent.run(query_formatted)
    return response

# Example usage
query = "latest news on AI advancements"
search_results = get_search_results_using_agent(query)
print(search_results)


In [None]:
import pandas as pd
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

from langchain.agents import initialize_agent, Tool, AgentType
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer


GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY
)


from sklearn.datasets import load_iris

def load_data(query=None):  # Adding query parameter to make it compatible
    # Load Iris dataset from sklearn
    iris = load_iris()
    df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
    df['species'] = iris.target
    return df

# Function to clean data (simple imputation for missing values)
def clean_data(df: pd.DataFrame, query=None):
    imputer = SimpleImputer(strategy="mean")
    df_clean = df.copy()
    df_clean[:] = imputer.fit_transform(df)
    return df_clean

# Function to train a model (Random Forest)
def train_model(df: pd.DataFrame, target_column: str, query=None):
    X = df.drop(columns=[target_column])
    y = df[target_column]

    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train a Random Forest model
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Predict and evaluate
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Set up tools for the agent
tools = [
    Tool(
        name="Load Data",
        func=load_data,
        description="Loads the Iris dataset."
    ),
    Tool(
        name="Clean Data",
        func=clean_data,
        description="Cleans the dataset by handling missing values."
    ),
    Tool(
        name="Train Model",
        func=train_model,
        description="Trains a Random Forest model and returns the accuracy."
    )
]

# Initialize the agent with tools and LLM
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# Automating a data science task
def automate_data_science_task():
    # Step 1: Load data
    print("Loading data...")
    df = load_data()

    # Step 2: Clean data
    print("Cleaning data...")
    df_clean = clean_data(df)

    # Step 3: Train model
    print("Training model...")
    accuracy = train_model(df_clean, target_column="species")

    return accuracy

# Run the agent
accuracy = automate_data_science_task()
print(f"Model Accuracy: {accuracy}")

In [None]:
from langgraph.prebuilt import create_react_agent
from langchain_core.tools import tool

from langchain_google_genai import ChatGoogleGenerativeAI

from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# Initialize Chat Model

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GOOGLE_API_KEY
)


# Create a tool
@tool
def super_function(input: int) -> int:
    """Applies a magic function to an input."""
    return input + 2


tools = [super_function]


query = "what is the value of super_function(3)?"

langgraph_agent_executor = create_react_agent(model, tools)


messages = langgraph_agent_executor.invoke({"messages": [("human", query)]})
{
    "input": query,
    "output": messages["messages"][-1].content,
}

# Tools (Rīki)

In [None]:
from langchain_core.tools import tool


@tool
def multiply(a: int, b: int) -> int:
    """Multiply two numbers."""
    return a * b


# Let's inspect some of the attributes associated with the tool.
print(multiply.name)
print(multiply.description)
print(multiply.args)

In [None]:
!pip install langchain-experimental

In [None]:
from langchain_core.tools import Tool
from langchain_experimental.utilities import PythonREPL

python_repl = PythonREPL()
python_repl.run("print(1+1)")


# Invoice extractor (Rēķinu izvilkšanas lietotne)

In [None]:
!pip install pypdf

In [None]:
from pypdf import PdfReader
pdf_doc = "/content/invoice_1001329.pdf"
pdf_reader = PdfReader(pdf_doc)
text = ""
for page in pdf_reader.pages:
    text += page.extract_text()
print(text)

In [None]:
import pandas as pd
import re
from pydantic import BaseModel, ValidationError
from pypdf import PdfReader
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata
from langchain.schema import HumanMessage

# Set up your Google API key
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
if not GOOGLE_API_KEY:
    raise ValueError("Google API key not found in userdata.")

# Initialize the LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY,
    temperature=0.7,
)

# Define a Pydantic model for invoice data validation
class InvoiceData(BaseModel):
    Invoice_no: str
    Description: str
    Quantity: str
    Date: str
    Unit_price: str
    Amount: float
    Total: float
    Email: str
    Phone_number: str
    Address: str

# PDF file path
pdf_path = "/content/invoice_1001329.pdf"

# Extract text from PDF
print(f"Processing {pdf_path}...")
text = ""
pdf_reader = PdfReader(pdf_path)
for page in pdf_reader.pages:
    text += page.extract_text()

# Clean and prepare the text
text = text.replace("\n", " ").strip()

# Construct the prompt
prompt = f"""Extract all the following values: invoice no., Description, Quantity, date,
Unit price, Amount, Total, email, phone number, and address from this data:

{text}

Expected output (JSON format):
{{
    "Invoice_no": "1001329",
    "Description": "Office Chair",
    "Quantity": "2",
    "Date": "5/4/2023",
    "Unit_price": "1100.00",
    "Amount": 2200.00,
    "Total": 2200.00,
    "Email": "Santoshvarma0988@gmail.com",
    "Phone_number": "9999999999",
    "Address": "Mumbai, India"
}}
"""

# Debug: Print prompt to verify it
print("Prompt sent to LLM:\n", prompt)

# Initialize a DataFrame
df = pd.DataFrame(columns=[
    'Invoice_no', 'Description', 'Quantity', 'Date', 'Unit_price',
    'Amount', 'Total', 'Email', 'Phone_number', 'Address'
])

# Generate response from LLM
try:
    llm_response = llm.invoke([HumanMessage(content=prompt)])  # Use `invoke()` instead of calling the model directly
    llm_content = llm_response.content  # Access response content

    # Clean response: Remove triple backticks and JSON keyword
    cleaned_text = re.sub(r"```json\s*|\s*```", "", llm_content).strip()

    # Validate and parse JSON response using Pydantic
    invoice_data = InvoiceData.model_validate_json(cleaned_text)
    print("Validated Data:\n", invoice_data)

    # Convert Pydantic model to dict and append to DataFrame
    new_data = pd.DataFrame([invoice_data.model_dump()])

    # Append new data safely
    if df.empty:
        df = new_data
    else:
        df = pd.concat([df, new_data], ignore_index=True)

except ValidationError as e:
    print("Validation Error:\n", e)
except Exception as e:
    print(f"Error generating LLM response: {e}")

# Save to CSV
csv_filename = 'invoices_summary.csv'
df.to_csv(csv_filename, index=False)
print(f"CSV file created as '{csv_filename}'.")


In [None]:
import pandas as pd
import re
import json
from pydantic import BaseModel, field_validator
from typing import Optional
from pypdf import PdfReader
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata
from langchain.schema import HumanMessage

# Set up Google API key
GOOGLE_API_KEY = userdata.get("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
    raise ValueError("Google API key not found in userdata.")

# Initialize the LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key=GOOGLE_API_KEY,
    temperature=0.7,
)

# Define a Pydantic model with relaxed validation
class InvoiceData(BaseModel):
    Invoice_no: str
    Description: str
    Quantity: str
    Date: str
    Unit_price: str
    Amount: Optional[float] = None
    Total: Optional[float] = None
    Email: Optional[str] = None
    Phone_number: Optional[str] = None
    Address: Optional[str] = None

    # Convert currency values to float
    @field_validator("Amount", "Total", mode="before")
    @classmethod
    def parse_currency(cls, value):
        if isinstance(value, str):
            value = re.sub(r"[^\d.]", "", value)  # Remove non-numeric characters
        return float(value) if value else None

# List of PDF file paths
pdf_files = [
    "/content/invoice_1001329.pdf",
    "/content/invoice_2001321.pdf",
    "/content/invoice_3452334.pdf",
]

# Initialize an empty DataFrame
df = pd.DataFrame(columns=[
    "Invoice_no", "Description", "Quantity", "Date", "Unit_price",
    "Amount", "Total", "Email", "Phone_number", "Address"
])

# Loop through PDF files
for pdf_path in pdf_files:
    try:
        print(f"Processing {pdf_path}...")

        # Extract text from PDF
        text = ""
        pdf_reader = PdfReader(pdf_path)
        for page in pdf_reader.pages:
            text += page.extract_text() or ""  # Ensure it doesn't break if text extraction fails

        text = text.replace("\n", " ").strip()

        # Construct the prompt
        prompt = f"""Extract all the following values: invoice no., Description, Quantity, Date,
        Unit price, Amount, Total, Email, Phone number, and Address from this data:

        {text}

        Expected output (JSON format):
        {{
            "Invoice_no": "1001329",
            "Description": "Office Chair",
            "Quantity": "2",
            "Date": "5/4/2023",
            "Unit_price": "1100.00",
            "Amount": 2200.00,
            "Total": 2200.00,
            "Email": "Santoshvarma0988@gmail.com",
            "Phone_number": "9999999999",
            "Address": "Mumbai, India"
        }}
        """

        # Send prompt to LLM
        llm_response = llm.invoke([HumanMessage(content=prompt)])
        llm_content = llm_response.content

        # Extract JSON using regex
        json_match = re.search(r"\{.*\}", llm_content, re.DOTALL)
        if not json_match:
            print(f"Error: Could not extract JSON from response for {pdf_path}. Skipping.")
            continue

        cleaned_text = json_match.group(0)

        # Ensure valid JSON format
        try:
            invoice_json = json.loads(cleaned_text)
        except json.JSONDecodeError:
            print(f"Error: LLM response is not valid JSON for {pdf_path}: {cleaned_text}")
            continue  # Skip this file

        # Validate and parse JSON response using Pydantic
        try:
            invoice_data = InvoiceData(**invoice_json)
        except Exception as e:
            print(f"Validation Error in {pdf_path}: {e}")
            continue

        print("Validated Data:\n", invoice_data)

        # Convert Pydantic model to dict and append to DataFrame
        new_data = pd.DataFrame([invoice_data.model_dump()])
        df = pd.concat([df, new_data], ignore_index=True)

    except Exception as e:
        print(f"Error processing {pdf_path}: {e}")

# Save to CSV
csv_filename = "multi_invoice_summary.csv"
df.to_csv(csv_filename, index=False)
print(f"CSV file created as '{csv_filename}'.")
print(f"Total invoices processed: {len(df)}")