In [40]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langgraph.constants import START, END
from langgraph.graph import StateGraph, MessagesState
from pydantic import BaseModel, Field
from operator import add
from typing import TypedDict
from langgraph.checkpoint.memory import MemorySaver
from IPython.display import Markdown, Image, display
from dotenv import dotenv_values
from pypdf import PdfReader
import pandas as pd
import os

In [9]:
env_values = {**dotenv_values(".env.shared"), **dotenv_values(".env.secret")}

os.environ["LANGSMITH_TRACING"] = env_values["LANGSMITH_TRACING"]
os.environ["LANGSMITH_ENDPOINT"] = env_values["LANGSMITH_ENDPOINT"]
os.environ["LANGSMITH_API_KEY"] = env_values["LANGSMITH_API_KEY"]
os.environ["LANGSMITH_PROJECT"] = env_values["LANGSMITH_PROJECT"]
os.environ["OPENAI_API_KEY"] = env_values["OPENAI_API_KEY"]

## Summarization graph

In [10]:
PATH_TO_FILES = "./Data"

In [18]:
def read_csv_xlsx(path: str) -> str:
    if path.endswith(".xlsx"):
        df = pd.read_excel(path)
        return df.to_string(index=False)
    else:
        df = pd.read_csv(path)
        return df.to_string(index=False)

def read_pdf(path: str) -> str:
    with open(path, "rb") as f:
        reader = PdfReader(f)
        text = ""
        for page in reader.pages:
            text += page.extractText()

    return text

def read_docs():
    dirs = os.listdir(PATH_TO_FILES)
    text = ""
    for dir in dirs:
        files = os.listdir(os.path.join(PATH_TO_FILES, dir))
        for file in files:
            if not file.endswith(".pdf"):
                text += read_csv_xlsx(os.path.join(os.path.join(PATH_TO_FILES, dir), file))
            else:
                text += read_pdf(os.path.join(os.path.join(PATH_TO_FILES, dir), file))

    return text

In [20]:
test = read_docs()

In [13]:
llm = ChatOpenAI(model="gpt-4o", temperature=0.4, top_p=0.38)

In [26]:
summarization_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a travel assistant. Help in summarizing the employee profiles, the organizational rules and the travel requirements. Ignore any IDs"),
    ("human", "Summarize this {input}")
])

In [27]:
runnable_summarization = summarization_prompt | llm

In [30]:
output = runnable_summarization.invoke({'input': test}).content

In [31]:
Markdown(output)

### Employee Profiles Summary:

1. **Alice Doe**: 
   - Role: Senior Engineer
   - Travel History: USA, Germany, France (3 times)
   - Preferred Airlines: Lufthansa, Delta
   - Preferred Hotels: Marriott, Hilton
   - Meal Preference: Vegetarian
   - Seat Preference: Window
   - Visa Requirement: Schengen, USA

2. **Bob Smith**: 
   - Role: Project Manager
   - Travel History: India, UK (2 times)
   - Preferred Airlines: British Airways
   - Preferred Hotels: IHG, Hyatt
   - Meal Preference: Vegan
   - Seat Preference: Aisle
   - Visa Requirement: UK, India

3. **Carol Lee**: 
   - Role: Data Scientist
   - Travel History: Singapore, Japan (1 time)
   - Preferred Airlines: Singapore Airlines
   - Preferred Hotels: Marriott
   - Meal Preference: Non-Veg
   - Seat Preference: Aisle
   - Visa Requirement: Japan, Singapore

4. **David Wong**: 
   - Role: Cybersecurity Lead
   - Travel History: Canada, USA, Germany (4 times)
   - Preferred Airlines: Air Canada, Delta
   - Preferred Hotels: Hilton
   - Meal Preference: Kosher
   - Seat Preference: Window
   - Visa Requirement: Schengen, USA, Canada

5. **Eva Patel**: 
   - Role: Software Engineer
   - Travel History: Remote (0 trips)
   - Preferred Airlines: Emirates
   - Preferred Hotels: Hyatt
   - Meal Preference: Halal
   - Seat Preference: Window
   - Visa Requirement: Schengen

### Organizational Rules Summary:

1. **Budget and Travel Class**:
   - Max budget per trip per employee is $3,000, region dependent.
   - Business class flights are only for Director level and above.

2. **Approval and Booking**:
   - Approval from direct manager is required for travel abroad.
   - Employees cannot book flights exceeding the budget limit.
   - Visa approval must be completed before flight booking.

3. **Preferred Partners and Accommodations**:
   - Preferred airline partners are Lufthansa, Emirates, and Delta.
   - Hotels must be 4-star or higher for company bookings.

4. **Insurance**:
   - Travel insurance is mandatory for international trips.

### Travel Requests Summary:

1. **Alice Doe**: 
   - Destination: Berlin
   - Purpose: Conference
   - Dates: July 15-20, 2024
   - Budget Limit: $2,500
   - Status: Approved

2. **Bob Smith**: 
   - Destination: London
   - Purpose: Client Meeting
   - Dates: August 1-5, 2024
   - Budget Limit: $3,000
   - Status: Pending

3. **Carol Lee**: 
   - Destination: Tokyo
   - Purpose: Research
   - Dates: September 10-20, 2024
   - Budget Limit: $4,500
   - Status: Approved

4. **David Wong**: 
   - Destination: Toronto
   - Purpose: Internal Audit
   - Dates: October 5-15, 2024
   - Budget Limit: $3,500
   - Status: Rejected

5. **Eva Patel**: 
   - Destination: Dubai
   - Purpose: Business Expo
   - Dates: November 1-10, 2024
   - Budget Limit: $2,000
   - Status: Approved

### Creation of the summary graph

#### States

In [48]:
class SummaryGraphInput(TypedDict):
    summary: str
    text: str

class SummaryGraphOutput(TypedDict):
    summary: str


#### Utils

In [49]:
def read_csv_xlsx(path: str) -> str:
    if path.endswith(".xlsx"):
        df = pd.read_excel(path)
        return df.to_string(index=False)
    else:
        df = pd.read_csv(path)
        return df.to_string(index=False)

def read_pdf(path: str) -> str:
    with open(path, "rb") as f:
        reader = PdfReader(f)
        text = ""
        for page in reader.pages:
            text += page.extractText()

    return text

#### Prompts

In [66]:
summarization_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a travel assistant. Help in summarizing the employee profiles, the organizational rules and the travel requirements. Ignore any IDs. Do not provide the output in any font or styles. Provide the summary in a neat structured format."),
    ("human", "Summarize this {input}")
])

#### Nodes

In [67]:
def read_docs(state: SummaryGraphInput):
    PATH_TO_FILES = "./Data"
    dirs = os.listdir(PATH_TO_FILES)
    text = ""
    for dir in dirs:
        files = os.listdir(os.path.join(PATH_TO_FILES, dir))
        for file in files:
            if not file.endswith(".pdf"):
                text += read_csv_xlsx(os.path.join(os.path.join(PATH_TO_FILES, dir), file))
            else:
                text += read_pdf(os.path.join(os.path.join(PATH_TO_FILES, dir), file))

    return {'text': text}

In [68]:
def summarize(state: SummaryGraphInput):
    text = state['text']
    runnable_summarization = summarization_prompt | llm
    output = runnable_summarization.invoke({'input': text}).content
    return {'summary': output}

In [69]:
graph_summarizer_builder = StateGraph(SummaryGraphInput, output=SummaryGraphOutput)

graph_summarizer_builder.add_node("read documents", read_docs)
graph_summarizer_builder.add_node("summarize documents", summarize)

graph_summarizer_builder.add_edge(START, "read documents")
graph_summarizer_builder.add_edge("read documents", "summarize documents")
graph_summarizer_builder.add_edge("summarize documents", END)

memory = MemorySaver()
graph_summarizer = graph_summarizer_builder.compile()

In [59]:
display(Image(graph_summarizer.get_graph().draw_mermaid_png()))

ReadTimeout: HTTPSConnectionPool(host='mermaid.ink', port=443): Read timed out. (read timeout=10)

In [70]:
Markdown(graph_summarizer.invoke({}, config={'configurable': {'thread_id': "summarizer_2"}})['summary'])

**Employee Profiles:**

1. **Alice Doe**
   - Role: Senior Engineer
   - Travel History: USA, Germany, France (3x)
   - Preferred Airlines: Lufthansa, Delta
   - Preferred Hotels: Marriott, Hilton
   - Meal Preference: Vegetarian
   - Seat Preference: Window
   - Visa Requirement: Schengen, USA

2. **Bob Smith**
   - Role: Project Manager
   - Travel History: India, UK (2x)
   - Preferred Airlines: British Airways
   - Preferred Hotels: IHG, Hyatt
   - Meal Preference: Vegan
   - Seat Preference: Aisle
   - Visa Requirement: UK, India

3. **Carol Lee**
   - Role: Data Scientist
   - Travel History: Singapore, Japan (1x)
   - Preferred Airlines: Singapore Airlines
   - Preferred Hotels: Marriott
   - Meal Preference: Non-Veg
   - Seat Preference: Aisle
   - Visa Requirement: Japan, Singapore

4. **David Wong**
   - Role: Cybersecurity Lead
   - Travel History: Canada, USA, Germany (4x)
   - Preferred Airlines: Air Canada, Delta
   - Preferred Hotels: Hilton
   - Meal Preference: Kosher
   - Seat Preference: Window
   - Visa Requirement: Schengen, USA, Canada

5. **Eva Patel**
   - Role: Software Engineer
   - Travel History: Remote (0 trips)
   - Preferred Airlines: Emirates
   - Preferred Hotels: Hyatt
   - Meal Preference: Halal
   - Seat Preference: Window
   - Visa Requirement: Schengen

**Organizational Rules:**

1. Max budget per trip per employee: $3,000 (Region dependent)
2. Business class flights only for Director level & above
3. Approval from direct manager required for travel abroad
4. Preferred airline partners: Lufthansa, Emirates, Delta
5. Hotels must be 4-star or higher for company bookings
6. Travel insurance mandatory for international trips
7. Employees cannot book flights exceeding the budget limit
8. Visa approval must be completed before flight booking

**Travel Requirements:**

1. **Berlin (Alice Doe)**
   - Purpose: Conference
   - Dates: 2024-07-15 to 2024-07-20
   - Budget Limit: $2500
   - Approval Status: Approved

2. **London (Bob Smith)**
   - Purpose: Client Meeting
   - Dates: 2024-08-01 to 2024-08-05
   - Budget Limit: $3000
   - Approval Status: Pending

3. **Tokyo (Carol Lee)**
   - Purpose: Research
   - Dates: 2024-09-10 to 2024-09-20
   - Budget Limit: $4500
   - Approval Status: Approved

4. **Toronto (David Wong)**
   - Purpose: Internal Audit
   - Dates: 2024-10-05 to 2024-10-15
   - Budget Limit: $3500
   - Approval Status: Rejected

5. **Dubai (Eva Patel)**
   - Purpose: Business Expo
   - Dates: 2024-11-01 to 2024-11-10
   - Budget Limit: $2000
   - Approval Status: Approved