# Search over Orders Data Inside PDFs

In [1]:
import os
import getpass

def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"var: ")

_set_env("OPENAI_API_KEY")

In [2]:
from openai import OpenAI

client = OpenAI()

print("Client created successfully!")

Client created successfully!


In [3]:
def create_search_order_db(store_name: str):
    """Creates a vector store for document storage."""
    vector_store = client.vector_stores.create(name=store_name)
    
    details = {
        "id": vector_store.id,
        "name": vector_store.name,
        "created_at": vector_store.created_at,
        "file_count": vector_store.file_counts.completed
    }
    
    print(f"Created vector store: {vector_store.name}")
    print(f"Vector Store ID: {vector_store.id}")
    
    return details

In [5]:
vector_store_db_details = create_search_order_db("order_details")

Created vector store: order_details
Vector Store ID: vs_6900fb3b2e00819192e1c4875b68ec5b


In [6]:
vector_store_db_details['id']

'vs_6900fb3b2e00819192e1c4875b68ec5b'

prompt to generate the fake orders synthetic data: 

```Create 10 pdf files with fake synthetic order information data to simulate a dataset.```

In [None]:
def upload_file_to_vector_store(file_path: str, vector_store_id: str):
    """Uploads a file to the vector store."""
    file_name = os.path.basename(file_path)
    
    try:
        # Upload file to OpenAI
        print(f"Uploading {file_name}...")
        file_response = client.files.create(
            file=open(file_path, 'rb'),
            purpose="assistants"
        )
        
        # Add file to vector store
        print(f"Adding to vector store...")
        client.vector_stores.files.create(
            vector_store_id=vector_store_id,
            file_id=file_response.id
        )
        
        print(f"✓ Successfully uploaded: {file_name}")
        return {"file": file_name, "status": "success", "file_id": file_response.id}
        
    except Exception as e:
        print(f"✗ Failed to upload {file_name}: {str(e)}")
        return {"file": file_name, "status": "failed", "error": str(e)}
    

import glob

order_data = glob.glob('./assets-resources/orders_data/*.pdf')

order_data

['./assets-resources/orders_data/SO-20251028-010-JEXQ7.pdf',
 './assets-resources/orders_data/SO-20251028-009-UT0CV.pdf',
 './assets-resources/orders_data/SO-20251028-005-1SDW4.pdf',
 './assets-resources/orders_data/SO-20251028-006-SV871.pdf',
 './assets-resources/orders_data/SO-20251028-004-SDBDW.pdf',
 './assets-resources/orders_data/SO-20251028-007-KRT05.pdf',
 './assets-resources/orders_data/SO-20251028-008-5C34D.pdf',
 './assets-resources/orders_data/SO-20251028-001-VBDIV.pdf',
 './assets-resources/orders_data/SO-20251028-003-J7YHL.pdf',
 './assets-resources/orders_data/SO-20251028-002-W5UZB.pdf']

Uploading the orders data to our database!

In [11]:
for file_path in order_data:
    upload_file_to_vector_store(file_path, vector_store_db_details['id'])

Uploading SO-20251028-010-JEXQ7.pdf...
Adding to vector store...
✓ Successfully uploaded: SO-20251028-010-JEXQ7.pdf
Uploading SO-20251028-009-UT0CV.pdf...
Adding to vector store...
✓ Successfully uploaded: SO-20251028-009-UT0CV.pdf
Uploading SO-20251028-005-1SDW4.pdf...
Adding to vector store...
✓ Successfully uploaded: SO-20251028-005-1SDW4.pdf
Uploading SO-20251028-006-SV871.pdf...
Adding to vector store...
✓ Successfully uploaded: SO-20251028-006-SV871.pdf
Uploading SO-20251028-004-SDBDW.pdf...
Adding to vector store...
✓ Successfully uploaded: SO-20251028-004-SDBDW.pdf
Uploading SO-20251028-007-KRT05.pdf...
Adding to vector store...
✓ Successfully uploaded: SO-20251028-007-KRT05.pdf
Uploading SO-20251028-008-5C34D.pdf...
Adding to vector store...
✓ Successfully uploaded: SO-20251028-008-5C34D.pdf
Uploading SO-20251028-001-VBDIV.pdf...
Adding to vector store...
✓ Successfully uploaded: SO-20251028-001-VBDIV.pdf
Uploading SO-20251028-003-J7YHL.pdf...
Adding to vector store...
✓ Succe

In [12]:
def search_over_orders_data(query: str, orders_db_id: str, model: str = "gpt-5-mini", max_num_results: int = 3):
    """Query documents using file search in the Responses API."""
    
    instructions = """You are a helpful assistant that answers questions about orders
    only using the orders data in the vector store.
    """
    
    response = client.responses.create(
        input=query,
        model=model,
        instructions=instructions,
        tools=[{
            "type": "file_search",
            "vector_store_ids": [orders_db_id],
            "max_num_results": max_num_results
        }]
    )
    
    return response

output = search_over_orders_data("What was the order sent to Isla Murphy?", vector_store_db_details['id'])

In [19]:
output.output[-1].content[0].text

'I found two order receipts billed to Isla Murphy:\n\n- Order SO-20251028-006-SV871 — Order Date: 2025-09-14, Status: Refunded (Payment: Apple Pay). Items: Desk LED Lamp ×2, NVMe SSD 1TB ×1, HDMI 2.1 Cable 2m ×3, Mechanical Keyboard 75% ×1, 27" 4K Monitor ×2. Subtotal 964.97, Tax 221.94, Shipping 4.99, TOTAL 1,191.90. Billing address: Isla Murphy, 22 Harbour View, Limerick, Ireland. This is shown on the receipt SO-20251028-006-SV871.pdf .\n\n- Order SO-20251028-010-JEXQ7 — Order Date: 2025-08-30, Status: Paid (Payment: Apple Pay). Items: Mechanical Keyboard 75% ×3, NVMe SSD 1TB ×2, 27" 4K Monitor ×3. Subtotal 1,502.00, Tax 195.26, Shipping 4.99, Discount −10.00, TOTAL 1,692.25. Billing address: Isla Murphy, 108 Brookside, Athlone, Ireland. See receipt SO-20251028-010-JEXQ7.pdf .\n\nWhich of these orders did you mean, or would you like me to open the full receipt (PDF) for one of them?'

# Investment Suggestion

In [None]:
"""
Demo 2: Investment Suggestion Generator
Uses OpenAI Responses API with Code Interpreter to analyze data and generate investment suggestions.
"""

import os
from openai import OpenAI
from typing import Optional


class InvestmentAdvisor:
    """An AI investment advisor that uses the Responses API with Code Interpreter."""

    def __init__(self, api_key: str = None):
        """Initialize the Investment Advisor.

        Args:
            api_key: OpenAI API key. If not provided, reads from OPENAI_API_KEY env var.
        """
        self.client = OpenAI(api_key=api_key)
        self.conversation_id = None

    def get_investment_suggestion(
        self,
        query: str,
        model: str = "gpt-5-mini",
        use_code_interpreter: bool = True,
        stream: bool = True
    ) -> str:
        """Get investment suggestions based on user query.

        Args:
            query: Investment-related question or scenario
            model: OpenAI model to use
            use_code_interpreter: Whether to enable code interpreter for analysis
            stream: Whether to stream the response

        Returns:
            Investment suggestion text
        """
        instructions = """You are a professional investment advisor with expertise in:
        - Portfolio diversification and risk management
        - Market analysis and trends
        - Asset allocation strategies
        - Financial planning and retirement savings
        - Stock, bond, ETF, and mutual fund analysis

        Guidelines:
        - Provide data-driven insights when possible
        - Use code interpreter to perform calculations and create visualizations
        - Always include risk disclaimers
        - Explain your reasoning clearly
        - Consider the user's risk tolerance and investment timeline
        - Suggest diversified portfolios when appropriate

        IMPORTANT DISCLAIMER: Always remind users that this is educational information,
        not professional financial advice. Users should consult with licensed financial
        advisors before making investment decisions."""

        # Setup tools
        tools = []
        if use_code_interpreter:
            tools.append({
                "type": "code_interpreter",
                "container": {"type": "auto"}
            })

        # Create response
        if stream:
            return self._stream_response(query, instructions, tools, model)
        else:
            return self._sync_response(query, instructions, tools, model)

    def _stream_response(self, query: str, instructions: str, tools: list, model: str):
        """Stream the response with real-time output."""
        print("\n" + "=" * 60)
        print(f"Query: {query}")
        print("=" * 60)
        print("Investment Advisor: ", end="", flush=True)

        stream = self.client.responses.create(
            input=query,
            model=model,
            instructions=instructions,
            previous_response_id=self.conversation_id,
            tools=tools,
            stream=True
        )

        full_text = []
        for event in stream:
            if event.type == "response.output_text.delta":
                print(event.delta, end="", flush=True)
                full_text.append(event.delta)
            elif event.type == "response.output_item.added":
                if hasattr(event, 'item') and event.item.type == "code_interpreter_call":
                    print("\n\n[Running analysis...]", flush=True)
            elif event.type == "response.completed":
                self.conversation_id = event.response.id

        print("\n" + "=" * 60)
        return ''.join(full_text)

    def _sync_response(self, query: str, instructions: str, tools: list, model: str) -> str:
        """Get synchronous response."""
        response = self.client.responses.create(
            input=query,
            model=model,
            instructions=instructions,
            previous_response_id=self.conversation_id,
            tools=tools
        )

        self.conversation_id = response.id
        return response.output[-1].content[0].text

    def analyze_portfolio(
        self,
        portfolio_data: dict,
        model: str = "gpt-4o"
    ) -> str:
        """Analyze a portfolio and provide suggestions.

        Args:
            portfolio_data: Dictionary with portfolio information
            model: OpenAI model to use

        Returns:
            Analysis and suggestions
        """
        # Format portfolio data into a query
        query = f"""Please analyze this investment portfolio and provide suggestions:

Portfolio Details:
- Age: {portfolio_data.get('age', 'Not specified')}
- Risk Tolerance: {portfolio_data.get('risk_tolerance', 'Moderate')}
- Investment Timeline: {portfolio_data.get('timeline', 'Not specified')}
- Current Holdings: {portfolio_data.get('holdings', 'Not specified')}
- Investment Goals: {portfolio_data.get('goals', 'Not specified')}

Please provide:
1. Analysis of current allocation
2. Risk assessment
3. Diversification recommendations
4. Suggested adjustments (if any)
5. Expected return projections (with visualizations if helpful)"""

        return self.get_investment_suggestion(query, model=model, stream=True)

    def compare_investments(
        self,
        investment_options: list,
        criteria: str = "risk-adjusted returns",
        model: str = "gpt-4o"
    ) -> str:
        """Compare different investment options.

        Args:
            investment_options: List of investment options to compare
            criteria: Criteria for comparison
            model: OpenAI model to use

        Returns:
            Comparison analysis
        """
        options_str = "\n".join([f"- {option}" for option in investment_options])

        query = f"""Please compare these investment options based on {criteria}:

{options_str}

Provide:
1. Side-by-side comparison
2. Pros and cons of each option
3. Risk analysis
4. Historical performance (if applicable)
5. Recommendation based on different investor profiles
6. Use visualizations to illustrate key differences"""

        return self.get_investment_suggestion(query, model=model, stream=True)

    def market_outlook(
        self,
        sector: str = "general market",
        model: str = "gpt-4o"
    ) -> str:
        """Get market outlook and investment implications.

        Args:
            sector: Market sector to analyze
            model: OpenAI model to use

        Returns:
            Market analysis and outlook
        """
        query = f"""Provide a market outlook analysis for {sector}:

Please include:
1. Current market conditions
2. Key trends and drivers
3. Potential risks and opportunities
4. Investment strategies for current conditions
5. Sector-specific recommendations (if applicable)

Note: Focus on general principles and educational insights rather than specific stock picks."""

        return self.get_investment_suggestion(query, model=model, stream=True)

    def reset_conversation(self):
        """Reset the conversation history."""
        self.conversation_id = None
        print("Conversation reset.")


def main():
    """Demo application for investment suggestions."""
    print("=" * 60)
    print("Investment Suggestion Generator Demo")
    print("Using OpenAI Responses API with Code Interpreter")
    print("=" * 60)
    print("\n⚠️  DISCLAIMER: This is for educational purposes only.")
    print("Not professional financial advice. Consult licensed advisors.")
    print("=" * 60)

    # Initialize advisor
    advisor = InvestmentAdvisor()

    # Example 1: General investment question
    print("\n\n### Example 1: General Investment Advice")
    advisor.get_investment_suggestion(
        "I'm 30 years old with moderate risk tolerance. How should I allocate $10,000 for retirement?"
    )

    # Example 2: Portfolio analysis
    print("\n\n### Example 2: Portfolio Analysis")
    portfolio = {
        "age": 35,
        "risk_tolerance": "Moderate to Aggressive",
        "timeline": "25-30 years until retirement",
        "holdings": "60% stocks (S&P 500 index), 30% bonds, 10% cash",
        "goals": "Retirement savings, aiming for $2M by age 65"
    }
    advisor.analyze_portfolio(portfolio)

    # Example 3: Compare investment options
    print("\n\n### Example 3: Investment Comparison")
    advisor.compare_investments(
        investment_options=[
            "S&P 500 Index Fund (VOO)",
            "Total Bond Market Fund (BND)",
            "Real Estate Investment Trust (VNQ)",
            "Technology Sector ETF (XLK)"
        ],
        criteria="risk-adjusted returns for a 10-year investment horizon"
    )

    # Example 4: Market outlook
    print("\n\n### Example 4: Market Outlook")
    advisor.market_outlook(sector="technology sector")

    # Interactive mode
    print("\n\n" + "=" * 60)
    print("Interactive Mode - Ask your investment questions!")
    print("Commands: 'quit' to exit, 'reset' to start new conversation")
    print("=" * 60)

    while True:
        user_input = input("\nYour question: ").strip()

        if user_input.lower() in ['quit', 'exit', 'q']:
            print("\nThank you for using Investment Advisor!")
            break

        if user_input.lower() == 'reset':
            advisor.reset_conversation()
            continue

        if not user_input:
            continue

        try:
            advisor.get_investment_suggestion(user_input)
        except Exception as e:
            print(f"\nError: {e}")

    print("\n" + "=" * 60)
    print("Remember: Always consult with licensed financial advisors")
    print("before making investment decisions!")
    print("=" * 60)



main()