In [6]:
import os
from dotenv import load_dotenv
import sqlite3
import json
import re
from openai import OpenAI
from datetime import datetime

In [13]:
class DeepSeekProductExtractor:
    """Product and price extractor using DeepSeek API"""

    def __init__(self, api_key: str | None = None):
        """Initialize the DeepSeek-based product extractor"""
        # Set up DeepSeek API
        self.api_key = api_key or os.environ.get("DEEPSEEK_API_KEY")
        if not self.api_key:
            raise ValueError(
                "DeepSeek API key is required. Set DEEPSEEK_API_KEY environment variable or pass api_key parameter.")

        self.client = OpenAI(api_key=self.api_key, base_url="https://api.deepseek.com")
        self.model = "deepseek-chat"

        # Connect to database
        self.setup_database()

    # Adapter to convert datetime objects to ISO format strings
    def adapt_datetime(dt):
        return dt.isoformat()

    # Converter to parse ISO format strings back to datetime objects
    def convert_datetime(bytestring):
        return datetime.fromisoformat(bytestring.decode())

    # Register the adapter for datetime objects
    sqlite3.register_adapter(datetime, adapt_datetime)

    # Register the converter for 'DATETIME' type
    sqlite3.register_converter('DATETIME', convert_datetime)

    def setup_database(self):
        """Setup SQLite database for storing products"""
        self.conn = sqlite3.connect('products_deepseek.db', detect_types=sqlite3.PARSE_DECLTYPES)
        self.cursor = self.conn.cursor()

        # Create products table if it doesn't exist
        self.cursor.execute('''
        CREATE TABLE IF NOT EXISTS products (
            id INTEGER PRIMARY KEY,
            name TEXT,
            price REAL,
            currency TEXT,
            source_text TEXT,
            timestamp DATETIME
        )
        ''')
        self.conn.commit()

    def extract_products(self, text: str) -> list[dict[str, object]]:
        """Extract product names and prices from text using DeepSeek"""
        system_prompt = """
        Extract all products and their prices mentioned in the text.
        Return a JSON array where each item has the following format:
        {
            "name": "Product Name",
            "price": 123.45,
            "currency": "$"
        }

        Rules:
        1. Extract complete product names including brand and model
        2. Convert all prices to numeric values (no currency symbols in the price field)
        3. Identify the currency symbol used ($, €, £, etc.) and include it separately
        4. Return an empty array if no products with prices are detected
        5. Do not make up any information not present in the text
        6. Extract bangla text as well
        """

        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": text},
                ],
                temperature=0.1,
                max_tokens=1000
            )

            response_text = response.choices[0].message.content

            # Parse the response JSON
            try:
                # Try direct JSON parsing
                result = json.loads(response_text)

                # Add source text to each product entry
                for product in result:
                    product["source_text"] = text

                return result

            except json.JSONDecodeError:
                # Fallback if response isn't valid JSON - look for code blocks
                json_match = re.search(r'```json\s*(.*?)\s*```', response_text, re.DOTALL)
                if json_match:
                    try:
                        result = json.loads(json_match.group(1))
                        for product in result:
                            product["source_text"] = text
                        return result
                    except:
                        pass

                print(f"Failed to parse DeepSeek response as JSON: {response_text}")
                return []

        except Exception as e:
            print(f"Error calling DeepSeek API: {e}")
            return []

    def store_products(self, products: list[dict[str, object]]):
        """Store extracted products in the database"""
        for product in products:
            self.cursor.execute(
                "INSERT INTO products (name, price, currency, source_text, timestamp) VALUES (?, ?, ?, ?, ?)",
                (product["name"],
                 product["price"],
                 product["currency"],
                 product["source_text"],
                 datetime.now())
            )
        self.conn.commit()

    def process_text(self, text: str) -> list[dict[str, object]]:
        """Process text to extract and store products"""
        products = self.extract_products(text)
        if products:
            self.store_products(products)
        return products

    def process_voice(self, audio_file_path: str):
        """
        Placeholder for voice processing functionality
        In a real implementation, this would use a speech-to-text API
        then pass the text to process_text()
        """
        print(f"Voice processing not implemented in this demo. Would process: {audio_file_path}")
        return []

    def get_stored_products(self, limit: int = 10) -> list[dict[str, object]]:
        """Retrieve stored products from database"""
        self.cursor.execute(
            "SELECT id, name, price, currency, source_text, timestamp FROM products ORDER BY timestamp DESC LIMIT ?",
            (limit,)
        )
        columns = ["id", "name", "price", "currency", "source_text", "timestamp"]
        return [dict(zip(columns, row)) for row in self.cursor.fetchall()]

    def close(self):
        """Close database connection"""
        self.conn.close()

In [14]:
# Demo usage
def run_deepseek_demo():
    # Initialize the extractor - make sure to set DEEPSEEK_API_KEY in your environment
    # or pass it explicitly: DeepSeekProductExtractor(api_key="your-api-key")
    load_dotenv()  # Load environment variables from .env file

    try:
        extractor = DeepSeekProductExtractor()

        # Sample texts
        sample_texts = [
            "amare 5kg chal den 1000 taka",
            "I bought a Samsung Galaxy S23 for $999 and AirPods Pro for $249"
        ]

        # Process each sample text
        all_products = []
        for text in sample_texts:
            print(f"\nProcessing text: {text}")
            products = extractor.process_text(text)
            all_products.extend(products)
            print(f"Extracted products: {json.dumps(products, indent=2)}")

        # Show all stored products
        stored_products = extractor.get_stored_products()
        print("\nStored products in database:")
        for product in stored_products:
            print(
                f"ID: {product['id']} | {product['name']} - {product['currency']}{product['price']} | Source: '{product['source_text'][:30]}...'")

        # Close the connection
        extractor.close()
    except ValueError as e:
        print(f"Error: {e}")
        print("To run this demo, you need to provide a DeepSeek API key.")
        print("Set it as an environment variable: export DEEPSEEK_API_KEY=your-key-here")

In [15]:
if __name__ == "__main__":
    run_deepseek_demo()


Processing text: amare 5kg chal den 1000 taka
Extracted products: [
  {
    "name": "5kg chal",
    "price": 1000,
    "currency": "\u09f3",
    "source_text": "amare 5kg chal den 1000 taka"
  }
]

Processing text: I bought a Samsung Galaxy S23 for $999 and AirPods Pro for $249
Extracted products: [
  {
    "name": "Samsung Galaxy S23",
    "price": 999,
    "currency": "$",
    "source_text": "I bought a Samsung Galaxy S23 for $999 and AirPods Pro for $249"
  },
  {
    "name": "AirPods Pro",
    "price": 249,
    "currency": "$",
    "source_text": "I bought a Samsung Galaxy S23 for $999 and AirPods Pro for $249"
  }
]

Stored products in database:
ID: 9 | AirPods Pro - $249.0 | Source: 'I bought a Samsung Galaxy S23 ...'
ID: 8 | Samsung Galaxy S23 - $999.0 | Source: 'I bought a Samsung Galaxy S23 ...'
ID: 7 | 5kg chal - ৳1000.0 | Source: 'amare 5kg chal den 1000 taka...'
ID: 6 | AirPods Pro - $249.0 | Source: 'I bought a Samsung Galaxy S23 ...'
ID: 5 | Samsung Galaxy S23 - $999.0 |