In [2]:
from model import models, schemas
from utils import database
import os
import uuid
import sqlite3
from tqdm import tqdm


class BDDChunksSQLite:
    """
    A class to process reviews from a SQLite database and store them as chunks with embeddings.
    
    Each review is considered a single chunk.
    """

    def __init__(self):
        """
        Initialize the BDDChunksSQLite instance.

        Args:
            sqlite_db_path (str): Path to the SQLite database file.
            reviews_table (str): Name of the table containing reviews and restaurant information.
            embeddings_table (str): Name of the table where embeddings will be stored.
        """
        # self.sqlite_db_path = sqlite_db_path
        # self.reviews_table = reviews_table
        # self.embeddings_table = embeddings_table
        self.db = self.get_db()

    def get_db(self):
        return database.SessionLocal()
    

    def fetch_reviews_from_db(self) -> list[tuple[str, str]]:
        """
        Fetch reviews and their associated restaurant names from the SQLite database.

        Returns:
            list[tuple[str, str]]: A list of tuples containing restaurant names and reviews.
        """
        conn = sqlite3.connect(self.sqlite_db_path)
        cursor = conn.cursor()
        query = f"SELECT restaurant_name, review FROM {self.reviews_table};"
        cursor.execute(query)
        data = cursor.fetchall()
        conn.close()
        return data

    def generate_fake_embedding(self, text: str) -> list[float]:
        """
        Generate a fake embedding for a given text. Replace this with your embedding logic.

        Args:
            text (str): The text for which to generate an embedding.

        Returns:
            list[float]: A list representing the embedding vector.
        """
        # Example: Return the length of each word in the text as a fake embedding.
        return [len(word) for word in text.split()]

    def store_chunk_in_db(self, restaurant_name: str, chunk: str, embedding: list[float]) -> None:
        """
        Store a chunk and its embedding in the SQLite database.

        Args:
            restaurant_name (str): The name of the associated restaurant.
            chunk (str): The text chunk (in this case, the full review).
            embedding (list[float]): The embedding vector for the chunk.
        """
        conn = sqlite3.connect(self.sqlite_db_path)
        cursor = conn.cursor()

        # Ensure the embeddings table exists
        cursor.execute(f"""
        CREATE TABLE IF NOT EXISTS {self.embeddings_table} (
            id TEXT PRIMARY KEY,
            restaurant_name TEXT,
            chunk TEXT,
            embedding TEXT
        );
        """)

        embedding_str = ",".join(map(str, embedding))
        cursor.execute(
            f"INSERT INTO {self.embeddings_table} (id, restaurant_name, chunk, embedding) VALUES (?, ?, ?, ?);",
            (str(uuid.uuid4()), restaurant_name, chunk, embedding_str),
        )
        conn.commit()
        conn.close()

    def process_reviews(self) -> None:
        """
        Process each review as a single chunk, generate embeddings, and store them in the database.

        This method:
        1. Fetches reviews and restaurant names from the SQLite database.
        2. Generates embeddings for each review.
        3. Stores the reviews and embeddings in the SQLite database.
        """
        data = self.fetch_reviews_from_db()

        for restaurant_name, review in tqdm(data, desc="Processing Reviews"):
            embedding = self.generate_fake_embedding(review)  # Generate embedding for the review (chunk)
            self.store_chunk_in_db(restaurant_name, review, embedding)


# Example usage
sqlite_db_path = "path/to/your/database.sqlite"  # Path to your SQLite DB
reviews_table = "reviews"  # Table containing reviews
embeddings_table = "embeddings"  # Table to store embeddings

bdd_chunks = BDDChunksSQLite(sqlite_db_path, reviews_table, embeddings_table)
bdd_chunks.process_reviews()


In [1]:
import os
import uuid
import sqlite3
from tqdm import tqdm


class BDDChunksSQLite:
    """
    A class to process reviews from a SQLite database and store them as chunks with embeddings.
    
    Each review is considered a single chunk.
    """

    def __init__(self, sqlite_db_path: str, reviews_table: str, embeddings_table: str):
        """
        Initialize the BDDChunksSQLite instance.

        Args:
            sqlite_db_path (str): Path to the SQLite database file.
            reviews_table (str): Name of the table containing reviews and restaurant information.
            embeddings_table (str): Name of the table where embeddings will be stored.
        """
        self.sqlite_db_path = sqlite_db_path
        self.reviews_table = reviews_table
        self.embeddings_table = embeddings_table

    def fetch_reviews_from_db(self) -> list[tuple[str, str]]:
        """
        Fetch reviews and their associated restaurant names from the SQLite database.

        Returns:
            list[tuple[str, str]]: A list of tuples containing restaurant names and reviews.
        """
        conn = sqlite3.connect(self.sqlite_db_path)
        cursor = conn.cursor()
        query = f"SELECT restaurant_name, review FROM {self.reviews_table};"
        cursor.execute(query)
        data = cursor.fetchall()
        conn.close()
        return data

    def generate_fake_embedding(self, text: str) -> list[float]:
        """
        Generate a fake embedding for a given text. Replace this with your embedding logic.

        Args:
            text (str): The text for which to generate an embedding.

        Returns:
            list[float]: A list representing the embedding vector.
        """
        # Example: Return the length of each word in the text as a fake embedding.
        return [len(word) for word in text.split()]

    def store_chunk_in_db(self, restaurant_name: str, chunk: str, embedding: list[float]) -> None:
        """
        Store a chunk and its embedding in the SQLite database.

        Args:
            restaurant_name (str): The name of the associated restaurant.
            chunk (str): The text chunk (in this case, the full review).
            embedding (list[float]): The embedding vector for the chunk.
        """
        conn = sqlite3.connect(self.sqlite_db_path)
        cursor = conn.cursor()

        # Ensure the embeddings table exists
        cursor.execute(f"""
        CREATE TABLE IF NOT EXISTS {self.embeddings_table} (
            id TEXT PRIMARY KEY,
            restaurant_name TEXT,
            chunk TEXT,
            embedding TEXT
        );
        """)

        embedding_str = ",".join(map(str, embedding))
        cursor.execute(
            f"INSERT INTO {self.embeddings_table} (id, restaurant_name, chunk, embedding) VALUES (?, ?, ?, ?);",
            (str(uuid.uuid4()), restaurant_name, chunk, embedding_str),
        )
        conn.commit()
        conn.close()

    def process_reviews(self) -> None:
        """
        Process each review as a single chunk, generate embeddings, and store them in the database.

        This method:
        1. Fetches reviews and restaurant names from the SQLite database.
        2. Generates embeddings for each review.
        3. Stores the reviews and embeddings in the SQLite database.
        """
        data = self.fetch_reviews_from_db()

        for restaurant_name, review in tqdm(data, desc="Processing Reviews"):
            embedding = self.generate_fake_embedding(review)  # Generate embedding for the review (chunk)
            self.store_chunk_in_db(restaurant_name, review, embedding)


# Example usage
sqlite_db_path = "path/to/your/database.sqlite"  # Path to your SQLite DB
reviews_table = "reviews"  # Table containing reviews
embeddings_table = "embeddings"  # Table to store embeddings

bdd_chunks = BDDChunksSQLite(sqlite_db_path, reviews_table, embeddings_table)
bdd_chunks.process_reviews()


OperationalError: unable to open database file