In [None]:
# Ensure newer SQLite for ChromaDB compatibility
import sys
try:
    __import__('pysqlite3')
    sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
    print("✓ Using pysqlite3-binary for SQLite 3.46+")
except ImportError:
    import sqlite3
    print(f"⚠️  Using system SQLite {sqlite3.sqlite_version} - may need pysqlite3-binary")

## Pluralsight Course : Building and Deploying RAG in Production

#### Demo: Basic RAG

In [None]:
# install the required libraries
!pip install -qqq llama-index llama-index-llms-openai llama-index-vector-stores-chroma


In [1]:
import os
# Load API key from environment (optionally from a .env file)
try:
    from dotenv import load_dotenv  # pip install python-dotenv (if not available)
    load_dotenv()
except Exception:
    pass

if not os.getenv("OPENAI_API_KEY"):
    raise RuntimeError(
        "OPENAI_API_KEY is not set. Export it in your shell or put it in a .env file."
    )

### Define Embedding and LLM Model

In [2]:
from llama_index.embeddings.openai import OpenAIEmbedding
# define embeding model 
embed_model = OpenAIEmbedding()

In [3]:
from llama_index.llms.openai import OpenAI
# define LLM model
llm = OpenAI(model="gpt-4o", temperature=0)

In [4]:
from llama_index.core import Settings

# setting embedding model and llm model globally
Settings.embed_model = embed_model
Settings.llm = llm

### Ingestion Pipeline

In [5]:
# set chunk size
Settings.chunk_size = 1024

In [7]:
# import libraries 
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import TokenTextSplitter

# load
documents = SimpleDirectoryReader("data/MovieWatchList/MovieWatchList").load_data()


In [8]:
# define chunking strategy
text_splitter = TokenTextSplitter()

In [16]:
# define vector database and store 
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

# create local in-memory client
chroma_client = chromadb.EphemeralClient()
# create a collection
chroma_collection = chroma_client.create_collection("ps-foo-rag", get_or_create=True)
# define the vector store using the collection
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)


Failed to send telemetry event ClientStartEvent: module 'chromadb' has no attribute 'get_settings'
Failed to send telemetry event ClientCreateCollectionEvent: module 'chromadb' has no attribute 'get_settings'
Failed to send telemetry event ClientCreateCollectionEvent: module 'chromadb' has no attribute 'get_settings'


In [15]:
# Check current SQLite version (before ChromaDB import)
import sqlite3
print(f"Current SQLite version: {sqlite3.sqlite_version}")
print(f"Python sqlite3 module version: {sqlite3.version}")

# ChromaDB requires SQLite >= 3.35.0
required_version = "3.35.0"
current_version = sqlite3.sqlite_version

from packaging import version
if version.parse(current_version) < version.parse(required_version):
    print(f"❌ SQLite {current_version} is too old. ChromaDB needs >= {required_version}")
    print("Installing pysqlite3-binary to upgrade SQLite...")
else:
    print(f"✓ SQLite {current_version} is compatible with ChromaDB")

Current SQLite version: 3.46.1
Python sqlite3 module version: 2.6.0
✓ SQLite 3.46.1 is compatible with ChromaDB


In [12]:
# Fix SQLite version compatibility for ChromaDB
!pip install -qqq pysqlite3-binary packaging

# Now configure to use the newer SQLite
import sys
__import__('pysqlite3')
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

# Verify the upgrade worked
import sqlite3
print(f"Updated SQLite version: {sqlite3.sqlite_version}")
print(f"Python sqlite3 module version: {sqlite3.version}")

# Now ChromaDB should work
import chromadb
print(f"✓ ChromaDB version: {chromadb.__version__}")
print("✓ ChromaDB imported successfully with updated SQLite")

Updated SQLite version: 3.46.1
Python sqlite3 module version: 2.6.0
✓ ChromaDB version: 1.1.0
✓ ChromaDB imported successfully with updated SQLite


In [17]:
# define ingestion pipeline
from llama_index.core.ingestion import IngestionPipeline

pipeline = IngestionPipeline(
    transformations=[
        text_splitter,
        embed_model,
    ],
    vector_store=vector_store,
)


In [18]:
# run the ingestion pipeline
nodes = pipeline.run(documents=documents)
print(f"number of nodes or chunks : {len(nodes)}")

Failed to send telemetry event CollectionAddEvent: module 'chromadb' has no attribute 'get_settings'


number of nodes or chunks : 4


### RAG Pipeline

In [19]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    embed_model=embed_model,
)

In [20]:
# create semantic query engine 
vector_query_engine = vector_index.as_query_engine()

In [24]:
# query 
response = vector_query_engine.query("Create markdown documentation taht describes this application and what it does and how to set it up. Please document classes using Mermaid")
print(response)

# Movie Watch List Application Documentation

## Overview

The Movie Watch List application is a web-based platform designed to help users manage and explore their movie watch lists. It integrates with The Movie Database (TMDB) API to fetch movie data and provides an interactive user interface using Razor Components.

## Features

- **Movie Data Integration**: Connects to TMDB API to retrieve movie information.
- **Interactive UI**: Utilizes Razor Components for a dynamic and responsive user experience.
- **Environment-Specific Configuration**: Supports different configurations for development and production environments.

## Setup Instructions

### Prerequisites

- .NET SDK installed on your machine.
- A valid TMDB API key.

### Configuration

1. **Clone the Repository**: Clone the application repository to your local machine.

2. **API Key Setup**: 
   - Navigate to the `appsettings.Development.json` file.
   - Replace `"YOUR_TMDB_API_KEY_HERE"` with your actual TMDB API key.

3. **B

***Sample Queries to test***

- When did Bar started his entrepreneurial journey ?
- Who are the co-founders of the company ?
- What is the Bar's AI vision?
- Who is the CEO of company Foo? and what are the other companies he has started earlier.
