In [None]:
import sys
print(sys.version)

: 

In [None]:
# Ensure chromadb is installed in the current environment
import sys
import subprocess

try:
    import chromadb
except ImportError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "chromadb"])
    import chromadb


In [None]:
pip install chromadb

In [2]:
import sys
import os
sys.path.append(os.path.abspath(".."))

# UdaPlay â€“ RAG Pipeline Demonstration

This notebook demonstrates:

1. Loading and inspecting the `games.json` data
2. Initializing a persistent ChromaDB vector store via `VectorStoreManager`
3. Populating the vector store with embedded game data
4. Running semantic search queries
5. Verifying persistence across sessions


## 1. Setup and Imports

Load environment variables and import the necessary modules from the project.

In [3]:
import os
import json

from dotenv import load_dotenv

# Load .env from project root
load_dotenv()

openai_key = os.getenv("OPENAI_API_KEY")
tavily_key = os.getenv("TAVILY_API_KEY")

print("OPENAI_API_KEY set:", openai_key is not None)
print("TAVILY_API_KEY set:", tavily_key is not None)

OPENAI_API_KEY set: True
TAVILY_API_KEY set: True


## 2. Load and Inspect Game Data

We load the `games.json` file and inspect a few entries to confirm the structure.

In [4]:
data_path = os.path.join("..", "data", "games.json")
print("Data path:", data_path)

with open(data_path, "r", encoding="utf-8") as f:
    games = json.load(f)

print("Number of games loaded:", len(games))
games[:2]  # show first two entries as a sample

Data path: ..\data\games.json
Number of games loaded: 1


[{'id': 'fifa-21',
  'title': 'FIFA 21',
  'developer': 'EA Vancouver; EA Romania',
  'publisher': 'Electronic Arts',
  'release_date': '2020-10-09',
  'platforms': ['PlayStation 4', 'Xbox One', 'PC', 'Nintendo Switch'],
  'genre': 'Sports',
  'description': 'FIFA 21 is a football simulation video game in the FIFA series.'}]

## 3. Initialize ChromaDB via VectorStoreManager

We now import and initialize the `VectorStoreManager`, which sets up a persistent ChromaDB instance and a `games` collection.

In [None]:
from src.vector_store_manager import VectorStoreManager

# Initialize the vector store manager (uses a persistent directory by default)
vsm = VectorStoreManager()
vsm.client, vsm.collection



## 4. Populate the Vector Store with Game Data

We embed and insert the game documents into the ChromaDB collection using `populate_from_games`.

In [None]:
vsm.populate_from_games(games)

count = vsm.collection.count()
print("Number of documents in collection:", count)

## 5. Semantic Search Demonstration

We perform a semantic search query against the vector store to verify that embeddings and retrieval are working correctly.

In [None]:
query = "football game"
results = vsm.query(query, k=3)

print("Query:", query)
print("Results keys:", results.keys())
results

You should see the most relevant game(s) (e.g., FIFA 21) in the `documents` and `metadatas` fields above, confirming that semantic search is functional.

## 6. Persistence Verification

To verify persistence, you can:
1. **Stop the kernel** and restart it.
2. Re-run the setup and this cell to confirm that the document count remains the same.

Below is the code you should run after restarting the kernel to confirm persistence.

In [None]:
# After restarting the kernel, re-run the imports and this cell.
from src.vector_store_manager import VectorStoreManager

vsm_persist = VectorStoreManager()
persist_count = vsm_persist.collection.count()
print("Number of documents in collection after restart:", persist_count)

If the document count matches the earlier value, this confirms that the ChromaDB vector store is using persistent storage correctly.

## 7. (Optional) Agent Demo

Finally, we can demonstrate the full UdaPlay agent using the RAG pipeline and web search fallback.

In [None]:
from src.agent_state import UdaPlayAgent

agent = UdaPlayAgent(vsm)

question = "Who developed FIFA 21?"
answer = agent.handle_query(question)
print("Question:", question)
print("\nAnswer:\n", answer)