In [2]:
from pathlib import Path
from uuid import uuid4
import json
import os
import sys
from dotenv import load_dotenv
from tqdm import tqdm
import qdrant_client
from qdrant_client.models import PointStruct, VectorParams, Distance
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load environment variables - with fallback and error handling
try:
    load_dotenv("/Users/mason/Desktop/Technical_Projects/PYTHON_Projects/PSAI/code/.env")
    # Alternative: use relative path or allow custom path input
    # load_dotenv(Path.cwd().parent / "code" / ".env")
    
    QDRANT_URL = os.getenv("QDRANT_URL")
    QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
    
    if not QDRANT_URL or not QDRANT_API_KEY:
        raise ValueError("QDRANT_URL or QDRANT_API_KEY environment variables not found")
    
    # Initialize Qdrant client and embedding model
    client = qdrant_client.QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
    print(f"Successfully connected to Qdrant at {QDRANT_URL}")
    
    model = SentenceTransformer("all-MiniLM-L6-v2")
    print(f"Loaded embedding model: all-MiniLM-L6-v2 (output dimension: {model.get_sentence_embedding_dimension()})")
except Exception as e:
    print(f"Error during initialization: {e}")
    sys.exit(1)

Successfully connected to Qdrant at https://3031677a-6463-44f9-ba66-42977581720e.us-east-1-0.aws.cloud.qdrant.io
Loaded embedding model: all-MiniLM-L6-v2 (output dimension: 384)


In [4]:
def ensure_collection(name):
    """Create collection if it doesn't exist."""
    try:
        collections = [c.name for c in client.get_collections().collections]
        if name not in collections:
            client.recreate_collection(
                collection_name=name,
                vectors_config=VectorParams(size=model.get_sentence_embedding_dimension(), distance=Distance.COSINE)
            )
            print(f"Created collection: {name}")
        else:
            print(f"Collection {name} already exists")
    except Exception as e:
        print(f"Error ensuring collection {name}: {e}")
        raise

def embed_and_upload(chunks, collection_name):
    """Embed text chunks and upload them to Qdrant."""
    if not chunks:
        print(f"Warning: No chunks to upload to {collection_name}")
        return
    
    print(f"Processing {len(chunks)} chunks for collection {collection_name}...")
    ensure_collection(collection_name)
    
    # Process in batches to avoid memory issues with large datasets
    batch_size = 100
    for i in range(0, len(chunks), batch_size):
        batch = chunks[i:i+batch_size]
        texts = [chunk["text"] for chunk in batch]
        
        # Embed texts
        try:
            embeddings = model.encode(texts, show_progress_bar=True).tolist()
        except Exception as e:
            print(f"Error embedding batch starting at index {i}: {e}")
            continue
            
        points = []
        for chunk, embedding in zip(batch, embeddings):
            payload = chunk.get("metadata", {}).copy()
            payload["text"] = chunk["text"]
            points.append(
                PointStruct(
                    id=str(uuid4()),
                    vector=embedding,
                    payload=payload
                )
            )
        
        try:
            client.upload_points(collection_name=collection_name, points=points)
            print(f"Uploaded batch {i//batch_size + 1}/{(len(chunks)-1)//batch_size + 1} to {collection_name}")
        except Exception as e:
            print(f"Error uploading batch to {collection_name}: {e}")

def load_book_chunks(book_dir):
    """Load chunks from book JSON files."""
    chunks = []
    book_dir = Path(book_dir)
    if not book_dir.exists():
        print(f"Warning: Book directory {book_dir} does not exist")
        return chunks
        
    json_files = list(book_dir.glob("*.json"))
    print(f"Found {len(json_files)} book JSON files")
    
    for file in tqdm(json_files, desc="Loading book chunks"):
        try:
            with open(file, "r", encoding="utf-8") as f:
                book_data = json.load(f)
                for entry in book_data:
                    chunks.append({
                        "text": entry["text"],
                        "metadata": {
                            "author": entry.get("author", "Unknown"),
                            "book_title": entry.get("book_title", "Unknown"),
                            "publication_year": entry.get("publication_year", "Unknown"),
                            "doc_type": "Phyllis Schlafly Book",
                            "source_file": file.name
                        }
                    })
        except Exception as e:
            print(f"Error loading {file}: {e}")
    
    print(f"Loaded {len(chunks)} total book chunks")
    return chunks

def load_psr_chunks(path):
    """Load PSR chunks from JSON file."""
    path = Path(path)
    if not path.exists():
        print(f"Warning: PSR file {path} does not exist")
        return []
        
    try:
        with open(path, "r", encoding="utf-8") as f:
            chunks = json.load(f)
            print(f"Loaded {len(chunks)} PSR chunks from {path}")
            return chunks
    except Exception as e:
        print(f"Error loading PSR chunks from {path}: {e}")
        return []

def load_psc_chunks(psc_dir):
    """Load PSC chunks from JSON files."""
    chunks = []
    psc_dir = Path(psc_dir)
    if not psc_dir.exists():
        print(f"Warning: PSC directory {psc_dir} does not exist")
        return chunks
        
    json_files = list(psc_dir.glob("psc_*.json"))
    print(f"Found {len(json_files)} PSC JSON files")
    
    for file in tqdm(json_files, desc="Loading PSC chunks"):
        try:
            with open(file, "r", encoding="utf-8") as f:
                year_data = json.load(f)
                if "chunks" in year_data:
                    # Add source file metadata
                    for chunk in year_data["chunks"]:
                        if "metadata" not in chunk:
                            chunk["metadata"] = {}
                        chunk["metadata"]["source_file"] = file.name
                    
                    chunks.extend(year_data["chunks"])
                else:
                    print(f"Warning: No 'chunks' key in {file}")
        except Exception as e:
            print(f"Error loading {file}: {e}")
    
    print(f"Loaded {len(chunks)} total PSC chunks")
    return chunks

# Function to check if paths exist
def check_paths(paths):
    """Check if all paths exist and return valid ones."""
    valid_paths = {}
    for name, path in paths.items():
        path_obj = Path(path)
        if path_obj.exists():
            valid_paths[name] = path_obj
            print(f"✓ {name} path exists: {path}")
        else:
            print(f"✗ {name} path does not exist: {path}")
    return valid_paths

# Main execution
def main():
    # Define paths
    paths = {
        "book_path": "/Users/mason/Desktop/Technical_Projects/PYTHON_Projects/PSAI/chunks/books",
        "psr_path": "/Users/mason/Desktop/Technical_Projects/PYTHON_Projects/PSAI/chunks/psr_chunks.json",
        "psc_path": "/Users/mason/Desktop/Technical_Projects/PYTHON_Projects/PSAI/chunks/psc_chunks"
    }
    
    # Check paths before proceeding
    valid_paths = check_paths(paths)
    
    # Load chunks from valid paths
    all_chunks = {}
    
    if "book_path" in valid_paths:
        book_chunks = load_book_chunks(valid_paths["book_path"])
        all_chunks["book_chunks"] = book_chunks
    
    if "psr_path" in valid_paths:
        psr_chunks = load_psr_chunks(valid_paths["psr_path"])
        all_chunks["psr_chunks"] = psr_chunks
    
    if "psc_path" in valid_paths:
        psc_chunks = load_psc_chunks(valid_paths["psc_path"])
        all_chunks["psc_chunks"] = psc_chunks
    
    # Upload all chunks to Qdrant
    for collection_name, chunks in all_chunks.items():
        print(f"\nProcessing collection: {collection_name}")
        embed_and_upload(chunks, collection_name)
        print(f"Completed upload to {collection_name}")

# # In a Jupyter notebook, you might want to run this directly rather than as a function
# # For production code, using the function is better
# if __name__ == "__main__":
#     main()
# else:
#     # When imported as a module or run in Jupyter
#     print("Script loaded and ready. Run main() to process and upload all chunks.")

In [5]:
main()

✓ book_path path exists: /Users/mason/Desktop/Technical_Projects/PYTHON_Projects/PSAI/chunks/books
✓ psr_path path exists: /Users/mason/Desktop/Technical_Projects/PYTHON_Projects/PSAI/chunks/psr_chunks.json
✓ psc_path path exists: /Users/mason/Desktop/Technical_Projects/PYTHON_Projects/PSAI/chunks/psc_chunks
Found 3 book JSON files


Loading book chunks: 100%|██████████| 3/3 [00:00<00:00, 171.60it/s]

Loaded 693 total book chunks





Loaded 4621 PSR chunks from /Users/mason/Desktop/Technical_Projects/PYTHON_Projects/PSAI/chunks/psr_chunks.json
Found 42 PSC JSON files


Loading PSC chunks: 100%|██████████| 42/42 [00:00<00:00, 119.66it/s]
  client.recreate_collection(


Loaded 24034 total PSC chunks

Processing collection: book_chunks
Processing 693 chunks for collection book_chunks...
Created collection: book_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.23s/it]


Uploaded batch 1/7 to book_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.15s/it]


Uploaded batch 2/7 to book_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.15s/it]


Uploaded batch 3/7 to book_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.12s/it]


Uploaded batch 4/7 to book_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.17it/s]


Uploaded batch 5/7 to book_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.36it/s]


Uploaded batch 6/7 to book_chunks


Batches: 100%|██████████| 3/3 [00:03<00:00,  1.01s/it]


Uploaded batch 7/7 to book_chunks
Completed upload to book_chunks

Processing collection: psr_chunks
Processing 4621 chunks for collection psr_chunks...
Created collection: psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.10it/s]


Uploaded batch 1/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.16s/it]


Uploaded batch 2/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.11it/s]


Uploaded batch 3/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.00s/it]


Uploaded batch 4/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.09it/s]


Uploaded batch 5/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.05it/s]


Uploaded batch 6/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.11it/s]


Uploaded batch 7/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.05it/s]


Uploaded batch 8/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.13s/it]


Uploaded batch 9/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.17s/it]


Uploaded batch 10/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.04s/it]


Uploaded batch 11/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.01s/it]


Uploaded batch 12/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.02it/s]


Uploaded batch 13/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:05<00:00,  1.32s/it]


Uploaded batch 14/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:16<00:00,  4.05s/it]


Uploaded batch 15/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:07<00:00,  1.75s/it]


Uploaded batch 16/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:07<00:00,  1.93s/it]


Uploaded batch 17/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.10s/it]


Uploaded batch 18/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.03it/s]


Uploaded batch 19/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.04it/s]


Uploaded batch 20/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.03s/it]


Uploaded batch 21/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.01s/it]


Uploaded batch 22/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.02it/s]


Uploaded batch 23/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.02it/s]


Uploaded batch 24/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.02it/s]


Uploaded batch 25/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.04it/s]


Uploaded batch 26/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.05s/it]


Uploaded batch 27/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.05s/it]


Uploaded batch 28/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.04it/s]


Uploaded batch 29/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.00it/s]


Uploaded batch 30/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.01it/s]


Uploaded batch 31/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.14it/s]


Uploaded batch 32/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.03it/s]


Uploaded batch 33/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.12s/it]


Uploaded batch 34/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.05s/it]


Uploaded batch 35/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.02it/s]


Uploaded batch 36/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.12it/s]


Uploaded batch 37/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.07it/s]


Uploaded batch 38/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.00it/s]


Uploaded batch 39/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.05s/it]


Uploaded batch 40/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.08it/s]


Uploaded batch 41/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.04s/it]


Uploaded batch 42/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.07it/s]


Uploaded batch 43/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.08it/s]


Uploaded batch 44/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.05s/it]


Uploaded batch 45/47 to psr_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.12s/it]


Uploaded batch 46/47 to psr_chunks


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


Uploaded batch 47/47 to psr_chunks
Completed upload to psr_chunks

Processing collection: psc_chunks
Processing 24034 chunks for collection psc_chunks...
Created collection: psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  3.05it/s]


Uploaded batch 1/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.67it/s]


Uploaded batch 2/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.70it/s]


Uploaded batch 3/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.81it/s]


Uploaded batch 4/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  3.20it/s]


Uploaded batch 5/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.54it/s]


Uploaded batch 6/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.67it/s]


Uploaded batch 7/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.87it/s]


Uploaded batch 8/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  3.01it/s]


Uploaded batch 9/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  3.15it/s]


Uploaded batch 10/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  3.07it/s]


Uploaded batch 11/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.72it/s]


Uploaded batch 12/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.92it/s]


Uploaded batch 13/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.62it/s]


Uploaded batch 14/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.68it/s]


Uploaded batch 15/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.67it/s]


Uploaded batch 16/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.82it/s]


Uploaded batch 17/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.63it/s]


Uploaded batch 18/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.71it/s]


Uploaded batch 19/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.62it/s]


Uploaded batch 20/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.50it/s]


Uploaded batch 21/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.47it/s]


Uploaded batch 22/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.76it/s]


Uploaded batch 23/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.98it/s]


Uploaded batch 24/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.07it/s]


Uploaded batch 25/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.04it/s]


Uploaded batch 26/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.73it/s]


Uploaded batch 27/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.64it/s]


Uploaded batch 28/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.69it/s]


Uploaded batch 29/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.63it/s]


Uploaded batch 30/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.59it/s]


Uploaded batch 31/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.54it/s]


Uploaded batch 32/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.61it/s]


Uploaded batch 33/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.69it/s]


Uploaded batch 34/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.71it/s]


Uploaded batch 35/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.78it/s]


Uploaded batch 36/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.70it/s]


Uploaded batch 37/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.64it/s]


Uploaded batch 38/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.71it/s]


Uploaded batch 39/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.33it/s]


Uploaded batch 40/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.34it/s]


Uploaded batch 41/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.30it/s]


Uploaded batch 42/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.32it/s]


Uploaded batch 43/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.26it/s]


Uploaded batch 44/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.93it/s]


Uploaded batch 45/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.89it/s]


Uploaded batch 46/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.85it/s]


Uploaded batch 47/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.83it/s]


Uploaded batch 48/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.16it/s]


Uploaded batch 49/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.40it/s]


Uploaded batch 50/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.70it/s]


Uploaded batch 51/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.94it/s]


Uploaded batch 52/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.84it/s]


Uploaded batch 53/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.60it/s]


Uploaded batch 54/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.85it/s]


Uploaded batch 55/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.60it/s]


Uploaded batch 56/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.77it/s]


Uploaded batch 57/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.89it/s]


Uploaded batch 58/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.72it/s]


Uploaded batch 59/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.76it/s]


Uploaded batch 60/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.44it/s]


Uploaded batch 61/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.50it/s]


Uploaded batch 62/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.02it/s]


Uploaded batch 63/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.17it/s]


Uploaded batch 64/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.29it/s]


Uploaded batch 65/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.21it/s]


Uploaded batch 66/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.54it/s]


Uploaded batch 67/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.61it/s]


Uploaded batch 68/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.85it/s]


Uploaded batch 69/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.64it/s]


Uploaded batch 70/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.48it/s]


Uploaded batch 71/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.31it/s]


Uploaded batch 72/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.43it/s]


Uploaded batch 73/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.64it/s]


Uploaded batch 74/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.42it/s]


Uploaded batch 75/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.47it/s]


Uploaded batch 76/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.84it/s]


Uploaded batch 77/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.56it/s]


Uploaded batch 78/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.42it/s]


Uploaded batch 79/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.34it/s]


Uploaded batch 80/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.57it/s]


Uploaded batch 81/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.29it/s]


Uploaded batch 82/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.22it/s]


Uploaded batch 83/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.41it/s]


Uploaded batch 84/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.51it/s]


Uploaded batch 85/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.36it/s]


Uploaded batch 86/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.75it/s]


Uploaded batch 87/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.46it/s]


Uploaded batch 88/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.48it/s]


Uploaded batch 89/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.77it/s]


Uploaded batch 90/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.68it/s]


Uploaded batch 91/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.42it/s]


Uploaded batch 92/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.56it/s]


Uploaded batch 93/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.69it/s]


Uploaded batch 94/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.67it/s]


Uploaded batch 95/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.65it/s]


Uploaded batch 96/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.62it/s]


Uploaded batch 97/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.73it/s]


Uploaded batch 98/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.48it/s]


Uploaded batch 99/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.55it/s]


Uploaded batch 100/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.80it/s]


Uploaded batch 101/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.60it/s]


Uploaded batch 102/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.60it/s]


Uploaded batch 103/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.19it/s]


Uploaded batch 104/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.05s/it]


Uploaded batch 105/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.10it/s]


Uploaded batch 106/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.14it/s]


Uploaded batch 107/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.51it/s]


Uploaded batch 108/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.54it/s]


Uploaded batch 109/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.66it/s]


Uploaded batch 110/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.66it/s]


Uploaded batch 111/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.66it/s]


Uploaded batch 112/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.60it/s]


Uploaded batch 113/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.53it/s]


Uploaded batch 114/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.45it/s]


Uploaded batch 115/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.47it/s]


Uploaded batch 116/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.27it/s]


Uploaded batch 117/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.52it/s]


Uploaded batch 118/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.12s/it]


Uploaded batch 119/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.25it/s]


Uploaded batch 120/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.36it/s]


Uploaded batch 121/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.34it/s]


Uploaded batch 122/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.60it/s]


Uploaded batch 123/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.73it/s]


Uploaded batch 124/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.34it/s]


Uploaded batch 125/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.05s/it]


Uploaded batch 126/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.08s/it]


Uploaded batch 127/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.13it/s]


Uploaded batch 128/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.02s/it]


Uploaded batch 129/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.41it/s]


Uploaded batch 130/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.33it/s]


Uploaded batch 131/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.31it/s]


Uploaded batch 132/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.19it/s]


Uploaded batch 133/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.32it/s]


Uploaded batch 134/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.38it/s]


Uploaded batch 135/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.59it/s]


Uploaded batch 136/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.72it/s]


Uploaded batch 137/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.62it/s]


Uploaded batch 138/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.69it/s]


Uploaded batch 139/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.68it/s]


Uploaded batch 140/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.85it/s]


Uploaded batch 141/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.88it/s]


Uploaded batch 142/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.90it/s]


Uploaded batch 143/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.79it/s]


Uploaded batch 144/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.62it/s]


Uploaded batch 145/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.47it/s]


Uploaded batch 146/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.50it/s]


Uploaded batch 147/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.56it/s]


Uploaded batch 148/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.66it/s]


Uploaded batch 149/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.84it/s]


Uploaded batch 150/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.69it/s]


Uploaded batch 151/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.00it/s]


Uploaded batch 152/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.58it/s]


Uploaded batch 153/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.55it/s]


Uploaded batch 154/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.13it/s]


Uploaded batch 155/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.44it/s]


Uploaded batch 156/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.36it/s]


Uploaded batch 157/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.53it/s]


Uploaded batch 158/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.65it/s]


Uploaded batch 159/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.44it/s]


Uploaded batch 160/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.02s/it]


Uploaded batch 161/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.44it/s]


Uploaded batch 162/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.57it/s]


Uploaded batch 163/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.30it/s]


Uploaded batch 164/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.42it/s]


Uploaded batch 165/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.78it/s]


Uploaded batch 166/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.81it/s]


Uploaded batch 167/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.78it/s]


Uploaded batch 168/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.47it/s]


Uploaded batch 169/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.33it/s]


Uploaded batch 170/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.87it/s]


Uploaded batch 171/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.77it/s]


Uploaded batch 172/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.30it/s]


Uploaded batch 173/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.47it/s]


Uploaded batch 174/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.57it/s]


Uploaded batch 175/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.63it/s]


Uploaded batch 176/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.54it/s]


Uploaded batch 177/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.39it/s]


Uploaded batch 178/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.87it/s]


Uploaded batch 179/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.90it/s]


Uploaded batch 180/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.87it/s]


Uploaded batch 181/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.58it/s]


Uploaded batch 182/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.44it/s]


Uploaded batch 183/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.64it/s]


Uploaded batch 184/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.08it/s]


Uploaded batch 185/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.63it/s]


Uploaded batch 186/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.57it/s]


Uploaded batch 187/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.54it/s]


Uploaded batch 188/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.42it/s]


Uploaded batch 189/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.02s/it]


Uploaded batch 190/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.09s/it]


Uploaded batch 191/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.33it/s]


Uploaded batch 192/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.52it/s]


Uploaded batch 193/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.58it/s]


Uploaded batch 194/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.48it/s]


Uploaded batch 195/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.41it/s]


Uploaded batch 196/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.49it/s]


Uploaded batch 197/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.74it/s]


Uploaded batch 198/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.92it/s]


Uploaded batch 199/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.68it/s]


Uploaded batch 200/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.64it/s]


Uploaded batch 201/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.01s/it]


Uploaded batch 202/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.09s/it]


Uploaded batch 203/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.39it/s]


Uploaded batch 204/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.17it/s]


Uploaded batch 205/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.59it/s]


Uploaded batch 206/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.06s/it]


Uploaded batch 207/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.15s/it]


Uploaded batch 208/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.72it/s]


Uploaded batch 209/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.43it/s]


Uploaded batch 210/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.49it/s]


Uploaded batch 211/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.47it/s]


Uploaded batch 212/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.02it/s]


Uploaded batch 213/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.03s/it]


Uploaded batch 214/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.13it/s]


Uploaded batch 215/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.09it/s]


Uploaded batch 216/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.11s/it]


Uploaded batch 217/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.11s/it]


Uploaded batch 218/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.01it/s]


Uploaded batch 219/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.72it/s]


Uploaded batch 220/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.64it/s]


Uploaded batch 221/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.78it/s]


Uploaded batch 222/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.09s/it]


Uploaded batch 223/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:04<00:00,  1.13s/it]


Uploaded batch 224/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.48it/s]


Uploaded batch 225/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.68it/s]


Uploaded batch 226/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.53it/s]


Uploaded batch 227/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.64it/s]


Uploaded batch 228/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.62it/s]


Uploaded batch 229/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.21it/s]


Uploaded batch 230/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.57it/s]


Uploaded batch 231/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.47it/s]


Uploaded batch 232/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.57it/s]


Uploaded batch 233/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.52it/s]


Uploaded batch 234/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.57it/s]


Uploaded batch 235/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.45it/s]


Uploaded batch 236/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.72it/s]


Uploaded batch 237/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.80it/s]


Uploaded batch 238/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.79it/s]


Uploaded batch 239/241 to psc_chunks


Batches: 100%|██████████| 4/4 [00:02<00:00,  1.86it/s]


Uploaded batch 240/241 to psc_chunks


Batches: 100%|██████████| 2/2 [00:01<00:00,  1.74it/s]


Uploaded batch 241/241 to psc_chunks
Completed upload to psc_chunks


In [6]:
# Check if processing completed successfully
for collection_name in ["book_chunks", "psr_chunks", "psc_chunks"]:
    try:
        info = client.get_collection(collection_name=collection_name)
        print(f"Collection '{collection_name}' contains {info.points_count} points")
    except Exception as e:
        print(f"Error checking collection '{collection_name}': {e}")

Collection 'book_chunks' contains 693 points
Collection 'psr_chunks' contains 4621 points
Collection 'psc_chunks' contains 24034 points
