In [2]:
import os
from dotenv import load_dotenv
import qdrant_client
from sentence_transformers import SentenceTransformer
import pandas as pd
from IPython.display import display, HTML

# Load environment variables
load_dotenv("/Users/mason/Desktop/Technical_Projects/PYTHON_Projects/PSAI/code/.env")
QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")

# Initialize Qdrant client and embedding model
client = qdrant_client.QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
model = SentenceTransformer("all-MiniLM-L6-v2")
print(f"Connected to Qdrant at {QDRANT_URL}")
print(f"Using embedding model: all-MiniLM-L6-v2")

def semantic_search(query_text, collection_name, limit=5):
    """Perform semantic search on the collection."""
    # Embed the query
    query_vector = model.encode(query_text).tolist()
    
    # Search the collection
    search_results = client.search(
        collection_name=collection_name,
        query_vector=query_vector,
        limit=limit,
        with_payload=True
    )
    
    return search_results

def display_search_results(results, query):
    """Display search results in a formatted DataFrame."""
    rows = []
    for i, result in enumerate(results):
        # Extract metadata
        metadata = {}
        if "metadata" in result.payload:
            metadata = result.payload["metadata"]
        else:
            # If metadata is directly in payload
            for key in result.payload:
                if key != "text":
                    metadata[key] = result.payload[key]
        
        # Format metadata as string
        metadata_str = ", ".join([f"{k}: {v}" for k, v in metadata.items()])
        
        rows.append({
            "Rank": i+1,
            "Score": f"{result.score:.4f}",
            "Text": result.payload.get("text", "No text found"),
            "Metadata": metadata_str
        })
    
    df = pd.DataFrame(rows)
    
    # Add query info
    print(f"Query: \"{query}\"")
    print(f"Results: {len(results)} items\n")
    
    # Show the dataframe
    return df

def test_collections():
    """Test all collections with sample queries."""
    # Get all collections
    collections = [c.name for c in client.get_collections().collections]
    print(f"Found {len(collections)} collections: {', '.join(collections)}")
    
    # Define test queries
    test_queries = [
        "women's rights in America",
        "education policy and reform",
        "views on family values",
        "political activism",
        "conservative philosophy"
    ]
    
    # Run tests
    results = {}
    for collection in collections:
        print(f"\n===== Testing Collection: {collection} =====")
        collection_results = {}
        
        for query in test_queries:
            print(f"\nRunning query: \"{query}\"")
            search_results = semantic_search(query, collection, limit=3)
            
            if search_results:
                df = display_search_results(search_results, query)
                display(df)
                collection_results[query] = search_results
            else:
                print("No results found.")
        
        results[collection] = collection_results
    
    return results

def interactive_search():
    """Run an interactive search across all collections."""
    # Get all collections
    collections = [c.name for c in client.get_collections().collections]
    
    # Create a query function for Jupyter interactivity
    def run_search(query, collection="all", limit=5):
        print(f"Searching for: \"{query}\"")
        
        if collection == "all":
            all_results = {}
            for coll in collections:
                print(f"\n----- Results from {coll} -----")
                results = semantic_search(query, coll, limit=limit)
                if results:
                    df = display_search_results(results, query)
                    display(df)
                    all_results[coll] = results
                else:
                    print("No results found.")
            return all_results
        else:
            if collection in collections:
                results = semantic_search(query, collection, limit=limit)
                if results:
                    df = display_search_results(results, query)
                    display(df)
                    return results
                else:
                    print("No results found.")
            else:
                print(f"Collection '{collection}' not found. Available collections: {', '.join(collections)}")
    
    print("Interactive search ready!")
    print(f"Available collections: {', '.join(collections)}")
    print("\nUsage examples:")
    print("run_search('education reform')  # Search all collections")
    print("run_search('family values', collection='book_chunks', limit=3)  # Search specific collection")
    
    return run_search

# Function to check basic stats of collections
def collection_stats():
    """Get statistics about each collection."""
    collections = client.get_collections().collections
    
    stats = []
    for collection in collections:
        try:
            info = client.get_collection(collection_name=collection.name)
            
            # Get a sample point to analyze payload structure
            sample_payload = {}
            if info.points_count > 0:
                sample = client.scroll(
                    collection_name=collection.name,
                    limit=1,
                    with_payload=True,
                    with_vectors=False
                )[0]
                sample_payload = sample.payload
            
            stats.append({
                "Collection": collection.name,
                "Points": info.points_count,
                "Vector Size": info.config.params.vectors.size,
                "Distance": info.config.params.vectors.distance,
                "Payload Structure": list(sample_payload.keys()) if sample_payload else "Empty"
            })
        except Exception as e:
            stats.append({
                "Collection": collection.name,
                "Error": str(e)
            })
    
    return pd.DataFrame(stats)

# Run collection statistics first to ensure everything is there
print("Checking collection statistics...")
stats_df = collection_stats()
display(stats_df)

# Now run the testing function
print("\nStarting collection testing...")
test_results = test_collections()

# Set up the interactive search for further testing
print("\nSetting up interactive search...")
run_search = interactive_search()

Connected to Qdrant at https://3031677a-6463-44f9-ba66-42977581720e.us-east-1-0.aws.cloud.qdrant.io
Using embedding model: all-MiniLM-L6-v2
Checking collection statistics...


Unnamed: 0,Collection,Error
0,psr_chunks,'list' object has no attribute 'payload'
1,book_chunks,'list' object has no attribute 'payload'
2,psc_chunks,'list' object has no attribute 'payload'



Starting collection testing...
Found 3 collections: psr_chunks, book_chunks, psc_chunks

===== Testing Collection: psr_chunks =====

Running query: "women's rights in America"
Query: "women's rights in America"
Results: 3 items



  search_results = client.search(


Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.7235,"The Right To Be A Woman\nW om en’ s magazines,...","title: ['The Right to Be a Woman'], date: Nove..."
1,2,0.6791,Don't Stoop to Equality\nThe women’s liberatio...,"title: ['Time Is Running Out On E. R. A.', ""Do..."
2,3,0.6282,"women to executive positions), the United Nati...","title: ['The Radical Goals of the Feminists', ..."



Running query: "education policy and reform"
Query: "education policy and reform"
Results: 3 items



  search_results = client.search(


Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.6219,"Out across America, the mood of the people is ...","title: [""Let's Abolish the Department of Educa..."
1,2,0.5658,"VOL. 15, NO. 3, SECTION 1\nBOX 618, ALTON, ILL...","title: [""The Law Is On Your Side: Parents' and..."
2,3,0.5629,"VOL. 7, NO. 10, SECTION 1\nBox 618, ALTON, ILL...","title: ['How ERA Will Hurt Divorced Women', 'P..."


  search_results = client.search(



Running query: "views on family values"
Query: "views on family values"
Results: 3 items



Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.6108,"VOL. 21, NO. 7, SECTION 2\nBOX 618, ALTON, ILL...","title: [""The Family: Preserving America's Futu..."
1,2,0.5362,"VOL. 23, NO. 9, SECTION 2\nBOX 618, ALTON, ILL...","title: ['The Anti-Communists Were Right, After..."
2,3,0.5334,"in poverty, while in 1986 (the latest year for...","title: ['Just Say NO to Tax Increases'], date:..."



Running query: "political activism"


  search_results = client.search(


Query: "political activism"
Results: 3 items



Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.4899,"captured by the feminists, the think tanks tha...",title: ['Understanding Feminists and Their Fan...
1,2,0.4642,Why Liberals Hate “ Single Issues”\nby Congres...,"title: ['Single Issue Voters', 'Why Liberals H..."
2,3,0.4536,The classrooms were not the only source of act...,"title: [""The UN Women's Conference in Nairobi""..."



Running query: "conservative philosophy"


  search_results = client.search(


Query: "conservative philosophy"
Results: 3 items



Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.6395,"V O L I, NO. 9\nBox 618, ALTON, ILLINOIS 62002...","title: ['Show Your Colors Now!', 'Michigan Con..."
1,2,0.6196,The reality today is that American conservativ...,"title: [""America's Future Is Conservative""], d..."
2,3,0.5283,page three\nI strongly urge you to get this im...,title: ['The Feminists Continue Their War Agai...



===== Testing Collection: book_chunks =====

Running query: "women's rights in America"
Query: "women's rights in America"
Results: 3 items



  search_results = client.search(


Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.6176,Before the feminist movement burst on the scen...,"author: Phyllis Schlafly, book_title: Who Kill..."
1,2,0.6096,The Stop ERA women showed that ERA was a fraud...,"author: Phyllis Schlafly, book_title: Who Kill..."
2,3,0.5988,Congress and the President should direct their...,"author: Phyllis Schlafly, book_title: Who Kill..."



Running query: "education policy and reform"
Query: "education policy and reform"
Results: 3 items



  search_results = client.search(


Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.5468,in the s. T his change was best capsuled b...,"author: Phyllis Schlafly, book_title: The Supr..."
1,2,0.5253,"What will it mean to enforce Article 28, which...","author: Phyllis Schlafly, book_title: Who Kill..."
2,3,0.5024,How did it happen that our public schools sudd...,"author: Phyllis Schlafly, book_title: The Supr..."



Running query: "views on family values"
Query: "views on family values"
Results: 3 items



  search_results = client.search(


Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.5928,"Glenn T. Stanton, director of Global Family Fo...","author: Phyllis Schlafly, book_title: Who Kill..."
1,2,0.5599,“WHO KILLED THE AMERICAN FAMILY?”\t\t\tfamily....,"author: Phyllis Schlafly, book_title: Who Kill..."
2,3,0.5197,based on the judge’s opinion of the best inter...,"author: Phyllis Schlafly, book_title: The Supr..."


  search_results = client.search(



Running query: "political activism"
Query: "political activism"
Results: 3 items



Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.417,At the 1980 Republican National Convention in ...,"author: Phyllis Schlafly, book_title: How the ..."
1,2,0.4149,Feminist ideology teaches that one of the majo...,"author: Phyllis Schlafly, book_title: Who Kill..."
2,3,0.4074,"On January 22, 2014, the members of the entire...","author: Phyllis Schlafly, book_title: How the ..."



Running query: "conservative philosophy"


  search_results = client.search(


Query: "conservative philosophy"
Results: 3 items



Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.5927,Conservative morality fits the family of the s...,"author: Phyllis Schlafly, book_title: Who Kill..."
1,2,0.5607,The poll reports that conservatives are the l...,"author: Phyllis Schlafly, book_title: Who Kill..."
2,3,0.5335,Reagan’s election as President in 1980 profoun...,"author: Phyllis Schlafly, book_title: How the ..."



===== Testing Collection: psc_chunks =====

Running query: "women's rights in America"


  search_results = client.search(


Query: "women's rights in America"
Results: 3 items



Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.7723,"To the feminists, ""women's rights"" is defined ...",title: An Intelligent Candidate's Guide to the...
1,2,0.6815,"To the feminine or pro-family woman, ""women's ...",title: An Intelligent Candidate's Guide to the...
2,3,0.6266,"In the topsy-turvy modern lexicon, the premier...","title: Abortion Exploits Women, date: November..."



Running query: "education policy and reform"


  search_results = client.search(


Query: "education policy and reform"
Results: 3 items



Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.7194,It is a bafflement how any Congressman could t...,title: Let's Abolish the Department of Educati...
1,2,0.6252,The National Education Association has been cr...,title: The Primary Republican Mission Is to Cu...
2,3,0.6128,"Education Reform, Ted Kennedy-Style\n\n\tby Ph...","title: Education Reform, Ted Kennedy-Style, da..."



Running query: "views on family values"


  search_results = client.search(


Query: "views on family values"
Results: 3 items



Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.5927,"The 64-page report called ""The Family: Preserv...","title: White House Report on the Family, date:..."
1,2,0.5801,We each have the freedom to choose our own val...,"title: The Two-Class American Society, date: F..."
2,3,0.5615,These anti-family attitudes have established t...,"title: The Two-Class American Society, date: F..."



Running query: "political activism"
Query: "political activism"
Results: 3 items



  search_results = client.search(


Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.5618,"""Political advocacy"" is defined as attempting ...","title: Tax-Funded Lobbying, date: April 8, 198..."
1,2,0.5223,Here are some samples of the home-made signs t...,"title: Who Lied?, date: September 18, 2009, au..."
2,3,0.5058,Political reality is that the national Democra...,"title: What Happened to the Democratic Party?,..."



Running query: "conservative philosophy"
Query: "conservative philosophy"
Results: 3 items



  search_results = client.search(


Unnamed: 0,Rank,Score,Text,Metadata
0,1,0.5787,"Finally, in 1980, Ronald Reagan mainstreamed c...",title: Don't Turn the Clock Back to Rockefelle...
1,2,0.5743,Upstream: The Ascendance of American Conservat...,"title: What's Happened to Teaching History?, d..."
2,3,0.5713,we\n\nThen came Ronald Reagan who mainstreamed...,"title: Conservatism is Alive and Well, date: N..."



Setting up interactive search...
Interactive search ready!
Available collections: psr_chunks, book_chunks, psc_chunks

Usage examples:
run_search('education reform')  # Search all collections
run_search('family values', collection='book_chunks', limit=3)  # Search specific collection
