## Vector Search on PostgreSQL


### Prerequisites
  
- Generate embeddings - [generate_embeddings.ipynb](../common/generate_embeddings.ipynb) 
- Create table and ingest embeddings - [postgree_ingestion.ipynb](.../postgree_ingestion.ipynb)

### Set environment variables

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

pg_host  = os.getenv("POSTGRESQL_HOST")
if pg_host is None or pg_host == "":
    print("POSTGRESQL_HOST environment variable not set.")
    exit()

pg_user  = os.getenv("POSTGRESQL_USERNAME")
if pg_user is None or pg_user == "":
    print("POSTGRESQL_USERNAME environment variable not set.")
    exit()

pg_password  = os.getenv("POSTGRESQL_PASSWORD")
if pg_password is None or pg_password == "":
    print("POSTGRESQL_PASSWORD environment variable not set.")
    exit()

db_name  = os.getenv("POSTGRESQL_DATABASE")
if db_name is None or db_name == "":
    print("POSTGRESQL_DATABASE environment variable not set.")
    exit()

aoai_key  = os.getenv("AZURE_OPENAI_KEY")
if aoai_key is None or aoai_key == "":
    print("AZURE_OPENAI_KEY environment variable not set.")
    exit()

aoai_endpoint = 'https://azure-openai-dnai.openai.azure.com'
aoai_api_version = '2023-08-01-preview'
aoai_embedding_deployed_model = 'embedding-ada'

text_table_name = 'text_sample'
doc_table_name = 'doc_sample'
image_table_name = 'image_sample'

postgresql_params = {
    "host": pg_host,
    "port": "5432", 
    "dbname": db_name,
    "user": pg_user,
    "password": pg_password
}




#### Simple vector search

In [None]:
import psycopg2 
import openai
from openai.embeddings_utils import get_embedding, cosine_similarity

query = 'web hosting services'

openai.api_type = "azure"
openai.api_key = aoai_key
openai.api_base = aoai_endpoint
openai.api_version = aoai_api_version

query_vector = get_embedding(query, engine = aoai_embedding_deployed_model)

connection = psycopg2.connect(**postgresql_params)
print("Connection established.")

query_sql = f"SELECT title FROM text_sample ORDER BY ((content_vector <=> '{query_vector}')) LIMIT 5;"

cursor = connection.cursor()
cursor.execute(query_sql)

records = cursor.fetchall()

for row in records:
        print(row[0], )

cursor.close()
connection.close()

### Cross column vector similarity search

In [None]:
## TODO

### Hybrid search

In [None]:
## TODO

#### Document search example

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
from azure.search.documents.models import Vector 
import pandas as pd 
from openai.embeddings_utils import get_embedding, cosine_similarity
import psycopg2 
import openai

#autheticating
openai.api_type = "azure"
openai.api_key = aoai_key
openai.api_base = aoai_endpoint
openai.api_version = aoai_api_version



query = 'tools for software development'

query_sql = f'''
    SELECT t1.chunk_content_vector as chunk_content_vector,
           t1.chunk_content as chunk_content
    FROM {doc_table_name} t1
    LIMIT 100;
'''
print("Query table")


# Function to convert string to PostgreSQL double precision[]
def to_double_precision_array(value):
    if isinstance(value, str):
        # Remove brackets and split by comma, then convert to float
        values = [float(x.strip()) for x in value.strip('[]').split(',')]
        return values
    return []


# Fetch and process the results
connection = psycopg2.connect(**postgresql_params)
cursor = connection.cursor()
cursor.execute(query_sql)

records = cursor.fetchall()

##creating a dataframe from the results for the query
column_names = [desc[0] for desc in cursor.description]
df_query_results = pd.DataFrame(records, columns=column_names)

##create embedding
query_vector = get_embedding(query,   engine=aoai_embedding_deployed_model )

##converting datatype
df_query_results['chunk_content_vector_array'] = df_query_results['chunk_content_vector'].apply(to_double_precision_array)


##listing datatype to cross check
#df_query_results['chunk_content_vector_array'].apply(lambda x: print(type(x), x))
#df_query_results['chunk_content_vector'].apply(lambda x: print(type(x), x))


##checking similarities to do the vector cross search
df_query_results["similarities"] = df_query_results['chunk_content_vector_array'].apply(lambda x: cosine_similarity(x, query_vector))


# Sort the DataFrame by similarities in descending order
df_query_results = df_query_results.sort_values(by="similarities", ascending=False)

# Display the results and similarities
for index, row in df_query_results.iterrows():
    print("Content:", row["chunk_content"])
    print("Content Vector:", row["chunk_content_vector_array"])
    print("Similarity:", row["similarities"])
    print("\n")






#### Image search example

In [None]:
import psycopg2
import openai
from azure.search.documents.models import Vector  
from PIL import Image
import matplotlib.pyplot as plt

###<working on it>
# Query text
query = 'flower with hand'

# Vectorize the query text using OpenAI's function or your preferred method
#query_vector = vectorize_text_com_vision(com_vision_endpoint, com_vision_key, query)



# Search for similar image vectors in PostgreSQL
cursor.execute("""
    SELECT image_name
    FROM image_vectors
    WHERE cosine_similarity(image_vector, %s) > 0.7
""", (query_vector,))

# Fetch and process the results
connection = psycopg2.connect(**postgresql_params)
cursor = connection.cursor()
cursor.execute(query_sql)

results = cursor.fetchall()

# Print or process the results
for result in results:
    print(result[0])  
