# Failing use of from ogbujipt.embedding_helper.PGvectorConnection



## Initial setup and Imports

In [29]:
import os

HOST = os.environ.get('PGHOST', 'localhost')
DB_NAME = 'PGv'
PORT = 5432
USER = 'oori'
PASSWORD = 'example'

In [30]:
from ogbujipt.embedding.pgvector import DocDB
from sentence_transformers       import SentenceTransformer

e_model = SentenceTransformer('all-MiniLM-L6-v2')  # Load the embedding model

pacer_copypasta = [  # Demo data
    "Structure of visceral layer of Bowman's capsule is a glomerular capsule structure and a structure of epithelium."
]

## Connecting to the database

In [31]:
TABLE_NAME = 'embedding_test'
vDB = await DocDB.from_conn_params(
    embedding_model=e_model, 
    table_name=TABLE_NAME,
    user=USER,
    password=PASSWORD,
    db_name=DB_NAME,
    host=HOST,
    port=int(PORT)
)

## Create Tables

In [32]:
await vDB.drop_table()
await vDB.create_table()

## Inserting Data

In [33]:
for index, text in enumerate(pacer_copypasta):   # For each line in the copypasta
    await vDB.insert(                        # Insert the line into the table
        content=text,                            # The text to be embedded
        permission='public',                     # Permission metadata for access control
        title=f'Pacer Copypasta line {index}',   # Title metadata
        page_numbers=[1, 2, 3],                  # Page number metadata
        tags=['fitness', 'pacer', 'copypasta'],  # Tag metadata
    )

## Similarity search

In [34]:
k = 3  # Setting number of rows to return when searching

from pprint import pprint
def print_results(results):  # Helper function to print results
    print(f'RAW RETURN:')
    pprint(results)                                                              # Print the raw results
    print(f'\nRETURNED TITLE:\n"{results[0]["title"]}"')                            # Print the title of the first result
    print(f'RETURNED CONTENT:\n"{results[0]["content"]}"')                          # Print the content of the first result
    print(f'RETURNED COSINE SIMILARITY:\n{results[0]["cosine_similarity"]:.2f}')  # Print the cosine similarity of the first result

### Searching the table with a perfect match:

In [35]:
search_string = '[beep] A single lap should be completed each time you hear this sound.'
print(f'Semantic Searching data using search string:\n"{search_string}"\n')

sim_search = await vDB.search(query_string=search_string, limit=k)

print_results(sim_search)

Semantic Searching data using search string:
"[beep] A single lap should be completed each time you hear this sound."

RAW RETURN:
[<Record cosine_similarity=1.0135670066197815 title='Pacer Copypasta line 0' content="Structure of visceral layer of Bowman's capsule is a glomerular capsule structure and a structure of epithelium." permission='public' page_numbers=[1, 2, 3] tags=['fitness', 'pacer', 'copypasta']>]

RETURNED TITLE:
"Pacer Copypasta line 0"
RETURNED CONTENT:
"Structure of visceral layer of Bowman's capsule is a glomerular capsule structure and a structure of epithelium."
RETURNED COSINE SIMILARITY:
1.01


### Searching the table with a partial match:

In [36]:
search_string = 'straight'
print(f'Semantic Searching data using search string:\n"{search_string}"\n')

sim_search = await vDB.search(query_string=search_string, limit=k)

print_results(sim_search)

Semantic Searching data using search string:
"straight"

RAW RETURN:
[<Record cosine_similarity=1.0444792224311596 title='Pacer Copypasta line 0' content="Structure of visceral layer of Bowman's capsule is a glomerular capsule structure and a structure of epithelium." permission='public' page_numbers=[1, 2, 3] tags=['fitness', 'pacer', 'copypasta']>]

RETURNED TITLE:
"Pacer Copypasta line 0"
RETURNED CONTENT:
"Structure of visceral layer of Bowman's capsule is a glomerular capsule structure and a structure of epithelium."
RETURNED COSINE SIMILARITY:
1.04
