In [15]:
import pathlib
import textwrap
import os
import google.generativeai as genai 
import tqdm as notebook_tqdm
from IPython.display import display
from IPython.display import Markdown
from dotenv import load_dotenv
load_dotenv()
GOOGLE_API_KEY = os.environ["GEMINI_API_KEY"]

genai.configure(api_key=GOOGLE_API_KEY)
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))


# Load the language model only once

In [16]:
language_model = genai.GenerativeModel('gemini-1.5-flash')
response = language_model.generate_content("What is the meaning of life?")
to_markdown(response.text)
#print(response.candidates[0].content.parts[0].text)

> The meaning of life is a question that has been pondered by philosophers and theologians for centuries, and there is no single, universally accepted answer. It's a deeply personal question, and what gives life meaning can vary greatly from person to person. 
> 
> Here are some perspectives:
> 
> **Existentialist View:**  Life has no inherent meaning, and it is up to each individual to create their own purpose. This means finding meaning through their choices, actions, relationships, and experiences.
> 
> **Nihilistic View:**  Life has no meaning or purpose whatsoever. This perspective can lead to feelings of despair and apathy.
> 
> **Religious View:** Many religions believe that life has meaning and purpose given by a higher power. This purpose might involve serving God, achieving enlightenment, or living a virtuous life.
> 
> **Humanistic View:**  Focuses on human potential, values, and the pursuit of happiness. Meaning is found through personal growth, contributing to society, and experiencing the richness of life.
> 
> **Scientific View:**  From a scientific perspective, life's meaning is not a question that can be answered. The focus is on understanding the physical processes of life and the universe.
> 
> **Other perspectives:** Some people find meaning in:
> 
> * **Love and relationships:**  Building strong connections with others and experiencing love can be a powerful source of meaning.
> * **Creativity and self-expression:**  Expressing oneself through art, music, writing, or other forms of creativity can be deeply fulfilling.
> * **Work and contribution:**  Finding meaning in one's work, whether it's a job, a hobby, or volunteer work, can provide a sense of purpose.
> * **Nature and the universe:**  Connecting with nature and appreciating the vastness of the cosmos can evoke a sense of wonder and awe.
> 
> Ultimately, the meaning of life is a personal journey that each individual must undertake. There are no right or wrong answers, and what gives life meaning can change over time. The important thing is to actively seek out experiences and relationships that bring you joy, purpose, and fulfillment. 


In [21]:
import psycopg2
from pgvector.psycopg2 import register_vector
import numpy as np
from dotenv import load_dotenv


load_dotenv()
import os
from sentence_transformers import SentenceTransformer
print("PORT", os.environ["PORT"])
conn = psycopg2.connect(
    dbname="ai",
    user="postgres",
    password="postgres",
    host="127.0.0.1",
    port=5432
)
register_vector(conn)
dbschema = "public"
dbtable = "spaces_eicp"
cur = conn.cursor()
model_dir = "models/all-MiniLM-L6-v2"
main_model_dir = "models/gemini-1.5-flash"
embedding_model =''
language_model = genai.GenerativeModel('gemini-1.5-flash')
#language_model.save(main_model_dir)

# Check if the model directory exists
if not os.path.exists(model_dir):
    # Load and save the model to the specified directory
    embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
    embedding_model.save(model_dir)
else:
    # Load the model from the specified directory
    embedding_model = SentenceTransformer(model_dir)

def get_embeddings(content):
    # Generate embeddings using the local model
    embeddings = embedding_model.encode(content, convert_to_tensor=True)
    return embeddings.tolist()



PORT 5432


In [None]:
import numpy as np
from psycopg2.extras import execute_values
from psycopg2.extras import execute_values
import logging
import json
def get_top1_similar_docs(query_embedding, conn, schema, table):
    # Normalize the query embedding
    print("hello")
    query_embedding = np.array(query_embedding)
    query_embedding = query_embedding / np.linalg.norm(query_embedding)
    
    cur = conn.cursor()
    # Use normalized embeddings and add additional filters for relevance
    cur.execute(f"""
        SELECT pageno, context, tabletext, source, imagepath, embedding, COALESCE(numtokens, 0)
        FROM {schema}.{table}
    """)
    
    rows = cur.fetchall()
    print(len(rows))
    
    # Compute similarities and store results
    results = []
    for row in rows:
        pageno, context, tabletext, source, imagepath, embedding_str, numtokens = row
        try:
            embedding = np.array(json.loads(embedding_str))  # Convert JSON string to numpy array
            
            # Safe normalization
            norm = np.linalg.norm(embedding)
            if norm == 0:
                logging.warning(f"Skipping normalization for zero vector: {embedding}")
                continue
            
            embedding = embedding / norm
            similarity = np.dot(query_embedding, embedding)
            
            # Ensure numtokens is a valid number
            numtokens = numtokens if numtokens is not None else 0
            
            # Optionally, weight similarity by the number of tokens or other criteria
            weighted_similarity = similarity * (1 + 0.01 * numtokens)
            
            results.append((pageno, context, tabletext, source, imagepath, weighted_similarity))
        except Exception as e:
            logging.error(f"Error processing row {row}: {e}")
    
    # Sort results by weighted similarity
    results.sort(key=lambda x: x[-1], reverse=True)
    
    # Return the top 3 most similar documents
    top3_docs = results[:5]
    return top3_docs

question = "Explain the coefficients involved in Valve sizing calculations?"
embed = get_embeddings(question)
try:
    res = get_top1_similar_docs(query_embedding=embed, conn=conn,schema=dbschema, table=dbtable)
except Exception as e:
    print("Exception: ", e)

#envelope=f"You are a friendly AI assitant who finds information for HR assistants, Engineers, sales teams and many more ... only using the given context {res} answer the question {question}"
#response = language_model.generate_content(envelope)
#print(response.text)
#to_markdown(response.text)


In [9]:
import ollama

embed = ollama.embeddings(model='nomic-embed-text', prompt='This is example text')
print(len(embed['embedding']))

768
