In [1]:
# https://github.com/weaviate/recipes/blob/main/weaviate-features/generative-search/generative_search_openai.ipynb

In [2]:
# Docs : https://weaviate.io/developers/weaviate/client-libraries/python

In [3]:
# Run `docker-compose up -d` for local deployment

In [4]:
import os
import sys
sys.path.append("../..")
from utils import load_documents,split_documents,calculate_chunk_ids

import weaviate

In [5]:
EMBEDDING_MODEL = "text-embedding-ada-002"

In [6]:

import weaviate, os

# Connect to your local Weaviate instance deployed with Docker
client = weaviate.connect_to_local(
    headers={
        "X-OpenAI-Api-Key": os.environ["OPENAI_APIKEY"] # Replace with your Cohere key - recommended: use env var
    }
)

# Option 2
# Connect to your Weaviate Client Service cluster
# client = weaviate.connect_to_wcs(
#     cluster_url="WCS-CLUSTER-ID",                             # Replace with your WCS cluster ID
#     auth_credentials=weaviate.auth.AuthApiKey("WCS-API-KEY"), # Replace with your WCS API KEY - recommended: use env var
#     headers={
#         "X-OpenAI-Api-Key": os.getenv("OPENAI-API-KEY"),      # Replace with your inference API key - recommended: use env var
#     }
# )


I0000 00:00:1721828889.518331   10688 config.cc:230] gRPC experiments enabled: call_status_override_on_cancellation, event_engine_dns, event_engine_listener, http2_stats_fix, monitoring_experiment, pick_first_new, trace_record_callops, work_serializer_clears_time_cache


In [7]:
client.is_ready()

True

In [8]:
# Note: in practice, you shouldn"t rerun this cell, as it deletes your data
# in "JeopardyQuestion", and then you need to re-import it again.
import weaviate.classes.config as wc

# Delete the collection if it already exists
if (client.collections.exists("Chunk")):
    client.collections.delete("Chunk")

client.collections.create(
    name="Chunk",

    vectorizer_config=wc.Configure.Vectorizer.text2vec_openai( # specify the vectorizer and model type you're using
        model="ada",
        model_version="002",
        type_="text"
    ),

    # Enable generative model from Cohere
    generative_config=wc.Configure.Generative.openai(
        model="gpt-3.5-turbo"  # Optional - Defaults to `gpt-3.5-turbo`
    ),

    properties=[ # defining properties (data schema) is optional
        wc.Property(name="chunk_id", data_type=wc.DataType.TEXT,skip_vectorization=True), 
        wc.Property(name="context", data_type=wc.DataType.TEXT),
    ]
)

print("Successfully created collection: Chunk.")

Successfully created collection: Chunk.


## Preprocessing the pdfs

In [9]:
documents = load_documents()


In [10]:
len(documents)

172

In [11]:
chunks = split_documents(documents)

In [12]:
chunks[0].metadata,chunks[0].page_content

({'source': '../../data/Galapagos/pdfs/CnE_GUI-CE-027 Guidance on Declaration of Interest (1).pdf',
  'page': 0},
 'Do you need to\n(Also known as a Conflict of Interest)DECLARE AN INTEREST?\nYou may need to declare an interest where a potential conflict arises…\nA potential conflict of interest just means that your personal interests could conflict with your role and decisions at \nGalapagos. \nIt doesn’t mean that there is an actual conflict or that you’ve done anything wrong. In fact, conflicts are usually a \nresult of good things — like having good relationships through friendships or investments. \nBut they could also be perceived by someone else as impacting your judgment, or could harm the trust between \ncolleagues, and that’s why we need to be aware of them so we can take any steps we need to manage them. \nHere are some examples…\nWhat should I do if I think I might need to declare an interest?')

In [13]:
chunks = calculate_chunk_ids(chunks)

In [14]:
chunks[0].metadata['id']

'../../data/Galapagos/pdfs/CnE_GUI-CE-027 Guidance on Declaration of Interest (1).pdf:0:0'

## Uploading the data

In [15]:
data_json = []

for chunk in chunks:
    chunk_id = chunk.metadata['id']
    chunk_content = chunk.page_content
    db_doc_row = {
        "chunk_id": chunk_id,
        "context" : chunk_content
    }
    data_json.append(db_doc_row)

In [16]:

chunk_collections = client.collections.get("Chunk")

# Insert data objects
response = chunk_collections.data.insert_many(data_json)

# However, if you have a milion objects to insert, then you should spit them into smaller batches (i.e. 100-1000 per insert)

if (response.has_errors):
    print(response.errors)
else:
    print("Insert complete.")

Insert complete.


## Refining Ouput

In [17]:
PROMPT_USER_TEMPLATE = """
Answer the question based only on the following context:

Question :
{question}


Context :
{context}

"""

SYSTEM_PROMPT = """
You are an ICF MCC certified coach who has a lot of experience with life coaching.
You are give certain context and a question. Use the context and output an answer that is precise and clear.

"""

In [18]:
USER_QUERY = "How to behave in a meeting?"
RETREIVER_K = 5

In [19]:
# Get text and embeddings
EMBEDDING_MODEL_ENDPOINT = "https://api.openai.com/v1/embeddings"
import os
import openai
from dotenv import load_dotenv
load_dotenv()


# Load environment variables
OPENAI_ORG = os.getenv('OPENAI_ORG')
OPENAI_APIKEY = os.getenv('OPENAI_APIKEY')

openai.organization = OPENAI_ORG
openai.api_key = OPENAI_APIKEY


openai_client = openai.OpenAI(api_key=OPENAI_APIKEY)
def get_embedding(text, model=EMBEDDING_MODEL):
   text = text.replace("\n", " ")
   return openai_client.embeddings.create(input = [text], model=model).data[0].embedding


In [20]:
response = chunk_collections.generate.near_vector(
    near_vector=get_embedding(USER_QUERY),
    include_vector=True,
    limit=RETREIVER_K,
)

In [21]:
def combine_results(response):
    result = ""
    for item in response.objects:
        result+=item.properties['context']+"\n\n"
    return result

In [22]:
related_context = combine_results(response)

In [23]:
related_context

'|   1ATTENDEES - Best meeting practices\n•Decide if you are joining the meeting\n\uf0fcRSVP (respond) to the meeting organizer as soon as possible\n•Prepare: read the agenda and the pre- work\n\uf0fccheck tasks from last meeting, prepare insights and /or questions\n•Be on time\n•Be present, participate and avoid distractions\n\uf0fcturn off notifications, put away your phone\n•Make sure you are heard, even if your opinion is less popular \n•Reflect: did I participate, and did I share my opinion?\n•Provide feedback to the meeting organizer for potential improvement\n•Read and review the minutes \n•Check the follow -up tasks and act on yours\nBefore\nDuring\nAfter\n\nTime Management tips for Effective Meetings\n☑Respect other people’s time\n|   1•respond to meeting invites as soon as you receive them\n•reschedule timely to resolve meeting conflicts\n•don’t be afraid to politely decline a meeting\n(are you needed there? ask for an agenda, align expectations)\n☑Save yourself some time\n•s

In [24]:
''' 
'|   1ATTENDEES - Best meeting practices
•Decide if you are joining the meeting
\uf0fcRSVP (respond) to the meeting organizer as soon as possible
•Prepare: read the agenda and the pre- work
\uf0fccheck tasks from last meeting, prepare insights and /or questions
•Be on time
•Be present, participate and avoid distractions
\uf0fcturn off notifications, put away your phone
•Make sure you are heard, even if your opinion is less popular 
•Reflect: did I participate, and did I share my opinion?
•Provide feedback to the meeting organizer for potential improvement
•Read and review the minutes 
•Check the follow -up tasks and act on yours
Before
During
After

Time Management tips for Effective Meetings
☑Respect other people’s time
|   1•respond to meeting invites as soon as you receive them
•reschedule timely to resolve meeting conflicts
•don’t be afraid to politely decline a meeting
(are you needed there? ask for an agenda, align expectations)
☑Save yourself some time
•share your calendar titles with your functional and TA teams
•book well in advance long, recurring, and team meetings
•respect time -zones and working hours, yours included!

|   1ORGANIZERS - Best meeting practices
•Decide if you really need the meeting and select correct meeting type
•Define a clear and timed agenda, including the meeting objective
•Use the Outlook Scheduling Assistant to plan at an appropriate time
•Carefully choose the attendees, share the agenda and materials in advance
•Start by introducing people, stating the objective, and showing the agenda
•Assign note -taker and timekeeper, but record tasks yourself
•Moderate and ensure a safe space for everyone to get the word
•End meeting by summarizing the follow -up tasks for all attendees
•Share with all attendees the follow -up tasks and the meeting minutes
•Reflect: was the meeting goal achieved, and was everyone heard?
•Ensure follow -up and plan next meeting if necessary
Before
During
After

Tips for engaging and inclusive meetings
☑Build trust and encourage participation
|   1•assign minutes -taker and time -keeper roles
•send the agenda (and pre -work) to the attendees in advance
•keep it focused and engaging
\uf0d8be well -prepared to retain attendees’ attention 
\uf0d8keep the meeting as short as possible
\uf0d8use polls, ratings, and whiteboards
\uf0d8establish ground rules at the start of the meeting
•has everyone’s point of view been heard?
\uf0d8ask questions to prompt discussion and listen actively

Calendar Man agement tips for Effective Meetings
|   1☑Organ ize your calendar
•state your working hours (e.g. 09:00-1 7:00)
•change default meeting duration to 25’ instead of 30’
•book lunch and travel time as OOO
•book focus time as ‘tentative’ to avoid blocking your entire agenda
☑Organi ze y our calenda r for OOO days
•add public holidays for all major GLPG sites (single action)
•enter your vacation on Outlook well in advance
•reschedule (or decline) meetings happening during your absence

'
'''

" \n'|   1ATTENDEES - Best meeting practices\n•Decide if you are joining the meeting\n\uf0fcRSVP (respond) to the meeting organizer as soon as possible\n•Prepare: read the agenda and the pre- work\n\uf0fccheck tasks from last meeting, prepare insights and /or questions\n•Be on time\n•Be present, participate and avoid distractions\n\uf0fcturn off notifications, put away your phone\n•Make sure you are heard, even if your opinion is less popular \n•Reflect: did I participate, and did I share my opinion?\n•Provide feedback to the meeting organizer for potential improvement\n•Read and review the minutes \n•Check the follow -up tasks and act on yours\nBefore\nDuring\nAfter\n\nTime Management tips for Effective Meetings\n☑Respect other people’s time\n|   1•respond to meeting invites as soon as you receive them\n•reschedule timely to resolve meeting conflicts\n•don’t be afraid to politely decline a meeting\n(are you needed there? ask for an agenda, align expectations)\n☑Save yourself some time

In [25]:
prompt_query = PROMPT_USER_TEMPLATE.format(context=related_context, question=USER_QUERY)

In [26]:
MODEL = "gpt-4o"
response_openai = openai_client.chat.completions.create(
    model=MODEL,
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt_query}
    ],
)
assistant_message = response_openai.choices[0].message.content
tokens = response_openai.usage.total_tokens
assistant_message

"To behave effectively in a meeting:\n\n- **RSVP promptly** to the meeting organizer.\n- **Prepare by reading** the agenda and completing any pre-work, ensuring you check tasks from the last meeting and prepare insights or questions.\n- **Be punctual.**\n- **Be present and participate actively**, avoiding distractions by turning off notifications and putting away your phone.\n- **Ensure your voice is heard**, even if your opinion is less popular.\n- **Reflect on your participation** and whether you shared your opinion.\n- **Provide feedback** to the meeting organizer for improvement.\n- **Review the minutes** and follow up on assigned tasks.\n\nAdditionally, respect other people’s time by responding to invites promptly, and don't hesitate to reschedule or decline if necessary."

In [27]:
''' 
"To behave effectively in a meeting:

- **RSVP promptly** to the meeting organizer.
- **Prepare by reading** the agenda and completing any pre-work, ensuring you check tasks from the last meeting and prepare insights or questions.
- **Be punctual.**
- **Be present and participate actively**, avoiding distractions by turning off notifications and putting away your phone.
- **Ensure your voice is heard**, even if your opinion is less popular.
- **Reflect on your participation** and whether you shared your opinion.
- **Provide feedback** to the meeting organizer for improvement.
- **Review the minutes** and follow up on assigned tasks.
Additionally, respect other people’s time by responding to invites promptly, and don't hesitate to reschedule or decline if necessary."
'''

' \nTo behave effectively in a meeting:\n- RSVP to the meeting organizer promptly to confirm your attendance.\n- Prepare by reading the agenda, completing any pre-work, and reviewing tasks from the last meeting.\n- Arrive on time.\n- Be fully present and participate actively; turn off notifications and put away your phone to avoid distractions.\n- Ensure your voice is heard, even if your opinion is less popular.\n- Reflect on your participation and whether you shared your opinion.\n- Provide feedback to the meeting organizer for improvements.\n- Review the meeting minutes afterward.\n- Check and act on any follow-up tasks assigned to you.\n'