In [93]:
import os

# read local .env file
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) 

In [94]:
################################################
# 1. Import two libries
################################################


################################################
# 1.1 Import cohere
################################################
import cohere
co = cohere.Client(os.environ['COHERE_API_KEY'])

In [95]:
################################################
# 1.2 Import weaviate
################################################
import weaviate
auth_config = weaviate.auth.AuthApiKey(
    api_key=os.environ['WEAVIATE_API_KEY'])


In [96]:
#no ch
client = weaviate.Client(
    url=os.environ["WEAVIATE_URL"],
    auth_client_secret=auth_config,
    additional_headers={"X-Cohere-Api-Key": os.environ["COHERE_API_KEY"]},
    startup_period=30,  # Adjust to allow more time
)



In [98]:

import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file


import cohere
co = cohere.Client(os.environ['COHERE_API_KEY'])


import weaviate
auth_config = weaviate.auth.AuthApiKey(
    api_key=os.environ['WEAVIATE_API_KEY'])


client = weaviate.Client(
    url=os.environ['WEAVIATE_URL'],
    auth_client_secret=auth_config,
    additional_headers={
        "X-Cohere-Api-Key": os.environ['COHERE_API_KEY'],
    }
)

In [99]:
client.is_ready()

True

In [100]:
def dense_retrieval(query, 
                    client,
                    results_lang='en', 
                    properties = ["text", "title", "url", "views", "lang", "_additional {distance}"],
                    num_results=5):

    nearText = {"concepts": [query]}
    
    # To filter by language
    where_filter = {
    "path": ["lang"],
    "operator": "Equal",
    "valueString": results_lang
    }
    response = (
        client.query
        .get("Articles", properties)
        .with_near_text(nearText)
        .with_where(where_filter)
        .with_limit(num_results)
        .do()
    )

    result = response['data']['Get']['Articles']

    return result

In [101]:
def print_result(result):
    """ Print results with colorful formatting """
    for i,item in enumerate(result):
        print(f'item {i}')
        for key in item.keys():
            print(f"{key}:{item.get(key)}")
            print()
        print()

In [102]:
query = "What is the capital of Canada?"
dense_retrieval_results = dense_retrieval(query, client)
print_result(dense_retrieval_results)

KeyError: 'data'

In [105]:
#!/usr/bin/env python
# coding: utf-8

################################################
# ReRank Workflow
#
# Step 1: Import Libraries (cohere and weaviate)
# Step 2: Setup API keys and Client Connections
# Step 3: Perform Dense Retrieval
# Step 4: Perform ReRanking on Retrieved Results
################################################

# Import Required Libraries
import os
from dotenv import load_dotenv, find_dotenv
import cohere
import weaviate

# Load .env File for API Keys
_ = load_dotenv(find_dotenv()) 

################################################
# 1. Initialize Clients
################################################

# Initialize Cohere Client
co = cohere.Client(os.environ['COHERE_API_KEY'])

# Initialize Weaviate Client
auth_config = weaviate.auth.AuthApiKey(api_key=os.environ['WEAVIATE_API_KEY'])
client = weaviate.Client(
    url=os.environ['WEAVIATE_URL'],
    auth_client_secret=auth_config,
    additional_headers={
        "X-Cohere-Api-Key": os.environ['COHERE_API_KEY'],
    },
)

# Check Weaviate Connection
if client.is_ready():
    print("Weaviate is ready!")
else:
    raise Exception("Failed to connect to Weaviate.")

################################################
# 2. Define Dense Retrieval Function
################################################

def dense_retrieval(query, client, results_lang='en', properties=None, num_results=5):
    """Perform Dense Retrieval."""
    if properties is None:
        # Update properties to match your schema
        properties = ["name", "description", "_additional {distance}"]

    # Define nearText query
    near_text = {"concepts": [query]}
    where_filter = {
        "path": ["lang"],
        "operator": "Equal",
        "valueString": results_lang,
    }

    try:
        response = (
            client.query
            .get("Articles", properties)  # Replace "Articles" with your schema class
            .with_near_text(near_text)
            .with_where(where_filter)
            .with_limit(num_results)
            .do()
        )

        # Debugging response
        print("Raw Response from Weaviate:", response)

        # Extract results
        if "data" in response and "Get" in response["data"] and "Articles" in response["data"]["Get"]:
            return response["data"]["Get"]["Articles"]
        else:
            raise KeyError(f"Unexpected response structure: {response}")

    except Exception as e:
        print(f"Error during dense retrieval: {e}")
        return []

################################################
# 3. Define ReRank Function
################################################

def rerank_responses(query, responses, num_responses=10):
    """ReRank Responses using Cohere."""
    reranked_responses = co.rerank(
        model='rerank-english-v2.0',
        query=query,
        documents=responses,
        top_n=num_responses,
    )
    return reranked_responses

################################################
# 4. Utility Function to Print Results
################################################

def print_result(result):
    """Print results with formatting."""
    if not result:
        print("No results found.")
        return

    for i, item in enumerate(result):
        print(f"Result {i + 1}:")
        for key, value in item.items():
            print(f"  {key}: {value}")
        print()

################################################
# 5. Perform Workflow
################################################

# Example Query
query = "What is the capital of Canada?"

# Step 1: Perform Dense Retrieval
dense_retrieval_results = dense_retrieval(query, client)

# Step 2: Print Dense Retrieval Results
print("Dense Retrieval Results:")
print_result(dense_retrieval_results)

# Step 3: Extract Texts for ReRanking
texts = [result.get("description") for result in dense_retrieval_results]

# Step 4: Perform ReRanking
reranked_results = rerank_responses(query, texts)

# Step 5: Print ReRanked Results
print("ReRanked Results:")
for i, rerank_result in enumerate(reranked_results):
    print(f"Result {i + 1}:")
    print(f"  Document: {rerank_result.document}")
    print(f"  Relevance Score: {rerank_result.relevance_score}")
    print()


Weaviate is ready!
Raw Response from Weaviate: {'errors': [{'locations': [{'column': 82, 'line': 1}], 'message': 'Unknown argument "nearText" on field "Articles" of type "GetObjectsObj". Did you mean "nearObject" or "nearVector"?', 'path': None}, {'locations': [{'column': 141, 'line': 1}], 'message': 'Cannot query field "name" on type "Articles".', 'path': None}, {'locations': [{'column': 146, 'line': 1}], 'message': 'Cannot query field "description" on type "Articles".', 'path': None}]}
Error during dense retrieval: 'Unexpected response structure: {\'errors\': [{\'locations\': [{\'column\': 82, \'line\': 1}], \'message\': \'Unknown argument "nearText" on field "Articles" of type "GetObjectsObj". Did you mean "nearObject" or "nearVector"?\', \'path\': None}, {\'locations\': [{\'column\': 141, \'line\': 1}], \'message\': \'Cannot query field "name" on type "Articles".\', \'path\': None}, {\'locations\': [{\'column\': 146, \'line\': 1}], \'message\': \'Cannot query field "description" on

BadRequestError: status_code: 400, body: {'message': 'invalid request: list of documents must not be empty'}