In [1]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
import numpy as np 
import os 

In [2]:
client = QdrantClient(url=os.getenv("Qdrant_cluster_url"), 
                      api_key=os.getenv("Qdrant_API_KEY"))
client

<qdrant_client.qdrant_client.QdrantClient at 0x1d3f91c0cb0>

In [3]:
client.create_collection(
    collection_name="Fed_Speeches",
    vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE)
)

UnexpectedResponse: Unexpected Response: 409 (Conflict)
Raw response content:
b'{"status":{"error":"Wrong input: Collection `Fed_Speeches` already exists!"},"time":0.022515731}'

In [4]:
#Generate ID's 

import json
# Load the JSON data
with open('../data/embedding_JSON/FedSpeechMay1_with_embeddings.json', 'r') as file:
    data = json.load(file)

# Extract the vectors
vectors = [d['embeddings'] for d in data]

texts = [d['text'] for d in data]


# Generate ids
ids = [i for i in range(len(vectors))]

In [5]:
texts[0:3]

['May 1, 2024  Chair Powell’s Press Conference  PRELIMINARY  Transcript of Chair Powell’s Press Conference May 1, 2024 CHAIR POWELL',
 'My colleagues and I remain squarely focused on our dual mandate to promote maximum employment and stable prices for the American people',
 'The economy has made considerable progress toward our dual mandate objectives']

In [6]:
import ast

vectors = [[float(v) for v in ast.literal_eval(vector)] for vector in vectors]

In [7]:
points = [models.PointStruct(id=id, vector=vector, payload={'text': text}) for id, vector, text in zip(ids, vectors, texts)]

In [8]:
len(points)

304

In [9]:
len(vectors)

304

In [14]:
points[303].payload

{'text': 'Thank you very much'}

In [15]:
client.upsert(
    collection_name="Fed_Speeches",
    points=points,)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

In [20]:
client.retrieve(
    collection_name="Fed_Speeches",
    ids=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    )


[Record(id=0, payload={'text': 'May 1, 2024  Chair Powell’s Press Conference  PRELIMINARY  Transcript of Chair Powell’s Press Conference May 1, 2024 CHAIR POWELL'}, vector=None, shard_key=None),
 Record(id=1, payload={'text': 'My colleagues and I remain squarely focused on our dual mandate to promote maximum employment and stable prices for the American people'}, vector=None, shard_key=None),
 Record(id=2, payload={'text': 'The economy has made considerable progress toward our dual mandate objectives'}, vector=None, shard_key=None),
 Record(id=3, payload={'text': 'Inflation has eased substantially over the past year while the labor market has remained strong and that’s very good news'}, vector=None, shard_key=None),
 Record(id=4, payload={'text': 'But inflation is still too high, further progress in bringing it down is not assured, and the path forward is uncertain'}, vector=None, shard_key=None),
 Record(id=5, payload={'text': 'We are fully committed to returning inflation to our 2 pe

## Semantic Search

In [21]:
import openai

In [22]:
def get_embeddings(text):
    try:
        response = openai.Embedding.create(
            input=text,
            engine="text-embedding-3-small"  # Check the latest documentation for available engines
        )
        return response['data'][0]['embedding']  # Extracting the embedding vector
    except Exception as e:
        return str(e)  # Return error as string for debugging

In [23]:
search_text = 'inflation'
search_vector = get_embeddings(search_text)
client.search(
    collection_name="Fed_Speeches",
    query_vector=search_vector,
    limit = 10)


ValidationError: 3 validation errors for SearchRequest
vector.list[float]
  Input should be a valid list [type=list_type, input_value='\n\nYou tried to access ...ython/discussions/742\n', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/list_type
vector.NamedVector
  Input should be a valid dictionary or instance of NamedVector [type=model_type, input_value='\n\nYou tried to access ...ython/discussions/742\n', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/model_type
vector.NamedSparseVector
  Input should be a valid dictionary or instance of NamedSparseVector [type=model_type, input_value='\n\nYou tried to access ...ython/discussions/742\n', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/model_type

In [24]:
import openai
import qdrant_client

# Function to generate embeddings using OpenAI API
def get_embeddings(text):
    try:
        # Create an embedding using the specified engine
        response = openai.Embedding.create(
            input=text,
            engine="text-embedding-ada-002"  # Use the correct embedding engine for OpenAI version 1.31.0
        )
        # Extract and return the embedding vector (a list of floats)
        return response['data'][0]['embedding']
    except Exception as e:
        return str(e)  # Return error as string for debugging

# Initialize Qdrant client
qdrant = qdrant_client.QdrantClient(host="localhost", port=6333)  # Adjust host/port as needed

# Search query text
search_text = 'inflation'
search_vector = get_embeddings(search_text)

if isinstance(search_vector, list):  # Ensure the embedding is a valid list
    # Perform search using Qdrant
    try:
        search_results = qdrant.search(
            collection_name="Fed_Speeches",  # The name of your Qdrant collection
            query_vector=search_vector,      # The list of floats (embedding vector)
            limit=10                         # Number of top results to return
        )
        print(search_results)
    except Exception as e:
        print(f"Error during search: {str(e)}")
else:
    print(f"Error in embedding generation: {search_vector}")


Error during search: 3 validation errors for SearchRequest
vector.list[float]
  Input should be a valid list [type=list_type, input_value='\n\nYou tried to access ...ython/discussions/742\n', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/list_type
vector.NamedVector
  Input should be a valid dictionary or instance of NamedVector [type=model_type, input_value='\n\nYou tried to access ...ython/discussions/742\n', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/model_type
vector.NamedSparseVector
  Input should be a valid dictionary or instance of NamedSparseVector [type=model_type, input_value='\n\nYou tried to access ...ython/discussions/742\n', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/model_type
