In [1]:
import redis
from redis.commands.search.field import VectorField, TextField, TagField
from redis.commands.search.indexDefinition import IndexDefinition, IndexType

# Redis connection details
redis_host = "localhost"
redis_port = "6379"
redis_password = ""

# Connect to the Redis server
conn = redis.Redis(
    host=redis_host, 
    port=redis_port,
    password=redis_password, 
    encoding='utf-8', 
    decode_responses=True
)

# Define the schema for the index
SCHEMA = [
    TagField("url"),
    TextField("title"), 
    TextField("description"),
    TextField("publish_date"),
    TextField("content"),
    VectorField("embedding", "HNSW", {
        "TYPE": "FLOAT32",
        "DIM": 1536, 
        "DISTANCE_METRIC": "COSINE"}
    ),
]

# Create an index
def create_index(conn, schema, index_name="posts"):
    try:
        conn.ft(index_name).create_index(
            fields=schema,
            definition=IndexDefinition(prefix=["post:"], index_type=IndexType.HASH)
        )
        print(f"‚úÖ Index '{index_name}' created successfully!")
    except Exception as e:
        print(f"‚ö†Ô∏è Index '{index_name}' already exists or cannot be created.")

# Delete an index
def delete_index(conn, index_name="posts"):
    try:
        conn.execute_command('FT.DROPINDEX', index_name)
        print(f"üóëÔ∏è Index '{index_name}' deleted successfully!")
    except Exception as e:
        print(f"‚ö†Ô∏è Failed to delete index '{index_name}': {e}")

# Delete all documents from an index
def delete_all_keys_from_index(conn, index_name="posts"):
    try:
        result = conn.execute_command('FT.SEARCH', index_name, '*', 'NOCONTENT')
        doc_ids = result[1::2]  # Extract document IDs
        
        if not doc_ids:
            print("üõë No documents found to delete.")
            return

        for doc_id in doc_ids:
            conn.delete(doc_id)
        print(f"üóëÔ∏è Deleted {len(doc_ids)} documents from index '{index_name}'.")

    except Exception as e:
        print(f"‚ö†Ô∏è Failed to delete keys from index '{index_name}': {e}")

# View index details
def view_index(conn, index_name="posts"):
    try:
        info = conn.execute_command('FT.INFO', index_name)
        for i in range(0, len(info), 2):
            print(f"{info[i]}: {info[i+1]}")
    except Exception as e:
        print(f"‚ö†Ô∏è Failed to retrieve index details: {e}")

# Insert a sample document
def insert_sample_document(conn):
    doc_id = "post:1"
    doc = {
        "url": "https://example.com/sample",
        "title": "Sample Blog Post",
        "description": "This is a test blog post stored in Redis.",
        "publish_date": "2024-06-15",
        "content": "Redis is a powerful in-memory database used for various applications.",
        "embedding": " ".join(["0.1"] * 1536)  # Mock embedding as space-separated values
    }
    conn.hset(doc_id, mapping=doc)
    print(f"‚úÖ Sample document inserted: {doc_id}")



In [4]:
view_index(conn)

index_name: posts
index_options: []
index_definition: ['key_type', 'HASH', 'prefixes', ['post:'], 'default_score', '1']
attributes: [['identifier', 'url', 'attribute', 'url', 'type', 'TAG', 'SEPARATOR', ','], ['identifier', 'title', 'attribute', 'title', 'type', 'TEXT', 'WEIGHT', '1'], ['identifier', 'description', 'attribute', 'description', 'type', 'TEXT', 'WEIGHT', '1'], ['identifier', 'publish_date', 'attribute', 'publish_date', 'type', 'TEXT', 'WEIGHT', '1'], ['identifier', 'content', 'attribute', 'content', 'type', 'TEXT', 'WEIGHT', '1'], ['identifier', 'embedding', 'attribute', 'embedding', 'type', 'VECTOR', 'algorithm', 'HNSW', 'data_type', 'FLOAT32', 'dim', 1536, 'distance_metric', 'COSINE', 'M', 16, 'ef_construction', 200]]
num_docs: 107
max_doc_id: 122
num_terms: 4662
num_records: 23527
inverted_sz_mb: 0.6109371185302734
vector_index_sz_mb: 6.2415924072265625
total_inverted_index_blocks: 4672
offset_vectors_sz_mb: 0.05615520477294922
doc_table_size_mb: 0.007564544677734375
s

In [7]:
redis_host = "localhost"
redis_port = "6379"
redis_password = ""

conn = redis.Redis(host=redis_host, 
                   port=redis_port, 
                   password=redis_password, 
                   encoding='utf-8', 
                   decode_responses=False)

p = conn.pipeline(transaction=False)

seen_urls = set()
for key in conn.keys("post:*"):
    url = conn.hget(key, "url").decode("utf-8")
    
    if url in seen_urls:
        conn.delete(key)  # Remove duplicate entry
        print(f"Deleted duplicate: {key}")
    else:
        seen_urls.add(url)


Deleted duplicate: b'post:6_3'
Deleted duplicate: b'post:5_8'
Deleted duplicate: b'post:4_2'
Deleted duplicate: b'post:0_1'
Deleted duplicate: b'post:5_11'
Deleted duplicate: b'post:0_10'
Deleted duplicate: b'post:0_8'
Deleted duplicate: b'post:5_6'
Deleted duplicate: b'post:4_11'
Deleted duplicate: b'post:8_7'
Deleted duplicate: b'post:1_3'
Deleted duplicate: b'post:6_1'
Deleted duplicate: b'post:1_0'
Deleted duplicate: b'post:9_2'
Deleted duplicate: b'post:1_7'
Deleted duplicate: b'post:6_5'
Deleted duplicate: b'post:1_2'
Deleted duplicate: b'post:5_13'
Deleted duplicate: b'post:4_7'
Deleted duplicate: b'post:1_1'
Deleted duplicate: b'post:8_4'
Deleted duplicate: b'post:6_0'
Deleted duplicate: b'post:2_2'
Deleted duplicate: b'post:0_4'
Deleted duplicate: b'post:6_9'
Deleted duplicate: b'post:6_13'
Deleted duplicate: b'post:9_0'
Deleted duplicate: b'post:6_8'
Deleted duplicate: b'post:6_4'
Deleted duplicate: b'post:9_1'
Deleted duplicate: b'post:8_3'
Deleted duplicate: b'post:5_10'
De