# 1. Connect to Redis
We will use maven format to include the Jedis client in this notebook.

For more info, visit https://github.com/redis/jedis

Note that for Vector Similarity Search support, we need Jedis 4.2.1 and above.

In [None]:
%%loadFromPOM
<repository>
  <id>VSS-JAVA-DEMO</id>
  <url>https://github.com/redis/jedis</url>
</repository>

<dependency>
  <groupId>redis.clients</groupId>
  <artifactId>jedis</artifactId>
  <version>4.2.1</version>
</dependency>


In [None]:
// Importing some needed classes from the redis Java client

import java.util.Map;

import redis.clients.jedis.JedisPooled;
import redis.clients.jedis.UnifiedJedis;
import redis.clients.jedis.Protocol;
import redis.clients.jedis.search.*;
import redis.clients.jedis.CommandArguments;
import redis.clients.jedis.commands.ProtocolCommand;

// connect to redis
String host = "vecsim";
int port = 6379;
UnifiedJedis redis_conn = new JedisPooled(host, port);

// some logging warnings may come up. lets not worry about that in this notebook.

In [None]:
// Test connection
redis_conn.set("test", "Success!");
redis_conn.get("test");

# 2 Utility functions to load data into Redis 
We'll be loading into a "hash" structure (a table-like structure)

In [None]:
// load articles into redis hash

import java.nio.ByteBuffer;

public byte[] floatToByte(float[] input) {
    byte[] ret = new byte[input.length*4];
    for (int x = 0; x < input.length; x++) {
        ByteBuffer.wrap(ret, x*4, 4).putFloat(input[x]);
    }
    return ret;
}

public void load_vectors(UnifiedJedis client, String vector_field_name, int dimension, int amount) {
    amount = amount > 0 ? amount : 300;
    for (int i = 0; i < amount; i++) {
        // hash key
        float[] vec = new float[dimension];
        float val = (float)i / (dimension + i);
        for (int j = 0; j < vec.length; j++) {
            vec[j] = val;
        }
        String key = "doc:" + i;
        client.hset(key.getBytes(), vector_field_name.getBytes(), floatToByte(vec));
        client.hset(key, "number", i + "");
    }
}

public void flushAll(UnifiedJedis client) {
    client.executeCommand(new CommandArguments(Protocol.Command.FLUSHALL));
}

public void printDoc(Document doc) {
    String str = "id: " + doc.getId() + ", properties:" + doc.getProperties();
    System.out.println(str);
}


# 3.Utility Functions to Define vector indexes 

In [None]:
// Utility Functions to Create Indexes on Vector field

public void create_flat_index (UnifiedJedis redis_conn, String vector_field_name, int number_of_vectors, int vector_dimensions, String distance_metric) {
    Map<String, Object> attr = new HashMap<>();
    attr.put("TYPE", "FLOAT32");
    attr.put("DIM", vector_dimensions);
    attr.put("DISTANCE_METRIC", distance_metric);
    attr.put("INITIAL_CAP", number_of_vectors);
    attr.put("BLOCK_SIZE", number_of_vectors);
    Schema schema = new Schema().addFlatVectorField(vector_field_name, attr).addNumericField("number");
    redis_conn.ftCreate("my_index", IndexOptions.defaultOptions(), schema);
}

public void create_hnsw_index (UnifiedJedis redis_conn, String vector_field_name, int number_of_vectors, int vector_dimensions, String distance_metric, int m, int ef) {
    Map<String, Object> attr = new HashMap<>();
    attr.put("TYPE", "FLOAT32");
    attr.put("DIM", vector_dimensions);
    attr.put("DISTANCE_METRIC", distance_metric);
    attr.put("INITIAL_CAP", number_of_vectors);
    attr.put("M", m);
    attr.put("EF_CONSTRUCTION", ef);
    Schema schema = new Schema().addHNSWVectorField(vector_field_name, attr).addNumericField("number");
    redis_conn.ftCreate("my_index", IndexOptions.defaultOptions(), schema);
}

# 4. Load and Index data (HNSW Vector Index)

In [None]:
int NUMBER_ARTICLES = 300;
String VECTOR_FIELD_NAME = "my_vector";
String DISTANCE_METRIC = "L2";
int DIMENSIONS = 100;

flushAll(redis_conn);
create_hnsw_index(redis_conn,VECTOR_FIELD_NAME,NUMBER_ARTICLES,DIMENSIONS,DISTANCE_METRIC,40,200);
load_vectors(redis_conn,VECTOR_FIELD_NAME,DIMENSIONS,NUMBER_ARTICLES);
System.out.println(NUMBER_ARTICLES + " News Articles loaded and indexed");

# 5. A simple FT.SEARCH (without vector similarity)
### Get all documents with `number` field between 42 and 46 

FT.SEARCH QUERY = `@number:[42 46]`

In [None]:
Query q = new Query("@number:[42 46]").setNoContent();
List<Document> docs = redis_conn.ftSearch("my_index", q).getDocuments();

System.out.println("Got " + docs.size() + " results.");
for (Document doc : docs) {
    printDoc(doc);
}


# 6. A simple FT.SEARCH (only vector similarity)
### Get top 4 documents with thier vector field is the closest to [1.4e-30f, 1.4e-30f,...]

FT.SEARCH QUERY = `*=>[KNN 4 @my_vector $QUERY_BLOB]`

In [None]:
// query for similarity

float[] e = new float[DIMENSIONS];
for (int j = 0; j < e.length; j++) {
    e[j] = 1.4e-30f;
}

int K = 4;
Query q = new Query("*=>[KNN $K @my_vector $BLOB]").setSortBy("__my_vector_score", true).addParam("K", K).addParam("BLOB", floatToByte(e)).limit(0,K).dialect(2);

// parameters to be passed into search
List<Document> docs = redis_conn.ftSearch("my_index", q).getDocuments();

System.out.println("Got " + docs.size() + " results.");
for (Document doc : docs) {
    printDoc(doc);
}

### Same query, but naming the distances results and not returning the vectors blobs

FT.SEARCH QUERY = `*=>[KNN 4 @my_vector $QUERY_BLOB AS distances]`

In [None]:
// Now without getting the vector blobs

int K = 4;
Query q = new Query("*=>[KNN $K @my_vector $BLOB AS distances]").setSortBy("distances", true)
                                                                .returnFields("number", "distances")
                                                                .addParam("K", K)
                                                                .addParam("BLOB", floatToByte(e))
                                                                .limit(0,K)
                                                                .dialect(2);

// parameters to be passed into search
List<Document> docs = redis_conn.ftSearch("my_index", q).getDocuments();

System.out.println("Got " + docs.size() + " results.");
for (Document doc : docs) {
    printDoc(doc);
}

# 7. Config default dialect

VSS syntax is avalible only on dialect 2 of the FT.SEARCH command.

As for RediSearch 2.4.3, the default dialect is 1, so we needed to specify in the command that we want to use dialect 2.

lets instead set the default dialect to 2, so we wont need to include it in every query

In [None]:

redis_conn.ftConfigSet("DEFAULT_DIALECT", "2");


# 8. A Hybrid Query FT.SEARCH (vector and non-vector search criteria)
### Get top 5 documents with `number` value is between 0 and 100

FT.SEARCH QUERY = `@number:[0 100]=>[KNN $K @my_vector $BLOB AS scores]`

In [None]:
// same query vector as before

// build query
int K = 5;
Query q = new Query("@number:[0 100]=>[KNN $K @my_vector $BLOB AS scores]").setSortBy("scores", true)
                                                                           .returnFields("number", "scores")
                                                                           .addParam("K", K)
                                                                           .addParam("BLOB", floatToByte(e))
                                                                           .limit(0,K);

// FT.SEARCH 
List<Document> docs = redis_conn.ftSearch("my_index", q).getDocuments();

System.out.println("Got " + docs.size() + " results.");
for (Document doc : docs) {
    printDoc(doc);
}

# 9. Another Hybrid Query FT.SEARCH (vector and non-vector search criteria)
## Get top 5 articles with 
- `number` value is between 0 and 20

OR
- `number` value is between `indexsize-20` and `indexsize`


In [None]:
// query for similarity

int K = 5;
Query q = new Query("(@number:[0 20]|@number:[$x $y])=>[KNN $K @my_vector $BLOB AS scores]").setSortBy("scores", true)
                                                                                            .returnFields("number", "scores")
                                                                                            .addParam("K", K)
                                                                                            .addParam("x", NUMBER_ARTICLES - 20)
                                                                                            .addParam("y", NUMBER_ARTICLES)
                                                                                            .addParam("BLOB", floatToByte(e))
                                                                                            .limit(0,K);

// FT.SEARCH 
List<Document> docs = redis_conn.ftSearch("my_index", q).getDocuments();

System.out.println("Got " + docs.size() + " results.");
for (Document doc : docs) {
    printDoc(doc);
}

## another way for the same results:
## Get top 5 articles with 
- `number` value is NOT between `21` and `indexsize-21` ("`(indexsize-20`" means don't include this value)


In [None]:
// query for similarity

int K = 5;
Query q = new Query("(-@number:[21 ("+ (NUMBER_ARTICLES - 20) +"])=>[KNN $K @my_vector $BLOB AS scores]").setSortBy("scores", true)
                                                                                                         .returnFields("number", "scores")
                                                                                                         .addParam("K", K)
                                                                                                         .addParam("BLOB", floatToByte(e))
                                                                                                         .limit(0,K);

// FT.SEARCH 
List<Document> docs = redis_conn.ftSearch("my_index", q).getDocuments();

System.out.println("Got " + docs.size() + " results.");
for (Document doc : docs) {
    printDoc(doc);
}

# 10. Load and Index data (FLAT Vector Index)

In [None]:
int NUMBER_ARTICLES = 300;
String VECTOR_FIELD_NAME = "my_vector";

flushAll(redis_conn);
create_flat_index(redis_conn,VECTOR_FIELD_NAME,NUMBER_ARTICLES,DIMENSIONS,DISTANCE_METRIC);
load_vectors(redis_conn,VECTOR_FIELD_NAME,DIMENSIONS,NUMBER_ARTICLES);
System.out.println(NUMBER_ARTICLES + " News Articles loaded and indexed");

# 11. Another Hybrid Query FT.SEARCH (vector and non-vector search criteria)
### Get top 5 documents with `number` value is between 0 and 100

FT.SEARCH QUERY = `@number:[0 100]=>[KNN $K @my_vector $BLOB AS scores]`

In [None]:
// query as before

// build query
int K = 5;
Query q = new Query("@number:[0 100]=>[KNN $K @my_vector $BLOB AS scores]").setSortBy("scores", true)
                                                                           .returnFields("number", "scores")
                                                                           .addParam("K", K)
                                                                           .addParam("BLOB", floatToByte(e))
                                                                           .limit(0,K);

// FT.SEARCH 
List<Document> docs = redis_conn.ftSearch("my_index", q).getDocuments();

System.out.println("Got " + docs.size() + " results.");
for (Document doc : docs) {
    printDoc(doc);
}

In [None]:
// closing connection
redis_conn.close();