In [7]:
import redis
from redis.commands.search.query import Query
import pandas as pd
import numpy as np
import struct, ast
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [2]:
df = pd.read_csv("embeds_test.csv")

In [3]:
df.drop(["embeds"], axis=1, inplace=True)

In [4]:
df.head(3)

Unnamed: 0,name,year,selling_price,km_driven,fuel
0,Maruti Swift Dzire VDI,2014,450000,145500,Diesel
1,Skoda Rapid 1.5 TDI Ambition,2014,370000,120000,Diesel
2,Honda City 2017-2020 EXi,2006,158000,140000,Petrol


In [6]:
df.describe()

Unnamed: 0,year,selling_price,km_driven
count,50.0,50.0,50.0
mean,2013.2,488460.0,66285.76
std,4.759523,403968.9,45158.85196
min,2001.0,45000.0,2388.0
25%,2011.0,200000.0,29825.0
50%,2014.0,395000.0,64000.0
75%,2017.0,665000.0,96750.0
max,2019.0,2100000.0,175000.0


In [15]:
def preprocess_dataframe(df):
    # Make a copy of the dataframe to avoid modifying the original
    df_copy = df.copy()

    # Define the columns to be scaled
    columns_to_scale = ['year', 'selling_price', 'km_driven', 'fuel']

    # Encode the 'fuel' column
    le = LabelEncoder()
    df_copy['fuel'] = le.fit_transform(df_copy['fuel'])

    # Standard scale the columns
    scaler = StandardScaler()
    df_copy[columns_to_scale] = scaler.fit_transform(df_copy[columns_to_scale])

    return df_copy, scaler, le

In [16]:
df2, sc, le = preprocess_dataframe(df)

In [18]:
df2.head()

Unnamed: 0,name,year,selling_price,km_driven,fuel
0,Maruti Swift Dzire VDI,0.169791,-0.096172,1.771933,-1.020589
1,Skoda Rapid 1.5 TDI Ambition,0.169791,-0.296217,1.201527,-1.020589
2,Honda City 2017-2020 EXi,-1.528115,-0.826338,1.648904,0.942082
3,Hyundai i20 Sportz Diesel,-0.679162,-0.6588,1.358109,-1.020589
4,Maruti Swift VXI BSIII,-1.315877,-0.896354,1.201527,0.942082


In [19]:
df2["vec"] = np.zeros(len(df2.index))

In [27]:
def vec(row):
    return [row["year"],row["selling_price"],row["km_driven"],row["fuel"]]

In [28]:
df2["vec"] = df2.apply(vec, axis=1)

In [30]:
r = redis.Redis(
  host='redis-13948.c250.eu-central-1-1.ec2.cloud.redislabs.com',
  port=13948,
  password='M6qC8uehk3e54Kx')

In [None]:
r = redis.Redis(
  host='localhost',
  port=6379)

In [34]:
def list_to_binary_floats(float_list):
    # float_list = ast.literal_eval(float_list)
    """Converts a list of floats into a packed binary representation."""
    return np.array(float_list, dtype=np.float32).tobytes()

In [35]:
# pushes the df to redis
for i,row in df2.iterrows():
    r.hset(f"vehicule:{i}", mapping={"vector":list_to_binary_floats(row['vec'])})

In [36]:
# function to perform a redis search, taking in a list of floats as input
def redis_search(float_list):
    float_list = list_to_binary_floats(float_list)

    query = (
        Query("*=>[KNN 2 @vector $vec as score]")
        .sort_by("score")
        .return_fields("score")
        .paging(0, 5)
        .dialect(2)
    )

    query_params = {
        "vec": float_list
    }
    
    return r.ft("idx1").search(query, query_params).docs

In [43]:
redis_search(df2.iloc[17,5])

[Document {'id': 'vehicule:17', 'payload': None, 'score': '0'},
 Document {'id': 'vehicule:39', 'payload': None, 'score': '0.104735270143'}]

In [44]:
print(df.iloc[17,:])
print("-------------------")
print(df.iloc[39,:])

name             Hyundai i20 1.4 CRDi Asta
year                                  2012
selling_price                       500000
km_driven                            53000
fuel                                Diesel
Name: 17, dtype: object
-------------------
name             Tata Safari DICOR 2.2 LX 4x2
year                                     2011
selling_price                          425000
km_driven                               60000
fuel                                   Diesel
Name: 39, dtype: object


In [13]:
def delete_keys_with_prefix(prefix):
    """Delete all keys with the given prefix."""
    cursor = '0'
    while cursor != 0:
        cursor, keys = r.scan(cursor=cursor, match=f"{prefix}*")
        for key in keys:
            r.delete(key)

In [14]:
delete_keys_with_prefix("vehicule:")