In [43]:
import redis
from redis.commands.search.query import Query
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
import pickle

In [44]:
df = pd.read_csv("data_cleaned.csv")

In [45]:
df.head(3)

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,mileage,engine,max_power,seats
0,Maruti Swift Dzire VDI,2014,9900,145500,Diesel,Individual,Manual,4.27,1248.0,74.0,5.0
1,Skoda Rapid 1.5 TDI Ambition,2014,8140,120000,Diesel,Individual,Manual,4.73,1498.0,103.52,5.0
2,Honda City 2017-2020 EXi,2006,3476,140000,Petrol,Individual,Manual,5.65,1497.0,78.0,5.0


In [46]:
def preprocess_dataframe(df):
    # Make a copy of the dataframe to avoid modifying the original
    df_copy = df.copy()

    # Define the columns to be scaled
    columns_to_scale = ['year', 'selling_price', 'km_driven', 'fuel','mileage','engine','max_power','seats','fuel','seller_type','transmission']
    columns_to_categorize = ['fuel','seller_type','transmission']

    # Encode the 'fuel' column
    oe = OrdinalEncoder()
    df_copy[columns_to_categorize] = oe.fit_transform(df_copy[columns_to_categorize])


    # Standard scale the columns
    scaler = StandardScaler()
    df_copy[columns_to_scale] = scaler.fit_transform(df_copy[columns_to_scale])

    return df_copy, scaler, oe

In [47]:
df2, sc, oe = preprocess_dataframe(df)

In [48]:
with open('scaler.pkl','wb') as f:
    pickle.dump(sc,f)
with open('ordinal.pkl','wb') as f:
    pickle.dump(oe,f)

In [49]:
df2["vec"] = df2.apply(lambda x : list(x[1:]), axis=1)

In [50]:
def list_to_binary_floats(float_list):
    """Converts a list of floats into a packed binary representation."""
    return np.array(float_list, dtype=np.float32).tobytes()

In [51]:
r = redis.Redis(
  host='localhost',
  port=6379)

In [52]:
df2.vec

0       [0.004612112992954618, -0.24552897324943504, 1...
1       [0.004612112992954618, -0.34386886220197804, 0...
2       [-2.0644929897245996, -0.6044695679262171, 1.2...
3       [-1.0299404383658226, -0.5221099109284623, 1.0...
4       [-1.8058548518849054, -0.6388885290596071, 0.8...
                              ...                        
7902    [-0.2540260248467397, -0.40533129279731744, 0....
7903    [-1.8058548518849054, -0.6327422860000732, 0.8...
7904    [-1.2885785762055169, -0.3291178788590966, 0.8...
7905    [-0.2540260248467397, -0.4422087511545211, -0....
7906    [-0.2540260248467397, -0.4422087511545211, -0....
Name: vec, Length: 7907, dtype: object

In [53]:
for i,row in df2.iterrows():
    r.hset(f"vehicule:{i}", mapping={"vector":list_to_binary_floats(row['vec'])})

In [54]:
# function to perform a redis search, taking in a list of floats as input
def redis_search(float_list):
    float_list = list_to_binary_floats(float_list)

    query = (
        Query("*=>[KNN 2 @vector $vec as score]")
        .sort_by("score")
        .return_fields("score")
        .paging(0, 5)
        .dialect(2)
    )

    query_params = {
        "vec": float_list
    }
    
    return r.ft("idx1").search(query, query_params).docs

In [55]:
redis_search(df2.iloc[1127,-1])

[Document {'id': 'vehicule:1127', 'payload': None, 'score': '0'},
 Document {'id': 'vehicule:6618', 'payload': None, 'score': '7.61151313782e-05'}]

In [56]:
print(df.iloc[1127,:])
print("-------------------")
print(df.iloc[6618,:])

name             Renault KWID RXT Optional
year                                  2016
selling_price                         6820
km_driven                            11500
fuel                                Petrol
seller_type                     Individual
transmission                        Manual
mileage                               3.97
engine                               799.0
max_power                             53.3
seats                                  5.0
Name: 1127, dtype: object
-------------------
name             Renault KWID RXT
year                         2016
selling_price                6380
km_driven                   10000
fuel                       Petrol
seller_type            Individual
transmission               Manual
mileage                      3.97
engine                      799.0
max_power                    53.3
seats                         5.0
Name: 6618, dtype: object
