#  🌞 Solr hybrid search

Author: J Visbeen  
Target: practise  
Main Source: https://medium.com/@maithri.vm/from-keywords-to-meaning-embracing-semantic-fusion-in-apache-solrs-hybrid-search-paradigm-e7be29534ddd 

Connect to local instance

In [1]:
from pysolr import Solr
import json
from sentence_transformers import SentenceTransformer

model_name='bert-base-nli-mean-tokens'
model = SentenceTransformer(model_name)

solr_url = 'http://localhost:8983/solr/amazon_products'
solr = Solr(solr_url)



In [2]:
response = solr.ping()
response

'{\n  "responseHeader":{\n    "zkConnected":null,\n    "status":0,\n    "QTime":17,\n    "params":{\n      "q":"{!lucene}*:*",\n      "distrib":"false",\n      "df":"_text_",\n      "rows":"10",\n      "echoParams":"all",\n      "rid":"null-11"\n    }\n  },\n  "status":"OK"\n}'

Read and prepare data

In [4]:
import pandas as pd

df_categories = (
    pd.read_csv('../amazon_categories.csv')
    .dropna()
    .reset_index()
)
df_products = (
    pd.read_csv('../amazon_products_100000.csv')
    .dropna()
    .reset_index()
)

merged_df = pd.merge(df_products, df_categories, left_on='category_id', right_on='id', how='left')
merged_df.drop('category_id', axis=1)
merged_df

Unnamed: 0,index_x,asin,title,imgUrl,productURL,stars,reviews,price,listPrice,category_id,isBestSeller,boughtInLastMonth,index_y,id,category_name
0,0,B014TMV5YE,"Sion Softside Expandable Roller Luggage, Black...",https://m.media-amazon.com/images/I/815dLQKYIY...,https://www.amazon.com/dp/B014TMV5YE,4.5,0,139.99,0.00,104,False,2000,94,104,Suitcases
1,1,B07GDLCQXV,Luggage Sets Expandable PC+ABS Durable Suitcas...,https://m.media-amazon.com/images/I/81bQlm7vf6...,https://www.amazon.com/dp/B07GDLCQXV,4.5,0,169.99,209.99,104,False,1000,94,104,Suitcases
2,2,B07XSCCZYG,Platinum Elite Softside Expandable Checked Lug...,https://m.media-amazon.com/images/I/71EA35zvJB...,https://www.amazon.com/dp/B07XSCCZYG,4.6,0,365.49,429.99,104,False,300,94,104,Suitcases
3,3,B08MVFKGJM,Freeform Hardside Expandable with Double Spinn...,https://m.media-amazon.com/images/I/91k6NYLQyI...,https://www.amazon.com/dp/B08MVFKGJM,4.6,0,291.59,354.37,104,False,400,94,104,Suitcases
4,4,B01DJLKZBA,Winfield 2 Hardside Expandable Luggage with Sp...,https://m.media-amazon.com/images/I/61NJoaZcP9...,https://www.amazon.com/dp/B01DJLKZBA,4.5,0,174.99,309.99,104,False,400,94,104,Suitcases
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9995,B0BR617B8P,Mens Athletic Workout Shorts with Compression ...,https://m.media-amazon.com/images/I/61MFjWYxa5...,https://www.amazon.com/dp/B0BR617B8P,4.5,0,34.99,0.00,110,False,0,100,110,Men's Clothing
9996,9996,B07VWSP5HD,Men's Knitted Regular Fit Full Zip Cardigan Sw...,https://m.media-amazon.com/images/I/91Hm4RVlu8...,https://www.amazon.com/dp/B07VWSP5HD,4.4,0,49.99,59.99,110,False,0,100,110,Men's Clothing
9997,9997,B0CD6MBV8T,Men's Jacket Windproof Qulited Bomber Jackets ...,https://m.media-amazon.com/images/I/61sVoWGbg3...,https://www.amazon.com/dp/B0CD6MBV8T,4.6,0,46.98,0.00,110,False,0,100,110,Men's Clothing
9998,9998,B08XW98F22,Men's Dry Franchise Polo,https://m.media-amazon.com/images/I/51yJ8ZYPcs...,https://www.amazon.com/dp/B08XW98F22,4.4,0,46.87,55.00,110,False,0,100,110,Men's Clothing


Populate the database

In [5]:
amazon_products_array = []
for idx, row in merged_df.iterrows():
    amzon_product_obj = {
        'id': idx,
        'title': row['title'],
        'imageUrl': row['imgUrl'],
        'productUrl': row['productURL'],
        'stars': row['stars'],
        'reviews': row['reviews'],
        'price': row['price'],
        'listPrice': row['listPrice'],
        'categoryName': row['category_name'],
        'isBestSeller': row['isBestSeller'],
        'boughtInLastMonth': row['boughtInLastMonth'],
        'BERT_vector': model.encode(row['title'] + ' ' + row['category_name']).tolist()
    }
    amazon_products_array.append(amzon_product_obj)

solr.add(amazon_products_array)


'{\n  "responseHeader":{\n    "status":0,\n    "QTime":10378\n  }\n}'

Search the database

In [None]:
def hybrid_search(query):
    filter_queries = []
    filter_query="type:Video OR type:text"
    filter_queries.append(filter_query)
    user_signal = "leadership skills"

    embeded_query = model.encode([query])

    solr_response = solr.search(
        fl = ["id", "title", "imageUrl", "productUrl", "stars", "reviews", "price", "listPrice", "categoryName", "isBestSeller", "boughtInLastMonth"],
        q = "",
        fq ="",
        rq = "",
        rqq = "",
        rows = 10000)
    


In [1]:
from sentence_transformers import SentenceTransformer

def update_dense_index():
    try:
        model_name='bert-base-nli-mean-tokens'
        model = SentenceTransformer(model_name)
    except Exception as e:
        print('error occured')
        return
    

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
solr_response = solr.search(
    q='Wheel',
    rows=20,
    start=0,
    f='title',
    qf='title^5',
    defType='edismax',
    ps='7',
    wt='json'
)

for i in solr_response:
    print(i)

{'id': '843', 'title': ['Universal Luggage Wheel, Wheel Replacement Luggage Travel Suitcase Wheels, Caster Wheel Replacement Travel Suitcase Wheel, Plastic'], 'imageUrl': ['https://m.media-amazon.com/images/I/61LWNCSXqFL._AC_UL320_.jpg'], 'productUrl': ['https://www.amazon.com/dp/B0BHXPQ8LH'], 'stars': [0.0], 'reviews': [0], 'price': [21.03], 'listPrice': [0.0], 'categoryName': ['Suitcases'], 'isBestSeller': [False], 'boughtInLastMonth': [0], 'BERT_vector': [-0.37865886092185974, 0.24041537940502167, 0.5860229134559631, 0.7581703066825867, 0.5846679210662842, -0.5036181807518005, -0.46948376297950745, -0.00031031668186187744, -0.45815742015838623, -0.05881822109222412, -0.28661003708839417, 0.619077742099762, 0.22040985524654388, 0.5051527619361877, -0.1146472692489624, 0.12227371335029602, -1.1407380104064941, -0.15253187716007233, -0.14035724103450775, -0.5488552451133728, -0.23888957500457764, 0.08053553104400635, 0.31295931339263916, 0.09983271360397339, 0.38594481348991394, -0.045

In [15]:
solr_response.hits

148

In [17]:
embedding = model.encode(['Xbox games'])
solr_resp = solr.search(
    fl=['categoryName', 'title', 'stars'],
    q='{!knn f=BERT_vector topK=10}'+str([float(w) for w in embedding[0]]),
    rows=20)

for item in solr_resp:
    print(item)

SolrError: Solr responded with an error (HTTP 400): [Reason: only DenseVectorField is compatible with Knn Query Parser]