# Semantic Search - Rightmove - using Qdrant Vector Database

In [209]:
#!pip install sentence-transformers numpy pandas tqdm

In [212]:
from sentence_transformers import SentenceTransformer
import numpy as np
import json
import pandas as pd
from tqdm.notebook import tqdm

In [213]:
model = SentenceTransformer(
    "all-MiniLM-L6-v2", device="cpu"
)

In [214]:
# Read JSON lines into a DataFrame - note this is because the json is just rows of JSON, not a list of JSON
df = pd.read_json("properties.json", lines=True)
df.head()

Unnamed: 0,title,price,price_text,address,description,link
0,2 bedroom barn conversion for sale,245000,"£245,000","Dean, Shepton Mallet, BA4",A charming converted barn in a courtyard setti...,https://www.rightmove.co.uk/properties/1472220...
1,2 bedroom bungalow for sale,240000,"£240,000","Alnwick, Toothill, Swindon, Wiltshire, SN5",** THIS PROPERTY SUFFERS FROM SUBSIDENCE **OFF...,https://www.rightmove.co.uk/properties/1441520...
2,2 bedroom detached house for sale,240000,"£240,000","Renard Rise, Stonehouse, Gloucestershire, GL10",A spacious two bedroom detached coach house wi...,https://www.rightmove.co.uk/properties/1477211...
3,2 bedroom bungalow for sale,240000,"£240,000","Hill Street, Kingswood, Bristol, BS15",Public Notice - We advise that an offer has be...,https://www.rightmove.co.uk/properties/1456468...
4,3 bedroom cottage for sale,240000,"£240,000","Main Road, Woodford, GL13",Perfect for those looking for a character fami...,https://www.rightmove.co.uk/properties/1495099...


In [215]:
vectors = model.encode(
    [row.title + ". " + row.description for row in df.itertuples()],
    show_progress_bar=True,
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [216]:
vectors.shape

(12, 384)

In [217]:
np.save("startup_vectors.npy", vectors, allow_pickle=False)

In [218]:
"""!docker run -p 6333:6333 \
    -v $(pwd)/qdrant_storage:/qdrant/storage \
    qdrant/qdrant"""

'!docker run -p 6333:6333     -v $(pwd)/qdrant_storage:/qdrant/storage     qdrant/qdrant'

In [219]:
#!pip install qdrant-client

In [220]:
# Import client library
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance

client = QdrantClient("http://localhost:6333")

In [202]:
client.recreate_collection(
    collection_name="property",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)

True

In [203]:
fd = open("./properties.json")

# payload is now an iterator over startup data
payload = map(json.loads, fd)

# Load all vectors into memory, numpy array works as iterable for itself.
# Other option would be to use Mmap, if you don't want to load all data into RAM
vectors = np.load("./startup_vectors.npy")

In [204]:
vectors
payload

<map at 0x7efeda8faef0>

In [205]:
client.upload_collection(
    collection_name="property",
    vectors=vectors,
    payload=payload,
    ids=None,  # Vector ids will be assigned automatically
    batch_size=256,  # How many vectors will be uploaded in a single request?
)

# Client

In [206]:
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer


class NeuralSearcher:
    def __init__(self, collection_name):
        self.collection_name = collection_name
        # Initialize encoder model
        self.model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
        # initialize Qdrant client
        self.qdrant_client = QdrantClient("http://localhost:6333")
        
    def search(self, text: str):
        # Convert text query into vector
        vector = self.model.encode(text).tolist()
    
        # Use `vector` for search for closest vectors in the collection
        search_result = self.qdrant_client.search(
            collection_name=self.collection_name,
            query_vector=vector,
            query_filter=None,  # If you don't want any filters for now
            limit=5,  # 5 the most closest results is enough
        )
        # `search_result` contains found vector ids with similarity scores along with the stored payload
        # In this function you are interested in payload only
        payloads = [hit.payload for hit in search_result]
        return payloads

In [221]:
ns = NeuralSearcher("property")
res = ns.search(text="hot tub in a rural location")

In [222]:
res

[{'address': 'Brook Cottage, Lower Common, Aylburton, Lydney',
  'description': 'A charming detached one bedroom bungalow, situated within a picturesque semi-rural valley location with off road parking, a variety of county walks, income generating solar panels and close proximity to the A48 and greater travel networks. Offered with no onward chain.',
  'link': 'https://www.rightmove.co.uk/properties/148406387#/?channel=RES_BUY',
  'price': 230000,
  'price_text': '£230,000',
  'title': '1 bedroom bungalow for sale'},
 {'address': 'Chaingate Court, Glastonbury, BA6',
  'description': 'Situated in a secluded position and accessed via a gated entrance, this unique chalet style property features a charming courtyard garden and has the benefit of no onward chain. The property would make an excellent investment or suit those requiring a property with easy maintenance close\xa0...',
  'link': 'https://www.rightmove.co.uk/properties/148084376#/?channel=RES_BUY',
  'price': 237500,
  'price_tex

In [None]:
# https://qdrant.tech/documentation/tutorials/neural-search/