In [4]:
import pandas as pd
df = pd.read_csv('../../top_100_clubs.csv')
df = df[df['location'].notna()] # remove any NaN values as it blows up serialization
data = df.sample(100).to_dict('records') # Get only 700 records. More records will make it slower to index
len(data)

100

In [5]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

  from tqdm.autonotebook import tqdm, trange


In [6]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [7]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [8]:
# Create collection to store clubs
qdrant.recreate_collection(
    collection_name="top_clubs",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

  qdrant.recreate_collection(


True

In [9]:
# vectorize!
qdrant.upload_points(
    collection_name="top_clubs",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["text"]).tolist(),
            payload=doc,
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

In [10]:
user_prompt = "Can you give me the name of the 3 highest ranking clubs in Spain?"

In [11]:
# Search time for awesome wines!

hits = qdrant.search(
    collection_name="top_clubs",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'Rank': 100, 'Club_Name': 'La Feria', 'location': 'Santiago, Chile', 'capacity': '400', 'Website': 'clublaferia.com', 'text': "If you've heard of one club in Chile it will undoubtedly be La Feria, a now-25-year-old venue that was once a large house in a bohemian neighbourhood. These days it is a hotbed of electronic music activity that boasts a Funktion-One sound system and sleek visual edge designed by Mexican designer Exme. In the last year, the club — which is run entirely on recycled energy — has opened a new space next to the main room that focuses on new local talent, while international guests have included the Martinez Brothers, Paco Osuna and Carl Cox. Thrust Publishing Ltd, Unit 3, 30-40 Underwood Street, London, N1 7JQ, United Kingdom."} score: 0.4202738555397836
{'Rank': 10, 'Club_Name': 'Laroc Club', 'location': 'Valinhos, Brazil', 'capacity': '5,000', 'Website': nan, 'text': 'laroc.club With its top-level DJ bookings, genuinely impressive open-air mainstage area, festiva

In [12]:
# define a variable to hold the search results
search_results = [hit.payload for hit in hits]

In [13]:
# Now time to connect to the local large language model
from openai import OpenAI
client = OpenAI(
    base_url="http://127.0.0.1:8081/v1", # "http://<Your api-server IP>:port"
    api_key = "sk-no-key-required"
)
completion = client.chat.completions.create(
    model="LLaMA_CPP",
    messages=[
        {"role": "system", "content": "You are chatbot, a club specialist. Your top priority is to help guide users into selecting amazing clubs and guide them with their requests."},
        {"role": "user", "content": "Can you give me the name of the 3 highest ranking clubs in Spain?"},
        {"role": "assistant", "content": str(search_results)}
    ]
)
print(completion.choices[0].message)