In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http import models
import numpy as np

# Load MPNet model using sentence-transformers (or HuggingFace transformers)
mpnet_model = SentenceTransformer('all-MiniLM-L6-v2') 

In [2]:
from faker import Faker
fake_something = Faker()

In [3]:
fake_something.name(), fake_something.address()

('Joshua Murphy', 'PSC 8368, Box 2994\nAPO AP 43102')

In [4]:
# data
payload = []

for i in range(1000):
    payload.append(
        {
            "artist": fake_something.name(),
            "song": " ".join(fake_something.words()),
            "year": fake_something.year()
        }
    )
    
payload[:3]

[{'artist': 'Richard Simmons',
  'song': 'represent institution country',
  'year': '2015'},
 {'artist': 'Mr. Michael Green',
  'song': 'baby material source',
  'year': '1984'},
 {'artist': 'Valerie Mack', 'song': 'situation happen mean', 'year': '1995'}]

In [5]:
embeddings = []
for item in payload:
    text = f"{item['artist']} {item['song']}"
    embedding = mpnet_model.encode(text)
    embeddings.append(embedding)

embeddings = np.array(embeddings)

In [6]:
# Initialize Qdrant client
client = QdrantClient(host="localhost", port=6333)

my_collection = "first_collection"

# Check if collection exists
try:
    client.get_collection(collection_name=my_collection)
    print(f"Collection '{my_collection}' already exists. Skipping creation.")
except Exception as e:
    print(f"Collection '{my_collection}' does not exist. Creating a new one.")
    client.create_collection(
        collection_name=my_collection,
        vectors_config=models.VectorParams(size=384, distance=models.Distance.COSINE)
    )

# Generate random data for the collection
data = np.random.uniform(low=-1.0, high=1.0, size=(1000, 100))
index = list(range(len(payload)))

# Upsert data into the collection
client.upsert(
    collection_name=my_collection,
    points=models.Batch(
        ids=index,
        vectors=embeddings.tolist(),
        payloads=payload
    )
)


Collection 'first_collection' already exists. Skipping creation.


UnexpectedResponse: Unexpected Response: 400 (Bad Request)
Raw response content:
b'{"status":{"error":"Wrong input: Vector dimension error: expected dim: 100, got 384"},"time":0.010217958}'

In [None]:
# time for semantic search

life_song = np.random.uniform(low=-1.0, high=1.0, size=(100)).tolist()
life_song[:5]

# search for semantically similar songs to life_song? 

In [None]:
# query

query = 'what song is from the year 1976?'
query_embedding = mpnet_model.encode(query)

# Perform semantic search in Qdrant using the generated embedding
search_result = client.search(
    collection_name=my_collection,
    query_vector=query_embedding.tolist(),  # Query vector (embedding of the search term)
    limit=10  # Limit the results to top 10 similar items
)

# Print search results
for hit in search_result:
    print(f"ID: {hit.id}, Score: {hit.score}, Payload: {hit.payload}")


In [7]:
import ollama
from sentence_transformers import SentenceTransformer
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http import models
from faker import Faker

# Initialize Sentence-Transformer (all-MiniLM-L6-v2) model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate sample data (could be anything you want to index in Qdrant)
fake = Faker()
payload = [{"song": fake.sentence(), "artist": fake.name(), "year": fake.year()} for _ in range(1000)]

# Generate embeddings for each piece of data
embeddings = []
for item in payload:
    text = f"{item['artist']} {item['song']}"
    embedding = embedding_model.encode(text)
    embeddings.append(embedding)

embeddings = np.array(embeddings)

# Initialize Qdrant client
client = QdrantClient(host="localhost", port=6333)

# Define collection name
my_collection = "song_collection"

# Check if collection exists, if not, create it
try:
    client.get_collection(collection_name=my_collection)
    print(f"Collection '{my_collection}' already exists. Skipping creation.")
except Exception as e:
    print(f"Collection '{my_collection}' does not exist. Creating a new one.")
    client.create_collection(
        collection_name=my_collection,
        vectors_config=models.VectorParams(size=384, distance=models.Distance.COSINE)
    )

# Upsert data (embedding vectors and payload) into the Qdrant collection
index = list(range(len(payload)))
client.upsert(
    collection_name=my_collection,
    points=models.Batch(
        ids=index,
        vectors=embeddings.tolist(),
        payloads=payload
    )
)

# Now, perform a semantic search using a query
query = "What song was released in 1975?"
query_embedding = embedding_model.encode(query)

# Perform semantic search using Qdrant
search_result = client.search(
    collection_name=my_collection,
    query_vector=query_embedding.tolist(),
    limit=10  # Limit the results to top 10 similar items
)

# Print the search results
print("Search Results from Qdrant:")
for hit in search_result:
    print(f"ID: {hit.id}, Score: {hit.score}, Payload: {hit.payload}")

# Now, interact with Mistral 7B via Ollama for a query-based response
response = ollama.chat(model="mistral-7b", messages=[{"role": "user", "content": query}])

# Print the response from Mistral 7B (via Ollama)
print("\nResponse from Mistral 7B (via Ollama):")
print(response['text'])


Collection 'song_collection' already exists. Skipping creation.


  search_result = client.search(


Search Results from Qdrant:
ID: 827, Score: 0.39237708, Payload: {'song': 'Nothing city heart catch sing.', 'artist': 'Nicholas Mcbride', 'year': '2003'}
ID: 433, Score: 0.3497751, Payload: {'song': 'Song move fish their.', 'artist': 'Joshua Johnson', 'year': '1983'}
ID: 331, Score: 0.34297228, Payload: {'song': 'Song rock anything usually turn our.', 'artist': 'Eugene Benjamin', 'year': '2013'}
ID: 257, Score: 0.32144278, Payload: {'song': 'Leave place something song head.', 'artist': 'Connie Cooper', 'year': '1989'}
ID: 85, Score: 0.32036415, Payload: {'song': 'Seek sing win store.', 'artist': 'Amanda Ryan', 'year': '1988'}
ID: 691, Score: 0.31972313, Payload: {'song': 'Consider dream top admit happen.', 'artist': 'William Jones', 'year': '1994'}
ID: 119, Score: 0.31131765, Payload: {'song': 'Community through summer keep trip song.', 'artist': 'John Gomez', 'year': '2006'}
ID: 651, Score: 0.3063661, Payload: {'song': 'Cover table single.', 'artist': 'Connie Anderson', 'year': '2018'

ResponseError: model "mistral-7b" not found, try pulling it first (status code: 404)