In [103]:
# recommendations/services.py
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import PointStruct, VectorParams

In [104]:
# Initialize Qdrant client
qdrant_client = QdrantClient(host='localhost', port=6333)

# Load a pre-trained model from Sentence Transformers
transformer_model = SentenceTransformer('all-MiniLM-L6-v2')



In [105]:
# Function to get embeddings using the Sentence Transformer model
def get_embeddings(text):
    return transformer_model.encode(text).tolist()

# Index documents in Qdrant
def index_documents(documents):
    points = []
    for doc in documents:
        embedding = get_embeddings(doc["text"])
        point = PointStruct(id=doc["id"], vector=embedding, payload={"text": doc["text"], "metadata": doc["metadata"]})
        points.append(point)
    
    # if collection does not exist, create it
    if not qdrant_client.collection_exists(collection_name="recommendations"):
        qdrant_client.create_collection(
            collection_name="recommendations",
            vectors_config=VectorParams(
                size=len(points[0].vector),
                distance='Cosine'
            )
        )
    
    qdrant_client.upsert(collection_name="recommendations", points=points)
    print("Documents indexed successfully")

In [96]:
# Retrieval of similar documents
def get_similar_documents(query, k=5):
    embed_query = get_embeddings(query)
    response = qdrant_client.search(
        collection_name="recommendations",
        query_vector=embed_query,
        limit=k
        )
    results = []
    for res in response:
        results.append({
            "id": res.id,
            "text": res.payload["text"],
            "metadata": res.payload["metadata"]
        })
    return results

In [124]:
# Create prompt with context
def create_prompt(query):
    context = create_context(query)
    return f"Here is the information we already have:\n{context}\nAnswer the following question: {query}\n"

# Create Context
def create_context(query):
    text = get_similar_documents(query, k=5)
    context = ""
    for i in range(len(text)):
        context += f"{i+1}. {text[i]['text']}\n"
    return context

In [71]:
# Example documents
documents = [
    {"id": 1, "text": "Paris is the capital of France and a major European city.", "metadata": {"city": "Paris"}},
    {"id": 2, "text": "Berlin, the capital of Germany, is known for its art and nightlife.", "metadata": {"city": "Berlin"}},
    {"id": 3, "text": "Tokyo is the capital of Japan and a mix of modern and traditional.", "metadata": {"city": "Tokyo"}},
]

In [72]:
index_documents(documents)

Documents indexed successfully


In [98]:
print(get_similar_documents("What is the capital of France?"))

[{'id': 1, 'text': 'Paris is the capital of France and a major European city.', 'metadata': {'city': 'Paris'}}, {'id': 2, 'text': 'Berlin, the capital of Germany, is known for its art and nightlife.', 'metadata': {'city': 'Berlin'}}, {'id': 3, 'text': 'Tokyo is the capital of Japan and a mix of modern and traditional.', 'metadata': {'city': 'Tokyo'}}]


In [99]:
print(create_prompt(query="What is the capital of France?"))

Context:
1. Paris is the capital of France and a major European city.
2. Berlin, the capital of Germany, is known for its art and nightlife.
3. Tokyo is the capital of Japan and a mix of modern and traditional.

Query: What is the capital of France?



In [1]:
# Generation of recommendations
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

  from .autonotebook import tqdm as notebook_tqdm


In [120]:
# Function to generate recommendations -- TEMPORARY
def generate_recommendations(prompt):
    input_ids = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
    attention_mask = input_ids.ne(1).long()

    # Generate text
    output = model.generate(
        input_ids, 
        attention_mask=attention_mask, 
        pad_token_id=model.config.eos_token_id, 
        max_length=150, 
        num_return_sequences=1, 
        temperature=0.7, 
        top_k=5,
        do_sample=True
    )
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated_text

In [125]:
print(generate_recommendations(prompt=create_prompt(query="What is the capital of France?")))


Here is the information we already have:
1. Paris is the capital of France and a major European city.
2. Berlin, the capital of Germany, is known for its art and nightlife.
3. Tokyo is the capital of Japan and a mix of modern and traditional.

Answer the following question: What is the capital of France?
The answer is simple: Paris.
The capital of France is located in the city of Paris.
This city has been a part of France since the 17th Century.
It was established in 1783 by the French king Louis XIV. It has been a part of France since the 17th Century.
France was the first European nation to become a major power in the
