### 4. Deploy the vector DB on your own, and implement the `vector cosine similarity` without using a high level library.
Vector Cosine Similarity
This utilize the Dot Product principle
```
dot_product(a,b) = |a||b| × Cos(θ)
```

In [26]:
import requests
import numpy as np
from typing import List
from collections import defaultdict


class VectorDB:
    def __init__(
        self,
        embedding_model_name: str = "nomic-embed-text:v1.5",
        base_url: str = "http://localhost:11434",
    ):
        self.base_url = base_url
        self.embedding_model_name = embedding_model_name
        self.vectors = {}
        self.ids = defaultdict(list)

    def get_embedding(self, text: str):
        response = requests.post(
            f"{self.base_url}/api/embeddings",
            json={"model": self.embedding_model_name, "prompt": text},
        )
        return response.json()["embedding"]

    def get_similarity_score(self, vector1: List[float], vector2: List[float]):
        similarity_score = np.dot(vector1, vector2) / (
            np.linalg.norm(vector1) * np.linalg.norm(vector2)
        )
        return similarity_score

    def insert_vector(self, key: str):
        vector = self.get_embedding(key)
        self.vectors[key] = vector

    def search(self, query: str, k: int = 3):
        vec1 = self.get_embedding(query)
        scored = [
            (key, self.get_similarity_score(vec1, vec))
            for key, vec in self.vectors.items()
        ]
        scored.sort(key=lambda x: x[1], reverse=True)
        return scored[:k]


In [None]:
vdb = VectorDB()

vdb.insert_vector("dark")
vdb.insert_vector("light")


In [30]:
vdb.search("black")

[('dark', np.float64(0.658563901563752)),
 ('light', np.float64(0.46528477239453514))]