In [1]:
import csv
import json
import os
import pymongo
from dotenv import load_dotenv
import plotly.express as px
import math

load_dotenv()

MDB_URI = os.getenv('MDB_URI')
print(MDB_URI)
animals = []
with open('animals.csv',newline='') as csvFile:
        reader = csv.DictReader(csvFile,fieldnames=['species','body_weight','brain_weight'])
        for i,row in enumerate(reader):
            if i > 0:
                row['_id'] = i
                row['2d'] = {
                    'raw':{
                        'euclidean':[float(row['body_weight']),float(row['brain_weight'])],
                        'dotproduct':[float(row['body_weight']),float(row['brain_weight'])],
                        'cosine':[float(row['body_weight']),float(row['brain_weight'])]
                    }
                }
                row['dimensions'] = 2
                animals.append(row)

client = pymongo.MongoClient(MDB_URI)
collection = client['test']['vectors']

collection.delete_many({"dimensions":2})
collection.insert_many(animals)

mongodb+srv://main_user:demos@cluster0.mcessqn.mongodb.net/test


<pymongo.results.InsertManyResult at 0x7fa411bac500>

In [2]:
def dotProduct(a,b):
    dotproduct = 0.0
    for k,v in enumerate(a):
        dotproduct+=a[k]*b[k]
    return dotproduct

In [3]:
input = {"_id":2,"species":"Cow","body_weight":465.000,"brain_weight":423.000}
for algo in ["euclidean","cosine","dotproduct"]:
  pipeline = [
    {
      "$search": {
        "index": "default",
        "knnBeta": {
          "vector": [input['body_weight'],input['brain_weight']],
          "path": "2d.raw.{}".format(algo),
          "filter": {
            "compound": {
              "mustNot":[
                {
                  "equals":{
                    "value":input["_id"],
                    "path":"_id"
                  }
                }
              ]
            }
          },
          "k": 1
        }
      }
    },
    {
      "$project":{'body_weight':1,'brain_weight':1,"_id":1,"species":1}
    }
  ]

  results = list(collection.aggregate(pipeline))
  nearest = results[0]

  inputVector = [input['body_weight'],input['brain_weight']]
  nearestVector = [float(nearest['body_weight']),float(nearest['brain_weight'])]

  distances = {
    "dotproduct": dotProduct(inputVector,nearestVector),
    "euclidean": math.sqrt(math.pow(inputVector[0]-nearestVector[0],2)+math.pow(inputVector[1]-nearestVector[1],2)),
    "cosine": dotProduct(inputVector,nearestVector) / (math.sqrt(dotProduct(inputVector,inputVector)))*(math.sqrt(dotProduct(nearestVector,nearestVector)))
  }

  input['series']=input['species']
  nearest['series']=nearest['species']
  data = [input,nearest]

  for a in animals:
    if a['_id'] != input['_id'] and a['_id'] != nearest['_id']:
      a['series'] = 'Others'
      data.append(a)

  fig = px.scatter(data,x="body_weight",y="brain_weight",hover_data=["species"],color="series",title="{} distance - {}".format(algo,distances[algo]))
  fig.show()
