In [24]:
import csv
import json
import os
import pymongo
from dotenv import load_dotenv
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import math

load_dotenv()

def dotProduct(a,b):
    dotproduct = 0.0
    for k,v in enumerate(a):
        dotproduct+=a[k]*b[k]
    return dotproduct

def normalize(v):
    sum_of_squares = 0.0
    for i in v:
        sum_of_squares+=math.pow(i,2)

    return [x / math.sqrt(sum_of_squares) for x in v]

def vectorSearch(collection,inputVector,algo,normalized=True):
  pipeline = [
    {
      "$search": {
        "index": "default",
        "knnBeta": {
          "vector": normalize(inputVector) if normalized else inputVector,
          "path": "2d.{}.{}".format('normalized' if normalized else 'raw',algo),
          "filter": {
            "compound": {
              "mustNot":[
                {
                  "equals":{
                    "value":input["_id"],
                    "path":"_id"
                  }
                }
              ]
            }
          },
          "k": 1
        }
      }
    }
  ]

  return list(collection.aggregate(pipeline))

In [25]:
MDB_URI = os.getenv('MDB_URI')
print(MDB_URI)
animals = []
with open('animals.csv',newline='') as csvFile:
        reader = csv.DictReader(csvFile,fieldnames=['species','body_weight','brain_weight'])
        for i,row in enumerate(reader):
            if i > 0:
                row['_id'] = i
                row['2d'] = {
                    'raw':{
                        'euclidean':[float(row['body_weight']),float(row['brain_weight'])],
                        'dotproduct':[float(row['body_weight']),float(row['brain_weight'])],
                        'cosine':[float(row['body_weight']),float(row['brain_weight'])]
                    },
                    'normalized':{
                        'euclidean':normalize([float(row['body_weight']),float(row['brain_weight'])]),
                        'dotproduct':normalize([float(row['body_weight']),float(row['brain_weight'])]),
                        'cosine':normalize([float(row['body_weight']),float(row['brain_weight'])])
                    }
                }
                row['dimensions'] = 2
                animals.append(row)

client = pymongo.MongoClient(MDB_URI)
collection = client['test']['vectors']

collection.delete_many({"dimensions":2})
collection.insert_many(animals)

mongodb+srv://main_user:demos@cluster0.mcessqn.mongodb.net/test


<pymongo.results.InsertManyResult at 0x7fccf118a0c0>

In [100]:
input = {"_id":2,"species":"Cow","body_weight":465.000,"brain_weight":423.000}

def makeGraphData (algo,items,normalized=True):
  graphData = []
  for v in items:
    graphData.append(
      {
        "species":v["species"],
        "x":v["2d"]["normalized" if normalized else "raw"][algo][0],
        "y":v["2d"]["normalized" if normalized else "raw"][algo][1]
      }
    )
  return graphData

for i,algo in enumerate(["euclidean","cosine","dotproduct"]):

  fig = make_subplots(rows=1,cols=2,subplot_titles=["Normalized Values","Raw Values"])

  inputVector = [input['body_weight'],input['brain_weight']]
  inputGraphDataNorm=normalize(inputVector)

  results = vectorSearch(collection,inputVector,algo,normalized=True)
  nearest = results[0]
  nearestGraphDataNorm = makeGraphData(algo,[nearest],normalized=True)

  animals = list(collection.find({'_id':{"$nin":[input['_id'],nearest['_id']]}}))
  otherGraphDataNorm = makeGraphData(algo,animals,normalized=True)

  normTrace0=go.Scatter(hoverinfo="text",hovertext=[input['species']], x=[inputGraphDataNorm[0]],y=[inputGraphDataNorm[1]],mode="markers",name="Input - "+input["species"],legendgroup="1",legendgrouptitle={"text":"Normalized"})
  normTrace1=go.Scatter(hoverinfo="text",hovertext=[d["species"] for d in nearestGraphDataNorm], x=[d["x"] for d in nearestGraphDataNorm],y=[d["y"] for d in nearestGraphDataNorm],mode="markers",name="Nearest - "+nearest["species"],legendgroup="1",legendgrouptitle={"text":"Normalized"})
  normTrace2=go.Scatter(hoverinfo="text",hovertext=[d["species"] for d in otherGraphDataNorm], x=[d["x"] for d in otherGraphDataNorm],y=[d["y"] for d in otherGraphDataNorm],mode="markers",name="Others",legendgroup="1",legendgrouptitle={"text":"Normalized"})

  for trace in [normTrace0,normTrace1,normTrace2]:
    fig.add_trace(trace,row=1,col=1)

  inputGraphDataRaw=inputVector

  results = vectorSearch(collection,inputVector,algo,normalized=False)
  nearest = results[0]
  nearestGraphDataRaw = makeGraphData(algo,[nearest],normalized=False)
  
  animals = list(collection.find({'_id':{"$nin":[input['_id'],nearest['_id']]}}))
  otherGraphDataRaw = makeGraphData(algo,animals,normalized=False)

  rawTrace0=go.Scatter(hoverinfo="text",hovertext=[input['species']], x=[inputGraphDataRaw[0]],y=[inputGraphDataRaw[1]],mode="markers",name="Input - "+input["species"],legendgroup="2",legendgrouptitle={"text":"Raw"})
  rawTrace1=go.Scatter(hoverinfo="text",hovertext=[d["species"] for d in nearestGraphDataRaw], x=[d["x"] for d in nearestGraphDataRaw],y=[d["y"] for d in nearestGraphDataRaw],mode="markers",name="Nearest - "+nearest["species"],legendgroup="2",legendgrouptitle={"text":"Raw"})
  rawTrace2=go.Scatter(hoverinfo="text",hovertext=[d["species"] for d in otherGraphDataRaw], x=[d["x"] for d in otherGraphDataRaw],y=[d["y"] for d in otherGraphDataRaw],mode="markers",name="Others",legendgroup="2",legendgrouptitle={"text":"Raw"})

  for trace in [rawTrace0,rawTrace1,rawTrace2]:
    fig.add_trace(trace,row=1,col=2)
  
  layout = {
    "xaxis2":{"title":"body_weight (kg)"},
    "yaxis2":{"title":"brain_weight (g)"}
  }
  # fig["layout"]["xaxis"]["title"]="body_weight (kg)"
  fig.update_layout(layout)
  fig.show()