In [1]:
import numpy as np
import pandas as pd

import chromadb
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

import yaml
CONFIG = yaml.safe_load(open("../config.yml"))
model = SentenceTransformerEmbeddings(model_name=CONFIG["EmbeddingModel"])

client = chromadb.PersistentClient(path=f"../{CONFIG['ChromaDBPath']}")
movie_collection = client.get_collection(name="movies")
user_collection = client.get_collection(name="users")

movie_df = pd.read_csv("../data/movies.csv")
users_df = pd.read_csv("../data/user_history.csv")

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
    user_collection.get(ids="1")["metadatas"][0]["userId"]

'1'

In [5]:
from langchain.vectorstores import Chroma

langchain_chroma = Chroma(
    client=client,
    collection_name="movies",
    embedding_function=model,
)

In [87]:
userId = "3"
user = user_collection.get(ids=userId, include=["embeddings", "metadatas"])

docs = langchain_chroma.similarity_search_by_vector(user["embeddings"], k=3)

for doc in docs:
    for key, value in doc.metadata.items():
        print(key,value,sep=": ")
    print("-"*20)

Cast: Semyon Treskunov, Konstantin Khabensky, Mikhail Yefremov
Director: Oksana Karas
Origin/Ethnicity: Russian
Release Year: 2016
Title: the good boy
Wiki Page: https://en.wikipedia.org/wiki/The_Good_Boy
genres: Children|Comedy|Drama
movieId: 167790
--------------------
Cast: Jennifer Lopez, Edward James Olmos
Director: Gregory Nava
Origin/Ethnicity: American
Release Year: 1997
Title: selena
Wiki Page: https://en.wikipedia.org/wiki/Selena_(film)
genres: Drama|Musical
movieId: 1487
--------------------
Cast: Tom Hollander, James Gandolfini, Chris Addison, Gina McKee
Director: Armando Iannucci
Origin/Ethnicity: British
Release Year: 2009
Title: in the loop
Wiki Page: https://en.wikipedia.org/wiki/In_the_Loop_(film)
genres: Comedy
movieId: 67997
--------------------


In [83]:
users_df["timestamp"] = pd.to_datetime(users_df['timestamp'], unit='s')
userMovies = users_df[users_df.userId==int(userId)].sort_values(by="timestamp", ascending=False)
userMovies.head()

Unnamed: 0.1,Unnamed: 0,userId,movieId,timestamp
186,281,3,2424,2011-05-27 02:44:53
192,288,3,5048,2011-05-27 02:44:44
172,262,3,527,2011-05-27 02:44:35
190,286,3,3949,2011-05-27 02:44:05
174,264,3,688,2011-05-27 02:43:48


In [85]:
import ast
movieIds = np.array(ast.literal_eval(user["metadatas"][0]["movieIds"][1:-1]))
movies = movie_df[movie_df.movieId.apply(int).isin(userMovies.movieId.iloc[:5].tolist())]

movies.sort_values(by="Release Year", ascending=False)

Unnamed: 0.1,Unnamed: 0,Release Year,Title,Origin/Ethnicity,Director,Cast,Genre,Wiki Page,Plot,movieId,title,genres
2814,13516,2002,snow dogs,American,Brian Levant,"Cuba Gooding Jr., James Coburn, Nichelle Nichols",family,https://en.wikipedia.org/wiki/Snow_Dogs_(film),"Dr. Ted Brooks is a dentist in Miami, Florida....",5048.0,snow dogs,Adventure|Children|Comedy
2584,13111,2000,requiem for a dream,American,Darren Aronofsky,"Ellen Burstyn, Jared Leto, Jennifer Connelly, ...",drama,https://en.wikipedia.org/wiki/Requiem_for_a_Dream,"During the summer in Brighton Beach, Brooklyn,...",3949.0,requiem for a dream,Drama
2409,12802,1998,you've got mail,American,Nora Ephron,"Tom Hanks, Meg Ryan, Parker Posey, Jean Stapleton",rom com,https://en.wikipedia.org/wiki/You%27ve_Got_Mail,"Kathleen Kelly is involved with Frank Navasky,...",2424.0,you've got mail,Comedy|Romance
1998,11998,1995,operation dumbo drop,American,Simon Wincer,"Ray Liotta, Danny Glover, Denis Leary",family,https://en.wikipedia.org/wiki/Operation_Dumbo_...,"During the Vietnam War in 1968, Captain Sam Ca...",688.0,operation dumbo drop,Action|Adventure|Comedy|War
1772,11578,1993,schindler's list,American,Steven Spielberg,"Liam Neeson, Ralph Fiennes, Ben Kingsley, Caro...",drama,https://en.wikipedia.org/wiki/Schindler%27s_List,"In Kraków during World War II, the Germans hav...",527.0,schindler's list,Drama|War
