This Notebook uses Retrieval Augmented Generation (RAG) along with ChatGPT to recommend movies.

In [1]:
import pandas as pd

Building the Training Data

In [2]:
train = pd.read_csv('../data/train.csv')

In [3]:
train.head()

Unnamed: 0,user_id,movie_id,rating,timestamp,movie_title,genres,avg_rating
0,259,255,4,874724710,My Best Friend's Wedding (1997),Romance,4.0
1,259,286,4,874724727,"English Patient, The (1996)","Romance, War",4.0
2,259,298,4,874724754,Face/Off (1997),"Action, Sci-Fi, Thriller",4.0
3,259,185,4,874724781,Psycho (1960),"Horror, Romance, Thriller",4.0
4,259,173,4,874724843,"Princess Bride, The (1987)","Action, Adventure, Romance",4.0


Using Qdrant Vector Database

In [4]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

  from tqdm.autonotebook import tqdm, trange


Set the embedding layer

In [5]:
encoder = SentenceTransformer("all-MiniLM-L6-v2")



Set the Qdrant database as an in memory database

In [6]:
client = QdrantClient(":memory:")

Define the database settings

In [7]:
client.recreate_collection(
    collection_name="movie_ratings",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
        distance=models.Distance.COSINE,
    ),
)

  client.recreate_collection(


True

Turn the data into more descriptive strings for better semantic searching

In [8]:
train['rating_descriptions'] = train.apply(lambda x: f"user {x.loc['user_id']} rated {x.loc['movie_title']} with a rating of {x.loc['rating']}", axis=1)

In [9]:
training_dict = train.sample(1000).to_dict(orient='records') # Sample 1000 records to speed up the process

Upload Training Data to vector database

In [10]:
client.upload_points(
    collection_name="movie_ratings",
    points=[
        models.PointStruct(
            id=idx, vector=encoder.encode(doc["rating_descriptions"]).tolist(), payload=doc
        )
        for idx, doc in enumerate(training_dict)
    ],
)

Search Vector Database for the query

In [11]:
hits = client.search(
    collection_name="movie_ratings",
    query_vector=encoder.encode("user 175 with a rating of 5").tolist(),
    limit=5,
)
for hit in hits:
    print(hit.payload, "score:", hit.score)

{'user_id': 291, 'movie_id': 129, 'rating': 5, 'timestamp': 874805699, 'movie_title': 'Bound (1996)', 'genres': 'Romance, Thriller', 'avg_rating': 5.0, 'rating_descriptions': 'user 291 rated Bound (1996) with a rating of 5'} score: 0.74506054168057
{'user_id': 201, 'movie_id': 129, 'rating': 4, 'timestamp': 884114471, 'movie_title': 'Bound (1996)', 'genres': 'Romance, Thriller', 'avg_rating': 4.0, 'rating_descriptions': 'user 201 rated Bound (1996) with a rating of 4'} score: 0.7205623246198989
{'user_id': 658, 'movie_id': 129, 'rating': 3, 'timestamp': 875145750, 'movie_title': 'Bound (1996)', 'genres': 'Romance, Thriller', 'avg_rating': 3.0, 'rating_descriptions': 'user 658 rated Bound (1996) with a rating of 3'} score: 0.7159467178367238
{'user_id': 275, 'movie_id': 662, 'rating': 3, 'timestamp': 880315170, 'movie_title': 'Somewhere in Time (1980)', 'genres': 'Romance', 'avg_rating': 3.0, 'rating_descriptions': 'user 275 rated Somewhere in Time (1980) with a rating of 3'} score: 0.7

In [12]:
search_results = [hit.payload for hit in hits]

In [13]:
user_prompt = "I am user 175.  Please recommend me a movie that I would rate 5."

Use OpenAI to recommend a movie based on vector database search results

In [14]:
from dotenv import load_dotenv
from groq import Client
import os
dotenv_path = 'D:\\test\\LLM-Recommender-System-with-RAG\\key_api.env'  # Thay thế bằng đường dẫn thực tế
load_dotenv(dotenv_path)
api_key = os.getenv('GROQ_API_KEY')
client = Client(api_key=api_key)

In [15]:
response = client.chat.completions.create(
    model="llama3-8b-8192",
        messages= [
            { 'role':'system','content' : 'You are a movie recommender.  You help users find movies they would rate 5 stars.'},
            { 'role':'user','content' : user_prompt},
            { "role": "assistant", "content": str(search_results)},],
        # temperature=0,
        # max_tokens=512,
        # top_p=1,
        # frequency_penalty=0,
        # presence_penalty=0,
        )

In [16]:
response.choices[0].message.content

'I see you\'re a fan of romance and thrillers! Based on your preferences, I\'d like to recommend a movie that you might rate 5 stars.\n\nHave you seen "La La Land" (2016)? It\'s a romantic musical drama starring Ryan Gosling and Emma Stone as two aspiring artists who fall in love while chasing their dreams in Los Angeles. The movie features stunning visuals, beautiful music, and a poignant love story that explores the sacrifices one must make for their passion.\n\n"La La Land" has received widespread critical acclaim and has broken numerous box office records. It\'s a modern classic that has resonated with audiences worldwide, and I think you might find it a 5-star movie. Would you like to give it a try?'