In [1]:
import pandas as pd
import numpy as np
import os
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
db_uri = f"postgresql://{os.getenv('POSTGRES_USER')}:{os.getenv('POSTGRES_PASSWORD')}@localhost:5432/{os.getenv('POSTGRES_DB')}"
engine = create_engine(db_uri)
Session = sessionmaker(bind=engine)
session = Session()

In [4]:

from server.api.models import Rating
ratings = session.query(Rating).all()
ratings_df = pd.DataFrame([{'movie_id': str(rating.movie_id), 'user_id': str(rating.user_id), 
                            'rating': rating.rating} for rating in ratings])

In [5]:
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds

m = ratings_df.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)
m_np = m.to_numpy()

k = min(50, min(m_np.shape)-1)
U, sigma, Vt = svds(m_np, k=k)
sigma = np.diag(sigma)

predicted_ratings = np.dot(np.dot(U, sigma), Vt)
predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=m.columns, index=m.index)

item_similarity = cosine_similarity(Vt.T)
item_similarity_df = pd.DataFrame(item_similarity, index=m.columns, columns=m.columns)

In [8]:
def similar_movies(movie_id, n=8):
    similar_scores = item_similarity_df[movie_id].sort_values(ascending=False)
    similar_movies = similar_scores.head(n + 1).index.tolist()
    similar_movies.remove(movie_id)
    return similar_movies[:n]

In [10]:
from server.api.models import Movie
godfather_movie = session.query(Movie).filter(Movie.title == 'The Godfather').first()
sim_movie_ids = similar_movies(str(godfather_movie.id))
sim_movie_ids

['09b93009-b702-451c-a15d-6704bcb01ae6',
 'bcb074b5-aedd-4995-bcce-3f83a5784595',
 '29bd4376-f336-43e9-9fd1-2fe4776a5380',
 '60918f5c-01ef-49a3-a00d-962383933427',
 '38c2331d-326b-49a8-9ba3-0af6adc44b7d',
 '983940f7-d48f-4af5-9d3e-1ea0a59b3bcb',
 '726843b9-adcc-4f4b-837b-d96b9cade424',
 'bc74fd84-8dd3-4990-9892-0ad4be1e7f00']

In [11]:
sim_movies = session.query(Movie).filter(Movie.id.in_(sim_movie_ids)).all()
for sim_movie in sim_movies:
    print(sim_movie.title)

Casino
Taxi Driver
Goodfellas
The Godfather: Part II
Donnie Brasco
The Godfather: Part III
Katok i skripka
War of the Worlds 2: The Next Wave
