In [74]:
import setup_django
setup_django.init()

In [75]:
import numpy as np
import pandas as pd
from django.contrib.auth import get_user_model
from django.db.models import F, Count, Sum, IntegerField
from movies.models import Movie
from ratings.models import Rating
import random

User = get_user_model()

In [76]:
rating_obj = Rating.objects.filter(user__username='rahul', value__gte=5).first()
rating_obj.content_object

<Movie: Toy Story (1995)>

In [77]:
movie_id = rating_obj.content_object.id
value = rating_obj.value
user_id = rating_obj.user_id

In [78]:
print(f"Movie {movie_id} was rated {value} by {user_id}")

Movie 1 was rated 5 by 1


In [79]:
from ratings.tasks import task_update_movie_ratings
task_update_movie_ratings()

Rating update took 0:00:03 (3.4308414459228516s)


In [80]:

other_user_ids = Rating.objects.filter(
        object_id=rating_obj.object_id, 
        content_type=rating_obj.content_type, 
        value__gte=rating_obj.value
).exclude(user=rating_obj.user).values_list('user_id', flat=True)

print(other_user_ids)
highly_rated = Rating.objects.filter(user_id__in=other_user_ids, value__gte=4)
highly_rated

<RatingQuerySet [671, 654, 646, 623, 624, 607, 598, 597, 595, 584, 577, 562, 560, 537, 538, 542, 543, 534, 530, 518, '...(remaining elements truncated)...']>


<RatingQuerySet [<Rating: Rating object (43720994)>, <Rating: Rating object (43720993)>, <Rating: Rating object (43720996)>, <Rating: Rating object (43720970)>, <Rating: Rating object (43720990)>, <Rating: Rating object (43720988)>, <Rating: Rating object (43720987)>, <Rating: Rating object (43720986)>, <Rating: Rating object (43720985)>, <Rating: Rating object (43720984)>, <Rating: Rating object (43720982)>, <Rating: Rating object (43720981)>, <Rating: Rating object (43720980)>, <Rating: Rating object (43720975)>, <Rating: Rating object (43720974)>, <Rating: Rating object (43720973)>, <Rating: Rating object (43720972)>, <Rating: Rating object (43720969)>, <Rating: Rating object (43720968)>, <Rating: Rating object (43720967)>, '...(remaining elements truncated)...']>

In [81]:
rec_users = []
rec_movies = []
for rating_obj in highly_rated:
    if rating_obj.user not in rec_users:
        rec_users.append(rating_obj.user)
    object_id = rating_obj.object_id
    if object_id not in rec_movies:
        rec_movies.append(object_id)

In [82]:
print(len(rec_users), len(set(rec_movies)))

82 2631


In [84]:
import numpy as np

In [85]:
# Write a list of 2 movies attributes
movie_1 = ['sci-fi', 'comedy', 'Stars latest action movie star']
movie_2 = ['historical fiction', 'drama', 'Stars latest action movie star']

# Write a list of a User's preferences
user_a = ['comedy', 'sci-fi', 'Stars latest action movie star']
print(f"User has {len(user_a)} primary preferences")

# Total number of potential features
num_unique_features = len(set(movie_1 + movie_2))
print(f"These movies have {num_unique_features} unique features.")

User has 3 primary preferences
These movies have 5 unique features.


In [86]:
my_preferences = np.array([.98, .85, .75])

In [87]:
movie_attribute_scores = np.array([.96, .99, .92]) 

movie_2_attribute_scores = np.array([-.93, -.45, .5])

In [88]:
max_score = (np.array([1,1,1]) * np.array([1,1,1])).sum()
min_score = (np.array([1,1,1]) * np.array([-1,-1,-1])).sum()
print(max_score, min_score)

3 -3


In [89]:
ids = Movie.objects.all().popular()[:15]
most_active_users = Rating.objects.all().annotate(
    user_count=Count("user")
).order_by('-user_count').values_list('user_id')[:5_000]

ratings_qs =  Rating.objects.filter(
    object_id__in=ids, 
    user_id__in=most_active_users, 
    active=True, value__gte=0
).annotate(movie=F("object_id")).values('user', 'value', 'movie')

df = pd.DataFrame(ratings_qs)

In [94]:
df.pivot_table(index='user', columns=['movie'], values='value', fill_value='')[:30]

movie,1,110,260,296,356,480,527,589,858,1196,1198,1270,2571,2858
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,5.0,,5.0,5.0,5.0,5.0,,,,5.0,,,5.0,
2,,4.0,,4.0,3.0,4.0,4.0,5.0,,,,,,
3,,4.0,,5.0,5.0,,3.0,,,,,,,4.0
4,,,5.0,5.0,5.0,5.0,,5.0,5.0,5.0,5.0,5.0,,
5,,,,,4.0,,,,3.0,,,,,
6,,,,,,,,,,,,,1.0,
7,3.0,5.0,5.0,,3.0,4.0,,3.0,,5.0,5.0,3.0,,
8,,4.0,4.0,4.0,4.0,,5.0,4.0,5.0,4.0,4.0,4.0,5.0,5.0
9,4.0,,,,,,5.0,,,,,,5.0,
10,,,,,,,,,,4.0,4.0,,5.0,


In [91]:
movie_1 = np.array([1, 1, 1, 1, 1])
user_1 = np.array([1, 1, 1, 1, 1])
(user_1 * movie_1).sum()

5