In [3]:
import sys
import os
import django
from pathlib import Path

# Manually set the root directory and the settings module
root_dir = "C:/Users/USER/Documents/recommender/src"
settings_module = "cfehome.settings"

os.environ.setdefault("DJANGO_SETTINGS_MODULE", settings_module)
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

# Add the root directory to sys.path
if root_dir not in sys.path:
    sys.path.insert(0, root_dir)
    print(f"'{root_dir}' successfully added to sys.path")

# Setup Django
try:
    django.setup()
    print("Django setup successfully")
except Exception as e:
    print(f"Error setting up Django: {e}")

# Now, access the settings and perform your operations
from django.conf import settings
import pandas as pd

# Ensure that the DATA_DIR setting is available
try:
    ratings_path = settings.DATA_DIR / "ratings_small.csv"
    if ratings_path.exists():
        print(f"Found ratings file at: {ratings_path}")
    else:
        print(f"Ratings file not found at: {ratings_path}")
except AttributeError as e:
    print(f"Error accessing DATA_DIR: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")



'C:/Users/USER/Documents/recommender/src' successfully added to sys.path
Django setup successfully
Found ratings file at: C:\Users\USER\Documents\recommender\src\data\ratings_small.csv


In [5]:
df = pd.read_csv(ratings_path)

In [6]:
df.head(n=10)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
5,1,1263,2.0,1260759151
6,1,1287,2.0,1260759187
7,1,1293,2.0,1260759148
8,1,1339,3.5,1260759125
9,1,1343,2.0,1260759131


In [9]:
from django.contrib.auth import get_user_model

User = get_user_model()

In [11]:
current_users = User.objects.all().values_list('id', flat=True)
rating_users = df['userId'].tolist()

In [34]:
missing_user_ids = set(rating_users) - set(current_users)
missing_user_ids

{4,
 6,
 7,
 9,
 10,
 11,
 13,
 14,
 15,
 16,
 18,
 19,
 20,
 21,
 22,
 24,
 25,
 26,
 27,
 28,
 29,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,
 185,
 186,
 187,
 188,
 189,
 190,
 191,
 193,
 194,
 195,
 196,
 197,
 198,
 199,
 200,
 201,
 202,
 2

In [13]:
for uid in missing_user_ids:
    User.objects.create(
        id=uid,
        username=f"missing-user-{uid}"
    )

In [19]:
import math
from decimal import Decimal

In [21]:
df['value'] = df['rating'].apply(lambda x: math.ceil(Decimal(x)))
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,value
0,1,31,2.5,1260759144,3
1,1,1029,3.0,1260759179,3
2,1,1061,3.0,1260759182,3
3,1,1129,2.0,1260759185,2
4,1,1172,4.0,1260759205,4


In [22]:
df['user_id'] = df['userId']
df['object_id'] = df['movieId']

In [23]:
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,value,user_id,object_id
0,1,31,2.5,1260759144,3,1,31
1,1,1029,3.0,1260759179,3,1,1029
2,1,1061,3.0,1260759182,3,1,1061
3,1,1129,2.0,1260759185,2,1,1129
4,1,1172,4.0,1260759205,4,1,1172


In [24]:
cols = ['user_id', 'value', 'object_id']
transformed_df = df.copy()[cols]

In [26]:
rating_records = transformed_df.to_dict('records')

In [27]:
from ratings.models import Rating
qs = Rating.objects.all()
qs.delete()    

(32, {'ratings.Rating': 32})

In [28]:
from django.contrib.contenttypes.models import ContentType

ctype = ContentType.objects.get(app_label='movies', model='movie')

In [32]:
new_ratings = []
for r in rating_records:
    r['content_type'] = ctype
    new_ratings.append(
    Rating(**r)
    )
Rating.objects.bulk_create(new_ratings, ignore_conflicts=True, batch_size=1000)

[<Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: Rating object (None)>,
 <Rating: 

In [33]:
from ratings.tasks import task_update_movie_ratings

task_update_movie_ratings()

Rating update took 0:05:35 (335.8502697944641s)
