In [1]:
import pandas as pd
import numpy as np
import json
import os
import time
from datetime import datetime
from keras.models import load_model

import tensorflow as tf

In [2]:

# datasets needed to create the final dataset (they are not present in the /recommender repository)
movies_with_context_path = 'data/movies_with_context.csv'
added_imdb_context_path = 'data/added_imdb_context_with_ratings.csv'


data_path = 'data/'
recsys_movies_context_data_path = 'data/recsys_movies_context_data.csv'
recsys_ratings_only_userid_movieid_path = 'data/recsys_ratings_only_userid_movieid.csv'

scaler_path = "data/transform_data/25m_added_imdb_context_scaler.pkl"
actor_label_encoder_path = "data/transform_data/actor_label_encoder.pkl"
directors_label_encoder_path = "data/transform_data/directors_label_encoder.pkl"
holiday_label_encoder_path = "data/transform_data/holiday_label_encoder.pkl"
titleType_label_encoder_path = "data/transform_data/titleType_label_encoder.pkl"

model_path = "model/arch5_25m_added_imdb_context_trained.keras"

In [3]:
ratings = pd.read_csv(recsys_ratings_only_userid_movieid_path)
ratings

Unnamed: 0,userId,movieId
0,1,296
1,1,306
2,1,307
3,1,665
4,1,899
...,...,...
24983464,162541,50872
24983465,162541,55768
24983466,162541,56176
24983467,162541,58559


In [4]:
user_id = 1
user_df = ratings[ratings['userId'] == user_id]

# Extract only unique movie_ids
unique_movie_ids = user_df['movieId'].unique()
unique_movie_ids

movies = pd.read_csv(recsys_movies_context_data_path)
print('movies.length: ', len(movies))
print('unique_movie_ids.length: ', len(unique_movie_ids))
print('diff: ', len(movies) - len(unique_movie_ids))

# get row from movies, where movieId is in NOT unique_movie_ids
non_rated_movies = movies[~movies['movieId'].isin(unique_movie_ids)]
non_rated_movies

movies.length:  58923
unique_movie_ids.length:  70
diff:  58853


Unnamed: 0,movieId,movieYear,titleType,isAdult,runtimeMinutes,directors,actor,genreAction,genreAdult,genreAdventure,...,genreMystery,genreNews,genreReality-tv,genreRomance,genreSci-fi,genreShort,genreSport,genreThriller,genreWar,genreWestern
0,1,1995,movie,0,81,nm0005124,nm0000741,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,2,1995,movie,0,104,nm0002653,nm0001372,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,3,1995,movie,0,101,nm0222043,nm0025908,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,4,1995,movie,0,124,nm0001845,nm0001365,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,5,1995,movie,0,106,nm0796124,nm0003028,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58918,209157,2018,movie,0,100,nm1415482,nm10312234,0,0,0,...,0,0,0,0,0,0,0,0,0,0
58919,209159,2001,movie,0,73,nm0142504,nm1546270,0,0,0,...,0,0,0,0,0,0,0,0,0,0
58920,209163,2018,movie,0,97,nm2520391,nm0468514,0,0,0,...,0,0,0,0,0,0,0,0,0,0
58921,209169,2001,tvMovie,0,237,nm0003022,nm0121700,0,0,0,...,0,0,0,1,0,0,0,0,0,0


In [14]:
def is_date_in_interval(date, start_date, end_date, holiday_name):
    if holiday_name == 'new_years': 
        if date.month == 12:
            if date.day >= start_date.day:
                return True

        elif date.month == 1:
            if date.day <= end_date.day:
                return True
        return False
    else:
        if date.month >= start_date.month and date.month <= end_date.month:
            if date.day >= start_date.day and date.day <= end_date.day:
                return True
        return False


def find_holiday(date, holiday_dates):
    for holiday_name, interval in holiday_dates.items():
        start_date = datetime.strptime(interval['start'], '%m-%d')
        end_date = datetime.strptime(interval['end'], '%m-%d')
        if is_date_in_interval(date, start_date, end_date, holiday_name):
            return holiday_name
    return 'no_holiday'

In [15]:
try:
    time_context = []

    ts = time.time()
    date_now = datetime.fromtimestamp(ts)

    """ Create a week day value from timestamp
    0: Monday
    1: Tuesday
    2: Wednesday
    3: Thursday
    4: Friday
    5: Saturday
    6: Sunday
    """
    day = datetime.fromtimestamp(ts).isoweekday()
    time_context.append(day)


    """ Create a isWeekday value from day column
    0: false / weekend
    1: true  / weekday
    """
    if day == 6 or day == 7:
        time_context.append(0)
    else:
        time_context.append(1)


    """ Create a season value
    1: Spring	
    2: Summer
    3: Fall
    4: Winter
    """
    month = date_now.month
    if month >= 3 and month <= 5:
        time_context.append(1)
    elif month >= 6 and month <= 8:
        time_context.append(2)
    elif month >= 9 and month <= 11:
        time_context.append(3)
    else:
        time_context.append(4)
        

    """ Create a partOfDay value
    1 - Morning
    2 - Afternoon
    3 - Evening
    4 - Night
    """
    hour = date_now.hour
    if hour >= 5 and hour < 12:
        time_context.append(1)
    elif hour >= 12 and hour < 17:
        time_context.append(2)
    elif hour >= 17 and hour < 21:
        time_context.append(3)
    else:
        time_context.append(4)


    with open('data/holidays.json', 'r') as json_file:
        holidays = json.load(json_file)
    time_context.append(find_holiday(date_now, holidays))


    # values = [1, 2, 3, 4, 5]
    non_rated_movies_copy = non_rated_movies.copy()

    # # Assign values to new columns using .loc
    non_rated_movies_copy.loc[:, 'day'] = time_context[0]
    non_rated_movies_copy.loc[:, 'isWeekday'] = time_context[1]
    non_rated_movies_copy.loc[:, 'season'] = time_context[2]
    non_rated_movies_copy.loc[:, 'partOfDay'] = time_context[3]
    non_rated_movies_copy.loc[:, 'holiday'] = time_context[4]

    print('time_context: ', time_context)

except Exception as e:
    print('\nERROR while creating time context in main.py')
    print(e)
    quit()

time_context:  [5, 1, 1, 4, 'no_holiday']


In [16]:
non_rated_movies_copy

Unnamed: 0,movieId,movieYear,titleType,isAdult,runtimeMinutes,directors,actor,genreAction,genreAdult,genreAdventure,...,genreShort,genreSport,genreThriller,genreWar,genreWestern,day,isWeekday,season,partOfDay,holiday
0,1,1995,movie,0,81,nm0005124,nm0000741,0,0,1,...,0,0,0,0,0,5,1,1,4,no_holiday
1,2,1995,movie,0,104,nm0002653,nm0001372,0,0,1,...,0,0,0,0,0,5,1,1,4,no_holiday
2,3,1995,movie,0,101,nm0222043,nm0025908,0,0,0,...,0,0,0,0,0,5,1,1,4,no_holiday
3,4,1995,movie,0,124,nm0001845,nm0001365,0,0,0,...,0,0,0,0,0,5,1,1,4,no_holiday
4,5,1995,movie,0,106,nm0796124,nm0003028,0,0,0,...,0,0,0,0,0,5,1,1,4,no_holiday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58918,209157,2018,movie,0,100,nm1415482,nm10312234,0,0,0,...,0,0,0,0,0,5,1,1,4,no_holiday
58919,209159,2001,movie,0,73,nm0142504,nm1546270,0,0,0,...,0,0,0,0,0,5,1,1,4,no_holiday
58920,209163,2018,movie,0,97,nm2520391,nm0468514,0,0,0,...,0,0,0,0,0,5,1,1,4,no_holiday
58921,209169,2001,tvMovie,0,237,nm0003022,nm0121700,0,0,0,...,0,0,0,0,0,5,1,1,4,no_holiday


In [17]:
non_rated_movies_copy['userId'] = user_id

new_order = ['userId', 'movieId', 'day', 'isWeekday', 'season', 'partOfDay', 'holiday', 'movieYear', 'titleType',
                'isAdult', 'runtimeMinutes', 'directors', 'actor', 'genreAction', 'genreAdult', 'genreAdventure',
                'genreAnimation', 'genreBiography', 'genreChildren', 'genreComedy', 'genreCrime', 'genreDocumentary',
                'genreDrama', 'genreFamily', 'genreFantasy', 'genreFilm-noir', 'genreHistory', 'genreHorror',
                'genreImax', 'genreMusic', 'genreMusical', 'genreMystery', 'genreNews', 'genreReality-tv',
                'genreRomance', 'genreSci-fi', 'genreShort', 'genreSport', 'genreThriller', 'genreWar', 'genreWestern']

non_rated_movies_copy = non_rated_movies_copy[new_order]
non_rated_movies_copy

Unnamed: 0,userId,movieId,day,isWeekday,season,partOfDay,holiday,movieYear,titleType,isAdult,...,genreMystery,genreNews,genreReality-tv,genreRomance,genreSci-fi,genreShort,genreSport,genreThriller,genreWar,genreWestern
0,1,1,5,1,1,4,no_holiday,1995,movie,0,...,0,0,0,0,0,0,0,0,0,0
1,1,2,5,1,1,4,no_holiday,1995,movie,0,...,0,0,0,0,0,0,0,0,0,0
2,1,3,5,1,1,4,no_holiday,1995,movie,0,...,0,0,0,1,0,0,0,0,0,0
3,1,4,5,1,1,4,no_holiday,1995,movie,0,...,0,0,0,1,0,0,0,0,0,0
4,1,5,5,1,1,4,no_holiday,1995,movie,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58918,1,209157,5,1,1,4,no_holiday,2018,movie,0,...,0,0,0,0,0,0,0,0,0,0
58919,1,209159,5,1,1,4,no_holiday,2001,movie,0,...,0,0,0,0,0,0,0,0,0,0
58920,1,209163,5,1,1,4,no_holiday,2018,movie,0,...,0,0,0,0,0,0,0,0,0,0
58921,1,209169,5,1,1,4,no_holiday,2001,tvMovie,0,...,0,0,0,1,0,0,0,0,0,0


In [18]:
data = non_rated_movies_copy.copy()
import joblib


# Load label encoders
actor_label_encoder = joblib.load(actor_label_encoder_path)
directors_label_encoder = joblib.load(directors_label_encoder_path)
holiday_label_encoder = joblib.load(holiday_label_encoder_path)
titleType_label_encoder = joblib.load(titleType_label_encoder_path)

# Load scaler
scaler = joblib.load(scaler_path)

data['actor'] = actor_label_encoder.transform(data['actor'])
data['directors'] = directors_label_encoder.transform(data['directors'])
data['holiday'] = holiday_label_encoder.transform(data['holiday'])
data['titleType'] = titleType_label_encoder.transform(data['titleType'])

# Scale data
new_data = scaler.transform(data)

new_data

array([[6.15229388e-06, 4.78077745e-06, 7.14285714e-01, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [6.15229388e-06, 9.56155490e-06, 7.14285714e-01, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [6.15229388e-06, 1.43423324e-05, 7.14285714e-01, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [6.15229388e-06, 9.99961754e-01, 7.14285714e-01, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [6.15229388e-06, 9.99990438e-01, 7.14285714e-01, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [6.15229388e-06, 1.00000000e+00, 7.14285714e-01, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

[]


In [25]:
import tensorflow as tf
tf.__version__

SyntaxError: invalid syntax (1411997622.py, line 2)

In [48]:
# model_path = "model/arch8_25m_added_imdb_context_max_abs_scaler_run2_trained.keras"
# model_path = "model/arch5_1m_movielens_added_imdb_context_trained.keras"
# model_path = "model/teraztu.keras"
# model_path = "model/arch_8_max_abs_2e_pc.keras"


nn_model = load_model(model_path, compile=True)
# nn_model.summary()

# new_model = tf.keras.models.load_model('model/test5.keras')
# new_model.summary()

predictions = nn_model.predict(new_data)

ValueError: Layer 'dense_6' expected 2 variables, but received 0 variables during loading. Expected: ['dense_6/kernel:0', 'dense_6/bias:0']

In [47]:
top_10 = np.argsort(predictions.flatten())[-10:][::-1]
top_10

array([42658, 48026, 38229, 33471, 40291, 36517, 46467, 19610, 49417,
       19464], dtype=int64)

In [43]:
print('max value:', max(predictions)[0])
print('index of max value:', np.argmax(predictions))
print('top movie:')
non_rated_movies_copy[non_rated_movies_copy['movieId'] == non_rated_movies_copy.iloc[42658]['movieId']]

max value: 4.233097
index of max value: 42658
top movie:


Unnamed: 0,userId,movieId,day,isWeekday,season,partOfDay,holiday,movieYear,titleType,isAdult,...,genreMystery,genreNews,genreReality-tv,genreRomance,genreSci-fi,genreShort,genreSport,genreThriller,genreWar,genreWestern
42728,1,170705,5,1,1,4,no_holiday,2001,tvMiniSeries,0,...,0,0,0,0,0,0,0,0,1,0


In [42]:
non_rated_movies_copy.iloc[42658]['movieId']

170705

In [55]:
top_k = 10

In [59]:
top_k_movie_indices = np.argsort(predictions.flatten())[-top_k:][::-1]
top_k_movie_ids = []
top_k_movies = []

for movie_index in top_k_movie_indices:
    # movie_index[all_movies['movieId'] == all_movies.iloc[movie_index]['movieId']]
    top_k_movie_ids.append(non_rated_movies_copy.iloc[movie_index]['movieId'])

for index, movie_id in enumerate(top_k_movie_ids):
    movie = movies[movies['movieId'] == movie_id]
    # movie
    # movie_info = f"{index + 1}. | {movie['title']}"
    # top_k_movies.append(movie_info)

movie

Unnamed: 0,movieId,movieYear,titleType,isAdult,runtimeMinutes,directors,actor,genreAction,genreAdult,genreAdventure,...,genreMystery,genreNews,genreReality-tv,genreRomance,genreSci-fi,genreShort,genreSport,genreThriller,genreWar,genreWestern
19534,101850,2004,tvMiniSeries,0,629,nm0994355,nm1655187,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
all_movies = pd.read_csv(movies_with_context_path)

In [8]:
all_movies[all_movies['title'] == 'Shawshank Redemption, The (1994)']

Unnamed: 0,movieId,title,movieYear,titleType,isAdult,runtimeMinutes,directors,actor,genreAction,genreAdult,...,genreMystery,genreNews,genreReality-tv,genreRomance,genreSci-fi,genreShort,genreSport,genreThriller,genreWar,genreWestern
314,318,"Shawshank Redemption, The (1994)",1994,movie,0,142,nm0001104,nm0000175,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
ratings[(ratings['userId'] == 9999) & (ratings['movieId'] == 318)]

Unnamed: 0,userId,movieId


In [None]:
9999