# Hybrid System

In [1]:
import pandas as pd
import numpy as np

from typing import Dict, Text
import joblib


In [2]:
df = pd.read_csv('data/master_data.zip', compression="zip")[["userId", "movieId", "rating"]]
#Movie names
movie_dict = joblib.load("data/movie_dict.pkl")

## Layer 1 - Most Similar User

In [3]:
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors  

In [4]:
# Built user-item matrix

pivot = df.pivot(
    index='userId',
    columns='movieId',
    values='rating'
).fillna(0)


In [5]:
# USER INPUT
#Movie ID - Rating
new_user_ratings = {4181: 4.5, 4188: 4.5, 4195:4.5, 4198:3.0, 4204:5.0}

In [6]:
pivot = pivot.append(new_user_ratings, ignore_index=True)

pivot.fillna(0, inplace=True)
df_sparse = csr_matrix(pivot.values)

model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(df_sparse)

distances, indices = model_knn.kneighbors(pivot.tail(1), n_neighbors=2)

most_similar_user = indices[0][1]


  pivot = pivot.append(new_user_ratings, ignore_index=True)


## Layer 2 - Recommendation System

In [7]:
import tensorflow as tf
import tensorflow_recommenders as tfrs

from sklearn.preprocessing import MinMaxScaler

In [8]:
# Model has to be re-built due to load issues

unique_movie_ids = joblib.load("data/unique_movie_ids.pkl")
unique_user_ids = joblib.load("data/unique_user_ids.pkl")

class ModelRanking(tf.keras.Model):

  def __init__(self):
    super().__init__()
    embedding_dims = 32

    # User embeddings
    self.user_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_user_ids, mask_token=None),
      tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dims)
    ])

    # Movie Embeddings
    self.movie_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_movie_ids, mask_token=None),
      tf.keras.layers.Embedding(len(unique_movie_ids) + 1, embedding_dims)
    ])

    # Predictions
    self.ratings = tf.keras.Sequential([
      # multiple dense layers
      tf.keras.layers.Dense(256, activation="relu"),
      tf.keras.layers.Dense(64, activation="relu"),
      # Ratings in output layer
      tf.keras.layers.Dense(1)
  ])

  def call(self, inputs):
    user_id, movie_id = inputs

    user_embed = self.user_embeddings(user_id)
    movie_embed = self.movie_embeddings(movie_id)

    return self.ratings(tf.concat([user_embed, movie_embed], axis=1))
  
  
class ModelMovielens(tfrs.models.Model):

  def __init__(self):
    super().__init__()
    self.ranking_model: tf.keras.Model = ModelRanking()
    self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
      loss = tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )

  def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
    return self.ranking_model(
        (features["user_id"], features["movie_id"]))

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    labels = features.pop("rating")
    rating_predictions = self(features)

    # Compute loss and metric
    return self.task(labels=labels, predictions=rating_predictions)

In [9]:
recommendation_model = ModelMovielens()
# Dummy input to reconstruct the model
recommendation_model({
      "user_id": np.array(["0"]),
      "movie_id": np.array(["0"])
  })

recommendation_model.load_weights('data/recommendation_model_weights.h5')

In [10]:
# Movies to predict
all_movies = np.array(list(movie_dict.keys()))

excluded_movies = np.array(joblib.load("data/excluded_movie_ids.pkl"))
exc_mask = np.isin(all_movies, excluded_movies, invert=True)
candidate_movies = all_movies[exc_mask]

watched_movies = np.array(list(new_user_ratings.keys()))
mask = np.isin(candidate_movies, watched_movies, invert=True)
candidate_movies = candidate_movies[mask]

In [11]:
recommendation_ratings = {}

movies_to_predict = list(candidate_movies.astype(str))
user_to_predict = str(most_similar_user)

for movie_id in movies_to_predict:
  #movie_name = movie_dict.get(int(movie_id))
  
  predicted_rating = recommendation_model({
      "user_id": np.array([user_to_predict]),
      "movie_id": np.array([movie_id])
  }).numpy()[0][0]
  
  recommendation_ratings[movie_id] = predicted_rating    


In [12]:
#Scale the values to be used in Layer 4

rec_values = np.array(list(recommendation_ratings.values())).reshape(-1, 1)

recommend_scaler = MinMaxScaler()
rec_scaled = recommend_scaler.fit_transform(rec_values)

recommendation_ratings = {key: value for key, value in zip(recommendation_ratings.keys(), rec_scaled.flatten())}

## Layer 3 - Demand Forecasting

In [13]:
from darts.models import TFTModel
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
forecast_model = TFTModel.load("data/forecasting/forecasting-model.pkl")

In [15]:
target_ts_scaled_dict = joblib.load("data/forecasting/target_ts_scaled_dict.pkl")

id_to_order = dict(zip(list(target_ts_scaled_dict.keys()), range(len(target_ts_scaled_dict.keys()))))
id_to_order = {str(key): value for key,value in id_to_order.items()}
order_to_id = {value:key for key,value in id_to_order.items()}

target_ts_scaled_dict = {str(key): value for key, value in target_ts_scaled_dict.items() if str(key) in recommendation_ratings}

covariate_ts_scaled_dict = joblib.load("data/forecasting/covariate_ts_scaled_dict.pkl")
covariate_ts_scaled_dict = {str(key): value for key, value in covariate_ts_scaled_dict.items() if str(key) in recommendation_ratings}

target_scaler = joblib.load("data/forecasting/target_scaler.pkl")

In [16]:
targets = [x[:-12] for x in target_ts_scaled_dict.values()]
covariates = list(covariate_ts_scaled_dict.values())

In [17]:
forecast_preds = forecast_model.predict(n=12, series=targets, past_covariates=covariates)
forecast_preds = dict(zip(list(target_ts_scaled_dict.keys()), forecast_preds))

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Predicting DataLoader 0: 100%|██████████| 139/139 [00:35<00:00,  3.92it/s]


In [18]:
temp_ts = [TimeSeries.from_values(np.array([None])) for _ in range(len(id_to_order))]

for i in target_ts_scaled_dict.keys():
    order = id_to_order[i]
    temp_ts[order] = forecast_preds[i]

inversed = target_scaler.inverse_transform(temp_ts, n_jobs=-1)

forecast_preds = {}
for n, i in enumerate(inversed):
    id_n = order_to_id[n]
    if id_n in target_ts_scaled_dict.keys():
        forecast_preds[id_n] = i


In [19]:
pred_arrays = [x.values() for x in forecast_preds.values()]
pred_arrays = np.concatenate(pred_arrays, axis=0).reshape(-1,)
pred_arrays =TimeSeries.from_values(pred_arrays)

aggregate_scaler = Scaler()
aggregate_scaler.fit(pred_arrays)

Scaler

In [20]:
#USER INPUT

chosen_month = 1

In [21]:
#Scale the values of the chosen month to be used in Layer 4

monthly_preds = {}
for key, value in forecast_preds.items():
    month_indexer = value.time_index.month.get_loc(chosen_month)
    monthly_preds[key] = value[month_indexer].values()[0][0]
    
scaled_preds = aggregate_scaler.transform(TimeSeries.from_values(np.array(list(monthly_preds.values()))))
scaled_preds = scaled_preds.values().reshape(-1,).tolist()

monthly_preds = {key: value for key, value in zip(monthly_preds.keys(), scaled_preds)}

## Layer 4 - Ensemble

In [22]:
def ensemble_algo (normalized_rating, normalized_forecast):
        highest = np.max([normalized_rating, normalized_forecast])
        output = highest + np.mean([normalized_rating, normalized_forecast])
        
        #Maximum possible value for output is 2 and minimum is 0. Values are normalized according to this.
        normalized_ensembled_output = output/2
        return normalized_ensembled_output

aggregated = {key: ensemble_algo(recommendation_ratings[key], monthly_preds[key]) for key in recommendation_ratings}


In [23]:
sort_aggregated = sorted(aggregated, key=aggregated.get, reverse=True)
sorted_movie_names = [movie_dict[int(ID)] for ID in sort_aggregated]

sorted_movie_names[:10]

['Seven (a.k.a. Se7en) (1995)',
 'Matrix, The (1999)',
 'WALLÂ·E (2008)',
 'Slumdog Millionaire (2008)',
 'Prometheus (2012)',
 'Star Trek (2009)',
 'Sin City (2005)',
 'X-Men: First Class (2011)',
 "Pan's Labyrinth (Laberinto del fauno, El) (2006)",
 'Death Proof (2007)']