In [None]:
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy import create_engine
import os
import pandas as pd

app = Flask(__name__)

engine = create_engine('sqlite:///recommender2.db', echo=False)

for f in os.listdir('data/movies/ml-latest-small'):
    if f[-4:] == '.csv':
        data = pd.read_csv(f'data/movies/ml-latest-small/{f}')
        data.to_sql(f[:-4], engine)
        print(f[0:-4])

In [None]:
watched_movie_id_list = ['70286', '109487', '589']

In [None]:
import numpy as np

movie_id_unique = 'SELECT * FROM movies'
all_movies = pd.read_sql(movie_id_unique, engine)

#remove the input movies from all_movies
movies_not_watched = all_movies[~all_movies['movieId'].isin(watched_movie_id_list)]
movies_not_watched.loc[:,'fake_id'] = np.ones(len(movies_not_watched), dtype =int)

#get all_ratings from sqlite
query = 'SELECT "userId", ratings."movieId", movies.title, rating FROM ratings JOIN movies ON ratings."movieId" = movies."movieId";'
all_ratings = pd.read_sql(query, engine)

#remove the watched movies from all_ratings
not_all_ratings = all_ratings[~all_ratings['movieId'].isin(watched_movie_id_list)]

#remove movieId and ratings of the watched movies???
movieindex = not_all_ratings['movieId'].unique().tolist()
dl_movie2movie_encoded = {x: i for i, x in enumerate(movieindex)}
dl_movie_encoded2movie = {i: x for i, x in enumerate(movieindex)}

not_all_ratings.loc[:,"movie"] = not_all_ratings["movieId"].map(dl_movie2movie_encoded)
not_all_ratings.loc[:,"rating"] = not_all_ratings["rating"].values.astype(np.float32)

#map userId from ?? to not_all_ratings ?? what do i do with the users??

not_all_user_ids = not_all_ratings["userId"].unique().tolist()
dl_user2user_encoded = {x: i for i, x in enumerate(not_all_user_ids)}
dl_userencoded2user = {i: x for i, x in enumerate(not_all_user_ids)}

not_all_ratings.loc[:,"user"] = not_all_ratings["userId"].map(dl_user2user_encoded)

min_rating = min(not_all_ratings["rating"])
max_rating = max(not_all_ratings["rating"])
num_users = len(dl_user2user_encoded)
num_movies = len(dl_movie_encoded2movie)
print(
        "Number of users: {}, Number of Movies: {}, Min rating: {}, Max rating: {}".format(
            num_users, num_movies, min_rating, max_rating
        )
    )

#define training data
df = not_all_ratings.sample(frac=1, random_state=42)
x = not_all_ratings[["user", "movie"]].values

    # Normalize the targets between 0 and 1. Makes it easy to train.
y = not_all_ratings["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values
    # Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.9 * df.shape[0])
x_train, x_val, y_train, y_val = (
        x[:train_indices],
        x[train_indices:],
        y[:train_indices],
        y[train_indices:],
    )

#inference
user_movie_array = movies_not_watched[['fake_id','index']]
max_movie_index = user_movie_array['index'].max()
that = user_movie_array.to_numpy()

EMBEDDING_SIZE = 50

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

class RecommenderNet(keras.Model):
        def __init__(self, num_users, num_movies, embedding_size, **kwargs):
            super(RecommenderNet, self).__init__(**kwargs)
            self.num_users = num_users
            self.num_movies = num_movies
            self.embedding_size = embedding_size
            self.user_embedding = layers.Embedding(
                num_users,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=keras.regularizers.l2(1e-6),
            )
            self.user_bias = layers.Embedding(num_users, 1)
            self.movie_embedding = layers.Embedding(
            #change this line from num_movies to max_movie_index+1
                max_movie_index+1,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=keras.regularizers.l2(1e-6),
            )
            #and this line (input_dim)
            self.movie_bias = layers.Embedding(max_movie_index+1, 1)

        def call(self, inputs):
            user_vector = self.user_embedding(inputs[:, 0])
            user_bias = self.user_bias(inputs[:, 0])
            movie_vector = self.movie_embedding(inputs[:, 1])
            movie_bias = self.movie_bias(inputs[:, 1])
            dot_user_movie = tf.tensordot(user_vector, movie_vector, 2)
            # Add all the components (including bias)
            x = dot_user_movie + user_bias + movie_bias
            # The sigmoid activation forces the rating to between 0 and 1
            return tf.nn.sigmoid(x)
            #return movie_bias

model = RecommenderNet(num_users, num_movies, EMBEDDING_SIZE)
model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(lr=0.001)
    )
ratings = model.predict(that).flatten()

top_ratings_indices = ratings.argsort()[-10:][::-1]
movies_not_watched.loc[movies_not_watched['index'].isin(top_ratings_indices)]