In [1]:
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy import create_engine
import os
import pandas as pd

app = Flask(__name__)

engine = create_engine('sqlite:///recommender2.db', echo=False)

for f in os.listdir('data/movies/ml-latest-small'):
    if f[-4:] == '.csv':
        data = pd.read_csv(f'data/movies/ml-latest-small/{f}')
        data.to_sql(f[:-4], engine)
        print(f[0:-4])

links
ratings
movies
tags


In [2]:
watched_movie_id_list = ['70286', '109487', '589']

In [3]:
movie_id_unique = 'SELECT * FROM movies'
all_movies = pd.read_sql(movie_id_unique, engine)

In [4]:
all_movies

Unnamed: 0,index,movieId,title,genres
0,0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1,2,Jumanji (1995),Adventure|Children|Fantasy
2,2,3,Grumpier Old Men (1995),Comedy|Romance
3,3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...,...
9737,9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,9739,193585,Flint (2017),Drama
9740,9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [7]:
all_movies[45:60]

Unnamed: 0,index,movieId,title,genres
45,45,49,When Night Is Falling (1995),Drama|Romance
46,46,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
47,47,52,Mighty Aphrodite (1995),Comedy|Drama|Romance
48,48,53,Lamerica (1994),Adventure|Drama
49,49,54,"Big Green, The (1995)",Children|Comedy
50,50,55,Georgia (1995),Drama
51,51,57,Home for the Holidays (1995),Drama
52,52,58,"Postman, The (Postino, Il) (1994)",Comedy|Drama|Romance
53,53,60,"Indian in the Cupboard, The (1995)",Adventure|Children|Fantasy
54,54,61,Eye for an Eye (1996),Drama|Thriller


In [5]:
query = 'SELECT "userId", ratings."movieId", movies.title, rating FROM ratings JOIN movies ON ratings."movieId" = movies."movieId";'
all_ratings = pd.read_sql(query, engine)
all_ratings

Unnamed: 0,userId,movieId,title,rating
0,1,1,Toy Story (1995),4.0
1,1,3,Grumpier Old Men (1995),4.0
2,1,6,Heat (1995),4.0
3,1,47,Seven (a.k.a. Se7en) (1995),5.0
4,1,50,"Usual Suspects, The (1995)",5.0
...,...,...,...,...
100831,610,166534,Split (2017),4.0
100832,610,168248,John Wick: Chapter Two (2017),5.0
100833,610,168250,Get Out (2017),5.0
100834,610,168252,Logan (2017),5.0


In [27]:
import numpy as np

movie_id_unique = 'SELECT * FROM movies'
all_movies = pd.read_sql(movie_id_unique, engine)

#remove the input movies from all_movies
movies_not_watched = all_movies[~all_movies['movieId'].isin(watched_movie_id_list)]
movies_not_watched.loc[:,'fake_id'] = np.ones(len(movies_not_watched), dtype =int)

#get all_ratings from sqlite
query = 'SELECT "userId", ratings."movieId", movies.title, rating FROM ratings JOIN movies ON ratings."movieId" = movies."movieId";'
all_ratings = pd.read_sql(query, engine)

#remove the watched movies from all_ratings
not_all_ratings = all_ratings[~all_ratings['movieId'].isin(watched_movie_id_list)]

#remove movieId and ratings of the watched movies???
movieindex = not_all_ratings['movieId'].unique().tolist()
dl_movie2movie_encoded = {x: i for i, x in enumerate(movieindex)}
dl_movie_encoded2movie = {i: x for i, x in enumerate(movieindex)}

not_all_ratings.loc[:,"movie"] = not_all_ratings["movieId"].map(dl_movie2movie_encoded)
not_all_ratings.loc[:,"rating"] = not_all_ratings["rating"].values.astype(np.float32)

#map userId from ?? to not_all_ratings ?? what do i do with the users??

not_all_user_ids = not_all_ratings["userId"].unique().tolist()
dl_user2user_encoded = {x: i for i, x in enumerate(not_all_user_ids)}
dl_userencoded2user = {i: x for i, x in enumerate(not_all_user_ids)}

not_all_ratings.loc[:,"user"] = not_all_ratings["userId"].map(dl_user2user_encoded)

min_rating = min(not_all_ratings["rating"])
max_rating = max(not_all_ratings["rating"])
num_users = len(dl_user2user_encoded)
num_movies = len(dl_movie_encoded2movie)
print(
        "Number of users: {}, Number of Movies: {}, Min rating: {}, Max rating: {}".format(
            num_users, num_movies, min_rating, max_rating
        )
    )

#define training data
df = not_all_ratings.sample(frac=1, random_state=42)
x = not_all_ratings[["user", "movie"]].values

    # Normalize the targets between 0 and 1. Makes it easy to train.
y = not_all_ratings["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values
    # Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.9 * df.shape[0])
x_train, x_val, y_train, y_val = (
        x[:train_indices],
        x[train_indices:],
        y[:train_indices],
        y[train_indices:],
    )

#inference
user_movie_array = movies_not_watched[['fake_id','index']]
max_movie_index = user_movie_array['index'].max()
that = user_movie_array.to_numpy()

EMBEDDING_SIZE = 50

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers




class RecommenderNet(keras.Model):
        def __init__(self, num_users, num_movies, embedding_size, **kwargs):
            super(RecommenderNet, self).__init__(**kwargs)
            self.num_users = num_users
            self.num_movies = num_movies
            self.embedding_size = embedding_size
            self.user_embedding = layers.Embedding(
                num_users,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=keras.regularizers.l2(1e-6),
            )
            self.user_bias = layers.Embedding(num_users, 1)
            self.movie_embedding = layers.Embedding(
            #change this line from num_movies to max_movie_index+1
                max_movie_index+1,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=keras.regularizers.l2(1e-6),
            )
            #and this line (input_dim)
            self.movie_bias = layers.Embedding(max_movie_index+1, 1)

        def call(self, inputs):
            user_vector = self.user_embedding(inputs[:, 0])
            user_bias = self.user_bias(inputs[:, 0])
            movie_vector = self.movie_embedding(inputs[:, 1])
            movie_bias = self.movie_bias(inputs[:, 1])
            dot_user_movie = tf.tensordot(user_vector, movie_vector, 2)
            # Add all the components (including bias)
            x = dot_user_movie + user_bias + movie_bias
            # The sigmoid activation forces the rating to between 0 and 1
            print(x)
            
            tf.print(dot_user_movie) #decimals with positive and negative {-1,1}
            #tf.print(movie_vector) #arrays
            #tf.print(self.movie_embedding) #object

            
            return tf.nn.sigmoid(x)
            #return movie_bias

model = RecommenderNet(num_users, num_movies, EMBEDDING_SIZE)
model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(lr=0.001)
    )
ratings = model.predict(that).flatten()

top_ratings_indices = ratings.argsort()[-10:][::-1]
movies_not_watched.loc[movies_not_watched['index'].isin(top_ratings_indices)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Number of users: 610, Number of Movies: 9721, Min rating: 0.5, Max rating: 5.0
Tensor("recommender_net_12/add_1:0", shape=(None, 1), dtype=float32)
Tensor("recommender_net_12/add_1:0", shape=(None, 1), dtype=float32)
-0.0489782952
0.00749459397
0.0433911644
-0.0343981311
-0.029390268
0.0361666493
-0.000240749912
-0.017835008
0.0338442922
-0.0605057925
0.031400498
-0.0472784787
0.0164755434
0.0149922408
-0.0199857336
-0.0664265454
0.00958278216
-0.0327761695
0.0491008908
0.0273761582
0.0246188268
-0.0370448455
-0.00600406295
0.030539576
-0.0508691967
-0.0368220545
0.00409349287
0.0586487427
0.0392241925
0.0625276342
0.0137478514
-0.0086724842
0.00114126969
0.0201291
0.0159189235
-0.0156808775
0.0045783096
0.0623187907
0.0224859454
-0.00843438786
-0.0462597758
-0.0357153341
-0.00639643567
-0.0227868091
-0.048805926
-0.000861406559
-0.0301074497
0.0231671557
-0.0157805718
-0.0459921844
-0.00465418
0.0205843765
0.0445701368
-0.0123737194
0.0221072566
-0.0176441148
-0.0186315682
-0.0166421


Unnamed: 0,index,movieId,title,genres,fake_id
865,865,1140,Entertaining Angels: The Dorothy Day Story (1996),Drama,1
890,890,1187,Passion Fish (1992),Drama,1
932,932,1232,Stalker (1979),Drama|Mystery|Sci-Fi,1
946,946,1247,"Graduate, The (1967)",Comedy|Drama|Romance,1
951,951,1252,Chinatown (1974),Crime|Film-Noir|Mystery|Thriller,1
6895,6895,63436,Saw V (2008),Crime|Horror|Thriller,1
6904,6904,63876,Milk (2008),Drama,1
6909,6909,64034,"Boy in the Striped Pajamas, The (Boy in the St...",Drama|War,1
7048,7048,69224,Marius and Jeanette (Marius et Jeannette) (1997),Comedy|Drama|Romance,1
7059,7059,69469,Garfield's Pet Force (2009),Animation,1


In [16]:
tf.print(movie_vector)

NameError: name 'movie_vector' is not defined

In [17]:
def call(self, inputs):
            user_vector = self.user_embedding(inputs[:, 0])
            user_bias = self.user_bias(inputs[:, 0])
            movie_vector = self.movie_embedding(inputs[:, 1])
            movie_bias = self.movie_bias(inputs[:, 1])
            dot_user_movie = tf.tensordot(user_vector, movie_vector, 2)
            # Add all the components (including bias)
            x = dot_user_movie + user_bias + movie_bias
            # The sigmoid activation forces the rating to between 0 and 1
            print(x)
            
            tf.print(movie_vector)
            
            
            return tf.nn.sigmoid(x)

In [22]:
r = RecommenderNet(610, 9742, 100)
r

<__main__.RecommenderNet at 0x7fc0de9c82b0>

In [24]:
call(r, 610)

TypeError: 'int' object is not subscriptable

In [8]:
x

array([[   0,    0],
       [   0,    1],
       [   0,    2],
       ...,
       [ 609, 3118],
       [ 609, 1389],
       [ 609, 2870]])

In [9]:
x.shape

(100474, 2)

In [None]:
not_all_ratings['userId'], not_all_ratings['index']

In [12]:
x[20:40]

array([[ 0, 20],
       [ 0, 21],
       [ 0, 22],
       [ 0, 23],
       [ 0, 24],
       [ 0, 25],
       [ 0, 26],
       [ 0, 27],
       [ 0, 28],
       [ 0, 29],
       [ 0, 30],
       [ 0, 31],
       [ 0, 32],
       [ 0, 33],
       [ 0, 34],
       [ 0, 35],
       [ 0, 36],
       [ 0, 37],
       [ 0, 38],
       [ 0, 39]])

In [11]:
x[100000:100020]

array([[ 609, 2380],
       [ 609, 2986],
       [ 609, 2062],
       [ 609, 9612],
       [ 609, 5971],
       [ 609, 7795],
       [ 609, 3810],
       [ 609, 3446],
       [ 609, 9613],
       [ 609, 5973],
       [ 609, 4322],
       [ 609,  240],
       [ 609, 2386],
       [ 609, 1056],
       [ 609, 1292],
       [ 609, 1148],
       [ 609, 6024],
       [ 609, 1914],
       [ 609, 3082],
       [ 609, 3451]])

In [15]:
(inputs[:, 1])

NameError: name 'inputs' is not defined

In [None]:
มีอย่างนึงที่ต้องลองทำ คือการแยกไฟล์ออกมาจาก modelselect.py เพราะตอนนี้มันยุ่บยั่บมาก น่าจะต้องมีระเบียบวิธีการออกาไนซ์

In [None]:
ตกลงเราจะเอา genres bias ของเราไปคูณยังไงคะ

In [None]:
ตอนนี้รู้แล้วว่า x ก็คือ not_all_ratings['userId'], not_all_ratings['index'] ที่เราทำมาแทบตายน่ะแหละ ได้ใช้