In [4]:
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy import create_engine
import os
import pandas as pd

app = Flask(__name__)

engine = create_engine('sqlite:///recommender2.db', echo=False)

for f in os.listdir('data/movies/ml-latest-small'):
    if f[-4:] == '.csv':
        data = pd.read_csv(f'data/movies/ml-latest-small/{f}')
        data.to_sql(f[:-4], engine)
        print(f[0:-4])

links
ratings
movies
tags


In [5]:
watched_movie_id_list = ['70286', '109487', '589']

In [3]:
movie_id_unique = 'SELECT * FROM movies'
all_movies = pd.read_sql(movie_id_unique, engine)

In [4]:
all_movies

Unnamed: 0,index,movieId,title,genres
0,0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1,2,Jumanji (1995),Adventure|Children|Fantasy
2,2,3,Grumpier Old Men (1995),Comedy|Romance
3,3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...,...
9737,9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,9739,193585,Flint (2017),Drama
9740,9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [7]:
all_movies[45:60]

Unnamed: 0,index,movieId,title,genres
45,45,49,When Night Is Falling (1995),Drama|Romance
46,46,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
47,47,52,Mighty Aphrodite (1995),Comedy|Drama|Romance
48,48,53,Lamerica (1994),Adventure|Drama
49,49,54,"Big Green, The (1995)",Children|Comedy
50,50,55,Georgia (1995),Drama
51,51,57,Home for the Holidays (1995),Drama
52,52,58,"Postman, The (Postino, Il) (1994)",Comedy|Drama|Romance
53,53,60,"Indian in the Cupboard, The (1995)",Adventure|Children|Fantasy
54,54,61,Eye for an Eye (1996),Drama|Thriller


In [5]:
query = 'SELECT "userId", ratings."movieId", movies.title, rating FROM ratings JOIN movies ON ratings."movieId" = movies."movieId";'
all_ratings = pd.read_sql(query, engine)
all_ratings

Unnamed: 0,userId,movieId,title,rating
0,1,1,Toy Story (1995),4.0
1,1,3,Grumpier Old Men (1995),4.0
2,1,6,Heat (1995),4.0
3,1,47,Seven (a.k.a. Se7en) (1995),5.0
4,1,50,"Usual Suspects, The (1995)",5.0
...,...,...,...,...
100831,610,166534,Split (2017),4.0
100832,610,168248,John Wick: Chapter Two (2017),5.0
100833,610,168250,Get Out (2017),5.0
100834,610,168252,Logan (2017),5.0


In [6]:
import numpy as np

movie_id_unique = 'SELECT * FROM movies'
all_movies = pd.read_sql(movie_id_unique, engine)

#remove the input movies from all_movies
movies_not_watched = all_movies[~all_movies['movieId'].isin(watched_movie_id_list)]
movies_not_watched.loc[:,'fake_id'] = np.ones(len(movies_not_watched), dtype =int)

#get all_ratings from sqlite
query = 'SELECT "userId", ratings."movieId", movies.title, rating FROM ratings JOIN movies ON ratings."movieId" = movies."movieId";'
all_ratings = pd.read_sql(query, engine)

#remove the watched movies from all_ratings
not_all_ratings = all_ratings[~all_ratings['movieId'].isin(watched_movie_id_list)]

#remove movieId and ratings of the watched movies???
movieindex = not_all_ratings['movieId'].unique().tolist()
dl_movie2movie_encoded = {x: i for i, x in enumerate(movieindex)}
dl_movie_encoded2movie = {i: x for i, x in enumerate(movieindex)}

not_all_ratings.loc[:,"movie"] = not_all_ratings["movieId"].map(dl_movie2movie_encoded)
not_all_ratings.loc[:,"rating"] = not_all_ratings["rating"].values.astype(np.float32)

#map userId from ?? to not_all_ratings ?? what do i do with the users??

not_all_user_ids = not_all_ratings["userId"].unique().tolist()
dl_user2user_encoded = {x: i for i, x in enumerate(not_all_user_ids)}
dl_userencoded2user = {i: x for i, x in enumerate(not_all_user_ids)}

not_all_ratings.loc[:,"user"] = not_all_ratings["userId"].map(dl_user2user_encoded)

min_rating = min(not_all_ratings["rating"])
max_rating = max(not_all_ratings["rating"])
num_users = len(dl_user2user_encoded)
num_movies = len(dl_movie_encoded2movie)
print(
        "Number of users: {}, Number of Movies: {}, Min rating: {}, Max rating: {}".format(
            num_users, num_movies, min_rating, max_rating
        )
    )

#define training data
df = not_all_ratings.sample(frac=1, random_state=42)
x = not_all_ratings[["user", "movie"]].values

    # Normalize the targets between 0 and 1. Makes it easy to train.
y = not_all_ratings["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values
    # Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.9 * df.shape[0])
x_train, x_val, y_train, y_val = (
        x[:train_indices],
        x[train_indices:],
        y[:train_indices],
        y[train_indices:],
    )

#inference
user_movie_array = movies_not_watched[['fake_id','index']]
max_movie_index = user_movie_array['index'].max()
that = user_movie_array.to_numpy()

EMBEDDING_SIZE = 50

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

class RecommenderNet(keras.Model):
        def __init__(self, num_users, num_movies, embedding_size, **kwargs):
            super(RecommenderNet, self).__init__(**kwargs)
            self.num_users = num_users
            self.num_movies = num_movies
            self.embedding_size = embedding_size
            self.user_embedding = layers.Embedding(
                num_users,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=keras.regularizers.l2(1e-6),
            )
            self.user_bias = layers.Embedding(num_users, 1)
            self.movie_embedding = layers.Embedding(
            #change this line from num_movies to max_movie_index+1
                max_movie_index+1,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=keras.regularizers.l2(1e-6),
            )
            #and this line (input_dim)
            self.movie_bias = layers.Embedding(max_movie_index+1, 1)

        def call(self, inputs):
            user_vector = self.user_embedding(inputs[:, 0])
            user_bias = self.user_bias(inputs[:, 0])
            movie_vector = self.movie_embedding(inputs[:, 1])
            movie_bias = self.movie_bias(inputs[:, 1])
            
            dot_user_movie = tf.tensordot(user_vector, movie_vector, 2)
            # Add all the components (including bias)
            x = dot_user_movie + user_bias + movie_bias
            # The sigmoid activation forces the rating to between 0 and 1
            print(x)
            return tf.nn.sigmoid(x)
            #return movie_bias

model = RecommenderNet(num_users, num_movies, EMBEDDING_SIZE)
model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(lr=0.001)
    )
ratings = model.predict(that).flatten()

top_ratings_indices = ratings.argsort()[-10:][::-1]
movies_not_watched.loc[movies_not_watched['index'].isin(top_ratings_indices)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Number of users: 610, Number of Movies: 9721, Min rating: 0.5, Max rating: 5.0
Tensor("recommender_net/add_1:0", shape=(None, 1), dtype=float32)
Tensor("recommender_net/add_1:0", shape=(None, 1), dtype=float32)


Unnamed: 0,index,movieId,title,genres,fake_id
3043,3043,4077,"With a Friend Like Harry... (Harry, un ami qui...",Drama|Thriller,1
3070,3070,4123,Ishtar (1987),Comedy,1
3947,3947,5560,À nous la liberté (Freedom for Us) (1931),Comedy|Musical,1
3950,3950,5564,Swimfan (2002),Thriller,1
3953,3953,5570,Thesis (Tesis) (1996),Drama|Horror|Thriller,1
5268,5268,8656,"Short Film About Killing, A (Krótki film o zab...",Crime|Drama,1
5275,5275,8685,"Miracle of Marcelino, The (Marcelino pan y vin...",Comedy|Drama,1
8379,8379,109578,Non-Stop (2014),Action|Mystery|Thriller,1
8383,8383,109687,Particle Fever (2013),Documentary,1


In [7]:
x

array([[   0,    0],
       [   0,    1],
       [   0,    2],
       ...,
       [ 609, 3118],
       [ 609, 1389],
       [ 609, 2870]])

In [8]:
x

array([[   0,    0],
       [   0,    1],
       [   0,    2],
       ...,
       [ 609, 3118],
       [ 609, 1389],
       [ 609, 2870]])

In [9]:
x.shape

(100474, 2)

In [None]:
not_all_ratings['userId'], not_all_ratings['index']

In [12]:
x[20:40]

array([[ 0, 20],
       [ 0, 21],
       [ 0, 22],
       [ 0, 23],
       [ 0, 24],
       [ 0, 25],
       [ 0, 26],
       [ 0, 27],
       [ 0, 28],
       [ 0, 29],
       [ 0, 30],
       [ 0, 31],
       [ 0, 32],
       [ 0, 33],
       [ 0, 34],
       [ 0, 35],
       [ 0, 36],
       [ 0, 37],
       [ 0, 38],
       [ 0, 39]])

In [11]:
x[100000:100020]

array([[ 609, 2380],
       [ 609, 2986],
       [ 609, 2062],
       [ 609, 9612],
       [ 609, 5971],
       [ 609, 7795],
       [ 609, 3810],
       [ 609, 3446],
       [ 609, 9613],
       [ 609, 5973],
       [ 609, 4322],
       [ 609,  240],
       [ 609, 2386],
       [ 609, 1056],
       [ 609, 1292],
       [ 609, 1148],
       [ 609, 6024],
       [ 609, 1914],
       [ 609, 3082],
       [ 609, 3451]])

In [15]:
(inputs[:, 1])

NameError: name 'inputs' is not defined

In [None]:
มีอย่างนึงที่ต้องลองทำ คือการแยกไฟล์ออกมาจาก modelselect.py เพราะตอนนี้มันยุ่บยั่บมาก น่าจะต้องมีระเบียบวิธีการออกาไนซ์

In [None]:
ตกลงเราจะเอา genres bias ของเราไปคูณยังไงคะ

In [None]:
ตอนนี้รู้แล้วว่า x ก็คือ not_all_ratings['userId'], not_all_ratings['index'] ที่เราทำมาแทบตายน่ะแหละ ได้ใช้

In [None]:
จะเขียนเพื่อทดลองว่า เวลาเอาค่า cosim_score ไปคูณกับ x แล้ว  จะทำให้ผล predict ออกมา เลือกเฉพาะประเภทหนังตามที่ input ไปเท่านั้นหรือไม่

In [None]:
ก็ต้องให้ลองปริ๊น recom_movies_title มาดูว่ามันมี genres อะไรบ้าง

In [1]:
def call(self, inputs):
            user_vector = self.user_embedding(inputs[:, 0])
            user_bias = self.user_bias(inputs[:, 0])
            movie_vector = self.movie_embedding(inputs[:, 1])
            movie_bias = self.movie_bias(inputs[:, 1])
            
            dot_user_movie = tf.tensordot(user_vector, movie_vector, 2)
            # Add all the components (including bias)
            x2 = dot_user_movie + user_bias + movie_bias
            # The sigmoid activation forces the rating to between 0 and 1
            print(x2)
            return tf.nn.sigmoid(x2)
            #return movie_bias

In [15]:
RecommenderNet(num_users, num_movies, EMBEDDING_SIZE)

<__main__.RecommenderNet at 0x7f07041c8dc0>

In [11]:
num_users = len(dl_user2user_encoded)
num_movies = len(dl_movie_encoded2movie)

In [22]:
class RecommenderNet2(keras.Model):
        def __init__(self, num_users, num_movies, embedding_size, **kwargs):
            super(RecommenderNet2, self).__init__(**kwargs)
            self.num_users = num_users
            self.num_movies = num_movies
            self.embedding_size = embedding_size
            self.user_embedding = layers.Embedding(
                num_users,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=keras.regularizers.l2(1e-6),
            )
            self.user_bias = layers.Embedding(num_users, 1)
            self.movie_embedding = layers.Embedding(
            #change this line from num_movies to max_movie_index+1
                max_movie_index+1,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=keras.regularizers.l2(1e-6),
            )
            #and this line (input_dim)
            self.movie_bias = layers.Embedding(max_movie_index+1, 1)
            
        def call(self, inputs):
            user_vector = self.user_embedding(inputs[:, 0])
            user_bias = self.user_bias(inputs[:, 0])
            movie_vector = self.movie_embedding(inputs[:, 1])
            movie_bias = self.movie_bias(inputs[:, 1])
            
            dot_user_movie = tf.tensordot(user_vector, movie_vector, 2)
            # Add all the components (including bias)
            x2 = dot_user_movie + user_bias + movie_bias
            # The sigmoid activation forces the rating to between 0 and 1
            print(x2)
            print('papa')
            print(tf.nn.sigmoid(x2))
            return tf.nn.sigmoid(x2)
            #return movie_bias

In [16]:
model = RecommenderNet(num_users, num_movies, EMBEDDING_SIZE)
model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(lr=0.001)
    )
ratings = model.predict(that).flatten()

top_ratings_indices = ratings.argsort()[-10:][::-1]
movies_not_watched.loc[movies_not_watched['index'].isin(top_ratings_indices)]

Tensor("recommender_net_4/add_1:0", shape=(None, 1), dtype=float32)
Tensor("recommender_net_4/Sigmoid:0", shape=(None, 1), dtype=float32)
Tensor("recommender_net_4/add_1:0", shape=(None, 1), dtype=float32)
Tensor("recommender_net_4/Sigmoid:0", shape=(None, 1), dtype=float32)


Unnamed: 0,index,movieId,title,genres,fake_id
624,624,791,"Last Klezmer: Leopold Kozlowski, His Life and ...",Documentary,1
1024,1024,1335,Blood Beach (1981),Horror|Mystery,1
1029,1029,1341,Burnt Offerings (1976),Horror,1
1054,1054,1371,Star Trek: The Motion Picture (1979),Adventure|Sci-Fi,1
8420,8420,110882,Locke (2013),Drama,1
8422,8422,111146,Alpha and Omega 3: The Great Wolf Games (2014),Action|Adventure|Animation|Children|Comedy,1
8435,8435,111680,At Middleton (2013),Comedy|Romance,1
8436,8436,111732,"Dance of Reality, The (Danza de la realidad, L...",Drama|Fantasy,1
8439,8439,111781,Mission: Impossible - Rogue Nation (2015),Action|Adventure|Thriller,1
9714,9714,188675,Dogman (2018),Crime|Drama,1


In [17]:
model = RecommenderNet(num_users, num_movies, EMBEDDING_SIZE)
model

<__main__.RecommenderNet at 0x7f0704115190>

In [19]:
model2 = RecommenderNet2(num_users, num_movies, EMBEDDING_SIZE)
model2

TypeError: super(type, obj): obj must be an instance or subtype of type

In [23]:
model2 = RecommenderNet2(num_users, num_movies, EMBEDDING_SIZE)
model2

<__main__.RecommenderNet2 at 0x7f0704105a30>

In [25]:
model2.compile(
        loss=tf.keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(lr=0.001)
    )
ratings = model2.predict(that).flatten()
ratings
#top_ratings_indices = ratings.argsort()[-10:][::-1]
#movies_not_watched.loc[movies_not_watched['index'].isin(top_ratings_indices)]

Tensor("recommender_net2_1/add_1:0", shape=(None, 1), dtype=float32)
papa
Tensor("recommender_net2_1/Sigmoid:0", shape=(None, 1), dtype=float32)


array([0.504908  , 0.5169384 , 0.5228944 , ..., 0.51004094, 0.49802026,
       0.5072579 ], dtype=float32)

In [26]:
model2.compile(
        loss=tf.keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(lr=0.001)
    )

In [27]:
model2.predict(that).flatten()


Tensor("recommender_net2_1/add_1:0", shape=(None, 1), dtype=float32)
papa
Tensor("recommender_net2_1/Sigmoid:0", shape=(None, 1), dtype=float32)


array([0.504908  , 0.5169384 , 0.5228944 , ..., 0.51004094, 0.49802026,
       0.5072579 ], dtype=float32)

In [None]:
ทำไม call class ตอนสั่ง predict??
def call ที่สั่งให้ RecommenderNet2 print('papa') โดนเรียกตอน model2.predict

In [None]:
ได้ x2 มาเป็น tensor แล้วยังไงต่อ
จะรู้ได้ไงว่าตอน predict แล้วคูณ cosim_score เข้าไปแล้วมันจะผลไม่เหมือนเดิม
ก็ต้องมาดูตอน predict
ต้องเรียกมาดูทั้งหมดน่ะแหละ
top_ratings_indices = ratings.argsort()[-10:][::-1]
movies_not_watched.loc[movies_not_watched['index'].isin(top_ratings_indices)]

In [32]:
ratings.shape คือ shape ของ movies_not_watched น่ะแหละ

(9739,)

In [30]:
ratings.max()

0.5400332

In [31]:
ratings[ratings.argsort()[-10:]][::-1]

array([0.5400332 , 0.5388386 , 0.53870296, 0.53712237, 0.53705156,
       0.53669834, 0.5365959 , 0.53585166, 0.5355341 , 0.5353538 ],
      dtype=float32)

In [33]:
pd.DataFrame(ratings)

Unnamed: 0,0
0,0.504908
1,0.516938
2,0.522894
3,0.507467
4,0.500166
...,...
9734,0.511043
9735,0.510056
9736,0.510041
9737,0.498020


In [28]:
movies_not_watched.loc[:,'prediction'] = ratings
movies_not_watched.loc[:,'cosim_score'] =
movies_not_watched.loc[:,'prediction with genres bias']

highest_score = ratings[ratings.argsort()[-10:]][::-1]

recom_movie_titles = movies_not_watched.loc[movies_not_watched.loc[:,'prediction'].isin(highest_score)]
#movies_not_watched.loc[movies_not_watched['index'].isin(top_ratings_indices)]
recom_movie_titles

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Unnamed: 0,index,movieId,title,genres,fake_id,prediction
514,514,597,Pretty Woman (1990),Comedy|Romance,1,0.538839
516,516,600,Love and a .45 (1994),Action|Comedy|Crime,1,0.537122
533,533,628,Primal Fear (1996),Crime|Drama|Mystery|Thriller,1,0.540033
539,539,636,Frisk (1995),Drama,1,0.538703
2926,2926,3925,Stranger Than Paradise (1984),Comedy|Drama,1,0.537052
2930,2930,3929,"Bank Dick, The (1940)",Comedy,1,0.535534
7242,7242,73876,Undisputed II: Last Man Standing (2006),Action|Crime|Drama,1,0.536596
7250,7250,74228,Triangle (2009),Drama|Horror|Mystery|Thriller,1,0.535354
7256,7256,74450,Valentine's Day (2010),Comedy|Romance,1,0.536698
8701,8701,123200,Jim Jefferies: I Swear to God (2009),Comedy,1,0.535852


In [None]:
ต้อง map not_all_ratings['cosim_score'] กลับมาใน movies_not_watched อีกรอบ??
ไม่สิ แค่เอามาคูณกับ prediction

In [None]:
movies_not_watched_bias.loc[:,'prediction with genres bias'] = movies_not_watched_bias['prediction']*movies_not_watched_bias['cosim_score']
#movies_not_watched_bias
ratings2 = movies_not_watched_bias['prediction with genres bias']
highest_score = ratings2[ratings2.argsort()[-10:]][::-1]

recom_movie_titles = movies_not_watched_bias.loc[movies_not_watched_bias['prediction with genres bias'].isin(highest_score)]
#movies_not_watched.loc[movies_not_watched['index'].isin(top_ratings_indices)]
recom_movie_titles