In [2]:
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy import create_engine
import os
import pandas as pd

app = Flask(__name__)

engine = create_engine('sqlite:///recommender2.db', echo=False)

for f in os.listdir('data/movies/ml-latest-small'):
    if f[-4:] == '.csv':
        data = pd.read_csv(f'data/movies/ml-latest-small/{f}')
        data.to_sql(f[:-4], engine)
        print(f[0:-4])

links
ratings
movies
tags


In [1]:
pwd

'/flask_jupyter'

In [3]:
watched_movie_id_list = ['70286', '109487', '589']


In [4]:
import numpy as np

movie_id_unique = 'SELECT * FROM movies'
all_movies = pd.read_sql(movie_id_unique, engine)

#remove the input movies from all_movies
movies_not_watched = all_movies[~all_movies['movieId'].isin(watched_movie_id_list)]
movies_not_watched.loc[:,'fake_id'] = np.ones(len(movies_not_watched), dtype =int)

#get all_ratings from sqlite
query = 'SELECT "userId", ratings."movieId", movies.title, rating FROM ratings JOIN movies ON ratings."movieId" = movies."movieId";'
all_ratings = pd.read_sql(query, engine)

#remove the watched movies from all_ratings
not_all_ratings = all_ratings[~all_ratings['movieId'].isin(watched_movie_id_list)]

#remove movieId and ratings of the watched movies???
movieindex = not_all_ratings['movieId'].unique().tolist()
dl_movie2movie_encoded = {x: i for i, x in enumerate(movieindex)}
dl_movie_encoded2movie = {i: x for i, x in enumerate(movieindex)}

not_all_ratings.loc[:,"movie"] = not_all_ratings["movieId"].map(dl_movie2movie_encoded)
not_all_ratings.loc[:,"rating"] = not_all_ratings["rating"].values.astype(np.float32)

#map userId from ?? to not_all_ratings ?? what do i do with the users??

not_all_user_ids = not_all_ratings["userId"].unique().tolist()
dl_user2user_encoded = {x: i for i, x in enumerate(not_all_user_ids)}
dl_userencoded2user = {i: x for i, x in enumerate(not_all_user_ids)}

not_all_ratings.loc[:,"user"] = not_all_ratings["userId"].map(dl_user2user_encoded)

min_rating = min(not_all_ratings["rating"])
max_rating = max(not_all_ratings["rating"])
num_users = len(dl_user2user_encoded)
num_movies = len(dl_movie_encoded2movie)
print(
        "Number of users: {}, Number of Movies: {}, Min rating: {}, Max rating: {}".format(
            num_users, num_movies, min_rating, max_rating
        )
    )

#define training data
df = not_all_ratings.sample(frac=1, random_state=42)
x = not_all_ratings[["user", "movie"]].values

    # Normalize the targets between 0 and 1. Makes it easy to train.
y = not_all_ratings["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values
    # Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.9 * df.shape[0])
x_train, x_val, y_train, y_val = (
        x[:train_indices],
        x[train_indices:],
        y[:train_indices],
        y[train_indices:],
    )

#inference
user_movie_array = movies_not_watched[['fake_id','index']]
max_movie_index = user_movie_array['index'].max()
that = user_movie_array.to_numpy()

EMBEDDING_SIZE = 50

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

class RecommenderNet(keras.Model):
        def __init__(self, num_users, num_movies, embedding_size, **kwargs):
            super(RecommenderNet, self).__init__(**kwargs)
            self.num_users = num_users
            self.num_movies = num_movies
            self.embedding_size = embedding_size
            self.user_embedding = layers.Embedding(
                num_users,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=keras.regularizers.l2(1e-6),
            )
            self.user_bias = layers.Embedding(num_users, 1)
            self.movie_embedding = layers.Embedding(
            #change this line from num_movies to max_movie_index+1
                max_movie_index+1,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=keras.regularizers.l2(1e-6),
            )
            #and this line (input_dim)
            self.movie_bias = layers.Embedding(max_movie_index+1, 1)

        def call(self, inputs):
            user_vector = self.user_embedding(inputs[:, 0])
            user_bias = self.user_bias(inputs[:, 0])
            movie_vector = self.movie_embedding(inputs[:, 1])
            movie_bias = self.movie_bias(inputs[:, 1])
            dot_user_movie = tf.tensordot(user_vector, movie_vector, 2)
            # Add all the components (including bias)
            x = dot_user_movie + user_bias + movie_bias
            # The sigmoid activation forces the rating to between 0 and 1
            return tf.nn.sigmoid(x)


model = RecommenderNet(num_users, num_movies, EMBEDDING_SIZE)
model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(lr=0.001)
    )
ratings = model.predict(that).flatten()

top_ratings_indices = ratings.argsort()[-10:][::-1]
movies_not_watched.loc[movies_not_watched['index'].isin(top_ratings_indices)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Number of users: 610, Number of Movies: 9721, Min rating: 0.5, Max rating: 5.0


Unnamed: 0,index,movieId,title,genres,fake_id
3169,3169,4265,Driven (2001),Action|Thriller,1
3185,3185,4293,Summer Rental (1985),Comedy,1
3193,3193,4305,Angel Eyes (2001),Romance|Thriller,1
3952,3952,5569,"Last House on the Left, The (1972)",Crime|Horror|Thriller,1
3967,3967,5591,Monkey Trouble (1994),Children|Comedy,1
4821,4821,7184,This Property is Condemned (1966),Drama|Romance,1
6688,6688,58156,Semi-Pro (2008),Comedy,1
6703,6703,58347,Penelope (2006),Comedy|Fantasy|Romance,1
8106,8106,100553,Frozen Planet (2011),Documentary,1
8126,8126,101362,Olympus Has Fallen (2013),Action|Thriller,1


In [5]:
top_ratings_indices

array([8126, 8106, 3169, 3185, 3967, 3193, 4821, 6703, 6688, 3952])

In [7]:
ratings[top_ratings_indices]

array([0.52255166, 0.52112556, 0.52022755, 0.5195387 , 0.5190718 ,
       0.51900285, 0.5189548 , 0.5189225 , 0.51872057, 0.5186978 ],
      dtype=float32)

In [8]:
ratings[3952]

0.5186978

In [None]:
อะ คือตรงนี้ ลิสท์ที่กลับมาจาก user input มันให้เลือกด้วยลิสท์ all_movies เพราะจะให้ไปเลือกจาก all_ratings ก็คงประหลาด
ถึงแม้ว่า movieId ใน all_ratings ก็มีเหมือนกัน แต่มันไม่ต่อกัน คือเลขอินเด็กซ์มันไม่สะท้อนถึง unique list
ที่มีจำนวนหนังทั้งหมด 9742 เรื่อง

มันเป็นการสร้างความสัมพันธ์ จะนึกเป็นก้อนคิวบ์สามมิติสองก้อน แล้วมีเส้นๆเชื่อมกันเป็นเน็ตเวิร์คก็ได้
แล้วแต่ละจุด ก็พล็อตด้วยเลขเว็คเตอร์ ซึ่งก็คือ embedding นั่นเอง
แล้วเส้นความสัมพันธ์ก็คือ weight
ว่าได้เท่าไหร่ ห่างจาก 0.5 มากน้อยแค่ไหน

เอา 10 อันดับที่ใกล้กับ 0.5 มากสุด
คือก็คือ ย้อนไปห้า เดินหน้าไปห้า ก็ได้เป็น
0.495 ถึง 0.505

ทำไมต้องเป็น 0.5
ก็คือระยะห่างระหว่าง 2 จุด
คือจุดว่า user คนนี้ให้ rating หนังเรื่องนี้เท่านี้ เต็ม 5.0
แล้ว user ของเราคนนี้จะให้เท่าไหร่

ยังไงก็ต้องมี user embedding ด้วย
เพราะมัน predict against person/identity

In [None]:
เสร็จก็เลยไปเพิ่มคอลัมน์ not_all_ratings['index'] เพื่อจะได้เรียกหนังเรื่องนึงได้ ตาม index ของมัน
แล้วก็ใส่ not_all_ratings['user'] ด้วย
แต่มันมีอยู่แล้วนี่หว่า??
แต่มันเริ่มต้นที่ 0-609 เลยต้องไป encode รอบนึงก่อน? ไรเงี้ยเหรอ?

เสร็จก็เอา 

In [10]:
not_all_ratings[80000:80020]

Unnamed: 0,userId,movieId,title,rating
80294,506,41571,Memoirs of a Geisha (2005),3.0
80295,506,45447,"Da Vinci Code, The (2006)",0.5
80296,506,45720,"Devil Wears Prada, The (2006)",3.0
80297,506,45880,Marie Antoinette (2006),3.0
80298,506,47629,The Queen (2006),4.0
80299,506,48997,Perfume: The Story of a Murderer (2006),3.5
80300,506,49286,"Holiday, The (2006)",1.0
80301,506,51834,Becoming Jane (2007),3.0
80302,506,52973,Knocked Up (2007),3.5
80303,506,52975,Hairspray (2007),3.0


In [14]:
not_all_ratings['movieId'] ไม่ซ้ำกันเลยซักกะบรรทัดหรือเปล่า ถึงแม้ว่าจะเป็นหนังเรื่องเดียวกัน
ไม่หนิ
สำหรับหนังเรื่อง the Davinci Code ทุกบรรทัดเป็น 45447

KeyError: "None of [Index(['Toy Story (1995)', 'Grumpier Old Men (1995)', 'Heat (1995)',\n       'Seven (a.k.a. Se7en) (1995)', 'Usual Suspects, The (1995)',\n       'From Dusk Till Dawn (1996)', 'Bottle Rocket (1996)',\n       'Braveheart (1995)', 'Rob Roy (1995)', 'Canadian Bacon (1995)',\n       ...\n       'The Magnificent Seven (2016)', 'Blair Witch (2016)', '31 (2016)',\n       'Arrival (2016)', 'Rogue One: A Star Wars Story (2016)', 'Split (2017)',\n       'John Wick: Chapter Two (2017)', 'Get Out (2017)', 'Logan (2017)',\n       'The Fate of the Furious (2017)'],\n      dtype='object', length=100474)] are in the [index]"

In [15]:
not_all_ratings.loc[not_all_ratings['movieId'] == 45447]

Unnamed: 0,userId,movieId,title,rating
3434,21,45447,"Da Vinci Code, The (2006)",3.5
7828,52,45447,"Da Vinci Code, The (2006)",3.5
8932,62,45447,"Da Vinci Code, The (2006)",3.5
9943,64,45447,"Da Vinci Code, The (2006)",2.5
16592,105,45447,"Da Vinci Code, The (2006)",4.0
17536,111,45447,"Da Vinci Code, The (2006)",4.0
25535,177,45447,"Da Vinci Code, The (2006)",3.5
30362,212,45447,"Da Vinci Code, The (2006)",3.5
31976,219,45447,"Da Vinci Code, The (2006)",2.5
32760,222,45447,"Da Vinci Code, The (2006)",3.0


In [4]:
import numpy as np

movie_id_unique = 'SELECT * FROM movies'
all_movies = pd.read_sql(movie_id_unique, engine)

#remove the input movies from all_movies
movies_not_watched = all_movies[~all_movies['movieId'].isin(watched_movie_id_list)]
movies_not_watched.loc[:,'fake_id'] = np.ones(len(movies_not_watched), dtype =int)

#get all_ratings from sqlite
query = 'SELECT "userId", ratings."movieId", movies.title, rating FROM ratings JOIN movies ON ratings."movieId" = movies."movieId";'
all_ratings = pd.read_sql(query, engine)

#remove the watched movies from all_ratings
not_all_ratings = all_ratings[~all_ratings['movieId'].isin(watched_movie_id_list)]

#remove movieId and ratings of the watched movies???
movieindex = not_all_ratings['movieId'].unique().tolist()
dl_movie2movie_encoded = {x: i for i, x in enumerate(movieindex)}
dl_movie_encoded2movie = {i: x for i, x in enumerate(movieindex)}

not_all_ratings.loc[:,"movie"] = not_all_ratings["movieId"].map(dl_movie2movie_encoded)
not_all_ratings.loc[:,"rating"] = not_all_ratings["rating"].values.astype(np.float32)

#map userId from ?? to not_all_ratings ?? what do i do with the users??

not_all_user_ids = not_all_ratings["userId"].unique().tolist()
dl_user2user_encoded = {x: i for i, x in enumerate(not_all_user_ids)}
dl_userencoded2user = {i: x for i, x in enumerate(not_all_user_ids)}

not_all_ratings.loc[:,"user"] = not_all_ratings["userId"].map(dl_user2user_encoded)

min_rating = min(not_all_ratings["rating"])
max_rating = max(not_all_ratings["rating"])
num_users = len(dl_user2user_encoded)
num_movies = len(dl_movie_encoded2movie)
print(
        "Number of users: {}, Number of Movies: {}, Min rating: {}, Max rating: {}".format(
            num_users, num_movies, min_rating, max_rating
        )
    )

#define training data
df = not_all_ratings.sample(frac=1, random_state=42)
x = not_all_ratings[["user", "movie"]].values

    # Normalize the targets between 0 and 1. Makes it easy to train.
y = not_all_ratings["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values
    # Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.9 * df.shape[0])
x_train, x_val, y_train, y_val = (
        x[:train_indices],
        x[train_indices:],
        y[:train_indices],
        y[train_indices:],
    )

#inference
user_movie_array = movies_not_watched[['fake_id','index']]
max_movie_index = user_movie_array['index'].max()
that = user_movie_array.to_numpy()

EMBEDDING_SIZE = 50

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

class RecommenderNet(keras.Model):
        def __init__(self, num_users, num_movies, embedding_size, **kwargs):
            super(RecommenderNet, self).__init__(**kwargs)
            self.num_users = num_users
            self.num_movies = num_movies
            self.embedding_size = embedding_size
            self.user_embedding = layers.Embedding(
                num_users,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=keras.regularizers.l2(1e-6),
            )
            self.user_bias = layers.Embedding(num_users, 1)
            self.movie_embedding = layers.Embedding(
            #change this line from num_movies to max_movie_index+1
                max_movie_index+1,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=keras.regularizers.l2(1e-6),
            )
            #and this line (input_dim)
            self.movie_bias = layers.Embedding(max_movie_index+1, 1)

        def call(self, inputs):
            user_vector = self.user_embedding(inputs[:, 0])
            user_bias = self.user_bias(inputs[:, 0])
            movie_vector = self.movie_embedding(inputs[:, 1])
            movie_bias = self.movie_bias(inputs[:, 1])
            dot_user_movie = tf.tensordot(user_vector, movie_vector, 2)
            # Add all the components (including bias)
            x = dot_user_movie + user_bias + movie_bias
            # The sigmoid activation forces the rating to between 0 and 1
            return tf.nn.sigmoid(x)


model = RecommenderNet(num_users, num_movies, EMBEDDING_SIZE)
model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(lr=0.001)
    )
ratings = model.predict(that).flatten()


top_ratings_indices = ratings.argsort()[-10:][::-1]
movies_not_watched.loc[movies_not_watched['index'].isin(top_ratings_indices)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Number of users: 610, Number of Movies: 9721, Min rating: 0.5, Max rating: 5.0


Unnamed: 0,index,movieId,title,genres,fake_id
2664,2664,3566,"Big Kahuna, The (2000)",Comedy|Drama,1
2666,2666,3568,Smiling Fish and Goat on Fire (1999),Comedy|Romance,1
2667,2667,3569,"Idiots, The (Idioterne) (1998)",Comedy|Drama,1
2675,2675,3580,Up at the Villa (2000),Drama,1
7441,7441,81132,Rubber (2010),Action|Adventure|Comedy|Crime|Drama|Film-Noir|...,1
7447,7447,81383,Heartbreaker (L'Arnacoeur) (2010),Comedy|Romance,1
7453,7453,81537,Due Date (2010),Comedy,1
9280,9280,157699,Snowden (2016),Drama|Thriller,1
9305,9305,159077,The Meddler (2016),Comedy|Drama,1
9306,9306,159093,Now You See Me 2 (2016),Action|Comedy|Thriller,1


In [5]:
recom_movie_titles = movies_not_watched.loc[movies_not_watched['index'].isin(top_ratings_indices)]
recom_movie_titles

Unnamed: 0,index,movieId,title,genres,fake_id
2664,2664,3566,"Big Kahuna, The (2000)",Comedy|Drama,1
2666,2666,3568,Smiling Fish and Goat on Fire (1999),Comedy|Romance,1
2667,2667,3569,"Idiots, The (Idioterne) (1998)",Comedy|Drama,1
2675,2675,3580,Up at the Villa (2000),Drama,1
7441,7441,81132,Rubber (2010),Action|Adventure|Comedy|Crime|Drama|Film-Noir|...,1
7447,7447,81383,Heartbreaker (L'Arnacoeur) (2010),Comedy|Romance,1
7453,7453,81537,Due Date (2010),Comedy,1
9280,9280,157699,Snowden (2016),Drama|Thriller,1
9305,9305,159077,The Meddler (2016),Comedy|Drama,1
9306,9306,159093,Now You See Me 2 (2016),Action|Comedy|Thriller,1


In [71]:
recom_movie_titles = movies_not_watched.loc[movies_not_watched['index'].isin(top_ratings_indices)]
recom_movie_titles

Unnamed: 0,index,movieId,title,genres,fake_id,score
2664,2664,3566,"Big Kahuna, The (2000)",Comedy|Drama,1,0.514363
2666,2666,3568,Smiling Fish and Goat on Fire (1999),Comedy|Romance,1,0.51408
2667,2667,3569,"Idiots, The (Idioterne) (1998)",Comedy|Drama,1,0.532203
2675,2675,3580,Up at the Villa (2000),Drama,1,0.517897
7441,7441,81132,Rubber (2010),Action|Adventure|Comedy|Crime|Drama|Film-Noir|...,1,0.517811
7447,7447,81383,Heartbreaker (L'Arnacoeur) (2010),Comedy|Romance,1,0.524657
7453,7453,81537,Due Date (2010),Comedy,1,0.514751
9280,9280,157699,Snowden (2016),Drama|Thriller,1,0.510308
9305,9305,159077,The Meddler (2016),Comedy|Drama,1,0.522291
9306,9306,159093,Now You See Me 2 (2016),Action|Comedy|Thriller,1,0.514225


In [6]:
recom_movie_titles.shape

(10, 5)

In [9]:
recom_movie_titles[2:5]

Unnamed: 0,index,movieId,title,genres,fake_id
2667,2667,3569,"Idiots, The (Idioterne) (1998)",Comedy|Drama,1
2675,2675,3580,Up at the Villa (2000),Drama,1
7441,7441,81132,Rubber (2010),Action|Adventure|Comedy|Crime|Drama|Film-Noir|...,1


In [10]:
recom_movie_titles.describe().to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>index</th>\n      <th>movieId</th>\n      <th>fake_id</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>count</th>\n      <td>10.000000</td>\n      <td>10.00000</td>\n      <td>10.0</td>\n    </tr>\n    <tr>\n      <th>mean</th>\n      <td>6090.400000</td>\n      <td>73420.40000</td>\n      <td>1.0</td>\n    </tr>\n    <tr>\n      <th>std</th>\n      <td>3040.829646</td>\n      <td>67892.37843</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>min</th>\n      <td>2664.000000</td>\n      <td>3566.00000</td>\n      <td>1.0</td>\n    </tr>\n    <tr>\n      <th>25%</th>\n      <td>2669.000000</td>\n      <td>3571.75000</td>\n      <td>1.0</td>\n    </tr>\n    <tr>\n      <th>50%</th>\n      <td>7444.000000</td>\n      <td>81257.50000</td>\n      <td>1.0</td>\n    </tr>\n    <tr>\n      <th>75%</th>\n      <td>8823.250000</td>\n      <td>138658.50000</td>\n      

In [30]:
recom_movie_titles.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>index</th>\n      <th>movieId</th>\n      <th>title</th>\n      <th>genres</th>\n      <th>fake_id</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2664</th>\n      <td>2664</td>\n      <td>3566</td>\n      <td>Big Kahuna, The (2000)</td>\n      <td>Comedy|Drama</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2666</th>\n      <td>2666</td>\n      <td>3568</td>\n      <td>Smiling Fish and Goat on Fire (1999)</td>\n      <td>Comedy|Romance</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2667</th>\n      <td>2667</td>\n      <td>3569</td>\n      <td>Idiots, The (Idioterne) (1998)</td>\n      <td>Comedy|Drama</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2675</th>\n      <td>2675</td>\n      <td>3580</td>\n      <td>Up at the Villa (2000)</td>\n      <td>Drama</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>7441</th>\n      <td>7441</td>

In [32]:
ratings.shape

(9739,)

In [36]:
top_ratings_indices = ratings.argsort()[-10:][::-1]
top_ratings_indices

array([7447, 2664, 9306, 2666, 7441, 9305, 7453, 2667, 9280, 2675])

In [55]:
top_ratings_indices.shape

(10,)

In [21]:
ratings[top_ratings_indices]
#จะแปะ series นี้ ไปเป็นคอลัมน์ใหม่ใน recom_movie_titles 
#แล้วเอา fake_id ออก

array([0.53307277, 0.5326335 , 0.532324  , 0.53220296, 0.532197  ,
       0.5321621 , 0.5320879 , 0.5320722 , 0.53199345, 0.53191656],
      dtype=float32)

In [78]:
recom_movie_titles['index'][top_ratings_indices]

7447    7447
2664    2664
9306    9306
2666    2666
7441    7441
9305    9305
7453    7453
2667    2667
9280    9280
2675    2675
Name: index, dtype: int64

In [58]:
ratings

array([0.48825327, 0.48752755, 0.4993802 , ..., 0.5118847 , 0.50651115,
       0.5047595 ], dtype=float32)

In [61]:
movies_not_watched.shape

(9739, 5)

In [64]:
movies_not_watched.loc[:,'score']=ratings
movies_not_watched.loc[:,'score']

0       0.488253
1       0.487528
2       0.499380
3       0.485551
4       0.486138
          ...   
9737    0.517832
9738    0.503938
9739    0.511885
9740    0.506511
9741    0.504759
Name: score, Length: 9739, dtype: float32

In [79]:
movies_not_watched.loc[:,'score'][7447]

0.5246568

In [82]:
movies_not_watched.loc[:,'score'][9741]

0.5047595

In [83]:
movies_not_watched.loc[:,'score'][2664]

0.5143631

In [89]:
movies_not_watched.loc[:,'score'][0:9]

0    0.488253
1    0.487528
2    0.499380
3    0.485551
4    0.486138
5    0.491378
6    0.502781
7    0.490023
8    0.486196
Name: score, dtype: float32

In [75]:
movies_not_watched.loc[:,'score'].argsort() #อันนี้เหมือนจะถูก นอกนั้นเหมือนจะผิดหมด

0       2060
1       2072
2       2076
3       2057
4       2064
        ... 
9737    7441
9738    2666
9739    9306
9740    2664
9741    7447
Name: score, Length: 9739, dtype: int64

In [76]:
movies_not_watched.loc[:,'score'].argsort()[8000:8020]

8002    6895
8003    5931
8004    3107
8005    8411
8006     588
8007    2973
8008    4260
8009    7234
8010    1216
8011    8698
8012    2672
8013    9012
8014    1578
8015    1278
8016     536
8017    8898
8018    9072
8019    4816
8020    4004
8021    8891
Name: score, dtype: int64

In [43]:
ratings[top_ratings_indices].map(top_ratings_indices)


AttributeError: 'numpy.ndarray' object has no attribute 'map'

In [None]:
df['Good_Bad'] = (dfc.values[:,None]==arr).all(2).any(1).astype(int)

In [19]:
recom_movie_titles['index'][top_ratings_indices]

7447    7447
2664    2664
9306    9306
2666    2666
7441    7441
9305    9305
7453    7453
2667    2667
9280    9280
2675    2675
Name: index, dtype: int64

In [87]:
recom_movie_titles['index'][top_ratings_indices] = ratings[top_ratings_indices]
recom_movie_titles['index'][top_ratings_indices] #ก็แค่นั้นแหละ

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recom_movie_titles['index'][top_ratings_indices] = ratings[top_ratings_indices]


7447    0.533073
2664    0.532633
9306    0.532324
2666    0.532203
7441    0.532197
9305    0.532162
7453    0.532088
2667    0.532072
9280    0.531993
2675    0.531917
Name: index, dtype: float32

In [44]:
recom_movie_titles['score'] = ratings[top_ratings_indices] #ผิด ต้อง index 7447 ที่ได้  0.533073 ทำไงให้มัน reflect วะ
recom_movie_titles

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recom_movie_titles['score'] = ratings[top_ratings_indices]


Unnamed: 0,index,movieId,title,genres,fake_id,score
2664,2664,3566,"Big Kahuna, The (2000)",Comedy|Drama,1,0.533073
2666,2666,3568,Smiling Fish and Goat on Fire (1999),Comedy|Romance,1,0.532633
2667,2667,3569,"Idiots, The (Idioterne) (1998)",Comedy|Drama,1,0.532324
2675,2675,3580,Up at the Villa (2000),Drama,1,0.532203
7441,7441,81132,Rubber (2010),Action|Adventure|Comedy|Crime|Drama|Film-Noir|...,1,0.532197
7447,7447,81383,Heartbreaker (L'Arnacoeur) (2010),Comedy|Romance,1,0.532162
7453,7453,81537,Due Date (2010),Comedy,1,0.532088
9280,9280,157699,Snowden (2016),Drama|Thriller,1,0.532072
9305,9305,159077,The Meddler (2016),Comedy|Drama,1,0.531993
9306,9306,159093,Now You See Me 2 (2016),Action|Comedy|Thriller,1,0.531917


In [103]:
recom_movie_titles

Unnamed: 0,index,movieId,title,genres,fake_id,score
2664,0.532633,3566,"Big Kahuna, The (2000)",Comedy|Drama,1,0.514363
2666,0.532203,3568,Smiling Fish and Goat on Fire (1999),Comedy|Romance,1,0.51408
2667,0.532072,3569,"Idiots, The (Idioterne) (1998)",Comedy|Drama,1,0.532203
2675,0.531917,3580,Up at the Villa (2000),Drama,1,0.517897
7441,0.532197,81132,Rubber (2010),Action|Adventure|Comedy|Crime|Drama|Film-Noir|...,1,0.517811
7447,0.533073,81383,Heartbreaker (L'Arnacoeur) (2010),Comedy|Romance,1,0.524657
7453,0.532088,81537,Due Date (2010),Comedy,1,0.514751
9280,0.531993,157699,Snowden (2016),Drama|Thriller,1,0.510308
9305,0.532162,159077,The Meddler (2016),Comedy|Drama,1,0.522291
9306,0.532324,159093,Now You See Me 2 (2016),Action|Comedy|Thriller,1,0.514225


In [65]:
recom_movie_titles['index']

2664    2664
2666    2666
2667    2667
2675    2675
7441    7441
7447    7447
7453    7453
9280    9280
9305    9305
9306    9306
Name: index, dtype: int64

In [50]:
recom_movie_titles['index']
ratings[top_ratings_indices]

array([0.53307277, 0.5326335 , 0.532324  , 0.53220296, 0.532197  ,
       0.5321621 , 0.5320879 , 0.5320722 , 0.53199345, 0.53191656],
      dtype=float32)

In [51]:
ratings

array([0.48825327, 0.48752755, 0.4993802 , ..., 0.5118847 , 0.50651115,
       0.5047595 ], dtype=float32)

In [52]:
ratings.shape

(9739,)

In [66]:
recom_movie_titles.shape

(10, 6)

In [None]:
movies_not_watched['score'] = ratings


In [92]:
movies_not_watched['score'].max() #อันนี้ถุก มาถูกทางแล้ว อันอื่นน่าจะผิด 

0.5330727696418762

In [93]:
movies_not_watched['score'].idxmax() #ทำไมไม่ใช่ 7447?

7449

In [101]:
movies_not_watched['score'].argmax() #หา ind ของอันนี้ ทำไมได้แล้วล่ะ งง?

7447

In [96]:
movies_not_watched['title'][7449]

'Heartbeats (Les amours imaginaires) (2010)'

In [102]:
movies_not_watched['title'][7447] #ได้โรแมนติกคอมาดี้ออกมาเนี่ยนะ ใส่ terminator2, district9, interstellar เข้าไปเนี่ยนะ!!!

"Heartbreaker (L'Arnacoeur) (2010)"

In [None]:
#ต้องทำแบบนี้หลังจากได้ ratings มาสดๆ แต่ตอนนี้มันมั่วไปหมดแล้ว
movies_not_watched.loc[:,'score'] = ratings

top_ratings_indices = ratings.argsort()[-10:][::-1]

recom_movie_titles = movies_not_watched.loc[movies_not_watched['index'].isin(top_ratings_indices)]
# movies_not_watched ก็จะมี ratings อยู่แล้ว แล้ว recom_movie_titles ก็ sort ตามนั้น ไม่ใช่เอามาแปะทีหลังมันจะงง

In [68]:
movies_not_watched[8000:8020]

Unnamed: 0,index,movieId,title,genres,fake_id,score
8002,8002,97225,Hotel Transylvania (2012),Animation|Children|Comedy,1,0.482903
8003,8003,97230,Side by Side (2012),Documentary,1,0.494257
8004,8004,97285,Take Aim at the Police Van (Sono gosôsha wo ne...,Action|Crime|Mystery,1,0.483731
8005,8005,97304,Argo (2012),Drama|Thriller,1,0.496324
8006,8006,97306,Seven Psychopaths (2012),Comedy|Crime,1,0.493037
8007,8007,97328,Liberal Arts (2012),Comedy|Drama,1,0.501692
8008,8008,97470,Catch .44 (2011),Action|Drama|Thriller,1,0.484863
8009,8009,97643,[REC]³ 3 Génesis (2012),Horror|Thriller,1,0.50228
8010,8010,97665,Asterix & Obelix: God Save Britannia (Astérix ...,Adventure|Comedy,1,0.487063
8011,8011,97701,Paranormal Activity 4 (2012),Horror|IMAX,1,0.489603


In [27]:
recom_movie_titles.drop(columns='fake_id')

Unnamed: 0,index,movieId,title,genres
2664,2664,3566,"Big Kahuna, The (2000)",Comedy|Drama
2666,2666,3568,Smiling Fish and Goat on Fire (1999),Comedy|Romance
2667,2667,3569,"Idiots, The (Idioterne) (1998)",Comedy|Drama
2675,2675,3580,Up at the Villa (2000),Drama
7441,7441,81132,Rubber (2010),Action|Adventure|Comedy|Crime|Drama|Film-Noir|...
7447,7447,81383,Heartbreaker (L'Arnacoeur) (2010),Comedy|Romance
7453,7453,81537,Due Date (2010),Comedy
9280,9280,157699,Snowden (2016),Drama|Thriller
9305,9305,159077,The Meddler (2016),Comedy|Drama
9306,9306,159093,Now You See Me 2 (2016),Action|Comedy|Thriller


In [10]:
user_movie_ratings = pd.pivot_table(all_ratings, values='rating', index='userId', columns='movieId')
user_movie_ratings

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,


In [11]:
user_movie_id_ratings_matrix = user_movie_ratings.fillna(0)
user_movie_id_ratings_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
number_of_genres = 10
m = NMF(n_components=number_of_genres)
m.fit(user_movie_id_ratings_matrix)

NMF(n_components=10)