## Non-Negative Matrix Factorization for Faster Recommendation

**Wikipedia:** Non-negative matrix factorization is a group of algorithms in multivariate analysis and linear algebra where a matrix V is factorized into (usually) two matrices W and H, with the property that all three matrices have no negative elements. This non-negativity makes the resulting matrices easier to inspect. Also, in applications such as processing of audio spectrograms or muscular activity, non-negativity is inherent to the data being considered. 

<img src="https://upload.wikimedia.org/wikipedia/commons/f/f9/NMF.png">

_Source: https://upload.wikimedia.org/wikipedia/commons/f/f9/NMF.png_

### Libraries

In [9]:
from surprise import SVD, SVDpp, NMF
from surprise import Dataset, accuracy
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise import Reader
import pandas as pd
import time
import numpy as np

### Dataset Preparation

In [92]:
ratings = pd.read_csv('data/ratings.csv')

ratings_dict = {'itemID': ratings.movie_id_ml,
                'userID': ratings.user_id,
                'rating': ratings.rating
               }

df = pd.DataFrame(ratings_dict)
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=.25)

### Top-15 Most Voted Movies

In [138]:
mat = np.zeros((max(ratings.user_id), max(ratings.movie_id_ml)))
ind = np.array(list(zip(list(ratings.user_id-1), list(ratings.movie_id_ml-1))))
mat[ind[:,0], ind[:,1]] = 1
movies_ = mat.sum(axis=0).argsort()+1
np.random.shuffle(movies_)
top15 = movies_[:15]

In [158]:
pd.read_csv('data/u.item.txt', delimiter='|', encoding = "ISO-8859-1", header=None)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
5,6,Shanghai Triad (Yao a yao yao dao waipo qiao) ...,01-Jan-1995,,http://us.imdb.com/Title?Yao+a+yao+yao+dao+wai...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,7,Twelve Monkeys (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Twelve%20Monk...,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
7,8,Babe (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Babe%20(1995),0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
8,9,Dead Man Walking (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Dead%20Man%20...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,10,Richard III (1995),22-Jan-1996,,http://us.imdb.com/M/title-exact?Richard%20III...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [147]:
df_ML_movies = pd.read_csv('data/u.item.txt', delimiter='|', names=column_item, encoding = "ISO-8859-1") 

In [160]:
min(ratings.user_id)

1

In [148]:
df_ML_movies

Unnamed: 0,movie_id_ml,title,release,vrelease,url,unknown,action,adventure,animation,childrens,...,fantasy,noir,horror,musical,mystery,romance,scifi,thriller,war,western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
5,6,Shanghai Triad (Yao a yao yao dao waipo qiao) ...,01-Jan-1995,,http://us.imdb.com/Title?Yao+a+yao+yao+dao+wai...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,7,Twelve Monkeys (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Twelve%20Monk...,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
7,8,Babe (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Babe%20(1995),0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
8,9,Dead Man Walking (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Dead%20Man%20...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,10,Richard III (1995),22-Jan-1996,,http://us.imdb.com/M/title-exact?Richard%20III...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [151]:
df_ML_movies.movie_id_ml.isin(top15)

0       False
1       False
2       False
3       False
4       False
        ...  
1677    False
1678    False
1679    False
1680    False
1681    False
Name: movie_id_ml, Length: 1682, dtype: bool

In [153]:
top15

array([1604,  217,  435, 1290,  855,  417, 1464,  858, 1411,   77, 1184,
        266,  353, 1171,  904])

In [154]:
df_ML_movies[df_ML_movies.movie_id_ml.isin(top15)]

Unnamed: 0,movie_id_ml,title,release,vrelease,url,unknown,action,adventure,animation,childrens,...,fantasy,noir,horror,musical,mystery,romance,scifi,thriller,war,western
76,77,"Firm, The (1993)",01-Jan-1993,,"http://us.imdb.com/M/title-exact?Firm,%20The%2...",0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
216,217,Bram Stoker's Dracula (1992),01-Jan-1992,,http://us.imdb.com/M/title-exact?Bram%20Stoker...,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0
265,266,Kull the Conqueror (1997),29-Aug-1997,,http://us.imdb.com/M/title-exact?Kull+the+Conq...,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
352,353,Deep Rising (1998),30-Jan-1998,,http://us.imdb.com/M/title-exact?imdb-title-11...,0,1,0,0,0,...,0,0,1,0,0,0,1,0,0,0
416,417,"Parent Trap, The (1961)",01-Jan-1961,,http://us.imdb.com/M/title-exact?Parent%20Trap...,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
434,435,Butch Cassidy and the Sundance Kid (1969),01-Jan-1969,,http://us.imdb.com/M/title-exact?Butch%20Cassi...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
854,855,Diva (1981),01-Jan-1981,,http://us.imdb.com/M/title-exact?Diva%20(1981),0,1,0,0,0,...,0,0,0,0,1,1,0,1,0,0
857,858,Amityville: Dollhouse (1996),01-Jan-1996,,http://us.imdb.com/M/title-exact?Amityville:%2...,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
903,904,Ma vie en rose (My Life in Pink) (1997),26-Dec-1997,,http://us.imdb.com/M/title-exact?imdb-title-11...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1170,1171,Wild Reeds (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?Roseaux%20sau...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [72]:
mat.sum(axis=0).argsort()

array([1681,  813, 1446, ...,   99,  257,   49])

array([ 97,   6,  55, 126, 173, 120, 299,   0, 287, 285, 293, 180,  99,
       257,  49])

In [77]:
mat.sum(axis=0)[0]

452.0

In [164]:
len(np.unique(ratings.user_id))

943

In [90]:
pd.read_csv('data/movies_cast_company.csv')

Unnamed: 0,movie_id_ml,title,release,url,unknown,action,adventure,animation,childrens,comedy,...,mystery,romance,scifi,thriller,war,western,movie_id,keyword,cast,company
0,1,toy story,1995,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,1,...,0,0,0,0,0,0,2445635,"['walkie-talkie', 'boy', 'slow-motion', 'villa...","[{""cast_id"": 193929, ""person_id"": 30260, ""cast...","[{""company_id"": 34, ""name"": ""Warner Home Video..."
1,2,goldeneye,1995,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,0,...,0,0,0,1,0,0,1923289,"['car-chase', 'good-versus-evil', '1990s', 'bl...","[{""cast_id"": 586283, ""person_id"": 83451, ""cast...","[{""company_id"": 19, ""name"": ""National Broadcas..."
2,3,four rooms,1995,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,0,...,0,0,0,1,0,0,1900170,"['number-in-title', 'title-directed-by-female'...","[{""cast_id"": 629008, ""person_id"": 89615, ""cast...","[{""company_id"": 11745, ""name"": ""Laurenfilm"", ""..."
3,4,get shorty,1995,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,1,...,0,0,0,0,0,0,1915485,"['actress', 'father-daughter-relationship', 'r...","[{""cast_id"": 1341029, ""person_id"": 184099, ""ca...","[{""company_id"": 19, ""name"": ""National Broadcas..."
4,5,copycat,1995,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,0,...,0,0,0,1,0,0,1788620,"['mother-son-relationship', 'san-francisco-cal...","[{""cast_id"": 643068, ""person_id"": 91412, ""cast...","[{""company_id"": 34, ""name"": ""Warner Home Video..."
5,7,twelve monkeys,1995,http://us.imdb.com/M/title-exact?Twelve%20Monk...,0,0,0,0,0,0,...,0,0,1,0,0,0,2455256,"['number-in-title', 'father-son-relationship',...","[{""cast_id"": 30433, ""person_id"": 5426, ""cast_n...","[{""company_id"": 160, ""name"": ""American Broadca..."
6,8,babe,1995,http://us.imdb.com/M/title-exact?Babe%20(1995),0,0,0,0,1,1,...,0,0,0,0,0,0,1706589,"['singing', '1990s', 'cat', 'based-on-novel', ...","[{""cast_id"": 555738, ""person_id"": 79746, ""cast...","[{""company_id"": 160, ""name"": ""American Broadca..."
7,9,dead man walking,1995,http://us.imdb.com/M/title-exact?Dead%20Man%20...,0,0,0,0,0,0,...,0,0,0,0,0,0,1811182,"['flashback', 'smoking', 'blockbuster', 'title...","[{""cast_id"": 24931, ""person_id"": 4509, ""cast_n...","[{""company_id"": 6, ""name"": ""Columbia Broadcast..."
8,12,the usual suspects,1995,http://us.imdb.com/M/title-exact?Usual%20Suspe...,0,0,0,0,0,0,...,0,0,0,1,0,0,2423463,"['father-son-relationship', 'mother-son-relati...","[{""cast_id"": 606673, ""person_id"": 86216, ""cast...","[{""company_id"": 5810, ""name"": ""LW Editora"", ""c..."
9,13,mighty aphrodite,1995,http://us.imdb.com/M/title-exact?Mighty%20Aphr...,0,0,0,0,0,1,...,0,0,0,0,0,0,2118082,"['human-relationship', 'horse', 'adultery', 's...","[{""cast_id"": 23915, ""person_id"": 4354, ""cast_n...","[{""company_id"": 11778, ""name"": ""Buena Vista In..."


### Training Matrix Factorization

In [33]:
start = time.time()
algo = NMF()
algo.fit(trainset)
predictions = algo.test(testset)
print("Test Set Error\n--------------")
accuracy.mae(predictions)
print("--------------\nFinished in {:.3f} sec.".format(time.time()-start))

# algo.pu -> User Matrix
# algo.qi -> Item Matrix

Test Set Error
--------------
MAE:  0.7569
--------------
Finished in 7.336 sec.
