In [1]:
import os
print(os.listdir())

['.ipynb_checkpoints', 'ALS.ipynb', 'ml-100k']


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users =  pd.read_csv('ml-100k/u.user', sep='|', names=u_cols,
 encoding='latin-1')

In [4]:
users.head()

Unnamed: 0,user_id,age,sex,occupation,zip_code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [5]:
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=r_cols,
 encoding='latin-1')

In [6]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating,unix_timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [7]:
i_cols = ['movie_id', 'movie_title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',
 'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
items = pd.read_csv('ml-100k/u.item', sep='|', names=i_cols, encoding='latin-1')

In [8]:
items.head()

Unnamed: 0,movie_id,movie_title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Children's,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [9]:
dataset = pd.merge(pd.merge(items, ratings),users)
dataset.head()

Unnamed: 0,movie_id,movie_title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Children's,...,Thriller,War,Western,user_id,rating,unix_timestamp,age,sex,occupation,zip_code
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,308,4,887736532,60,M,retired,95076
1,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,308,5,887737890,60,M,retired,95076
2,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,1,0,0,308,4,887739608,60,M,retired,95076
3,7,Twelve Monkeys (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Twelve%20Monk...,0,0,0,0,0,...,0,0,0,308,4,887738847,60,M,retired,95076
4,8,Babe (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Babe%20(1995),0,0,0,0,1,...,0,0,0,308,5,887736696,60,M,retired,95076


In [10]:
import sys
import numpy as np
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
import random

from sklearn.preprocessing import MinMaxScaler

import implicit

In [11]:
sparse_item_user = sparse.csr_matrix((dataset['rating'].astype(float),(dataset['movie_id'], dataset['user_id'])))

In [12]:
sparse_user_item = sparse.csr_matrix((dataset['rating'].astype(float),(dataset['user_id'], dataset['movie_id'])))

## Initialising ALS model

In [13]:
model = implicit.als.AlternatingLeastSquares(factors=20,regularization=0.1,iterations=200)



In [14]:
alpha_val = 15
data_conf = (sparse_item_user * alpha_val).astype('double')

In [15]:
model.fit(data_conf)

100%|██████████| 200.0/200 [00:08<00:00, 23.05it/s]


# Find Similar Items

### Finding the 5 most similar movies to Twelve Monkey(movie_id = 7)

In [16]:
item_id = 7
n_similar = 5
similar = model.similar_items(item_id,n_similar)

In [17]:
for item in similar:
    idx,score = item
    print (dataset.movie_title.loc[dataset.movie_id == idx].iloc[0])

Twelve Monkeys (1995)
Star Trek: First Contact (1996)
Toy Story (1995)
Independence Day (ID4) (1996)
Rock, The (1996)


# Find User Recommendation

In [18]:
user_id = 300
recommended = model.recommend(user_id,sparse_user_item)

In [19]:
movies = []
scores = []

In [20]:
for item in recommended:
    idx,score = item
    movies.append(dataset.movie_title.loc[dataset.movie_id==idx].iloc[0])
    scores.append(score)

In [21]:
print(pd.DataFrame({"movies":movies, "scores:":scores}))

                                              movies   scores:
0                                     Contact (1997)  0.958317
1                                  Saint, The (1997)  0.950846
2                                     Volcano (1997)  0.950283
3                             Picture Perfect (1997)  0.939612
4                        English Patient, The (1996)  0.903421
5                                Dante's Peak (1997)  0.901689
6                            Devil's Own, The (1997)  0.897593
7  Austin Powers: International Man of Mystery (1...  0.892555
8                                     Titanic (1997)  0.889475
9                                       Evita (1996)  0.876286


All these are for user id 300