In [1]:
import pandas as pd
import numpy as np

In [2]:
ratings = pd.read_csv('ratings.csv')

In [3]:
ratings

Unnamed: 0,user_id,movie_id,rating,unix_timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596
...,...,...,...,...
99995,880,476,3,880175444
99996,716,204,5,879795543
99997,276,1090,1,874795795
99998,13,225,2,882399156


In [4]:
ratings = ratings.drop(['unix_timestamp'],axis = 1)

In [5]:
ratings

Unnamed: 0,user_id,movie_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1
...,...,...,...
99995,880,476,3
99996,716,204,5
99997,276,1090,1
99998,13,225,2


In [6]:
n_user = ratings['user_id'].nunique()
n_user

943

In [7]:
n_movies = ratings['movie_id'].nunique()
n_movies

1682

## Create Sparse User-Item Matrix

In [8]:
from scipy.sparse import csr_matrix

In [9]:
## set alpha value as the starting random value

In [10]:
alpha = 40

In [11]:
data = [alpha] * ratings.shape[0]

In [12]:
sparse_user_item = csr_matrix( (data,(ratings['user_id'], ratings['movie_id'])) )

In [13]:
sparse_user_item.shape

(944, 1683)

In [14]:
print(sparse_user_item)

  (1, 1)	40
  (1, 2)	40
  (1, 3)	40
  (1, 4)	40
  (1, 5)	40
  (1, 6)	40
  (1, 7)	40
  (1, 8)	40
  (1, 9)	40
  (1, 10)	40
  (1, 11)	40
  (1, 12)	40
  (1, 13)	40
  (1, 14)	40
  (1, 15)	40
  (1, 16)	40
  (1, 17)	40
  (1, 18)	40
  (1, 19)	40
  (1, 20)	40
  (1, 21)	40
  (1, 22)	40
  (1, 23)	40
  (1, 24)	40
  (1, 25)	40
  :	:
  (943, 739)	40
  (943, 756)	40
  (943, 763)	40
  (943, 765)	40
  (943, 785)	40
  (943, 794)	40
  (943, 796)	40
  (943, 808)	40
  (943, 816)	40
  (943, 824)	40
  (943, 825)	40
  (943, 831)	40
  (943, 840)	40
  (943, 928)	40
  (943, 941)	40
  (943, 943)	40
  (943, 1011)	40
  (943, 1028)	40
  (943, 1044)	40
  (943, 1047)	40
  (943, 1067)	40
  (943, 1074)	40
  (943, 1188)	40
  (943, 1228)	40
  (943, 1330)	40


In [15]:
# array form

csr_user_array = sparse_user_item.toarray()
csr_user_array

array([[ 0,  0,  0, ...,  0,  0,  0],
       [ 0, 40, 40, ...,  0,  0,  0],
       [ 0, 40,  0, ...,  0,  0,  0],
       ...,
       [ 0, 40,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0, 40, ...,  0,  0,  0]], dtype=int32)

## similarly creating item-user sparse matrix

In [16]:
sparse_item_user = sparse_user_item.T.tocsr()
sparse_item_user

<1683x944 sparse matrix of type '<class 'numpy.intc'>'
	with 100000 stored elements in Compressed Sparse Row format>

In [17]:
sparse_item_user.shape

(1683, 944)

In [18]:
csr_item_array = sparse_item_user.toarray()
csr_item_array

array([[ 0,  0,  0, ...,  0,  0,  0],
       [ 0, 40, 40, ..., 40,  0,  0],
       [ 0, 40,  0, ...,  0,  0, 40],
       ...,
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]], dtype=int32)

## Creating train test data

In [19]:
! pip install implicit



In [20]:
from implicit.evaluation import train_test_split

In [21]:
train, test = train_test_split(sparse_user_item, train_percentage=0.8)

In [22]:
train,test

(<944x1683 sparse matrix of type '<class 'numpy.intc'>'
 	with 80080 stored elements in Compressed Sparse Row format>,
 <944x1683 sparse matrix of type '<class 'numpy.intc'>'
 	with 19920 stored elements in Compressed Sparse Row format>)

In [23]:
print(train)

  (1, 1)	40
  (1, 3)	40
  (1, 4)	40
  (1, 5)	40
  (1, 6)	40
  (1, 7)	40
  (1, 8)	40
  (1, 9)	40
  (1, 10)	40
  (1, 11)	40
  (1, 12)	40
  (1, 13)	40
  (1, 14)	40
  (1, 15)	40
  (1, 16)	40
  (1, 17)	40
  (1, 18)	40
  (1, 19)	40
  (1, 20)	40
  (1, 21)	40
  (1, 22)	40
  (1, 25)	40
  (1, 27)	40
  (1, 28)	40
  (1, 29)	40
  :	:
  (943, 625)	40
  (943, 672)	40
  (943, 685)	40
  (943, 717)	40
  (943, 720)	40
  (943, 721)	40
  (943, 722)	40
  (943, 724)	40
  (943, 732)	40
  (943, 756)	40
  (943, 765)	40
  (943, 785)	40
  (943, 794)	40
  (943, 796)	40
  (943, 816)	40
  (943, 824)	40
  (943, 825)	40
  (943, 831)	40
  (943, 928)	40
  (943, 941)	40
  (943, 1011)	40
  (943, 1047)	40
  (943, 1074)	40
  (943, 1188)	40
  (943, 1330)	40


## Building ALS model

In [24]:
import implicit

In [25]:
model = implicit.als.AlternatingLeastSquares(factors=100, regularization=0.1, iterations=20, calculate_training_loss=False)
model

  check_blas_config()


<implicit.cpu.als.AlternatingLeastSquares at 0x1d24f426810>

In [26]:
## Training the model
model.fit(train)

  0%|          | 0/20 [00:00<?, ?it/s]

## Generating recommendation for user_id

In [27]:
user_id = 117

In [28]:
model.recommend(user_id, sparse_user_item[user_id], N=30)

(array([ 294,  123,   64,   42,    3, 1011,  418,   21,  235, 1079,   22,
          79,  204,  183,  682,  100,  831,   89,  923,  250,  472,  234,
         230,  248,  343,  273,  125,  544,  165,   17]),
 array([1.3273053 , 1.1047777 , 1.0079306 , 0.9665003 , 0.9392998 ,
        0.92766786, 0.8937741 , 0.88883597, 0.88769007, 0.8795671 ,
        0.87540656, 0.83892107, 0.8276936 , 0.8069378 , 0.8018045 ,
        0.7813242 , 0.76528573, 0.7647131 , 0.7460707 , 0.74467945,
        0.715199  , 0.7139208 , 0.70412743, 0.69220793, 0.6620835 ,
        0.6602724 , 0.6520808 , 0.63974744, 0.63696694, 0.6326476 ],
       dtype=float32))

In [29]:
output_117 = model.recommend(user_id, sparse_user_item[user_id], N=10)

In [30]:
type(output_117)

tuple

In [31]:
output_df= pd.DataFrame({'movie_id': output_117[0],'als_score' : output_117[1]})
output_df

Unnamed: 0,movie_id,als_score
0,294,1.327305
1,123,1.104778
2,64,1.007931
3,42,0.9665
4,3,0.9393
5,1011,0.927668
6,418,0.893774
7,21,0.888836
8,235,0.88769
9,1079,0.879567


### Get movies dataset to merge

In [32]:
movies = pd.read_csv('movie_genres.csv')

In [33]:
movies.head()

Unnamed: 0,movie_id,movie title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Children's,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [34]:
movies = movies[['movie_id', 'movie title']]

In [35]:
pd.merge(output_df,movies,how='left',on='movie_id')

Unnamed: 0,movie_id,als_score,movie title
0,294,1.327305,Liar Liar (1997)
1,123,1.104778,"Frighteners, The (1996)"
2,64,1.007931,"Shawshank Redemption, The (1994)"
3,42,0.9665,Clerks (1994)
4,3,0.9393,Four Rooms (1995)
5,1011,0.927668,2 Days in the Valley (1996)
6,418,0.893774,Cinderella (1950)
7,21,0.888836,Muppet Treasure Island (1996)
8,235,0.88769,Mars Attacks! (1996)
9,1079,0.879567,Joe's Apartment (1996)


## Generating recommendation for movie_id

In [36]:
item_id = 11
n_similar = 10

In [37]:
similar = model.similar_items(item_id,n_similar)

In [38]:
similar

(array([ 11,  55, 195, 174, 423,  22,  89,  12,  99, 218]),
 array([1.        , 0.45690063, 0.34477362, 0.33461124, 0.33310303,
        0.32239783, 0.2836398 , 0.2800971 , 0.27688897, 0.2551469 ],
       dtype=float32))

In [39]:
similar[0]

array([ 11,  55, 195, 174, 423,  22,  89,  12,  99, 218])

In [40]:
similar_df = pd.DataFrame({'movie_id' : similar[0], 'score': similar[1]})

In [41]:
similar_df

Unnamed: 0,movie_id,score
0,11,1.0
1,55,0.456901
2,195,0.344774
3,174,0.334611
4,423,0.333103
5,22,0.322398
6,89,0.28364
7,12,0.280097
8,99,0.276889
9,218,0.255147


## Merge recommendation output with Movies Data

In [42]:
merged_similar = pd.merge(similar_df, movies, how='left', on='movie_id')

In [43]:
merged_similar

Unnamed: 0,movie_id,score,movie title
0,11,1.0,Seven (Se7en) (1995)
1,55,0.456901,"Professional, The (1994)"
2,195,0.344774,"Terminator, The (1984)"
3,174,0.334611,Raiders of the Lost Ark (1981)
4,423,0.333103,E.T. the Extra-Terrestrial (1982)
5,22,0.322398,Braveheart (1995)
6,89,0.28364,Blade Runner (1982)
7,12,0.280097,"Usual Suspects, The (1995)"
8,99,0.276889,Snow White and the Seven Dwarfs (1937)
9,218,0.255147,Cape Fear (1991)
