# ALS [Alternating Least Square]

## Building Recommendation system using ALS on MovieLens Dataset

import the required libraries

In [1]:
import pandas as pd

In [2]:

ratings = pd.read_csv('../hybrid/data/ratings.csv')


In [3]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating,unix_timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [4]:
ratings.shape

(100000, 4)

In [5]:
ratings.drop(['unix_timestamp'], axis=1, inplace=True)

In [6]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1


In [7]:
ratings.describe()

Unnamed: 0,user_id,movie_id,rating
count,100000.0,100000.0,100000.0
mean,462.48475,425.53013,3.52986
std,266.61442,330.798356,1.125674
min,1.0,1.0,1.0
25%,254.0,175.0,3.0
50%,447.0,322.0,4.0
75%,682.0,631.0,4.0
max,943.0,1682.0,5.0


In [8]:
ratings['user_id'].nunique()

943

In [9]:
ratings['movie_id'].nunique()

1682

## Create Sparse User-Item Matrix

In [10]:
from scipy.sparse import csr_matrix

In [11]:
alpha = 40

In [12]:
ratings.shape[0]

100000

In [13]:
sparse_user_item = csr_matrix( ([alpha]*ratings.shape[0], (ratings['user_id'], ratings['movie_id']) ))

In [14]:
sparse_user_item

<944x1683 sparse matrix of type '<class 'numpy.int64'>'
	with 100000 stored elements in Compressed Sparse Row format>

### shape : 944x1683.  since Total No of users = 943, No of Movies = 1682

### Convert to Array

In [15]:
csr_user_array = sparse_user_item.toarray()

In [16]:
csr_user_array

array([[ 0,  0,  0, ...,  0,  0,  0],
       [ 0, 40, 40, ...,  0,  0,  0],
       [ 0, 40,  0, ...,  0,  0,  0],
       ...,
       [ 0, 40,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0, 40, ...,  0,  0,  0]])

In [17]:
len(csr_user_array), len(csr_user_array[0]), csr_user_array[1][1]

(944, 1683, 40)

In [18]:
max(csr_user_array[1])

40

### csr matrix only stores where value is 40 [non-zero]. (Compressed Sparse Row)

In [19]:
print(sparse_user_item)

  (1, 1)	40
  (1, 2)	40
  (1, 3)	40
  (1, 4)	40
  (1, 5)	40
  (1, 6)	40
  (1, 7)	40
  (1, 8)	40
  (1, 9)	40
  (1, 10)	40
  (1, 11)	40
  (1, 12)	40
  (1, 13)	40
  (1, 14)	40
  (1, 15)	40
  (1, 16)	40
  (1, 17)	40
  (1, 18)	40
  (1, 19)	40
  (1, 20)	40
  (1, 21)	40
  (1, 22)	40
  (1, 23)	40
  (1, 24)	40
  (1, 25)	40
  :	:
  (943, 739)	40
  (943, 756)	40
  (943, 763)	40
  (943, 765)	40
  (943, 785)	40
  (943, 794)	40
  (943, 796)	40
  (943, 808)	40
  (943, 816)	40
  (943, 824)	40
  (943, 825)	40
  (943, 831)	40
  (943, 840)	40
  (943, 928)	40
  (943, 941)	40
  (943, 943)	40
  (943, 1011)	40
  (943, 1028)	40
  (943, 1044)	40
  (943, 1047)	40
  (943, 1067)	40
  (943, 1074)	40
  (943, 1188)	40
  (943, 1228)	40
  (943, 1330)	40


### Create item-user sparse matrix

In [20]:
sparse_item_user = sparse_user_item.T.tocsr()

In [21]:
sparse_item_user

<1683x944 sparse matrix of type '<class 'numpy.int64'>'
	with 100000 stored elements in Compressed Sparse Row format>

### shape : 1683x944.  since Total No of Movies = 1682 & No of users = 943

In [22]:
csr_item_array = sparse_item_user.toarray()

In [23]:
csr_item_array

array([[ 0,  0,  0, ...,  0,  0,  0],
       [ 0, 40, 40, ..., 40,  0,  0],
       [ 0, 40,  0, ...,  0,  0, 40],
       ...,
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]])

In [24]:
len(csr_item_array), len(csr_item_array[0]), csr_item_array[1][1]

(1683, 944, 40)

In [25]:
print(sparse_item_user)

  (1, 1)	40
  (1, 2)	40
  (1, 5)	40
  (1, 6)	40
  (1, 10)	40
  (1, 13)	40
  (1, 15)	40
  (1, 16)	40
  (1, 17)	40
  (1, 18)	40
  (1, 20)	40
  (1, 21)	40
  (1, 23)	40
  (1, 25)	40
  (1, 26)	40
  (1, 38)	40
  (1, 41)	40
  (1, 42)	40
  (1, 43)	40
  (1, 44)	40
  (1, 45)	40
  (1, 49)	40
  (1, 54)	40
  (1, 56)	40
  (1, 57)	40
  :	:
  (1662, 782)	40
  (1663, 782)	40
  (1664, 782)	40
  (1664, 839)	40
  (1664, 870)	40
  (1664, 880)	40
  (1665, 782)	40
  (1666, 782)	40
  (1667, 782)	40
  (1668, 782)	40
  (1669, 782)	40
  (1670, 782)	40
  (1671, 787)	40
  (1672, 828)	40
  (1672, 896)	40
  (1673, 835)	40
  (1674, 840)	40
  (1675, 851)	40
  (1676, 851)	40
  (1677, 854)	40
  (1678, 863)	40
  (1679, 863)	40
  (1680, 863)	40
  (1681, 896)	40
  (1682, 916)	40


## Create train, test data

In [26]:
from implicit.evaluation import train_test_split

In [27]:

train, test = train_test_split(sparse_item_user, train_percentage=0.8)

In [28]:
train

<1683x944 sparse matrix of type '<class 'numpy.int64'>'
	with 79914 stored elements in Compressed Sparse Row format>

In [29]:
test

<1683x944 sparse matrix of type '<class 'numpy.int64'>'
	with 20086 stored elements in Compressed Sparse Row format>

## Building ALS Model

In [30]:
import implicit

In [31]:
#! pip install implicit

In [33]:

model = implicit.als.AlternatingLeastSquares(factors=100, regularization=0.1, iterations=20, calculate_training_loss=False)


In [34]:
model

<implicit.als.AlternatingLeastSquares at 0x7f97438596d0>

### Train the Model

In [35]:
model.fit(train)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=20.0), HTML(value='')))




## Generating recommendations for a user_id

In [37]:
user_id = 117

In [38]:
model.recommend(user_id, sparse_user_item)

[(472, 1.0165672),
 (591, 0.9528069),
 (191, 0.9378897),
 (28, 0.91996574),
 (354, 0.87519),
 (294, 0.8659271),
 (69, 0.8521646),
 (825, 0.8492931),
 (831, 0.8246698),
 (269, 0.82336247)]

In [39]:
model.recommend(user_id, sparse_user_item, N=30)

[(472, 1.0165672),
 (591, 0.9528069),
 (191, 0.9378897),
 (28, 0.91996574),
 (354, 0.87519),
 (294, 0.8659271),
 (69, 0.8521646),
 (825, 0.8492931),
 (831, 0.8246698),
 (269, 0.82336247),
 (204, 0.8221911),
 (100, 0.81699705),
 (318, 0.8091815),
 (250, 0.80272377),
 (685, 0.80100846),
 (871, 0.7377837),
 (826, 0.72101486),
 (469, 0.71826744),
 (125, 0.71616775),
 (79, 0.7057934),
 (255, 0.70449966),
 (22, 0.70264304),
 (471, 0.6971425),
 (841, 0.69242173),
 (1033, 0.6800951),
 (1048, 0.66178155),
 (89, 0.6580378),
 (234, 0.65458477),
 (71, 0.649168),
 (273, 0.6378845)]

In [40]:
output = model.recommend(user_id, sparse_user_item)

In [41]:
output

[(472, 1.0165672),
 (591, 0.9528069),
 (191, 0.9378897),
 (28, 0.91996574),
 (354, 0.87519),
 (294, 0.8659271),
 (69, 0.8521646),
 (825, 0.8492931),
 (831, 0.8246698),
 (269, 0.82336247)]

In [42]:
output_df = pd.DataFrame(output, columns=['movie_id', 'als_score'])

In [43]:
output_df

Unnamed: 0,movie_id,als_score
0,472,1.016567
1,591,0.952807
2,191,0.93789
3,28,0.919966
4,354,0.87519
5,294,0.865927
6,69,0.852165
7,825,0.849293
8,831,0.82467
9,269,0.823362


# Load Movies Data

In [44]:
movies = pd.read_csv('../hybrid/data/movie_genres.csv')

In [45]:
movies.head()

Unnamed: 0,movie_id,movie title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Children's,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [46]:
movies = movies[['movie_id', 'movie title']]

In [47]:
movies.head()

Unnamed: 0,movie_id,movie title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


# Merge recommendation output with Movies Data

In [48]:
merged = pd.merge(output_df, movies, how='left', on='movie_id')

In [49]:
merged

Unnamed: 0,movie_id,als_score,movie title
0,472,1.016567,Dragonheart (1996)
1,591,0.952807,Primal Fear (1996)
2,191,0.93789,Amadeus (1984)
3,28,0.919966,Apollo 13 (1995)
4,354,0.87519,"Wedding Singer, The (1998)"
5,294,0.865927,Liar Liar (1997)
6,69,0.852165,Forrest Gump (1994)
7,825,0.849293,"Arrival, The (1996)"
8,831,0.82467,Escape from L.A. (1996)
9,269,0.823362,"Full Monty, The (1997)"


## Generating recommendations for Movie_id

In [50]:

item_id = 11
n_similar = 10

In [51]:

similar = model.similar_items(item_id, n_similar)


In [52]:
similar

[(11, 1.0),
 (64, 0.290379),
 (470, 0.2863752),
 (691, 0.28583243),
 (570, 0.27686635),
 (771, 0.27666548),
 (22, 0.27325064),
 (195, 0.26999053),
 (200, 0.26863372),
 (1163, 0.2655313)]

In [53]:
type(similar)

list

In [54]:
similar[0]

(11, 1.0)

In [55]:
similar_df = pd.DataFrame(similar, columns=['movie_id', 'score'])

In [56]:
similar_df

Unnamed: 0,movie_id,score
0,11,1.0
1,64,0.290379
2,470,0.286375
3,691,0.285832
4,570,0.276866
5,771,0.276665
6,22,0.273251
7,195,0.269991
8,200,0.268634
9,1163,0.265531


# Merge recommendation output with Movies Data

In [57]:
merged_similar = pd.merge(similar_df, movies, how='left', on='movie_id')

In [58]:
merged_similar

Unnamed: 0,movie_id,score,movie title
0,11,1.0,Seven (Se7en) (1995)
1,64,0.290379,"Shawshank Redemption, The (1994)"
2,470,0.286375,Tombstone (1993)
3,691,0.285832,Dark City (1998)
4,570,0.276866,Wyatt Earp (1994)
5,771,0.276665,Johnny Mnemonic (1995)
6,22,0.273251,Braveheart (1995)
7,195,0.269991,"Terminator, The (1984)"
8,200,0.268634,"Shining, The (1980)"
9,1163,0.265531,"Portrait of a Lady, The (1996)"
