# **Day-20 | Movie Recomendation System Using SVD**

# Importing the Basic Libraries

In [3]:
import numpy as np
import pandas as pd

# Importing & Parsing the Dataset as ratings & movie details

In [7]:
ratingData = pd.io.parsers.read_csv('ratings.dat',
    names = ['user_id', 'movie_id', 'rating', 'time'],
    engine = 'python', delimiter = '::')
movieData = pd.io.parsers.read_csv('movies.dat',
    names = ['movie_id', 'title', 'genre'],
    engine = 'python', delimiter = '::', encoding = 'latin-1')
print(ratingData)

        user_id  movie_id  rating       time
0             1      1193       5  978300760
1             1       661       3  978302109
2             1       914       3  978301968
3             1      3408       4  978300275
4             1      2355       5  978824291
...         ...       ...     ...        ...
218503     1325      3035       4  974778325
218504     1325      3036       4  974776814
218505     1325       900       4  974778676
218506     1325      2092       3  974777412
218507     1325       903       4  974778464

[218508 rows x 4 columns]


# Create the ratings matrix of shape (mxu)

In [8]:
ratingMatrix = np.ndarray(
    shape = (np.max(ratingData.movie_id.values), np.max(ratingData.user_id.values)),
    dtype = np.uint8)
ratingMatrix[ratingData.movie_id.values-1, ratingData.user_id.values-1] = ratingData.rating.values
print(ratingMatrix)

[[  5   9 108 ...   0  13   5]
 [ 39 205  65 ...  65   0   0]
 [128  24 103 ...  92  39 205]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0 225 ... 229   0   0]
 [  0   0   0 ...   0   0   4]]


# Subtract Mean off - Normalization

In [9]:
normalizedMatrix = ratingMatrix - np.asarray([(np.mean(ratingMatrix, 1))]).T
print(normalizedMatrix)

[[-53.28830189 -49.28830189  49.71169811 ... -58.28830189 -45.28830189
  -53.28830189]
 [-35.27773585 130.72226415  -9.27773585 ...  -9.27773585 -74.27773585
  -74.27773585]
 [ 59.01584906 -44.98415094  34.01584906 ...  23.01584906 -29.98415094
  136.01584906]
 ...
 [-27.72754717 -27.72754717 -27.72754717 ... -27.72754717 -27.72754717
  -27.72754717]
 [-28.23698113 -28.23698113 196.76301887 ... 200.76301887 -28.23698113
  -28.23698113]
 [-25.80226415 -25.80226415 -25.80226415 ... -25.80226415 -25.80226415
  -21.80226415]]


# Computing SVD

In [10]:
A = normalizedMatrix.T / np.sqrt(ratingMatrix.shape[0] - 1)
U, S, V = np.linalg.svd(A)

# Calculate cosine similiarity, sort by most similar and return the top N

In [23]:
def similar(ratingData, movie_id, top_n = 10):
  index = movie_id-1 #movie id starts from 1
  movie_row = ratingData[index, :]
  magnitude = np.sqrt(np.einsum('ij, ij -> i', ratingData, ratingData)) #Einstein Summation | traditional matrix multiplication and is equivalent to np.matmul(a,b)
  similarity = np.dot(movie_row, ratingData.T) / (magnitude[index] * magnitude)
  sort_indexes = np.argsort(-similarity) #Perform an indirectsort along the given axis (Last Axis)
  return sort_indexes[:top_n]

# Select K principal components to represent the movies, a movie_id to find recomendations and print the top_n results

In [28]:
k = 50
movie_id = 10
top_n = 5

sliced = V.T[:, :k] #Representative Data
indexes = similar(sliced, movie_id, top_n)

print('Recomendations for Movie {0}: \n'.format(
movieData[movieData.movie_id == movie_id].title.values[0]))
for id in indexes + 1:
  print(movieData[movieData.movie_id == id].title.values[0])

Recomendations for Movie GoldenEye (1995): 

GoldenEye (1995)
In the Line of Fire (1993)
Demolition Man (1993)
Hot Shots! Part Deux (1993)
Star Wars: Episode VI - Return of the Jedi (1983)
