# **Movie Recommendation System Using Singular Value Decomposition(SVD)**

## **Importing Libraries**

In [None]:
import numpy as np
import pandas as pd

## **Importing & Parsing the Dataset as Ratings and Movies Details**

In [None]:
ratingData = pd.io.parsers.read_csv('ratings.dat',
    names=['user_id', 'movie_id', 'rating', 'time'],
    engine='python', delimiter='::')
movieData = pd.io.parsers.read_csv('movies.dat',
    names=['movie_id', 'title', 'genre'],
    engine='python', delimiter='::', encoding='latin1')
print(ratingData.head(5))
print(movieData.head(5))

## **Create the Rating Matrix of shape (m x u)**

In [None]:
# Check for NaN values
print(ratingData.isnull().sum())

# Check for negative values
print((ratingData['movie_id'] < 0).sum())
print((ratingData['user_id'] < 0).sum())

print(np.max(ratingData.movie_id.values))
print(np.max(ratingData.user_id.values))

# Drop rows with NaN values
ratingData = ratingData.dropna()

# Ensure there are no negative values (if there are, you should investigate the data source)
ratingData = ratingData[(ratingData['movie_id'] > 0) & (ratingData['user_id'] > 0)]

print(np.max(ratingData.movie_id.values))
print(np.max(ratingData.user_id.values))

ratingMatrix = np.ndarray(
    shape =(np.max(ratingData.movie_id.values).astype(int), np.max(ratingData.user_id.values).astype(int)),
    dtype = np.uint8)

ratingMatrix[ratingData.movie_id.values-1, ratingData.user_id.values-1] = ratingData.rating.values

print(ratingMatrix)

## **Subtract Mean off - Normalization**

In [None]:
normalizedMatrix = ratingMatrix - np.asarray([(np.mean(ratingMatrix, 1))]).T
print(normalizedMatrix)

## **Computing SVD**

In [None]:
A = normalizedMatrix.T / np.sqrt(ratingMatrix.shape[0] - 1)
U, S, V = np.linalg.svd(A)

## **Calculate Cosine Similarity, Sort by most Similar and return the Top N**

In [None]:
def similar(ratingData, movie_id, top_n):
  index = movie_id - 1
  movie_row = ratingData[index, :]
  magnitude = np.sqrt(np.einsum('ij, ij -> i', ratingData, ratingData)) #Einstein Summation | tradition Matrix multiplication and is equivalent to np.matmul(a,b)
  similarity = np.dot(movie_row, ratingData.T) / (magnitude[index] * magnitude)
  sort_indexes = np.argsort(-similarity) #Perform an indirect sort along the given axis (Last axis)
  return sort_indexes[:top_n]

## **Select k principal components to Represent the Movies**
## **movie_id to find Recommendations**
## **top_n to print Top Results**

In [None]:
k = 50
movie_id = 2
top_n = 15

sliced = V.T[:, :k]
# print(sliced)
indexes = similar(sliced, movie_id, top_n)

print('Recommendations for Movie {0}: \n'.format(
 movieData[movieData.movie_id == movie_id].title.values[0]))
for id in indexes + 1:
  print(movieData[movieData.movie_id == id].title.values[0])
