In [None]:
%matplotlib inline  
# To make data visualisations display in Jupyter Notebooks
import numpy as np   # linear algebra
import pandas as pd  # Data processing, Input & Output load
import matplotlib.pyplot as plt # Visuvalization & plotting
import seaborn as sns # Also for Data visuvalization

from sklearn.metrics.pairwise import cosine_similarity  # Compute cosine similarity between samples in X and Y.
from scipy import sparse  #  sparse matrix package for numeric data.
from scipy.sparse.linalg import svds # svd algorithm

import warnings   # To avoid warning messages in the code run
warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [None]:
Rating = pd.read_csv('Ratings.csv')
Movie_D = pd.read_csv('Movie details.csv',encoding='latin-1') ##Movie details

In [None]:
Rating.shape

In [None]:
Rating.head()

In [None]:
Rating.columns = ['user_id', 'movie_id', 'rating', 'timestamp']

In [None]:
Movie_D.shape

In [None]:
Movie_D.head()

In [None]:
Movie_D.columns = ['movie_id', 'movie_title', 'release_date', 'video_release_date ',
       'IMDb_URL', 'unknown', 'Action ', 'Adventure', 'Animation',
       'Childrens', 'Comedy ', 'Crime ', ' Documentary ', 'Drama',
       ' Fantasy', 'Film-Noir ', 'Horror ', 'Musical', 'Mystery',
       ' Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

In [None]:
Movie_Rating = pd.merge(Rating, Movie_D, on='movie_id')
Movie_Rating.describe()

In [None]:
n_users = Movie_Rating.user_id.unique().shape[0]
n_items = Movie_Rating.movie_id.unique().shape[0]
print(n_items, n_users)

In [None]:
# Calculate mean rating of all movies 
Movie_Stats = pd.DataFrame(Movie_Rating.groupby('movie_title')['rating'].mean())
Movie_Stats.sort_values(by = ['rating'],ascending=False).head()

In [None]:
# Calculate count rating of all movies
Movie_Stats['Count_of_ratings'] = pd.DataFrame(Movie_Rating.groupby('movie_title')['rating'].count())
Movie_Stats.sort_values(by =['Count_of_ratings'], ascending=False).head()

In [None]:
Movie_Stats['Count_of_ratings'].hist(bins=50)

In [None]:
sns.jointplot(x='rating', y='Count_of_ratings', data=Movie_Stats)

In [None]:
User_movie_Rating = Movie_Rating.pivot_table(index='user_id', columns='movie_title', values='rating')
User_movie_Rating.head()

In [None]:
User_movie_Rating.shape

In [None]:
User_movie_Rating['Air Force One (1997)']

In [None]:
Similarity = User_movie_Rating.corrwith(User_movie_Rating['Air Force One (1997)'])
Similarity.head()

In [None]:
corr_similar = pd.DataFrame(Similarity, columns=['Correlation'])
corr_similar.sort_values(['Correlation'], ascending= False).head(10)

In [None]:
corr_similar_num_of_rating = corr_similar.join(Movie_Stats['Count_of_ratings'])
corr_similar_num_of_rating.sort_values(['Correlation'], ascending= False).head(10)

In [None]:
corr_similar_num_of_rating[corr_similar_num_of_rating ['Count_of_ratings']>50].sort_values('Correlation', ascending=False).head()

In [None]:
Movie_cosine = Movie_Rating[['user_id','movie_id','rating']]
Movie_cosine.head()

In [None]:
data = Movie_cosine.rating
col = Movie_cosine.movie_id
row = Movie_cosine.user_id

R = sparse.coo_matrix((data, (col, row))).tocsr()
print('{0}x{1} user by movie matrix'.format(*R.shape))

In [None]:
find_similarities = cosine_similarity(R.T) # We are transposing the matrix 
print (find_similarities.shape)

In [None]:
find_similarities[4].argsort()

In [None]:
find_similarities[4].argsort()[-5:]

In [None]:
Movie_D.iloc[4]

In [None]:
Movie_D.iloc[find_similarities[4].argsort()[-5:]]

In [None]:
df_matrix = User_movie_Rating.fillna(0)

In [None]:
df_matrix.shape

In [None]:
df_matrix.iloc[2]

In [None]:
U, SIGMA, VT = svds(df_matrix) #k=20
U.shape, SIGMA.shape, VT.shape

In [None]:
SIGMA = np.diag(SIGMA)
U.shape, SIGMA.shape, VT.shape

In [None]:
reconstructed_df = np.dot(np.dot(U, SIGMA), VT)
predictions_df = pd.DataFrame(reconstructed_df, columns = df_matrix.columns)
predictions_df.iloc[2]

In [None]:
predictions_df.iloc[2][df_matrix.iloc[2]==0.0].sort_values(ascending=False).head()