In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## **Importing libraries** <a class="anchor"  id="h2"></a>

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD

# **Reading Dataset** <a class="anchor"  id="h3"></a>

In [3]:
ratings = pd.read_csv("/content/drive/MyDrive/ratings.csv")
movies = pd.read_csv("/content/drive/MyDrive/movies.csv")

In [4]:
ratings = ratings.merge(movies, on="movieId")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


# **High-Level Info of Data** <a class="anchor"  id="h4"></a>

In [39]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [6]:
ratings.tail()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
100831,610,160341,2.5,1479545749,Bloodmoon (1997),Action|Thriller
100832,610,160527,4.5,1479544998,Sympathy for the Underdog (1971),Action|Crime|Drama
100833,610,160836,3.0,1493844794,Hazard (2005),Action|Drama|Thriller
100834,610,163937,3.5,1493848789,Blair Witch (2016),Horror|Thriller
100835,610,163981,3.5,1493850155,31 (2016),Horror


We will check which movies that have the most number of reviews,

In [7]:
ratings.groupby('movieId')['rating'].count().sort_values(ascending=False).head()

movieId
356     329
318     317
296     307
593     279
2571    278
Name: rating, dtype: int64

In [8]:
ratings.shape

(100836, 6)

In [9]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100836 entries, 0 to 100835
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     100836 non-null  int64  
 1   movieId    100836 non-null  int64  
 2   rating     100836 non-null  float64
 3   timestamp  100836 non-null  int64  
 4   title      100836 non-null  object 
 5   genres     100836 non-null  object 
dtypes: float64(1), int64(3), object(2)
memory usage: 5.4+ MB


# **Train-Test Split** <a class="anchor"  id="h5"></a>

In [10]:
X_train, X_test = train_test_split(ratings, test_size = 0.30, random_state = 42)
print(X_train.shape)
print(X_test.shape)

(70585, 6)
(30251, 6)


# **Pivot Ratings into Movie Features** <a class="anchor"  id="h6"></a>

In [11]:
user_data = X_train.pivot(index = 'userId', columns = 'movieId', values = 'rating').fillna(0)
user_data.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,190183,190215,190219,191005,193565,193567,193571,193573,193583,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# **Create a Copy of Train and Test Dataset** <a class="anchor"  id="h7"></a>

In [12]:
dummy_train = X_train.copy()
dummy_test = X_test.copy()

dummy_train['rating'] = dummy_train['rating'].apply(lambda x: 0 if x > 0 else 1)
dummy_test['rating'] = dummy_test['rating'].apply(lambda x: 1 if x > 0 else 0)

In [13]:
dummy_train = dummy_train.pivot(index = 'userId', columns = 'movieId', values = 'rating').fillna(1)
dummy_test = dummy_test.pivot(index ='userId', columns = 'movieId', values = 'rating').fillna(0)

In [14]:
dummy_train.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,190183,190215,190219,191005,193565,193567,193571,193573,193583,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [15]:
dummy_test.head()

movieId,1,2,3,4,5,6,7,9,10,11,...,188751,188833,190207,190209,190213,190221,193579,193581,193585,193587
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# **User based Similarity Matrix** <a class="anchor"  id="h8"></a>

## **Using Cosine Similarity** <a class="anchor"  id="h9"></a>

In [16]:
user_similarity = cosine_similarity(user_data)
user_similarity[np.isnan(user_similarity)] = 0
print(user_similarity)
print("- "*10)
print(user_similarity.shape)

[[1.         0.02187332 0.02616746 ... 0.20265733 0.06448247 0.10962085]
 [0.02187332 1.         0.         ... 0.02643589 0.04391188 0.06522718]
 [0.02616746 0.         1.         ... 0.00702794 0.         0.00791189]
 ...
 [0.20265733 0.02643589 0.00702794 ... 1.         0.11435659 0.21421564]
 [0.06448247 0.04391188 0.         ... 0.11435659 1.         0.02017565]
 [0.10962085 0.06522718 0.00791189 ... 0.21421564 0.02017565 1.        ]]
- - - - - - - - - - 
(610, 610)


## **Predicting the User Ratings on the Movies** <a class="anchor"  id="h10"></a>

In [17]:
user_predicted_ratings = np.dot(user_similarity, user_data)
user_predicted_ratings

array([[8.46238500e+01, 3.19328270e+01, 1.99931412e+01, ...,
        3.23986173e-02, 2.83487901e-02, 2.53838538e-01],
       [2.39848830e+01, 1.05603707e+01, 2.56908336e+00, ...,
        4.93110601e-01, 4.31471776e-01, 8.65728295e-01],
       [5.14904707e+00, 2.33866375e+00, 1.48839427e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [1.07603761e+02, 4.93513807e+01, 2.10233901e+01, ...,
        6.69599786e-02, 5.85899813e-02, 6.35401762e-01],
       [6.34632400e+01, 3.01528688e+01, 1.33211581e+01, ...,
        0.00000000e+00, 0.00000000e+00, 1.78805134e-01],
       [7.70232663e+01, 3.15104674e+01, 1.12485516e+01, ...,
        3.45985955e-01, 3.02737711e-01, 8.33514069e-01]])

In [18]:
user_predicted_ratings.shape

(610, 8532)

Our objective is to provide movie recommendations that exclude the ones the user has already watched and rated. We are specifically interested in suggesting movies that the user has not yet rated, and we will disregard any movies that have already received a rating from the user.

To achieve this, we will rely on the dummy train matrix we constructed earlier, which helps us identify the movies that have been rated by the user and thus should be excluded from our recommendations.

In [19]:
user_final_ratings = np.multiply(user_predicted_ratings, dummy_train)
user_final_ratings.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,190183,190215,190219,191005,193565,193567,193571,193573,193583,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,31.932827,0.0,0.970626,10.093745,38.040975,13.167573,2.783184,4.082196,44.852786,...,0.289181,0.068686,0.04579,0.036448,0.028349,0.024299,0.032399,0.032399,0.028349,0.253839
2,23.984883,10.560371,2.569083,0.086217,2.339004,10.162367,1.579197,0.439697,0.816889,10.166981,...,0.101113,0.095675,0.063783,0.554749,0.431472,0.369833,0.493111,0.493111,0.431472,0.865728
3,5.149047,2.338664,1.488394,0.078561,0.90456,3.632714,0.985177,0.364076,0.26701,3.47492,...,0.02052,0.013626,0.009084,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,52.191239,18.611688,9.047988,1.109513,5.883472,24.402422,9.353388,1.657141,1.837891,26.381719,...,0.231678,0.112741,0.07516,0.058497,0.045498,0.038998,0.051997,0.051997,0.045498,0.290216
5,0.0,40.407977,17.442226,1.963874,14.706799,33.708505,18.667692,4.239145,4.60487,47.211922,...,0.129298,0.191913,0.127942,0.0,0.0,0.0,0.0,0.0,0.0,0.215573


## **Top 5 Movie Recommendations for the User 42** <a class="anchor"  id="h11"></a>

In [20]:
user_final_ratings.iloc[42].sort_values(ascending = False)[0:5]

movieId
318    102.470986
593    102.239776
150    100.131445
457     83.206629
527     73.447521
Name: 43, dtype: float64

In [21]:
user_ratings = user_final_ratings.iloc[42]

top_movie_ids = user_ratings.sort_values(ascending=False).head(5).index

top_movies = movies[movies['movieId'].isin(top_movie_ids)][['movieId', 'title']]
print("Top 5 movie recommendations for User 42:")
for movie_id, movie_title in zip(top_movies['movieId'], top_movies['title']):
    print(f"Movie ID: {movie_id}, Title: {movie_title}")


Top 5 movie recommendations for User 42:
Movie ID: 150, Title: Apollo 13 (1995)
Movie ID: 318, Title: Shawshank Redemption, The (1994)
Movie ID: 457, Title: Fugitive, The (1993)
Movie ID: 527, Title: Schindler's List (1993)
Movie ID: 593, Title: Silence of the Lambs, The (1991)


# **Item-based Collaborative Filtering** <a class="anchor"  id="h12"></a>

In [22]:
movie_features = X_train.pivot(index = 'movieId', columns = 'userId', values = 'rating').fillna(0)
movie_features.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,0.0,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,0.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


# **Item based Similarity Matrix** <a class="anchor"  id="h13"></a>

## **Using Cosine Similarity** <a class="anchor"  id="h14"></a>

In [23]:
item_similarity = cosine_similarity(movie_features)
item_similarity[np.isnan(item_similarity)] = 0
print(item_similarity)
print("- "*10)
print(item_similarity.shape)

[[1.         0.29898457 0.22391203 ... 0.         0.         0.        ]
 [0.29898457 1.         0.20834743 ... 0.         0.         0.        ]
 [0.22391203 0.20834743 1.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 1.         1.         0.        ]
 [0.         0.         0.         ... 1.         1.         0.        ]
 [0.         0.         0.         ... 0.         0.         1.        ]]
- - - - - - - - - - 
(8532, 8532)


## **Predicting the User Ratings on the Movies** <a class="anchor"  id="h15"></a>

In [24]:
item_predicted_ratings = np.dot(movie_features.T, item_similarity)
item_predicted_ratings

array([[1.75597072e+02, 1.18996814e+02, 1.29388072e+02, ...,
        4.74230363e-01, 4.74230363e-01, 3.42846387e+00],
       [1.01373144e+01, 8.75828511e+00, 3.13529612e+00, ...,
        2.28577184e+00, 2.28577184e+00, 5.46468120e+00],
       [5.88907229e+00, 4.96539089e+00, 4.93902850e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [3.64620395e+02, 2.92352950e+02, 1.99538021e+02, ...,
        1.70295167e+00, 1.70295167e+00, 1.84763469e+01],
       [2.13118416e+01, 1.75784035e+01, 1.19482375e+01, ...,
        0.00000000e+00, 0.00000000e+00, 6.49862518e-01],
       [5.14826662e+02, 3.38270423e+02, 1.72250494e+02, ...,
        2.13641588e+01, 2.13641588e+01, 3.89590952e+01]])

In [25]:
item_predicted_ratings.shape

(610, 8532)

In [26]:
dummy_train.shape

(610, 8532)

# **Filtering the ratings only for the movies not already rated by the user for recommendation** <a class="anchor"  id="h16"></a>

In [27]:
item_final_ratings = np.multiply(item_predicted_ratings, dummy_train)
item_final_ratings.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,190183,190215,190219,191005,193565,193567,193571,193573,193583,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,118.996814,0.0,23.133772,60.581957,132.048898,56.179239,60.386704,28.869752,132.895236,...,6.058141,0.692299,0.692299,0.47423,0.47423,0.47423,0.47423,0.47423,0.47423,3.428464
2,10.137314,8.758285,3.135296,0.298945,4.255284,7.992577,1.195321,2.39303,1.148259,6.640364,...,0.684336,0.231703,0.231703,2.285772,2.285772,2.285772,2.285772,2.285772,2.285772,5.464681
3,5.889072,4.965391,4.939029,0.765647,3.299535,6.348033,2.425237,2.854331,1.431,5.889038,...,0.196667,0.045509,0.045509,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,101.427959,65.684718,51.634564,27.39084,37.976397,91.963227,44.131866,36.627356,13.420183,74.746324,...,4.271284,1.438547,1.438547,0.648673,0.648673,0.648673,0.648673,0.648673,0.648673,4.084436
5,0.0,29.96663,20.025656,13.081626,18.572332,25.926644,19.728925,15.498188,6.83857,28.740817,...,0.910592,0.686795,0.686795,0.0,0.0,0.0,0.0,0.0,0.0,0.924673


# **Top 5 movie Recommendations for the User 42** <a class="anchor"  id="h17"></a>

In [28]:
item_final_ratings.iloc[42].sort_values(ascending = False)[0:5]

movieId
150    92.658292
2      90.487035
457    89.231329
344    88.659364
595    86.387734
Name: 43, dtype: float64

In [29]:
item_ratings = item_final_ratings.iloc[42]

top_movie_ids = item_ratings.sort_values(ascending=False).head(5).index

top_movies = movies[movies['movieId'].isin(top_movie_ids)][['movieId', 'title']]
print("Top 5 movie recommendations for User 42:")
for movie_id, movie_title in zip(top_movies['movieId'], top_movies['title']):
    print(f"Movie ID: {movie_id}, Title: {movie_title}")

Top 5 movie recommendations for User 42:
Movie ID: 2, Title: Jumanji (1995)
Movie ID: 150, Title: Apollo 13 (1995)
Movie ID: 344, Title: Ace Ventura: Pet Detective (1994)
Movie ID: 457, Title: Fugitive, The (1993)
Movie ID: 595, Title: Beauty and the Beast (1991)


## **Model based Collaborative Filtering** <a class="anchor"  id="h18"></a>

We will recommend items to users by analyzing movie ratings and identifying similarities between the ratings of a particular user and those of other users who have watched similar movies. For this purpose, we will employ a model-based collaborative filtering technique. This approach allows us to predict movie preferences for a specific user by discerning patterns in the preferences of multiple users based on their collective data.

In [30]:
filtered = ratings[ratings['movieId']==50]['title']
filtered.unique()

array(['Usual Suspects, The (1995)'], dtype=object)

## **Utility Matrix** <a class="anchor"  id="h19"></a>

A utility matrix comprises the complete set of user-movie preference data, typically represented as a matrix. The utility matrix tends to be sparse because no user would have rated or watched every movie in the entire list.

In [31]:
rating_crosstab = pd.pivot_table(data=ratings,
                                 values='rating',
                                 index='userId',
                                 columns='title', fill_value=0)
rating_crosstab.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0,...,0.0,0.0,0.0,0,0,0.0,0.0,0.0,4.0,0
2,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0,...,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0
3,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0,...,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0
4,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0,...,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0
5,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0,...,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0


In [32]:
rating_crosstab.shape

(610, 9719)

In [33]:
X = rating_crosstab.T
X.shape

(9719, 610)

## **Decomposing the Matrix** <a class="anchor"  id="h20"></a>

In [34]:
svd = TruncatedSVD(n_components=12, random_state=12)
resultant_matrix = svd.fit_transform(X)
resultant_matrix.shape

(9719, 12)

## **Correlation Matrix** <a class="anchor"  id="h21"></a>

In [35]:
corr_mat = np.corrcoef(resultant_matrix)
corr_mat.shape

(9719, 9719)

In [36]:
movies_names = rating_crosstab.columns
movies_list = list(rating_crosstab.columns)
usual_suspects = movies_list.index('Usual Suspects, The (1995)')
print(usual_suspects)

9119


In [37]:
corr_usual_suspects = corr_mat[usual_suspects]
corr_usual_suspects.shape

(9719,)

## **Recommending Top 10 Highly Correlated Movies** <a class="anchor"  id="h22"></a>

In [38]:
print('Recommended movie: ', movies_names[(corr_usual_suspects<1.0) & (corr_usual_suspects>0.95)][0:9])

Recommended movie:  Index(['Pulp Fiction (1994)', 'Seven (a.k.a. Se7en) (1995)',
       'Silence of the Lambs, The (1991)'],
      dtype='object', name='title')
