# Recommendation system based on collaborative filtering 

# item based collaborative filtering 

In [1]:
import pandas as pd
import numpy as np
import warnings
import neattext.functions as nfx
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
movies=pd.read_csv('movies.csv.zip')


#Checking:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
34203,151697,Grand Slam (1967),Thriller
34204,151701,Bloodmoney (2010),(no genres listed)
34205,151703,The Butterfly Circus (2009),Drama
34206,151709,Zero (2015),Drama|Sci-Fi


In [4]:
ratings=pd.read_csv('ratings.csv.zip')


ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,169,2.5,1204927694
1,1,2471,3.0,1204927438
2,1,48516,5.0,1204927435
3,2,2571,3.5,1436165433
4,2,109487,4.0,1436165496
...,...,...,...,...
22884372,247753,49530,5.0,1430437962
22884373,247753,69481,3.0,1430437984
22884374,247753,74458,4.0,1430437968
22884375,247753,76093,5.0,1430437811


In [5]:
data=pd.merge(ratings, movies, on='movieId', how='inner')

data

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,169,2.5,1204927694,Free Willy 2: The Adventure Home (1995),Adventure|Children|Drama
1,13,169,1.0,974868393,Free Willy 2: The Adventure Home (1995),Adventure|Children|Drama
2,14,169,3.0,845470321,Free Willy 2: The Adventure Home (1995),Adventure|Children|Drama
3,17,169,1.0,944991371,Free Willy 2: The Adventure Home (1995),Adventure|Children|Drama
4,68,169,1.0,1011092044,Free Willy 2: The Adventure Home (1995),Adventure|Children|Drama
...,...,...,...,...,...,...
22884372,247509,95581,4.0,1341630851,"Flying Fleet, The (1929)",Adventure|Drama|Romance
22884373,247534,111838,4.0,1429019913,"Man Named Rocca, A (Nommé La Rocca, Un) (Man C...",Drama
22884374,247662,137437,3.0,1436313628,Son of a Lion (2007),(no genres listed)
22884375,247670,145939,5.0,1446432935,Sandesham (1991),Children|Comedy


In [6]:
data.isnull().any()

userId       False
movieId      False
rating       False
timestamp    False
title        False
genres       False
dtype: bool

In [7]:
data.drop(['timestamp', 'movieId', 'genres'], axis=1, inplace=True)

data

Unnamed: 0,userId,rating,title
0,1,2.5,Free Willy 2: The Adventure Home (1995)
1,13,1.0,Free Willy 2: The Adventure Home (1995)
2,14,3.0,Free Willy 2: The Adventure Home (1995)
3,17,1.0,Free Willy 2: The Adventure Home (1995)
4,68,1.0,Free Willy 2: The Adventure Home (1995)
...,...,...,...
22884372,247509,4.0,"Flying Fleet, The (1929)"
22884373,247534,4.0,"Man Named Rocca, A (Nommé La Rocca, Un) (Man C..."
22884374,247662,3.0,Son of a Lion (2007)
22884375,247670,5.0,Sandesham (1991)


In [27]:
print(data.columns)


Index(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
       ...
       '245940', '246415', '246533', '246887', '246994', '247081', '247509',
       '247534', '247662', '247670'],
      dtype='object', name='userId', length=4607)


In [30]:
data.columns = data.columns.str.strip()


In [31]:
print(data.head())


userId                                               1   2   3   4   5   6  \
title                                                                        
"Great Performances" Cats (1998)                   NaN NaN NaN NaN NaN NaN   
#1 Cheerleader Camp (2010)                         NaN NaN NaN NaN NaN NaN   
#chicagoGirl: The Social Network Takes on a Dic... NaN NaN NaN NaN NaN NaN   
$ (Dollars) (1971)                                 NaN NaN NaN NaN NaN NaN   
$5 a Day (2008)                                    NaN NaN NaN NaN NaN NaN   

userId                                               7   8   9  10  ...  \
title                                                               ...   
"Great Performances" Cats (1998)                   NaN NaN NaN NaN  ...   
#1 Cheerleader Camp (2010)                         NaN NaN NaN NaN  ...   
#chicagoGirl: The Social Network Takes on a Dic... NaN NaN NaN NaN  ...   
$ (Dollars) (1971)                                 NaN NaN NaN NaN  ...   
$5 

In [32]:
data

userId,1,2,3,4,5,6,7,8,9,10,...,245940,246415,246533,246887,246994,247081,247509,247534,247662,247670
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",,,,,,,,,,,...,,,,,,,,,,
#1 Cheerleader Camp (2010),,,,,,,,,,,...,,,,,,,,,,
#chicagoGirl: The Social Network Takes on a Dictator (2013),,,,,,,,,,,...,,,,,,,,,,
$ (Dollars) (1971),,,,,,,,,,,...,,,,,,,,,,
$5 a Day (2008),,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Škola princů (2010),,,,,,,,,,,...,,,,,,,,,,
أهواك (2015),,,,,,,,,,,...,,,,,,,,,,
キサラギ (2007),,,,,,,,,,,...,,,,,,,,,,
ドラゴンボール Ｚ あつまれ！ 悟空ワールド (1992),,,,,,,,,,,...,,,,,,,,,,


In [33]:
data=data.fillna(0)

data

userId,1,2,3,4,5,6,7,8,9,10,...,245940,246415,246533,246887,246994,247081,247509,247534,247662,247670
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#1 Cheerleader Camp (2010),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#chicagoGirl: The Social Network Takes on a Dictator (2013),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$ (Dollars) (1971),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$5 a Day (2008),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Škola princů (2010),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
أهواك (2015),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
キサラギ (2007),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ドラゴンボール Ｚ あつまれ！ 悟空ワールド (1992),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
data_sparse=csr_matrix(data)



data_sparse

<33647x4607 sparse matrix of type '<class 'numpy.float64'>'
	with 33647 stored elements in Compressed Sparse Row format>

In [35]:
model=NearestNeighbors(metric='euclidean')
model.fit(data_sparse)

In [36]:
distances, suggestions=model.kneighbors(data.iloc[2,:].values.reshape(1,-1))

In [37]:
distances

array([[0.        , 5.02493781, 5.02493781, 5.02493781, 5.02493781]])

In [38]:
suggestions

array([[2, 4, 0, 3, 6]])

In [39]:
for i in range(len(suggestions)):
    print(data.index[suggestions[i]])

Index(['#chicagoGirl: The Social Network Takes on a Dictator (2013)',
       '$5 a Day (2008)', '"Great Performances" Cats (1998)',
       '$ (Dollars) (1971)', '$ellebrity (Sellebrity) (2012)'],
      dtype='object', name='title')


In [40]:
distances, suggestions=model.kneighbors(data.iloc[32,:].values.reshape(1,-1))

In [41]:
recommended_movies = data.iloc[suggestions[0]]
print(recommended_movies)


userId                           1    2    3    4    5    6    7    8    9  \
title                                                                        
Act of Valor (2012)            0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
10,000 BC (2008)               0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
...And Justice for All (1979)  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
Battle of the Year (2013)      0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
As Above, So Below (2014)      0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   

userId                          10  ...  245940  246415  246533  246887  \
title                               ...                                   
Act of Valor (2012)            0.0  ...     0.0     0.0     0.0     0.0   
10,000 BC (2008)               0.0  ...     0.0     0.0     0.0     0.0   
...And Justice for All (1979)  0.0  ...     0.0     0.0     0.0     0.0   
Battle of the Year (2013)      0.0  ...     0.0     0.0     0.0     0.0   
As 

NameError: name 'model' is not defined