In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity,euclidean_distances

In [4]:
df = pd.read_csv('Movie.csv')
df.shape

(8992, 3)

In [5]:
df.head()

Unnamed: 0,userId,movie,rating
0,3,Toy Story (1995),4.0
1,6,Toy Story (1995),5.0
2,8,Toy Story (1995),4.0
3,10,Toy Story (1995),4.0
4,11,Toy Story (1995),4.5


In [6]:
len(df.userId.unique())

4081

In [7]:
len(df.movie.unique())

10

In [8]:
len(df.rating.unique())

10

In [9]:
df.groupby('movie')['rating'].mean().sort_values(ascending=False)[:6]

movie
Toy Story (1995)       3.959323
Heat (1995)            3.836508
GoldenEye (1995)       3.427003
Sabrina (1995)         3.381429
Tom and Huck (1995)    3.352564
Jumanji (1995)         3.268398
Name: rating, dtype: float64

## Cosine_Similarity

### Item_Based

In [10]:
df1 = df.pivot_table(index='movie',columns='userId',values='rating')

In [11]:
df1.fillna(0,axis=1,inplace=True)

In [12]:
df1

userId,1,2,3,4,5,6,7,8,10,11,...,7105,7107,7108,7110,7113,7115,7116,7117,7119,7120
movie,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Father of the Bride Part II (1995),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.0,3.5,0.0,0.0,0.0
GoldenEye (1995),0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.0,0.0,2.5,...,2.0,0.0,0.0,0.0,3.0,0.0,0.0,3.0,0.0,0.0
Grumpier Old Men (1995),0.0,4.0,0.0,0.0,0.0,3.0,3.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0
Heat (1995),0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,0.0,0.0,5.0,0.0,0.0
Jumanji (1995),3.5,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,4.0
Sabrina (1995),0.0,0.0,0.0,0.0,0.0,5.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,4.0
Sudden Death (1995),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
Tom and Huck (1995),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Toy Story (1995),0.0,0.0,4.0,0.0,0.0,5.0,0.0,4.0,4.0,4.5,...,0.0,4.0,0.0,4.0,0.0,0.0,4.0,4.0,5.0,4.5
Waiting to Exhale (1995),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
cos_similar = cosine_similarity(df1)

In [14]:
cos_similar.shape

(10, 10)

In [15]:
cos_similar[0]

array([1.        , 0.21599275, 0.45566216, 0.24736744, 0.27980393,
       0.39574604, 0.22645759, 0.16293622, 0.30863759, 0.16378738])

In [16]:
df1.index

Index(['Father of the Bride Part II (1995)', 'GoldenEye (1995)',
       'Grumpier Old Men (1995)', 'Heat (1995)', 'Jumanji (1995)',
       'Sabrina (1995)', 'Sudden Death (1995)', 'Tom and Huck (1995)',
       'Toy Story (1995)', 'Waiting to Exhale (1995)'],
      dtype='object', name='movie')

In [17]:
def recommended_movie(movie_name):
    if movie_name in df1.index:
        index = np.where(movie_name==df1.index)[0][0]
        similar = sorted(list(enumerate(cos_similar[index])),reverse=True,key=lambda x: x[1])[1:6]
        print(f'Recommended movie of {movie_name}')
        #print('Recommended movie of',movie_name)
        print('-'*40)
        for movie in similar:
            print(df1.index[movie[0]])
    else:
        print('Movie is not found!!!')

In [18]:
recommended_movie('Toy Story (1995)')

Recommended movie of Toy Story (1995)
----------------------------------------
Jumanji (1995)
Heat (1995)
GoldenEye (1995)
Grumpier Old Men (1995)
Sabrina (1995)


### User_Based

In [19]:
df2 = df.pivot_table(index='userId',columns='movie',values='rating')

In [20]:
df2.fillna(0,axis=1,inplace=True)

In [21]:
df2

movie,Father of the Bride Part II (1995),GoldenEye (1995),Grumpier Old Men (1995),Heat (1995),Jumanji (1995),Sabrina (1995),Sudden Death (1995),Tom and Huck (1995),Toy Story (1995),Waiting to Exhale (1995)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
4,0.0,4.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
7115,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7116,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
7117,0.0,3.0,4.0,5.0,0.0,3.0,1.0,0.0,4.0,0.0
7119,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0


In [22]:
arr = cosine_similarity(df2)

In [23]:
df4 = pd.DataFrame(arr,index=df.userId.unique())
df4

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4071,4072,4073,4074,4075,4076,4077,4078,4079,4080
3,1.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,1.000000,0.707107,0.000000,0.000000,0.000000,0.000000,0.000000,0.553372
6,0.000000,1.000000,0.000000,0.000000,0.000000,0.390567,0.707107,0.615457,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.458831,0.000000,0.000000
8,0.000000,0.000000,1.000000,0.000000,0.000000,0.650945,0.000000,0.492366,1.000000,0.874157,...,0.000000,1.000000,0.000000,0.707107,0.000000,0.000000,0.752577,0.458831,1.000000,0.622543
10,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.615457,0.000000,0.388514,...,0.800000,0.000000,0.000000,0.000000,0.989949,0.000000,0.000000,0.619422,0.000000,0.000000
11,1.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,1.000000,0.707107,0.000000,0.000000,0.000000,0.000000,0.000000,0.553372
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7044,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.658505,0.000000,0.000000,0.000000
7070,0.000000,0.000000,0.752577,0.000000,0.000000,0.489886,0.000000,0.370543,0.752577,0.657870,...,0.000000,0.752577,0.000000,0.532152,0.000000,0.658505,1.000000,0.345306,0.752577,0.468511
7080,0.000000,0.458831,0.458831,0.619422,0.000000,0.701884,0.567775,0.889532,0.458831,0.568212,...,0.344124,0.458831,0.000000,0.324443,0.648886,0.000000,0.345306,1.000000,0.458831,0.476071
7087,0.000000,0.000000,1.000000,0.000000,0.000000,0.650945,0.000000,0.492366,1.000000,0.874157,...,0.000000,1.000000,0.000000,0.707107,0.000000,0.000000,0.752577,0.458831,1.000000,0.622543


In [None]:
user = 4
tershold=0.5

In [25]:
df4[df4[4]>0.5][4].sort_values(ascending=False)[1:6]

4444    1.0
5736    1.0
3029    1.0
7100    1.0
1260    1.0
Name: 4, dtype: float64

In [38]:
df[(df.userId==5)|(df.userId==4337)]

Unnamed: 0,userId,movie,rating
2570,5,Jumanji (1995),3.0
5974,4337,Heat (1995),3.0
8409,4337,GoldenEye (1995),4.0


## Eucliden_Distance

### Item_Based

In [39]:
df1 = df.pivot_table(index='movie',columns='userId',values='rating')
df1.fillna(0,axis=1,inplace=True)

In [40]:
df1

userId,1,2,3,4,5,6,7,8,10,11,...,7105,7107,7108,7110,7113,7115,7116,7117,7119,7120
movie,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Father of the Bride Part II (1995),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.0,3.5,0.0,0.0,0.0
GoldenEye (1995),0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.0,0.0,2.5,...,2.0,0.0,0.0,0.0,3.0,0.0,0.0,3.0,0.0,0.0
Grumpier Old Men (1995),0.0,4.0,0.0,0.0,0.0,3.0,3.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0
Heat (1995),0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,0.0,0.0,5.0,0.0,0.0
Jumanji (1995),3.5,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,4.0
Sabrina (1995),0.0,0.0,0.0,0.0,0.0,5.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,4.0
Sudden Death (1995),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
Tom and Huck (1995),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Toy Story (1995),0.0,0.0,4.0,0.0,0.0,5.0,0.0,4.0,4.0,4.5,...,0.0,4.0,0.0,4.0,0.0,0.0,4.0,4.0,5.0,4.5
Waiting to Exhale (1995),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
distance = euclidean_distances(df1)

In [45]:
def recommended_movie(movie_name):
    if movie_name in df1.index:
        index = np.where(movie_name==df1.index)[0][0]
        similar = sorted(list(enumerate(distance[index])),reverse=False,key=lambda x: x[1])[1:6]
        print(f'Recommended movie of {movie_name}')
        #print('Recommended movie of',movie_name)
        print('-'*40)
        for movie in similar:
            print(df1.index[movie[0]])
    else:
        print('Movie is not found!!!')

In [46]:
df1.index

Index(['Father of the Bride Part II (1995)', 'GoldenEye (1995)',
       'Grumpier Old Men (1995)', 'Heat (1995)', 'Jumanji (1995)',
       'Sabrina (1995)', 'Sudden Death (1995)', 'Tom and Huck (1995)',
       'Toy Story (1995)', 'Waiting to Exhale (1995)'],
      dtype='object', name='movie')

In [47]:
recommended_movie('Grumpier Old Men (1995)')

Recommended movie of Grumpier Old Men (1995)
----------------------------------------
Sudden Death (1995)
Tom and Huck (1995)
Waiting to Exhale (1995)
Father of the Bride Part II (1995)
Sabrina (1995)


### User_Based

In [48]:
df2 = df.pivot_table(index='userId',columns='movie',values='rating')
df2.fillna(0,axis=1,inplace=True)

In [51]:
dista = euclidean_distances(df2)

In [54]:
df3 = pd.DataFrame(dista,index=df.userId.unique())


In [None]:
user = 5
tershold=4

In [55]:
df3[df3[4]<4][5].sort_values()[1:6]

1255    6.204837
4798    6.244998
5677    6.344289
6715    6.344289
452     6.344289
Name: 5, dtype: float64

In [58]:
df[(df.userId==5)|(df.userId==5818)]

Unnamed: 0,userId,movie,rating
2570,5,Jumanji (1995),3.0
3532,5818,Jumanji (1995),3.0


## pearson_correlation_coefficient(Adj_cos_similarity)

### Item_Based

In [60]:
df1 = df.pivot_table(index='userId',columns='movie',values='rating')

In [62]:
df1.fillna(0,axis=1,inplace=True)

In [63]:
df1

movie,Father of the Bride Part II (1995),GoldenEye (1995),Grumpier Old Men (1995),Heat (1995),Jumanji (1995),Sabrina (1995),Sudden Death (1995),Tom and Huck (1995),Toy Story (1995),Waiting to Exhale (1995)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
4,0.0,4.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
7115,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7116,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
7117,0.0,3.0,4.0,5.0,0.0,3.0,1.0,0.0,4.0,0.0
7119,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0


In [67]:
df2 = df1.corr(method='pearson')
arr1 = df2.values

In [74]:
def recommended_movie(movie_name):
    if movie_name in df2.index:
        index = np.where(movie_name==df2.index)[0][0]
        similar = sorted(list(enumerate(arr1[index])),reverse=False,key=lambda x: x[1])[1:6]
        print(f'Recommended movie of {movie_name}')
        #print('Recommended movie of',movie_name)
        print('-'*40)
        for movie in similar:
            print(df2.index[movie[0]])
    else:
        print('Movie is not found!!!')

In [75]:
df2.index

Index(['Father of the Bride Part II (1995)', 'GoldenEye (1995)',
       'Grumpier Old Men (1995)', 'Heat (1995)', 'Jumanji (1995)',
       'Sabrina (1995)', 'Sudden Death (1995)', 'Tom and Huck (1995)',
       'Toy Story (1995)', 'Waiting to Exhale (1995)'],
      dtype='object', name='movie')

In [76]:
recommended_movie('Jumanji (1995)')

Recommended movie of Jumanji (1995)
----------------------------------------
Toy Story (1995)
Sudden Death (1995)
Sabrina (1995)
Grumpier Old Men (1995)
Waiting to Exhale (1995)


### User_Based

In [77]:
df1 = df.pivot_table(index='movie',columns='userId',values='rating')

In [79]:
df1.fillna(0,axis=1,inplace=True)

In [81]:
df2=df1.corr(method='pearson')

In [82]:
df2

userId,1,2,3,4,5,6,7,8,10,11,...,7105,7107,7108,7110,7113,7115,7116,7117,7119,7120
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,-0.111111,-0.111111,-0.164581,1.000000,-0.211194,-0.166667,-0.265343,-0.111111,-0.158763,...,-0.111111,-0.111111,1.000000,6.666667e-01,-0.166667,-0.111111,-0.166206,-0.351364,-0.111111,0.478986
2,-0.111111,1.000000,-0.111111,-0.164581,-0.111111,0.276176,0.666667,0.563854,-0.111111,-0.158763,...,-0.111111,-0.111111,-0.111111,-1.666667e-01,-0.166667,-0.111111,-0.166206,0.351364,-0.111111,-0.217721
3,-0.111111,-0.111111,1.000000,-0.164581,-0.111111,0.601090,-0.166667,0.398015,1.000000,0.861858,...,-0.111111,1.000000,-0.111111,6.666667e-01,-0.166667,-0.111111,0.720224,0.351364,1.000000,0.566074
4,-0.164581,-0.164581,-0.164581,1.000000,-0.164581,-0.312826,-0.246871,0.484273,-0.164581,0.244763,...,0.775880,-0.164581,-0.164581,-2.468710e-01,0.987484,-0.164581,-0.246188,0.483275,-0.164581,-0.322494
5,1.000000,-0.111111,-0.111111,-0.164581,1.000000,-0.211194,-0.166667,-0.265343,-0.111111,-0.158763,...,-0.111111,-0.111111,1.000000,6.666667e-01,-0.166667,-0.111111,-0.166206,-0.351364,-0.111111,0.478986
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7115,-0.111111,-0.111111,-0.111111,-0.164581,-0.111111,-0.211194,-0.166667,-0.265343,-0.111111,-0.158763,...,-0.111111,-0.111111,-0.111111,-1.666667e-01,-0.166667,1.000000,0.609421,-0.351364,-0.111111,-0.217721
7116,-0.166206,-0.166206,0.720224,-0.246188,-0.166206,0.332115,-0.249308,0.132305,0.720224,0.576752,...,-0.166206,0.720224,-0.166206,4.155141e-01,-0.249308,0.609421,1.000000,0.035039,0.720224,0.299624
7117,-0.351364,0.351364,0.351364,0.483275,-0.351364,0.539420,0.395285,0.812868,0.351364,0.412401,...,0.175682,0.351364,-0.351364,7.314236e-18,0.527046,-0.351364,0.035039,1.000000,0.351364,0.137699
7119,-0.111111,-0.111111,1.000000,-0.164581,-0.111111,0.601090,-0.166667,0.398015,1.000000,0.861858,...,-0.111111,1.000000,-0.111111,6.666667e-01,-0.166667,-0.111111,0.720224,0.351364,1.000000,0.566074


In [None]:
user=8
thershold=0.7

In [86]:
df2[df2[8]>0.7][8].sort_values(ascending=False)[1:6]

userId
4831    0.992973
614     0.987937
2274    0.954622
5043    0.948926
5843    0.933903
Name: 8, dtype: float64

In [88]:
df[(df.userId==8)|(df.userId==5818)]

Unnamed: 0,userId,movie,rating
2,8,Toy Story (1995),4.0
3532,5818,Jumanji (1995),3.0
3727,8,Grumpier Old Men (1995),5.0
5205,8,Heat (1995),3.0
7445,8,GoldenEye (1995),4.0
