# Load the Libraries

In [41]:
# Importing Required Libraries
import pandas as pd
import numpy as np

# Load the DataSet

In [42]:
# Reading File - ratings :
Required_Columns = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
Ratings = pd.read_csv('ML_100K/u.data', sep='\t', names = Required_Columns, encoding='latin-1')
Ratings.head()

Unnamed: 0,user_id,movie_id,rating,unix_timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [43]:
Unique_Users = Ratings.user_id.unique().shape[0]
print("No. of Unique Users:", Unique_Users)

Unique_Movies = Ratings.movie_id.unique().shape[0]
print("No. of Unique Movies:", Unique_Movies)

No. of Unique Users: 943
No. of Unique Movies: 1682


# Create Pivot Table for Users and Movies Based on Ratings

In [44]:
Data_Matrix = Ratings.pivot_table(index='user_id', columns='movie_id', values='rating')
Data_Matrix.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,,,,,,,,,,
2,4.0,,,,,,,,,2.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,3.0,,,,,,,,,...,,,,,,,,,,


In [45]:
#Data Pre-processing the Dataset by Replacing the NaN with 0
Data_Matrix_Preprocessed = Data_Matrix.replace(np.nan,0)
Data_Matrix_Preprocessed.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Find Cosine Similatity for Users and Items

In [49]:
from sklearn.metrics.pairwise import pairwise_distances 
User_Similarity = pairwise_distances(Data_Matrix_Preprocessed, metric='cosine')
Item_Similarity = pairwise_distances(Data_Matrix_Preprocessed.T, metric='cosine')

Item_Similarity

array([[0.00000000e+00, 5.97617822e-01, 6.69755213e-01, ...,
        1.00000000e+00, 9.52816933e-01, 9.52816933e-01],
       [5.97617822e-01, 1.11022302e-16, 7.26930825e-01, ...,
        1.00000000e+00, 9.21700637e-01, 9.21700637e-01],
       [6.69755213e-01, 7.26930825e-01, 0.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 9.03124947e-01],
       ...,
       [1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        0.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [9.52816933e-01, 9.21700637e-01, 1.00000000e+00, ...,
        1.00000000e+00, 0.00000000e+00, 1.00000000e+00],
       [9.52816933e-01, 9.21700637e-01, 9.03124947e-01, ...,
        1.00000000e+00, 1.00000000e+00, 0.00000000e+00]],
      shape=(1682, 1682))

# Using Formula for Users and Items To Calcuate the Score Value

In [52]:
def Predict_Score_Value(Ratings, Similarity, type='user'):
    if type == 'user':
        # Calculate Each User's Average Ratings
        Mean_User_Rating = Ratings.mean(axis=1).values # (Convert from Panda Series To NumPy Array)
        
        # Getting a Centered Value of Each User’s Ratings by Subtracting their Mean.(np.newaxis Used to Maintain Same Format as Mean_User_Rating)
        Rating_Difference = (Ratings - Mean_User_Rating[:, np.newaxis])
        
        # Score_Value = Reshapes the Vector to a Column + How Much Similar Users Deviate from their Mean / Divided by Total Similarity for Normalization.
        Score_Value = Mean_User_Rating[:, np.newaxis] + Similarity.dot(Rating_Difference) / np.array([np.abs(Similarity).sum(axis=1)]).T
    elif type == 'item':
        
        Score_Value = Ratings.dot(Similarity) / np.array([np.abs(Similarity).sum(axis=1)])
    return Score_Value

In [55]:
# Prediction Table
User_Prediction = Predict_Score_Value(Data_Matrix_Preprocessed, User_Similarity, type='user')
Item_Prediction = Predict_Score_Value(Data_Matrix_Preprocessed, Item_Similarity, type='item')

Item_Similarity

array([[0.00000000e+00, 5.97617822e-01, 6.69755213e-01, ...,
        1.00000000e+00, 9.52816933e-01, 9.52816933e-01],
       [5.97617822e-01, 1.11022302e-16, 7.26930825e-01, ...,
        1.00000000e+00, 9.21700637e-01, 9.21700637e-01],
       [6.69755213e-01, 7.26930825e-01, 0.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 9.03124947e-01],
       ...,
       [1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        0.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [9.52816933e-01, 9.21700637e-01, 1.00000000e+00, ...,
        1.00000000e+00, 0.00000000e+00, 1.00000000e+00],
       [9.52816933e-01, 9.21700637e-01, 9.03124947e-01, ...,
        1.00000000e+00, 1.00000000e+00, 0.00000000e+00]],
      shape=(1682, 1682))

### As Per the User Based Filtering, First We Have to Find the Similarity Between Input Users and Others

In [9]:
#1. Select input user
Input_Item = 34

In [10]:
#2. Convert the User_Similarity_Table into DataFrame
Item_Similarity_Table = pd.DataFrame(Item_Similarity)
Item_Similarity_Table

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
0,0.000000,5.976178e-01,0.669755,0.545062,0.713286,0.883656,0.379021,0.518886,0.503712,0.726065,...,0.964613,1.0,1.000000,1.000000,0.964613,1.0,1.0,1.0,0.952817,0.952817
1,0.597618,1.110223e-16,0.726931,0.497429,0.681164,0.916437,0.616597,0.662998,0.744748,0.828918,...,1.000000,1.0,1.000000,1.000000,1.000000,1.0,1.0,1.0,0.921701,0.921701
2,0.669755,7.269308e-01,0.000000,0.675134,0.787043,0.893278,0.627079,0.799206,0.726331,0.841896,...,1.000000,1.0,1.000000,1.000000,0.967708,1.0,1.0,1.0,1.000000,0.903125
3,0.545062,4.974292e-01,0.675134,0.000000,0.665761,0.909692,0.510717,0.509764,0.580956,0.747439,...,1.000000,1.0,0.905978,0.905978,0.962391,1.0,1.0,1.0,0.943587,0.924782
4,0.713286,6.811638e-01,0.787043,0.665761,0.000000,0.962701,0.665231,0.740839,0.727552,0.944547,...,1.000000,1.0,1.000000,1.000000,1.000000,1.0,1.0,1.0,1.000000,0.905789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,1.000000,1.000000e+00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,0.0,0.0,0.0,1.000000,1.000000
1678,1.000000,1.000000e+00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,0.0,0.0,0.0,1.000000,1.000000
1679,1.000000,1.000000e+00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,0.0,0.0,0.0,1.000000,1.000000
1680,0.952817,9.217006e-01,1.000000,0.943587,1.000000,1.000000,0.948502,0.917967,0.942640,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,1.0,1.0,1.0,0.000000,1.000000


In [54]:
#3. Find Similarity Item for Input_Item = 34 Using Cosine Table
Similar_Input_Item = Item_Similarity_Table[Input_Item].sort_values(ascending=True).head(5).index
Similar_Input_Item

Index([34, 77, 246, 1030, 794], dtype='int64')

In [12]:
#4.Convert in to list
Similar_Input_Item = list(Similar_Input_Item)
len(Similar_Input_Item)

5

In [13]:
#5. Using Similar_Item_Movieid_List, Select Movieid from Ratings Table
Similar_Item_Userid_List = []

for Similar_Item in Similar_Input_Item:
    Similar_Item_Userid = list(Ratings[Ratings['movie_id']==Similar_Item]['user_id'])
    Input_Item_Watched_Userid = Ratings[Ratings['movie_id'] == Input_Item]['user_id'].astype(int).tolist()

    Similar_Item_Userid_List.append(Similar_Item_Userid)

len(Similar_Item_Userid_List)

5

In [36]:
# List of Lists Will be Visible Here
#Similar_Item_Userid_List

In [20]:
#6.Convert all the List of Lists as Single List
import itertools
Similar_Item_Userid_Single_List = list(itertools.chain.from_iterable(Similar_Item_Userid_List))
len(Similar_Item_Userid_Single_List)

348

In [21]:
#7. Unique Movieid from the Single List
Unique_UserId_Similar_Item = set(Similar_Item_Userid_Single_List)
len(Unique_UserId_Similar_Item)

274

In [22]:
#8. Input Item Watched movie_list
Input_Item_Watched_Userid = Ratings[Ratings['movie_id'] == Input_Item]['user_id'].astype(int).tolist()
Input_Item_Watched_Userid

[286, 276, 94, 184, 1, 551, 297]

In [26]:
#9. Create a list which should have recom movieid to the input user
Recommend = []
for Per_Id in Unique_UserId_Similar_Item:
    if(Per_Id in Input_Item_Watched_Userid):
        pass
    else:
        Recommend.append(Per_Id)

len(Recommend)

267

In [18]:
#sorted(Recommend)

In [20]:
# Cross Checking 
#sorted(Input_Item_Watched_Userid)

In [21]:
# Cross Checking
#sorted(Input_User_Watched_Movieid)

In [27]:
# Checking the Common Movie List
list(set(Unique_UserId_Similar_Item) & set(Input_Item_Watched_Userid))

[1, 551, 297, 276, 184, 94, 286]

In [28]:
Item_Prediction = pd.DataFrame(Item_Prediction)
Item_Prediction

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.446278,0.475473,0.505938,0.443633,0.512667,0.547939,0.446243,0.463059,0.474916,0.515821,...,0.580579,0.576202,0.582478,0.582478,0.575717,0.588155,0.588155,0.588155,0.573107,0.566696
2,0.108544,0.132957,0.125589,0.124932,0.131178,0.129005,0.110883,0.122223,0.109599,0.121525,...,0.135490,0.136546,0.134829,0.134829,0.134108,0.134458,0.134458,0.134458,0.136576,0.137111
3,0.085685,0.091690,0.087643,0.089966,0.089658,0.089985,0.083492,0.089725,0.085188,0.088331,...,0.089770,0.090506,0.086261,0.086261,0.089201,0.084659,0.084659,0.084659,0.089768,0.090845
4,0.053693,0.059604,0.058114,0.058364,0.059356,0.061472,0.053374,0.058615,0.055905,0.060601,...,0.061349,0.061686,0.061195,0.061195,0.060693,0.057937,0.057937,0.057937,0.061673,0.062281
5,0.224739,0.229171,0.263280,0.226387,0.259973,0.296529,0.232710,0.237109,0.258581,0.275076,...,0.297628,0.295990,0.299922,0.299922,0.298188,0.302051,0.302051,0.302051,0.293373,0.294309
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.092574,0.113870,0.110211,0.112040,0.112768,0.123140,0.098578,0.110839,0.098858,0.118579,...,0.123829,0.124430,0.120776,0.120776,0.121360,0.125056,0.125056,0.125056,0.123470,0.124327
940,0.164358,0.184894,0.196502,0.164884,0.195860,0.209652,0.162840,0.165606,0.171761,0.194536,...,0.217536,0.215515,0.219136,0.219136,0.216173,0.218583,0.218583,0.218583,0.216582,0.216819
941,0.032300,0.045024,0.042924,0.043223,0.047493,0.051077,0.032761,0.042646,0.039399,0.047421,...,0.052762,0.053042,0.052692,0.052692,0.051514,0.053028,0.053028,0.053028,0.051910,0.052280
942,0.157779,0.174095,0.189000,0.163514,0.186140,0.194151,0.164910,0.156970,0.167038,0.181295,...,0.197537,0.194479,0.198479,0.198479,0.197969,0.199793,0.199793,0.199793,0.197394,0.200031


In [32]:
Item_Prediction[Input_Item]

user_id
1      0.572414
2      0.139686
3      0.092378
4      0.062496
5      0.286815
         ...   
939    0.122120
940    0.217996
941    0.053046
942    0.198270
943    0.332126
Name: 34, Length: 943, dtype: float64

In [33]:
# Select Hightest Rated Movie From Recommend List which Would be Liked by Input User, Based on User prediction
Highest_Rated=[]

Input_Item_Predicted = pd.DataFrame(Item_Prediction[Input_Item])
Input_Item_Predicted_Transposed = Input_Item_Predicted.T
for Item in Recommend:
    Predicted_Rating = Input_Item_Predicted_Transposed[Item].values
    if(Predicted_Rating>=1):
        Highest_Rated.append(Item)

In [34]:
len(Highest_Rated)

4

In [35]:
# Checking the Common Movie List
list(set(Recommend) & set(Input_Item_Watched_Userid))

[]

In [37]:
def Item_Based(Input_Item, Item_Similarity, _Predictions, Similar_User_Count, Rating_Threshold):
        
    #2. Convert the User_Similarity_Table into DataFrame
    Item_Similarity_Table = pd.DataFrame(Item_Similarity)
    
    #3. Find Similarity Item for Input_Item = 34 Using Cosine Table
    Similar_Input_Item = Item_Similarity_Table[Input_Item].sort_values(ascending=True).head(5).index

    #4.Convert in to list
    Similar_Input_Item = list(Similar_Input_Item)
        
    #5. Using Similar_Item_Movieid_List, Select Movieid from Ratings Table
    Similar_Item_Userid_List = []
    
    for Similar_Item in Similar_Input_Item:
        Similar_Item_Userid = list(Ratings[Ratings['movie_id']==Similar_Item]['user_id'])
        Input_Item_Watched_Userid = Ratings[Ratings['movie_id'] == Input_Item]['user_id'].astype(int).tolist()
    
        Similar_Item_Userid_List.append(Similar_Item_Userid)
    
    #6.Convert all the List of Lists as Single List
    import itertools
    Similar_Item_Userid_Single_List = list(itertools.chain.from_iterable(Similar_Item_Userid_List))
        
    #7. Unique Movieid from the Single List
    Unique_UserId_Similar_Item = set(Similar_Item_Userid_Single_List)
        
    #8. Input Item Watched movie_list
    Input_Item_Watched_Userid = Ratings[Ratings['movie_id'] == Input_Item]['user_id'].astype(int).tolist()
    
    #9. Create a list which should have recom movieid to the input user
    Recommend = []
    for Per_Id in Unique_UserId_Similar_Item:
        if(Per_Id in Input_Item_Watched_Userid):
            pass
        else:
            Recommend.append(Per_Id)
    
    # Select Hightest Rated Movie From Recommend List which Would be Liked by Input User, Based on User prediction
    Highest_Rated=[]
    
    Input_Item_Predicted = pd.DataFrame(Item_Prediction[Input_Item])
    Input_Item_Predicted_Transposed = Input_Item_Predicted.T
    
    for Item in Recommend:
        Predicted_Rating = Input_Item_Predicted_Transposed[Item].values
        if(Predicted_Rating>=1):
            Highest_Rated.append(Item)
        
    return Highest_Rated

In [38]:
# def Item_Based(Input_User, User_Similarity, User_Predictions, Similar_User_Count, Rating_Threshold):
Recommended_Users = Item_Based(5, Item_Similarity, Item_Prediction, 5, 0.8)

In [39]:
len(Recommended_Users)

2

In [40]:
Recommended_Users

[416, 450]