# Item Based Recommendation System

### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import warnings 
warnings.filterwarnings('ignore')

In [2]:
r_cols = ['user_id','movie_id','rating','unix_timestamp']
ratings = pd.read_csv('C:\Priya learning path\Hope_Artifiacial_Intelligence\Recommendation system\Collabrative Recommendation System\ml-100k/u.data',
                      sep='\t', names=r_cols, encoding='latin-1')

In [3]:
ratings

Unnamed: 0,user_id,movie_id,rating,unix_timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596
...,...,...,...,...
99995,880,476,3,880175444
99996,716,204,5,879795543
99997,276,1090,1,874795795
99998,13,225,2,882399156


In [4]:
n_users = ratings.user_id.unique().shape[0]
n_items = ratings.movie_id.unique().shape[0]

In [5]:
print(f'No of users : {n_users}')
print(f'No of items : {n_items}')

No of users : 943
No of items : 1682


## Step 1 -  Create pivot table for users and movies based on rating

In [6]:
data_ma = ratings.pivot_table(index='user_id',columns='movie_id',values='rating')
data_ma.head(3)

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,,,,,,,,,,
2,4.0,,,,,,,,,2.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,


###  Replace all the NaN values to 0

In [7]:
data_matrix = data_ma.fillna(0)
data_matrix.head(3)

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Step 2 - Finding Cosine Similarity between users and items

In [8]:
from sklearn.metrics.pairwise import pairwise_distances

user_similarity = pairwise_distances(data_matrix, metric='cosine')
item_similarity = pairwise_distances(data_matrix.T, metric='cosine')

In [9]:
item_similarity

array([[0.        , 0.59761782, 0.66975521, ..., 1.        , 0.95281693,
        0.95281693],
       [0.59761782, 0.        , 0.72693082, ..., 1.        , 0.92170064,
        0.92170064],
       [0.66975521, 0.72693082, 0.        , ..., 1.        , 1.        ,
        0.90312495],
       ...,
       [1.        , 1.        , 1.        , ..., 0.        , 1.        ,
        1.        ],
       [0.95281693, 0.92170064, 1.        , ..., 1.        , 0.        ,
        1.        ],
       [0.95281693, 0.92170064, 0.90312495, ..., 1.        , 1.        ,
        0.        ]])

In [10]:
item_similarity.shape

(1682, 1682)

In [11]:
item_similarity_df = pd.DataFrame(item_similarity)
item_similarity_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
0,0.000000,0.597618,0.669755,5.450621e-01,0.713286,0.883656,0.379021,0.518886,0.503712,0.726065,...,0.964613,1.0,1.000000,1.000000,0.964613,1.0,1.0,1.0,0.952817,0.952817
1,0.597618,0.000000,0.726931,4.974292e-01,0.681164,0.916437,0.616597,0.662998,0.744748,0.828918,...,1.000000,1.0,1.000000,1.000000,1.000000,1.0,1.0,1.0,0.921701,0.921701
2,0.669755,0.726931,0.000000,6.751336e-01,0.787043,0.893278,0.627079,0.799206,0.726331,0.841896,...,1.000000,1.0,1.000000,1.000000,0.967708,1.0,1.0,1.0,1.000000,0.903125
3,0.545062,0.497429,0.675134,5.551115e-16,0.665761,0.909692,0.510717,0.509764,0.580956,0.747439,...,1.000000,1.0,0.905978,0.905978,0.962391,1.0,1.0,1.0,0.943587,0.924782
4,0.713286,0.681164,0.787043,6.657605e-01,0.000000,0.962701,0.665231,0.740839,0.727552,0.944547,...,1.000000,1.0,1.000000,1.000000,1.000000,1.0,1.0,1.0,1.000000,0.905789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,1.000000,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,0.0,0.0,0.0,1.000000,1.000000
1678,1.000000,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,0.0,0.0,0.0,1.000000,1.000000
1679,1.000000,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,0.0,0.0,0.0,1.000000,1.000000
1680,0.952817,0.921701,1.000000,9.435867e-01,1.000000,1.000000,0.948502,0.917967,0.942640,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,1.0,1.0,1.0,0.000000,1.000000


## Step 3 - Predict the non filled rating for the users using formula (Helps to find the unwatched movies)
### Using formula for user and item we are calcuating the score value

In [12]:
def predict(ratings,similarity,type='user'):
    if type == 'user':
        mean_user_rating = ratings.mean(axis=1)
        ratings_diff = (ratings - np.array(mean_user_rating)[:, np.newaxis])
        pred = np.array(mean_user_rating)[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
    elif type == 'item':
        pred = ratings.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
    return pred

In [13]:
user_prediction = predict(data_matrix,user_similarity,type='user')
item_prediction = predict(data_matrix, item_similarity, type='item')

In [14]:
user_prediction

array([[ 2.06532606,  0.73430275,  0.62992381, ...,  0.39359041,
         0.39304874,  0.3927712 ],
       [ 1.76308836,  0.38404019,  0.19617889, ..., -0.08837789,
        -0.0869183 , -0.08671183],
       [ 1.79590398,  0.32904733,  0.15882885, ..., -0.13699223,
        -0.13496852, -0.13476488],
       ...,
       [ 1.59151513,  0.27526889,  0.10219534, ..., -0.16735162,
        -0.16657451, -0.16641377],
       [ 1.81036267,  0.40479877,  0.27545013, ..., -0.00907358,
        -0.00846587, -0.00804858],
       [ 1.8384313 ,  0.47964837,  0.38496292, ...,  0.14686675,
         0.14629808,  0.14641455]])

In [15]:
user_pred = pd.DataFrame(user_prediction)
user_pred

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
0,2.065326,0.734303,0.629924,1.010669,0.640686,0.476150,1.784569,1.163032,1.513350,0.704478,...,0.394041,0.394434,0.393981,0.392972,0.393344,0.392272,0.394909,0.393590,0.393049,0.392771
1,1.763088,0.384040,0.196179,0.731538,0.225643,0.003892,1.493597,0.876153,1.108467,0.261991,...,-0.086942,-0.085491,-0.087137,-0.088158,-0.087298,-0.089288,-0.087468,-0.088378,-0.086918,-0.086712
2,1.795904,0.329047,0.158829,0.684154,0.173277,-0.035621,1.488230,0.835769,1.135426,0.236383,...,-0.134795,-0.133537,-0.135543,-0.136438,-0.135041,-0.137611,-0.136374,-0.136992,-0.134969,-0.134765
3,1.729951,0.293913,0.127741,0.644932,0.142143,-0.062261,1.437010,0.796249,1.096663,0.211789,...,-0.161413,-0.160220,-0.161542,-0.162586,-0.161634,-0.163877,-0.162283,-0.163080,-0.161442,-0.161248
4,1.796651,0.454474,0.354422,0.763130,0.359539,0.195987,1.547370,0.908904,1.292027,0.437954,...,0.101762,0.102405,0.101923,0.100839,0.101711,0.099951,0.102515,0.101233,0.101075,0.101201
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
938,1.676950,0.346339,0.177518,0.689906,0.199740,0.003297,1.429565,0.830905,1.070986,0.262183,...,-0.092434,-0.091197,-0.092851,-0.093801,-0.092953,-0.094539,-0.092217,-0.093378,-0.092686,-0.092423
939,1.822346,0.419125,0.286430,0.715605,0.294442,0.106633,1.514591,0.853050,1.195304,0.359260,...,0.014060,0.014688,0.014123,0.013060,0.013669,0.011978,0.014065,0.013021,0.013639,0.013796
940,1.591515,0.275269,0.102195,0.624383,0.133762,-0.069553,1.320734,0.765529,1.035088,0.192697,...,-0.166179,-0.164981,-0.166278,-0.167392,-0.166679,-0.168486,-0.166217,-0.167352,-0.166575,-0.166414
941,1.810363,0.404799,0.275450,0.726616,0.281316,0.087068,1.550310,0.850057,1.205745,0.342987,...,-0.008362,-0.007757,-0.008225,-0.009218,-0.008232,-0.010138,-0.008009,-0.009074,-0.008466,-0.008049


In [16]:
item_prediction

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.446278,0.475473,0.505938,0.443633,0.512667,0.547939,0.446243,0.463059,0.474916,0.515821,...,0.580579,0.576202,0.582478,0.582478,0.575717,0.588155,0.588155,0.588155,0.573107,0.566696
2,0.108544,0.132957,0.125589,0.124932,0.131178,0.129005,0.110883,0.122223,0.109599,0.121525,...,0.135490,0.136546,0.134829,0.134829,0.134108,0.134458,0.134458,0.134458,0.136576,0.137111
3,0.085685,0.091690,0.087643,0.089966,0.089658,0.089985,0.083492,0.089725,0.085188,0.088331,...,0.089770,0.090506,0.086261,0.086261,0.089201,0.084659,0.084659,0.084659,0.089768,0.090845
4,0.053693,0.059604,0.058114,0.058364,0.059356,0.061472,0.053374,0.058615,0.055905,0.060601,...,0.061349,0.061686,0.061195,0.061195,0.060693,0.057937,0.057937,0.057937,0.061673,0.062281
5,0.224739,0.229171,0.263280,0.226387,0.259973,0.296529,0.232710,0.237109,0.258581,0.275076,...,0.297628,0.295990,0.299922,0.299922,0.298188,0.302051,0.302051,0.302051,0.293373,0.294309
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.092574,0.113870,0.110211,0.112040,0.112768,0.123140,0.098578,0.110839,0.098858,0.118579,...,0.123829,0.124430,0.120776,0.120776,0.121360,0.125056,0.125056,0.125056,0.123470,0.124327
940,0.164358,0.184894,0.196502,0.164884,0.195860,0.209652,0.162840,0.165606,0.171761,0.194536,...,0.217536,0.215515,0.219136,0.219136,0.216173,0.218583,0.218583,0.218583,0.216582,0.216819
941,0.032300,0.045024,0.042924,0.043223,0.047493,0.051077,0.032761,0.042646,0.039399,0.047421,...,0.052762,0.053042,0.052692,0.052692,0.051514,0.053028,0.053028,0.053028,0.051910,0.052280
942,0.157779,0.174095,0.189000,0.163514,0.186140,0.194151,0.164910,0.156970,0.167038,0.181295,...,0.197537,0.194479,0.198479,0.198479,0.197969,0.199793,0.199793,0.199793,0.197394,0.200031


## Step 4 - Select the User Input
### As per User based filtering ,first have to find similarity between the input user and others

In [17]:
item_input = 34

In [18]:
item_sim_table = pd.DataFrame(item_similarity)
item_sim_table

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
0,0.000000,0.597618,0.669755,5.450621e-01,0.713286,0.883656,0.379021,0.518886,0.503712,0.726065,...,0.964613,1.0,1.000000,1.000000,0.964613,1.0,1.0,1.0,0.952817,0.952817
1,0.597618,0.000000,0.726931,4.974292e-01,0.681164,0.916437,0.616597,0.662998,0.744748,0.828918,...,1.000000,1.0,1.000000,1.000000,1.000000,1.0,1.0,1.0,0.921701,0.921701
2,0.669755,0.726931,0.000000,6.751336e-01,0.787043,0.893278,0.627079,0.799206,0.726331,0.841896,...,1.000000,1.0,1.000000,1.000000,0.967708,1.0,1.0,1.0,1.000000,0.903125
3,0.545062,0.497429,0.675134,5.551115e-16,0.665761,0.909692,0.510717,0.509764,0.580956,0.747439,...,1.000000,1.0,0.905978,0.905978,0.962391,1.0,1.0,1.0,0.943587,0.924782
4,0.713286,0.681164,0.787043,6.657605e-01,0.000000,0.962701,0.665231,0.740839,0.727552,0.944547,...,1.000000,1.0,1.000000,1.000000,1.000000,1.0,1.0,1.0,1.000000,0.905789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,1.000000,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,0.0,0.0,0.0,1.000000,1.000000
1678,1.000000,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,0.0,0.0,0.0,1.000000,1.000000
1679,1.000000,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,0.0,0.0,0.0,1.000000,1.000000
1680,0.952817,0.921701,1.000000,9.435867e-01,1.000000,1.000000,0.948502,0.917967,0.942640,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,1.0,1.0,1.0,0.000000,1.000000


In [20]:
similar_item_input = item_sim_table[item_input].sort_values(ascending=True).head(5).index
similar_item_input

Index([34, 77, 246, 1030, 794], dtype='int64')

In [21]:
similar_item_input = list(similar_item_input)
similar_item_input

[34, 77, 246, 1030, 794]

## Step 6 - Similar items watched by the users
### Using similar_item_input, can select movie id from ratings table

In [22]:
similar_item_userid_list = []
for sim_item in similar_item_input:
    sim = list(ratings[ratings['movie_id']==sim_item]['user_id'])
    similar_item_userid_list.append(sim)

In [23]:
len(similar_item_userid_list)

5

In [24]:
similar_item_userid_list

[[286, 276, 94, 184, 1, 551, 297],
 [92,
  222,
  224,
  194,
  59,
  291,
  262,
  64,
  83,
  128,
  106,
  135,
  334,
  301,
  276,
  184,
  188,
  201,
  110,
  234,
  256,
  286,
  320,
  372,
  345,
  244,
  381,
  60,
  375,
  42,
  330,
  56,
  43,
  115,
  270,
  417,
  280,
  405,
  183,
  343,
  94,
  474,
  347,
  313,
  416,
  271,
  269,
  497,
  506,
  109,
  504,
  7,
  246,
  429,
  363,
  346,
  76,
  95,
  72,
  399,
  311,
  566,
  496,
  374,
  618,
  450,
  505,
  293,
  318,
  593,
  524,
  457,
  328,
  332,
  659,
  650,
  178,
  1,
  655,
  299,
  453,
  643,
  308,
  268,
  49,
  653,
  454,
  664,
  766,
  528,
  532,
  437,
  741,
  487,
  215,
  551,
  764,
  577,
  586,
  478,
  371,
  533,
  378,
  521,
  774,
  500,
  721,
  389,
  711,
  881,
  545,
  843,
  823,
  633,
  627,
  870,
  815,
  916,
  893,
  455,
  130,
  561,
  749,
  145,
  647,
  303,
  393,
  394,
  442,
  495,
  267,
  65,
  682,
  690,
  922,
  896,
  186,
  156,
  452,
  554,
  9

### Convert all the userid as single

In [25]:
import itertools
similar_item_userid_single_list = list(itertools.chain.from_iterable(similar_item_userid_list))

In [26]:
len(similar_item_userid_single_list)

348

### Unique movie_id from the list

In [53]:
unique_userid_similar_item = set(similar_item_userid_single_list)
len(unique_userid_similar_item)

274

In [54]:
unique_userid_similar_item

{1,
 6,
 7,
 13,
 18,
 26,
 27,
 32,
 38,
 42,
 43,
 49,
 50,
 56,
 58,
 59,
 60,
 63,
 64,
 65,
 69,
 70,
 72,
 73,
 76,
 77,
 79,
 83,
 85,
 89,
 92,
 94,
 95,
 99,
 102,
 104,
 106,
 109,
 110,
 113,
 115,
 116,
 128,
 130,
 135,
 139,
 145,
 150,
 152,
 156,
 174,
 176,
 178,
 183,
 184,
 186,
 188,
 189,
 193,
 194,
 195,
 201,
 212,
 214,
 215,
 221,
 222,
 224,
 232,
 234,
 243,
 244,
 246,
 256,
 262,
 267,
 268,
 269,
 270,
 271,
 276,
 279,
 280,
 286,
 287,
 291,
 293,
 294,
 295,
 296,
 297,
 299,
 301,
 303,
 305,
 308,
 311,
 313,
 314,
 318,
 320,
 323,
 327,
 328,
 330,
 332,
 334,
 339,
 342,
 343,
 344,
 345,
 346,
 347,
 354,
 359,
 361,
 363,
 367,
 371,
 372,
 374,
 375,
 376,
 378,
 381,
 385,
 387,
 389,
 392,
 393,
 394,
 399,
 405,
 416,
 417,
 429,
 432,
 433,
 435,
 436,
 437,
 439,
 442,
 445,
 450,
 452,
 453,
 454,
 455,
 457,
 463,
 467,
 468,
 470,
 473,
 474,
 477,
 478,
 486,
 487,
 490,
 495,
 496,
 497,
 498,
 500,
 503,
 504,
 505,
 506,
 521,
 523,

## Step 7 - Item input watched Users

In [29]:
item_input_watched_user_list = list(ratings[ratings['movie_id']==item_input]['user_id'])
len(item_input_watched_user_list)

7

In [55]:
item_input_watched_user_list

[286, 276, 94, 184, 1, 551, 297]

In [31]:
sorted(item_input_watched_user_list)


[1, 94, 184, 276, 286, 297, 551]

## Step 9 - Create a list that should have recommended users id to the item input

In [32]:
recommended_user_list = []
for per_id in unique_movieid_similar_user:
    if per_id in item_input_watched_user_list:
        pass
    else:
        recommended_user_list.append(per_id)

In [33]:
len(recommended_user_list)

267

In [34]:
recommended_user_list

[6,
 7,
 521,
 523,
 524,
 13,
 528,
 18,
 532,
 533,
 536,
 26,
 27,
 541,
 542,
 32,
 545,
 38,
 42,
 43,
 554,
 557,
 560,
 49,
 561,
 50,
 566,
 567,
 56,
 58,
 59,
 60,
 63,
 64,
 577,
 65,
 578,
 69,
 582,
 70,
 72,
 73,
 586,
 76,
 77,
 79,
 592,
 593,
 83,
 595,
 85,
 89,
 92,
 95,
 99,
 102,
 104,
 106,
 618,
 620,
 109,
 110,
 624,
 113,
 115,
 627,
 116,
 633,
 635,
 637,
 128,
 130,
 643,
 642,
 135,
 647,
 648,
 650,
 139,
 653,
 654,
 655,
 145,
 659,
 150,
 662,
 664,
 152,
 156,
 669,
 682,
 174,
 176,
 178,
 690,
 693,
 183,
 697,
 186,
 699,
 188,
 189,
 193,
 194,
 195,
 711,
 712,
 201,
 717,
 719,
 721,
 212,
 214,
 215,
 727,
 730,
 221,
 222,
 224,
 736,
 741,
 232,
 234,
 749,
 243,
 244,
 246,
 758,
 762,
 764,
 766,
 256,
 770,
 262,
 774,
 778,
 267,
 268,
 269,
 270,
 271,
 782,
 790,
 279,
 280,
 795,
 796,
 287,
 291,
 293,
 294,
 295,
 296,
 299,
 301,
 815,
 303,
 305,
 819,
 308,
 311,
 823,
 313,
 314,
 828,
 318,
 320,
 834,
 323,
 327,
 328,
 330,
 8

In [35]:
sorted(recommended_user_list)

[6,
 7,
 13,
 18,
 26,
 27,
 32,
 38,
 42,
 43,
 49,
 50,
 56,
 58,
 59,
 60,
 63,
 64,
 65,
 69,
 70,
 72,
 73,
 76,
 77,
 79,
 83,
 85,
 89,
 92,
 95,
 99,
 102,
 104,
 106,
 109,
 110,
 113,
 115,
 116,
 128,
 130,
 135,
 139,
 145,
 150,
 152,
 156,
 174,
 176,
 178,
 183,
 186,
 188,
 189,
 193,
 194,
 195,
 201,
 212,
 214,
 215,
 221,
 222,
 224,
 232,
 234,
 243,
 244,
 246,
 256,
 262,
 267,
 268,
 269,
 270,
 271,
 279,
 280,
 287,
 291,
 293,
 294,
 295,
 296,
 299,
 301,
 303,
 305,
 308,
 311,
 313,
 314,
 318,
 320,
 323,
 327,
 328,
 330,
 332,
 334,
 339,
 342,
 343,
 344,
 345,
 346,
 347,
 354,
 359,
 361,
 363,
 367,
 371,
 372,
 374,
 375,
 376,
 378,
 381,
 385,
 387,
 389,
 392,
 393,
 394,
 399,
 405,
 416,
 417,
 429,
 432,
 433,
 435,
 436,
 437,
 439,
 442,
 445,
 450,
 452,
 453,
 454,
 455,
 457,
 463,
 467,
 468,
 470,
 473,
 474,
 477,
 478,
 486,
 487,
 490,
 495,
 496,
 497,
 498,
 500,
 503,
 504,
 505,
 506,
 521,
 523,
 524,
 528,
 532,
 533,
 536,
 5

In [36]:
# Checking the common user list
sorted(list(set(item_input_watched_user_list) & (set(unique_movieid_similar_user))))

[1, 94, 184, 276, 286, 297, 551]

### Converting the user_prediction score values into Table

In [37]:
item_pred = pd.DataFrame(item_prediction)
item_pred

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.446278,0.475473,0.505938,0.443633,0.512667,0.547939,0.446243,0.463059,0.474916,0.515821,...,0.580579,0.576202,0.582478,0.582478,0.575717,0.588155,0.588155,0.588155,0.573107,0.566696
2,0.108544,0.132957,0.125589,0.124932,0.131178,0.129005,0.110883,0.122223,0.109599,0.121525,...,0.135490,0.136546,0.134829,0.134829,0.134108,0.134458,0.134458,0.134458,0.136576,0.137111
3,0.085685,0.091690,0.087643,0.089966,0.089658,0.089985,0.083492,0.089725,0.085188,0.088331,...,0.089770,0.090506,0.086261,0.086261,0.089201,0.084659,0.084659,0.084659,0.089768,0.090845
4,0.053693,0.059604,0.058114,0.058364,0.059356,0.061472,0.053374,0.058615,0.055905,0.060601,...,0.061349,0.061686,0.061195,0.061195,0.060693,0.057937,0.057937,0.057937,0.061673,0.062281
5,0.224739,0.229171,0.263280,0.226387,0.259973,0.296529,0.232710,0.237109,0.258581,0.275076,...,0.297628,0.295990,0.299922,0.299922,0.298188,0.302051,0.302051,0.302051,0.293373,0.294309
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.092574,0.113870,0.110211,0.112040,0.112768,0.123140,0.098578,0.110839,0.098858,0.118579,...,0.123829,0.124430,0.120776,0.120776,0.121360,0.125056,0.125056,0.125056,0.123470,0.124327
940,0.164358,0.184894,0.196502,0.164884,0.195860,0.209652,0.162840,0.165606,0.171761,0.194536,...,0.217536,0.215515,0.219136,0.219136,0.216173,0.218583,0.218583,0.218583,0.216582,0.216819
941,0.032300,0.045024,0.042924,0.043223,0.047493,0.051077,0.032761,0.042646,0.039399,0.047421,...,0.052762,0.053042,0.052692,0.052692,0.051514,0.053028,0.053028,0.053028,0.051910,0.052280
942,0.157779,0.174095,0.189000,0.163514,0.186140,0.194151,0.164910,0.156970,0.167038,0.181295,...,0.197537,0.194479,0.198479,0.198479,0.197969,0.199793,0.199793,0.199793,0.197394,0.200031


In [38]:
### Checking score values for item_input = 34

In [39]:
item_pred[item_input]

user_id
1      0.572414
2      0.139686
3      0.092378
4      0.062496
5      0.286815
         ...   
939    0.122120
940    0.217996
941    0.053046
942    0.198270
943    0.332126
Name: 34, Length: 943, dtype: float64

## Step 10&11 - With the help of 'Threshold values' we can select the highest-rated user list 
### From the recommended user list select the highest rated user. Based on Item prediction

In [43]:
highest_rated = []
item_input_pre = pd.DataFrame(item_pred[item_input])
item_input_pred = item_input_pre.T
for userid in recommended_user_list:
    value = item_input_pred[userid].values
    if (value >= 1):
        highest_rated.append(userid)

In [45]:
item_input_pre

Unnamed: 0_level_0,34
user_id,Unnamed: 1_level_1
1,0.572414
2,0.139686
3,0.092378
4,0.062496
5,0.286815
...,...
939,0.122120
940,0.217996
941,0.053046
942,0.198270


In [46]:
item_input_pred

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
34,0.572414,0.139686,0.092378,0.062496,0.286815,0.457489,0.931267,0.130499,0.0566,0.45916,...,0.374875,0.089246,0.321919,0.08228,0.206321,0.12212,0.217996,0.053046,0.19827,0.332126


In [48]:
len(highest_rated)

4

In [49]:
highest_rated

[13, 655, 416, 450]

In [50]:
# Checking the common movie list
list(set(recommended_user_list) & (set(item_input_watched_user_list)))

[]

### Compiled all the steps in single function

In [56]:
def itembased(item_input,item_similarity,item_prediction,similar_item_count,threshold):
    #Convert the item_similarity into DataFrame
    item_sim_table=pd.DataFrame(item_similarity)
    #Find similarity user for 34 using cosine table
    similar_item_input= item_sim_table[item_input].sort_values(ascending=True).head(similar_item_count).index
    #Convert into list
    similar_item_input=list(similar_item_input) 
    #Using similar_user_input,can select movie id from ratings table
    similar_item_userid_list=[]
    for sim_item in similar_item_input:
        sim=list(ratings[ratings['movie_id']==sim_item]['user_id'])
        similar_item_userid_list.append(sim)
    #Converting as a whole list
    import itertools
    similar_item_userid_single_list=list(itertools.chain.from_iterable(similar_item_userid_list))
    #Unique userid from the list
    unique_userid_similar_item=set(similar_item_userid_single_list)
    #Item input watched user list
    item_input_watched_user_list=list(ratings[ratings['movie_id']==item_input]['user_id'].values)
    #Create a list which should have recom userid to the item input
    recommended_user_list=[]
    for per_id in unique_userid_similar_item:
        if per_id in item_input_watched_user_list:
            pass
        else:
            recommended_user_list.append(per_id)
    #From recommendation list selecting only hightest rated(predicted) value
    highest_rated = []
    item_input_pre = pd.DataFrame(item_pred[item_input])
    item_input_pred = item_input_pre.T
    for userid in recommended_user_list:
        value = item_input_pred[userid].values
        if (value >= 1):
            highest_rated.append(userid)
    highest_rated
    return highest_rated

In [81]:
# item_input,item_similarity,item_prediction,similar_item_count,threshold
Recommended_Users = itembased(600,item_similarity,item_pred,5,0.5)

In [82]:
len(Recommended_Users)

3

In [83]:
Recommended_Users

[13, 655, 450]