<a href="https://colab.research.google.com/github/Ali-Fartout/Getting-Ready-For-Project/blob/main/Collaboration_Filters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Requirement

In [1]:
! pip install kaggle
! mkdir ~/.kaggle
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d prajitdatta/movielens-100k-dataset

Downloading movielens-100k-dataset.zip to /content
100% 4.77M/4.77M [00:00<00:00, 32.9MB/s]



In [5]:
import zipfile
zip_ref = zipfile.ZipFile("/content/movielens-100k-dataset.zip", "r")
zip_ref.extractall()
zip_ref.close()

# First Look

In [6]:
import pandas as pd
import numpy as np
#Load the u.user file into a dataframe
u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']

users = pd.read_csv('/content/ml-100k/u.user', sep='|', names=u_cols,
 encoding='latin-1')

users.head()

Unnamed: 0,user_id,age,sex,occupation,zip_code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [7]:
#Load the u.item file into a dataframe
i_cols = ['movie_id', 'title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',
 'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

movies = pd.read_csv('/content/ml-100k/u.item', sep='|', names=i_cols, encoding='latin-1')

movies.head()

Unnamed: 0,movie_id,title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0


In [8]:
#Remove all information except Movie ID and title
movies = movies[['movie_id', 'title']]
#Load the u.data file into a dataframe
r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']

ratings = pd.read_csv('/content/ml-100k/u.data', sep='\t', names=r_cols,
 encoding='latin-1')

ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [9]:
ratings = ratings.drop('timestamp', axis=1)

# Training and test data

In [10]:
#Import the train_test_split function
from sklearn.model_selection import train_test_split

#Assign X as the original ratings dataframe and y as the user_id column of ratings.
X = ratings.copy()
y = ratings['user_id']

#Split into training and test datasets, stratified along user_id
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, stratify=y, random_state=42)

In [11]:
#Import the mean_squared_error function
from sklearn.metrics import mean_squared_error

#Function that computes the root mean squared error (or RMSE)
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

In [12]:
#Define the baseline model to always return 3.
def baseline(user_id, movie_id):
    return 3.0

In [13]:
#Function to compute the RMSE score obtained on the testing set by a model
def score(cf_model):
    
    #Construct a list of user-movie tuples from the testing dataset
    id_pairs = zip(X_test['user_id'], X_test['movie_id'])
    
    #Predict the rating for every user-movie tuple
    y_pred = np.array([cf_model(user, movie) for (user, movie) in id_pairs])
    
    #Extract the actual ratings given by the users in the test data
    y_true = np.array(X_test['rating'])
    
    #Return the final RMSE score
    return rmse(y_true, y_pred)
score(baseline)

1.2488234462885457

In [15]:
#Build the ratings matrix using pivot_table function
r_matrix = X_train.pivot_table(values='rating', index='user_id', columns='movie_id')

r_matrix.head()

(943, 1641)

In [17]:
#User Based Collaborative Filter using Mean Ratings
def cf_user_mean(user_id, movie_id):
#Check if movie_id exists in r_matrix
  if movie_id in r_matrix:
#Compute the mean of all the ratings given to the movie
    mean_rating = r_matrix[movie_id].mean()
  else:
#Default to a rating of 3.0 in the absence of any information
      mean_rating = 3.0
  return mean_rating
#Compute RMSE for the Mean model
score(cf_user_mean)

1.0300824802393536

In [18]:
#Create a dummy ratings matrix with all null values imputed to 0
r_matrix_dummy = r_matrix.copy().fillna(0)
# Import cosine_score
from sklearn.metrics.pairwise import cosine_similarity
#Compute the cosine similarity matrix using the dummy ratings matrix
cosine_sim = cosine_similarity(r_matrix_dummy, r_matrix_dummy)
#Convert into pandas dataframe
cosine_sim = pd.DataFrame(cosine_sim, index=r_matrix.index,
columns=r_matrix.index)
cosine_sim.head(10)

user_id,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,930,931,932,933,934,935,936,937,938,939,940,941,942,943
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,1.0,0.108361,0.046638,0.029577,0.245753,0.335853,0.344724,0.191582,0.057149,0.251979,0.235385,0.228356,0.304572,0.234445,0.129171,0.309772,0.188809,0.346046,0.100968,0.199532,0.166757,0.236226,0.297992,0.222576,0.220881,0.206157,0.106212,0.237319,0.128437,0.140023,0.0657,0.181805,0.03394,0.011083,0.02442,0.017731,0.14599,0.144916,0.054834,0.0751,...,0.088132,0.135066,0.115868,0.216639,0.248748,0.112567,0.192244,0.243273,0.164886,0.315299,0.057415,0.063335,0.444329,0.150487,0.210744,0.251917,0.054468,0.263734,0.251835,0.098013,0.229718,0.072834,0.065248,0.172213,0.198114,0.157317,0.211386,0.112948,0.273808,0.380418,0.257073,0.069412,0.231643,0.108093,0.176842,0.104799,0.232472,0.051528,0.129555,0.256333
2,0.108361,1.0,0.057613,0.130237,0.054918,0.190552,0.079399,0.076146,0.167992,0.147376,0.10071,0.08769,0.179865,0.170904,0.336956,0.082856,0.148535,0.134448,0.070199,0.05189,0.144626,0.025755,0.133631,0.10346,0.113986,0.262372,0.181119,0.045874,0.099802,0.178682,0.085454,0.168848,0.114961,0.168932,0.026121,0.081678,0.057475,0.02782,0.167157,0.191937,...,0.025587,0.236144,0.193923,0.212069,0.044199,0.049732,0.242911,0.0,0.0,0.074625,0.038895,0.260004,0.093701,0.255186,0.049829,0.309281,0.31149,0.148226,0.078822,0.175618,0.169203,0.0,0.194673,0.086257,0.03221,0.076731,0.22238,0.320522,0.010844,0.086376,0.136993,0.252887,0.255454,0.285193,0.232751,0.149088,0.102807,0.062386,0.109143,0.107686
3,0.046638,0.057613,1.0,0.139805,0.0,0.032485,0.043869,0.080968,0.022263,0.059925,0.025338,0.0,0.130786,0.027797,0.064841,0.03379,0.033218,0.0,0.072457,0.04126,0.153512,0.002765,0.022891,0.025828,0.031536,0.130869,0.0,0.046583,0.097863,0.07104,0.112275,0.106619,0.28692,0.11485,0.199781,0.15795,0.016675,0.038617,0.178786,0.316503,...,0.0412,0.132704,0.046466,0.080419,0.048187,0.03737,0.045587,0.0,0.011937,0.022697,0.0,0.231435,0.028872,0.031366,0.0,0.172285,0.234889,0.054099,0.02463,0.181877,0.009943,0.150564,0.242289,0.039911,0.077796,0.01287,0.011303,0.150248,0.0,0.011447,0.027402,0.0,0.17506,0.010343,0.105635,0.019052,0.127099,0.023917,0.060392,0.0
4,0.029577,0.130237,0.139805,1.0,0.0,0.04519,0.088586,0.199526,0.135013,0.026919,0.076747,0.078065,0.078496,0.08391,0.151148,0.014102,0.071625,0.032728,0.04687,0.063944,0.078067,0.073027,0.034276,0.10094,0.042499,0.133143,0.053838,0.120172,0.133272,0.131637,0.0,0.137936,0.239482,0.107528,0.102025,0.055604,0.155054,0.06245,0.175228,0.163922,...,0.088836,0.160588,0.037572,0.088923,0.079925,0.115111,0.120411,0.0,0.0,0.084925,0.0,0.122894,0.062253,0.0,0.0,0.133174,0.081929,0.109787,0.134332,0.126175,0.066107,0.110519,0.232643,0.121914,0.027958,0.0185,0.073114,0.15186,0.011766,0.069419,0.055392,0.049773,0.076549,0.139382,0.113886,0.0,0.130343,0.077357,0.15789,0.063911
5,0.245753,0.054918,0.0,0.0,1.0,0.176443,0.28186,0.132205,0.03879,0.1342,0.251593,0.092854,0.248074,0.223169,0.025404,0.2285,0.068526,0.231632,0.103015,0.198412,0.132421,0.30752,0.308427,0.106202,0.218025,0.060633,0.083527,0.299602,0.051691,0.094068,0.055939,0.056472,0.0,0.00337,0.011725,0.0,0.091807,0.18947,0.0,0.0,...,0.035732,0.030199,0.072539,0.114966,0.177732,0.037206,0.036854,0.220582,0.112036,0.252346,0.023278,0.0,0.243014,0.069952,0.132249,0.13472,0.0,0.183636,0.307767,0.074069,0.174632,0.016075,0.0,0.185434,0.07952,0.19972,0.100829,0.020942,0.237812,0.2872,0.183969,0.019305,0.073714,0.041807,0.081088,0.029743,0.188392,0.068342,0.055557,0.207259
6,0.335853,0.190552,0.032485,0.04519,0.176443,1.0,0.394725,0.143385,0.125126,0.372679,0.196718,0.140725,0.386684,0.290258,0.164953,0.31466,0.188439,0.431156,0.088296,0.138703,0.123537,0.150448,0.331849,0.219098,0.27431,0.16885,0.125905,0.132127,0.135272,0.113809,0.183697,0.094084,0.010371,0.0127,0.030929,0.055535,0.068317,0.091954,0.095617,0.083449,...,0.029816,0.121769,0.1152,0.174595,0.281924,0.12226,0.258604,0.274341,0.247464,0.310808,0.029241,0.094348,0.273167,0.08018,0.238146,0.251335,0.059512,0.148432,0.176555,0.067122,0.224236,0.069999,0.073177,0.096712,0.195236,0.286628,0.094991,0.169676,0.383691,0.331298,0.328643,0.070809,0.135806,0.17167,0.125446,0.086464,0.230566,0.095478,0.197307,0.185268
7,0.344724,0.079399,0.043869,0.088586,0.28186,0.394725,1.0,0.215861,0.121224,0.378723,0.305914,0.19663,0.444259,0.254973,0.091997,0.309837,0.08348,0.39767,0.147696,0.148335,0.210687,0.309957,0.28621,0.235292,0.250526,0.124554,0.077158,0.257359,0.124407,0.145055,0.132074,0.111642,0.06461,0.032914,0.054806,0.037804,0.185663,0.233275,0.029046,0.038124,...,0.074139,0.07411,0.084985,0.187129,0.32086,0.040994,0.139385,0.326799,0.275552,0.339253,0.039833,0.071376,0.345619,0.097803,0.210704,0.20544,0.04244,0.212374,0.288856,0.099363,0.255902,0.079612,0.078226,0.18314,0.097151,0.254555,0.117407,0.117471,0.411399,0.381967,0.339853,0.110866,0.096055,0.10469,0.126108,0.075012,0.270071,0.020036,0.236086,0.266571
8,0.191582,0.076146,0.080968,0.199526,0.132205,0.143385,0.215861,1.0,0.116173,0.169088,0.075552,0.156734,0.218887,0.134303,0.104316,0.201387,0.066766,0.151532,0.056013,0.199353,0.104119,0.372957,0.226112,0.141014,0.197471,0.151042,0.038605,0.176196,0.063708,0.295183,0.0,0.070059,0.07632,0.004673,0.048771,0.0,0.35288,0.09553,0.11057,0.035982,...,0.0,0.127711,0.037717,0.15303,0.126082,0.018572,0.158584,0.090707,0.0,0.234291,0.0,0.048066,0.195114,0.0,0.067687,0.101355,0.021363,0.164826,0.196006,0.141824,0.221637,0.022288,0.084872,0.142955,0.066823,0.265313,0.040194,0.103083,0.194046,0.253309,0.150048,0.064242,0.118297,0.053969,0.168057,0.095736,0.164157,0.076269,0.089871,0.210995
9,0.057149,0.167992,0.022263,0.135013,0.03879,0.125126,0.121224,0.116173,1.0,0.152694,0.021543,0.080805,0.092536,0.119426,0.064537,0.0669,0.174227,0.082271,0.060644,0.043167,0.095353,0.07559,0.130562,0.064851,0.14253,0.154271,0.111456,0.115107,0.071849,0.165986,0.0,0.154676,0.0,0.097811,0.031291,0.0,0.095367,0.02873,0.058041,0.090032,...,0.0,0.084623,0.147783,0.070184,0.099276,0.035745,0.06359,0.112475,0.055506,0.104294,0.077655,0.154191,0.104972,0.058339,0.035531,0.137848,0.038548,0.071025,0.085692,0.072362,0.155349,0.0,0.135191,0.0772,0.048232,0.105323,0.045408,0.092218,0.104536,0.113372,0.082819,0.0644,0.127051,0.069251,0.095673,0.0,0.131458,0.106763,0.089297,0.089583
10,0.251979,0.147376,0.059925,0.026919,0.1342,0.372679,0.378723,0.169088,0.152694,1.0,0.117907,0.170375,0.400139,0.204283,0.078609,0.373241,0.123725,0.36543,0.085606,0.165247,0.155444,0.168306,0.299478,0.241148,0.270917,0.107779,0.08,0.136591,0.087671,0.136034,0.188476,0.075794,0.014827,0.048418,0.039304,0.030984,0.067617,0.149506,0.046868,0.089477,...,0.090753,0.108465,0.168418,0.113553,0.303588,0.086593,0.198093,0.216891,0.217532,0.334196,0.094756,0.077472,0.2846,0.09631,0.206575,0.244755,0.066405,0.120441,0.192066,0.091844,0.239604,0.116753,0.12736,0.090578,0.090876,0.275763,0.139425,0.111322,0.359504,0.344987,0.279849,0.087828,0.131888,0.111841,0.094423,0.080883,0.255758,0.063461,0.169309,0.181031
