# **Collaborative Filtering based Recommender System using K Nearest Neighbor**


In [1]:
import numpy as np
import math

### Load and exploring dataset


In [6]:
import pandas as pd

In [19]:
rating_df = pd.read_csv("data/ratings.csv")

In [20]:
rating_df.head()

Unnamed: 0,user,item,rating
0,1889878,CC0101EN,3.0
1,1342067,CL0101EN,3.0
2,1990814,ML0120ENv3,3.0
3,380098,BD0211EN,3.0
4,779563,DS0101EN,3.0


The dataset contains three columns, `user id` (learner), `item id`(course), and `rating`(enrollment mode). 

Note that this matrix is presented as the dense or vertical form, and you may convert it to a sparse matrix using `pivot` :


In [21]:
rating_sparse_df = rating_df.pivot(index='user', columns='item', values='rating').fillna(0).reset_index().rename_axis(index=None, columns=None)
rating_sparse_df.head()

Unnamed: 0,user,AI0111EN,BC0101EN,BC0201EN,BC0202EN,BD0101EN,BD0111EN,BD0115EN,BD0121EN,BD0123EN,...,SW0201EN,TA0105,TA0105EN,TA0106EN,TMP0101EN,TMP0105EN,TMP0106,TMP107,WA0101EN,WA0103EN
0,2,0.0,3.0,0.0,0.0,3.0,2.0,0.0,2.0,2.0,...,0.0,2.0,0.0,3.0,0.0,2.0,2.0,0.0,3.0,0.0
1,4,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,...,0.0,2.0,0.0,0.0,0.0,2.0,2.0,0.0,2.0,2.0
2,5,2.0,2.0,2.0,0.0,2.0,0.0,0.0,0.0,2.0,...,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,2.0
3,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,8,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Implementation : Using **Surprise** library  


Now we import required classes and methods


In [23]:
from surprise import KNNBasic
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import accuracy

Now, let's load our own course rating dataset:


In [25]:
# Save the rating dataframe to a CSV file
rating_df.to_csv("course_ratings.csv", index=False)

# Read the course rating dataset with columns user item rating
reader = Reader(
    line_format='user item rating', sep=',', skip_lines=1, rating_scale=(2, 3))

# Load the dataset from the CSV file
course_dataset = Dataset.load_from_file("course_ratings.csv", reader=reader)


We split it into trainset and testset:


In [26]:

#smaller_dataset, _ = train_test_split(course_dataset, test_size=.8)
trainset, testset = train_test_split(course_dataset, test_size=.3)
trainset

<surprise.trainset.Trainset at 0x21d62a027a0>

then check how many users and items we can use to fit a KNN model:


In [27]:
print(f"Total {trainset.n_users} users and {trainset.n_items} items in the trainingset")

Total 31394 users and 124 items in the trainingset


### Performing KNN-based collaborative filtering on the user-item interaction matrix


In [28]:
sim_options = {
    "name": "cosine",
    "user_based": False,  # compute  similarities between items
}
model = KNNBasic(min_k=5,max_k=100,sim_options=sim_options)
model.fit(trainset)
predictions = model.test(testset)
rmse = accuracy.rmse(predictions)
print('RMSE:',rmse)


Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.2040
RMSE: 0.20402561326945703
