In [113]:
!pip install scikit-surprise



In [122]:
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy
import pandas as pd

In [123]:
# Load the data
data = pd.read_csv("feedbacks.csv")

pd.DataFrame(data)

Unnamed: 0,id,user_id,course_id,rating,content
0,4,4,1,3,pede malesuada in imperdiet et commodo vulputa...
1,5,5,2,4,a odio in hac habitasse platea dictumst maecen...
2,6,6,5,5,justo sollicitudin ut suscipit a feugiat et er...
3,7,7,2,5,morbi non lectus aliquam sit amet diam in magn...
4,8,8,1,5,proin risus praesent lectus vestibulum quam sa...
...,...,...,...,...,...
992,996,996,5,4,potenti nullam porttitor lacus at turpis donec...
993,997,997,1,4,aliquam augue quam sollicitudin vitae consecte...
994,998,998,5,4,vestibulum ac est lacinia nisi venenatis trist...
995,999,999,2,3,est et tempus semper est quam pharetra magna a...


In [154]:
# Create a Reader object
reader = Reader(rating_scale=(1, 5))

# Load the data into a Surprise Dataset
data_surprise = Dataset.load_from_df(data[['user_id', 'course_id', 'rating']], reader)

In [166]:
# Split the data into training and testing sets
trainset, testset = train_test_split(data_surprise, test_size=0.2, random_state=7)

# Use the KNNBasic collaborative filtering algorithm
sim_options = {'name': 'cosine', 'user_based': False}
recommendation_model = KNNBasic(sim_options=sim_options)

# Train the model on the training set
recommendation_model.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7c2f2535f610>

In [167]:
# Evaluate the model on the test set
predictions = recommendation_model.test(testset)

# Evaluate accuracy
accuracy.rmse(predictions)
accuracy.mae(predictions)

RMSE: 0.8191
MAE:  0.6890


0.6890276035131744

In [168]:
def get_top_recommendations(model, user_id, data, n=5):
    # Get item predictions for the customer
    item_predictions = []
    for item_id in set(data['course_id']):
        predicted_rating = model.predict(user_id, item_id).est
        item_predictions.append({'course_id': item_id, 'predicted_rating': predicted_rating})

    # Sort predictions by predicted rating
    item_predictions.sort(key=lambda x: x['predicted_rating'], reverse=True)

    # Get top recommended items
    top_recommendations = item_predictions[:n]

    return top_recommendations

In [169]:
get_top_recommendations(model=recommendation_model, user_id=5, data=data, n=5)

[{'course_id': 1, 'predicted_rating': 4.053952321204517},
 {'course_id': 3, 'predicted_rating': 4.053952321204517},
 {'course_id': 4, 'predicted_rating': 4.053952321204517},
 {'course_id': 5, 'predicted_rating': 4.053952321204517},
 {'course_id': 2, 'predicted_rating': 4.0}]