In [271]:
!pip install scikit-surprise
!pip install pymysql
!pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.0


In [274]:
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy
from dotenv import load_dotenv
import os
import pandas as pd
import pymysql

In [275]:
# Load environment variables from .env file
load_dotenv()

# Get MySQL connection details from environment variables
mysql_host = os.getenv("MYSQL_HOST")
mysql_user = os.getenv("MYSQL_USER")
mysql_password = os.getenv("MYSQL_PASSWORD")
mysql_database = os.getenv("MYSQL_DATABASE")

In [276]:
# Connect to MySQL
connection = pymysql.connect(
    host=mysql_host,
    user=mysql_user,
    password=mysql_password,
    database=mysql_database
)


In [277]:
# Query data from MySQL
feedbacks = "SELECT * FROM feedbacks"
data = pd.read_sql(feedbacks, connection)
pd.DataFrame(data)

  data = pd.read_sql(feedbacks, connection)


Unnamed: 0,id,user_id,course_id,rating,content,created_at,updated_at,deleted_at
0,4,4,1,5,"Cras mi pede, malesuada in, imperdiet et, comm...",2022-12-24 19:47:21,2023-01-08 22:29:37,
1,5,5,1,5,Duis at velit eu est congue elementum. In hac ...,2023-04-13 08:47:47,2023-10-31 21:33:56,
2,6,6,1,5,Phasellus id sapien in sapien iaculis congue. ...,2023-04-04 19:24:03,2023-08-24 02:41:51,
3,7,7,1,3,"Lorem ipsum dolor sit amet, consectetuer adipi...",2023-03-18 06:14:35,2023-08-06 04:07:42,
4,8,8,1,3,"Lorem ipsum dolor sit amet, consectetuer adipi...",2023-10-15 21:15:56,2022-12-04 00:50:01,
...,...,...,...,...,...,...,...,...
2769,2773,996,5,3,Duis at velit eu est congue elementum. In hac ...,2023-02-03 11:54:47,2023-07-09 02:00:08,
2770,2774,997,5,4,Nullam molestie nibh in lectus. Pellentesque a...,2023-09-11 15:15:17,2023-08-03 10:56:38,
2771,2775,998,5,4,Aliquam quis turpis eget elit sodales sceleris...,2023-07-15 10:30:10,2023-05-24 18:38:49,
2772,2776,999,5,3,"Vivamus metus arcu, adipiscing molestie, hendr...",2023-07-28 19:08:56,2023-11-24 12:56:54,


In [278]:
# Create a Reader object
reader = Reader(rating_scale=(1, 5))

# Load the data into a Surprise Dataset
data_surprise = Dataset.load_from_df(data[['user_id', 'course_id', 'rating']], reader)

In [279]:
# Split the data into training and testing sets
trainset, testset = train_test_split(data_surprise, test_size=0.8, random_state=3)

# Use the KNNBasic collaborative filtering algorithm
sim_options = {'name': 'cosine', 'user_based': False}
recommendation_model = KNNBasic(sim_options=sim_options)

# Train the model on the training set
recommendation_model.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7c2f17ebdae0>

In [280]:
# Evaluate the model on the test set
predictions = recommendation_model.test(testset)

# Evaluate accuracy
accuracy.rmse(predictions)
accuracy.mae(predictions)

RMSE: 0.9455
MAE:  0.7408


0.7408281997111679

In [282]:
def get_top_recommendations(model, user_id, data, n=5):
    # Get item predictions for the customer
    item_predictions = []
    for item_id in set(data['course_id']):
        predicted_rating = model.predict(user_id, item_id).est
        item_predictions.append({'course_id': item_id, 'predicted_rating': predicted_rating})

    # Sort predictions by predicted rating
    item_predictions.sort(key=lambda x: x['predicted_rating'], reverse=True)

    # Get top recommended items
    top_recommendations = item_predictions[:n]

    return top_recommendations

In [283]:
get_top_recommendations(model=recommendation_model, user_id=5, data=data, n=5)

[{'course_id': 1, 'predicted_rating': 5},
 {'course_id': 4, 'predicted_rating': 5},
 {'course_id': 2, 'predicted_rating': 4.012635379061372},
 {'course_id': 3, 'predicted_rating': 4.012635379061372},
 {'course_id': 5, 'predicted_rating': 4.012635379061372}]