In [1]:
import numpy as np
from scipy.io import loadmat  # this is the SciPy module that loads mat-files
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
data = loadmat('rekomendacje_filmow/each_movie_data.mat')
data_Y = data['Y']
data_R = data['R']

In [3]:
data_Y

array([[5, 4, 0, ..., 5, 0, 0],
       [3, 0, 0, ..., 0, 0, 5],
       [4, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [4]:
data_R

array([[1, 1, 0, ..., 1, 0, 0],
       [1, 0, 0, ..., 0, 0, 1],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [5]:
import random
import math

my_ratings = np.zeros(data_R.shape[0])

divider = math.floor(data_R.shape[0]/20)

for x in range(0, len(my_ratings) - 1):
    if ((x % divider) == 0):
        my_ratings[x] = random.randint(1, 5)

my_ratings

array([5., 0., 0., ..., 0., 3., 0.])

In [6]:
from surprise import Dataset
from surprise import Reader

data_pd = pd.DataFrame(data=data_Y)
data_pd

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,933,934,935,936,937,938,939,940,941,942
0,5,4,0,0,4,4,0,0,0,4,...,2,3,4,0,4,0,0,5,0,0
1,3,0,0,0,3,0,0,0,0,0,...,4,0,0,0,0,0,0,0,0,5
2,4,0,0,0,0,0,0,0,0,0,...,0,0,4,0,0,0,0,0,0,0
3,3,0,0,0,0,0,5,0,0,4,...,5,0,0,0,0,0,2,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1678,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1679,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1680,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
data_rows = data_pd.stack().reset_index()
data_rows.columns = ['iid', 'uid', 'r_ui']
data_rows = data_rows.drop(data_rows[data_rows.r_ui == 0].index)
data_rows = data_rows[['uid', 'iid', 'r_ui']]
data_rows

Unnamed: 0,uid,iid,r_ui
0,0,0,5
1,1,0,4
4,4,0,4
5,5,0,4
9,9,0,4
...,...,...,...
1582273,862,1677,1
1583216,862,1678,3
1584159,862,1679,2
1585135,895,1680,3


In [8]:
from surprise import KNNWithMeans

# To use item-based cosine similarity
sim_options = {
    "name": "cosine",
    "user_based": False,
}

algo = KNNWithMeans(sim_options=sim_options)

In [9]:
from surprise import Dataset
from surprise import Reader

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data_rows[["uid", "iid", "r_ui"]], reader)
trainingSet = data.build_full_trainset()

algo.fit(trainingSet)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x1a0a7552b80>

In [10]:
prediction = algo.predict(2, 0)
prediction.est

3.34976355932551

In [11]:
from surprise import KNNWithMeans

# To use item-based cosine similarity
sim_options = {
    "name": "cosine",
    "user_based": True,
}

algo_user_based = KNNWithMeans(sim_options=sim_options)

In [12]:
from surprise import KNNBasic

algo_user_based_basic = KNNBasic(sim_options=sim_options)

In [13]:
from surprise import Dataset
from surprise import Reader

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data_rows[["uid", "iid", "r_ui"]], reader)
trainingSet = data.build_full_trainset()

algo_user_based.fit(trainingSet)
algo_user_based_basic.fit(trainingSet)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x1a0a7552a30>

In [18]:
prediction = algo_user_based.predict(1, 0)
prediction.est

3.9400827087775174

In [19]:
prediction = algo_user_based_basic.predict(1, 0)
prediction.est

4.124882420121779