In [269]:
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore")

#### a panel of experts sat down with each of these albums and assigned a rating from zero to one for these two features.
<img src="images/album_rated.png">

## Content Filtering: Manually Fitting A Model for One User

In [270]:
df = pd.DataFrame({'artist': ["ommazh", "melt banana", "bts", "zhou shen", "sanam"], 
              'lofi_indie': [0.8, 0.95, 0.05, 0.02, 0.2], 
              'slick_pop': [0.1, 0.01, 0.99, 0.95, 0.9], 
              'an_ratings': [5, 5, 0, 1, 1]})

df

Unnamed: 0,artist,lofi_indie,slick_pop,an_ratings
0,ommazh,0.8,0.1,5
1,melt banana,0.95,0.01,5
2,bts,0.05,0.99,0
3,zhou shen,0.02,0.95,1
4,sanam,0.2,0.9,1


In [271]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(df[["lofi_indie", "slick_pop"]], df["an_ratings"])

In [272]:
model.coef_

array([-2.62715548, -7.19503777])

In [273]:
model.intercept_

7.70644309800463

#### The prediction is the dot product of An's user factors with the item's factors

In [274]:
model.predict(df[["lofi_indie", "slick_pop"]])

array([4.88521494, 5.13869502, 0.45199793, 0.8186141 , 0.70547801])

In [275]:
df["predictions"] = model.predict(df[["lofi_indie", "slick_pop"]])
df

Unnamed: 0,artist,lofi_indie,slick_pop,an_ratings,predictions
0,ommazh,0.8,0.1,5,4.885215
1,melt banana,0.95,0.01,5,5.138695
2,bts,0.05,0.99,0,0.451998
3,zhou shen,0.02,0.95,1,0.818614
4,sanam,0.2,0.9,1,0.705478


## Fitting a Model and Making a Prediction for Unseen Data

In [276]:
dfb = pd.DataFrame({'artist': ["ommazh", "melt banana", "zhou shen", "sanam"], 
              'lofi_indie': [0.8, 0.95, 0.02, 0.2], 
              'slick_pop': [0.1, 0.01, 0.95, 0.9], 
              'bhavana_ratings': [4, 5, 1, 1]})

dfb

Unnamed: 0,artist,lofi_indie,slick_pop,bhavana_ratings
0,ommazh,0.8,0.1,4
1,melt banana,0.95,0.01,5
2,zhou shen,0.02,0.95,1
3,sanam,0.2,0.9,1


In [277]:
modelb = LinearRegression()
modelb.fit(dfb[["lofi_indie", "slick_pop"]], dfb["bhavana_ratings"])
modelb.intercept_

4.045828058832336

In [278]:
modelb.coef_

array([ 0.74456621, -3.39291208])

In [279]:
modelb.predict(df[["lofi_indie", "slick_pop"]])

array([4.30218982, 4.71923684, 0.72407341, 0.83745291, 1.14112043])

In [280]:
dfb["predictions"] = modelb.predict(dfb[["lofi_indie", "slick_pop"]])

In [281]:
dfb

Unnamed: 0,artist,lofi_indie,slick_pop,bhavana_ratings,predictions
0,ommazh,0.8,0.1,4,4.30219
1,melt banana,0.95,0.01,5,4.719237
2,zhou shen,0.02,0.95,1,0.837453
3,sanam,0.2,0.9,1,1.14112


In [282]:
modelb.predict([[0.05, 0.99]])

array([0.72407341])

In [283]:
dfb_alt = pd.DataFrame({'artist': ["ommazh", "melt banana", "bts", "zhou shen", "sanam"], 
              'lofi_indie': [0.8, 0.95, 0.05, 0.02, 0.2], 
              'slick_pop': [0.1, 0.01, 0.99, 0.95, 0.9], 
              'bhavana_ratings': [4, 5, None, 1, 1]})
dfb_alt

Unnamed: 0,artist,lofi_indie,slick_pop,bhavana_ratings
0,ommazh,0.8,0.1,4.0
1,melt banana,0.95,0.01,5.0
2,bts,0.05,0.99,
3,zhou shen,0.02,0.95,1.0
4,sanam,0.2,0.9,1.0


In [284]:
dfb_alt["predictions"] = modelb.predict(dfb_alt[["lofi_indie", "slick_pop"]])

In [285]:
dfb_alt

Unnamed: 0,artist,lofi_indie,slick_pop,bhavana_ratings,predictions
0,ommazh,0.8,0.1,4.0,4.30219
1,melt banana,0.95,0.01,5.0,4.719237
2,bts,0.05,0.99,,0.724073
3,zhou shen,0.02,0.95,1.0,0.837453
4,sanam,0.2,0.9,1.0,1.14112


## Repeating this for Cordelia and Diego

In [286]:
dfc = pd.DataFrame({'artist': ["ommazh", "melt banana", "bts", "zhou shen", "sanam"], 
              'lofi_indie': [0.8, 0.95, 0.05, 0.02, 0.2], 
              'slick_pop': [0.1, 0.01, 0.99, 0.95, 0.9], 
              'cordelia_ratings': [None, 2, 4, 5, 5]})
dfc

Unnamed: 0,artist,lofi_indie,slick_pop,cordelia_ratings
0,ommazh,0.8,0.1,
1,melt banana,0.95,0.01,2.0
2,bts,0.05,0.99,4.0
3,zhou shen,0.02,0.95,5.0
4,sanam,0.2,0.9,5.0


In [287]:
modelc = LinearRegression()
dfc_no_missing = dfc.dropna()
modelc.fit(dfc_no_missing[["lofi_indie", "slick_pop"]], dfc_no_missing["cordelia_ratings"])
dfc["predictions"] = modelc.predict(dfc[["lofi_indie", "slick_pop"]])
dfc

Unnamed: 0,artist,lofi_indie,slick_pop,cordelia_ratings,predictions
0,ommazh,0.8,0.1,,2.23227
1,melt banana,0.95,0.01,2.0,2.054475
2,bts,0.05,0.99,4.0,4.76829
3,zhou shen,0.02,0.95,5.0,4.586739
4,sanam,0.2,0.9,5.0,4.590496


In [288]:
dfd = pd.DataFrame({'artist': ["ommazh", "melt banana", "bts", "zhou shen", "sanam"], 
              'lofi_indie': [0.8, 0.95, 0.05, 0.02, 0.2], 
              'slick_pop': [0.1, 0.01, 0.99, 0.95, 0.9], 
              'diego_ratings': [2, None, 5, None, 4]})
dfd

Unnamed: 0,artist,lofi_indie,slick_pop,diego_ratings
0,ommazh,0.8,0.1,2.0
1,melt banana,0.95,0.01,
2,bts,0.05,0.99,5.0
3,zhou shen,0.02,0.95,
4,sanam,0.2,0.9,4.0


In [289]:
modeld = LinearRegression()
dfd_no_missing = dfd.dropna()
modeld.fit(dfd_no_missing[["lofi_indie", "slick_pop"]], dfd_no_missing["diego_ratings"])
dfd["predictions"] = modeld.predict(dfd[["lofi_indie", "slick_pop"]])
dfd

Unnamed: 0,artist,lofi_indie,slick_pop,diego_ratings,predictions
0,ommazh,0.8,0.1,2.0,2.0
1,melt banana,0.95,0.01,,1.0
2,bts,0.05,0.99,5.0,5.0
3,zhou shen,0.02,0.95,,5.463636
4,sanam,0.2,0.9,4.0,4.0


In [290]:
modeld.coef_

array([-9.39393939, -4.54545455])

## Collaborative Filtering

### Using Known User Factors instead of Known Item Factors

In [291]:
df1 = pd.DataFrame({'user': ["an", "bhavana", "cordelia", "diego"], 
              'lofi_indie': [5, 4, 2, 2], 
              'slick_pop': [1, 0, 5, 4], 
              'ommazh_ratings': [5, 4, None, 2]})
df1

Unnamed: 0,user,lofi_indie,slick_pop,ommazh_ratings
0,an,5,1,5.0
1,bhavana,4,0,4.0
2,cordelia,2,5,
3,diego,2,4,2.0


In [292]:
model1 = LinearRegression()
df1_no_missing = df1.dropna()
model1.fit(df1_no_missing[["lofi_indie", "slick_pop"]], df1_no_missing["ommazh_ratings"])
df1["predictions"] = model1.predict(df1[["lofi_indie", "slick_pop"]])
df1

Unnamed: 0,user,lofi_indie,slick_pop,ommazh_ratings,predictions
0,an,5,1,5.0,5.0
1,bhavana,4,0,4.0,4.0
2,cordelia,2,5,,2.0
3,diego,2,4,2.0,2.0


In [293]:
model1.intercept_

8.881784197001252e-16

In [294]:
model1.coef_

array([ 1.00000000e+00, -8.99383442e-17])

In [295]:
df3 = pd.DataFrame({'user': ["an", "bhavana", "cordelia", "diego"], 
              'lofi_indie': [5, 4, 2, 2], 
              'slick_pop': [1, 0, 5, 4], 
              'bts_ratings': [0, None, 4, 5]})
df3

Unnamed: 0,user,lofi_indie,slick_pop,bts_ratings
0,an,5,1,0.0
1,bhavana,4,0,
2,cordelia,2,5,4.0
3,diego,2,4,5.0


In [296]:
model3 = LinearRegression(fit_intercept = False)
df3_no_missing = df3.dropna()
model3.fit(df3_no_missing[["lofi_indie", "slick_pop"]], df3_no_missing["bts_ratings"])
df3["predictions"] = model3.predict(df3[["lofi_indie", "slick_pop"]])
df3

Unnamed: 0,user,lofi_indie,slick_pop,bts_ratings,predictions
0,an,5,1,0.0,0.10035
1,bhavana,4,0,,-0.765461
2,cordelia,2,5,4.0,4.903151
3,diego,2,4,5.0,3.845974


In [297]:
model3.intercept_

0.0

In [298]:
model3.coef_

array([-0.19136523,  1.0571762 ])

### A Manual Pass of Collaborative Filtering

In [299]:
dfa = pd.DataFrame({'artist': ["ommazh", "melt banana", "bts", "zhou shen", "sanam"], 
              'f1': [0.172, 0.9, -0.1, 1.2, 0.53], 
              'f2': [0.96, 0.91, 0.6, 0.71, 0.21], 
              'an_ratings': [5, 5, 0, 1, 1]})
dfa

Unnamed: 0,artist,f1,f2,an_ratings
0,ommazh,0.172,0.96,5
1,melt banana,0.9,0.91,5
2,bts,-0.1,0.6,0
3,zhou shen,1.2,0.71,1
4,sanam,0.53,0.21,1


In [300]:
model = LinearRegression(fit_intercept = False)
model.fit(dfa[["f1", "f2"]], dfa["an_ratings"])

In [301]:
model.coef_

array([-0.16493458,  3.9810756 ])

In [302]:
dfb = pd.DataFrame({'artist': ["ommazh", "melt banana", "bts", "zhou shen", "sanam"], 
              'f1': [0.172, 0.9, -0.1, 1.2, 0.53], 
              'f2': [0.96, 0.91, 0.6, 0.71, 0.21], 
              'bhavana_ratings': [4, 5, None, 0, 1]}).dropna()

model = LinearRegression(fit_intercept = False)
model.fit(dfb[["f1", "f2"]], dfb["bhavana_ratings"])
model.coef_

array([-1.7254504 ,  5.20482902])

In [303]:
model.predict(dfb[["f1", "f2"]])

array([4.6998584 , 3.18348905, 1.62488813, 0.17852538])

In [304]:
dfc = pd.DataFrame({'artist': ["ommazh", "melt banana", "bts", "zhou shen", "sanam"], 
              'f1': [0.172, 0.9, -0.1, 1.2, 0.53], 
              'f2': [0.96, 0.91, 0.6, 0.71, 0.21], 
              'cordelia_ratings': [None, 2, 4, 5, 5]}).dropna()

model = LinearRegression(fit_intercept = False)
model.fit(dfc[["f1", "f2"]], dfc["cordelia_ratings"])
model.coef_

array([1.56292718, 3.52944221])

In [305]:
dfd = pd.DataFrame({'artist': ["ommazh", "melt banana", "bts", "zhou shen", "sanam"], 
              'f1': [0.172, 0.9, -0.1, 1.2, 0.53], 
              'f2': [0.96, 0.91, 0.6, 0.71, 0.21], 
              'diego_ratings': [2, None, 5, None, 4]}).dropna()

model = LinearRegression(fit_intercept = False)
model.fit(dfd[["f1", "f2"]], dfd["diego_ratings"])
model.coef_

array([3.58993387, 3.75881912])

In [306]:
df1 = pd.DataFrame({'user': ["an", "bhavana", "cordelia", "diego"], 
              'f1': [-0.165, -1.72, 1.56, 3.59], 
              'f2': [3.98, 5.2, 3.53, 3.76], 
              'ommazh_ratings': [5, 4, None, 2]}).dropna()

model = LinearRegression(fit_intercept = False)
model.fit(df1[["f1", "f2"]], df1["ommazh_ratings"])
model.coef_

array([-0.24483934,  0.86243485])

## The SURPRISE Library

In [307]:
ratings_dict = {'userID': ['an', 'bhavana', 'diego', 
                           'an', 'bhavana', 'cordelia', 
                           'an', 'cordelia', 'diego',
                           'an', 'bhavana', 'cordelia',
                           'an', 'bhavana', 'cordelia', 'diego'],
                'itemID': ['ommazh', 'ommazh', 'ommazh',
                           'melt banana', 'melt banana', 'melt banana', 
                           'bts', 'bts', 'bts',
                           'zhou shen', 'zhou shen', 'zhou shen',
                           'sanam', 'sanam', 'sanam', 'sanam'],                
                'rating': [5, 4, 2,
                          5, 5, 2, 
                          0, 4, 5,
                          1, 0, 5,
                          1, 1, 5, 4]}
df = pd.DataFrame(ratings_dict)

In [308]:
df

Unnamed: 0,userID,itemID,rating
0,an,ommazh,5
1,bhavana,ommazh,4
2,diego,ommazh,2
3,an,melt banana,5
4,bhavana,melt banana,5
5,cordelia,melt banana,2
6,an,bts,0
7,cordelia,bts,4
8,diego,bts,5
9,an,zhou shen,1


In [309]:
from surprise import Dataset
from surprise import Reader
reader = Reader(rating_scale=(0, 5))
sf = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)

In [310]:
training_data = sf.build_full_trainset()

In [311]:
training_data

<surprise.trainset.Trainset at 0x2b849a05f60>

In [312]:
import surprise
model = surprise.SVD(n_factors = 2, n_epochs = 10000, biased = False)
model.fit(training_data)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2b846954f10>

In [313]:
model.predict("an", "ommazh")

Prediction(uid='an', iid='ommazh', r_ui=None, est=4.830197366726953, details={'was_impossible': False})

In [314]:
model.predict("cordelia", "ommazh")

Prediction(uid='cordelia', iid='ommazh', r_ui=None, est=5, details={'was_impossible': False})

In [315]:
test_data = training_data.build_testset()
predictions = model.test(test_data)
predictions

[Prediction(uid='an', iid='ommazh', r_ui=5.0, est=4.830197366726953, details={'was_impossible': False}),
 Prediction(uid='an', iid='melt banana', r_ui=5.0, est=4.99868762893539, details={'was_impossible': False}),
 Prediction(uid='an', iid='bts', r_ui=0.0, est=0, details={'was_impossible': False}),
 Prediction(uid='an', iid='zhou shen', r_ui=1.0, est=0.8579092365024903, details={'was_impossible': False}),
 Prediction(uid='an', iid='sanam', r_ui=1.0, est=1.3829953180940184, details={'was_impossible': False}),
 Prediction(uid='bhavana', iid='ommazh', r_ui=4.0, est=4.094709231548621, details={'was_impossible': False}),
 Prediction(uid='bhavana', iid='melt banana', r_ui=5.0, est=4.90529616373693, details={'was_impossible': False}),
 Prediction(uid='bhavana', iid='zhou shen', r_ui=0.0, est=0.15885878754958527, details={'was_impossible': False}),
 Prediction(uid='bhavana', iid='sanam', r_ui=1.0, est=0.727069571561061, details={'was_impossible': False}),
 Prediction(uid='diego', iid='ommazh',

In [316]:
surprise.accuracy.mse(predictions)

MSE: 0.0235


0.023507012080931828

In [317]:
# user factors
model.pu

array([[-0.94813625,  1.68549368],
       [-0.59107481,  1.71581714],
       [-2.06086311, -1.56129757],
       [-2.68119982,  0.2584906 ]])

In [318]:
# item factors
model.qi

array([[-2.19819629,  1.62920088],
       [-0.49000981,  2.69006738],
       [-1.62289607, -0.97581844],
       [-1.90977884, -0.56530696],
       [-1.81975951, -0.20313612]])

In [319]:
model.pu @ model.qi.T

array([[ 4.83019737,  4.99868763, -0.10600922,  0.85790924,  1.38299532],
       [ 4.09470923,  4.90529616, -0.71507302,  0.15885879,  0.72706957],
       [ 1.98651427, -3.19015253,  4.8681096 ,  4.81840514,  4.06743117],
       [ 6.3149366 ,  2.00917134,  4.09906874,  4.97437213,  4.82663009]])

## Plotting Item Factors and Computing Item Distance

In [320]:
df = pd.DataFrame([[-1.44352162, -2.32437267, 'ommazh'],
       [-2.64083923, -0.70888811, 'melt banana'],
       [ 1.10555829, -1.53744304, 'bts'],
       [ 0.71994672, -1.85701467, 'zhou shen'],
       [ 0.35161794, -1.79698869, 'sanam']],
        columns = ['F1', 'F2', 'artist'])

df

Unnamed: 0,F1,F2,artist
0,-1.443522,-2.324373,ommazh
1,-2.640839,-0.708888,melt banana
2,1.105558,-1.537443,bts
3,0.719947,-1.857015,zhou shen
4,0.351618,-1.796989,sanam


In [321]:
import plotly.express as px
fig = px.scatter(df, x = "F1", y = "F2", text = "artist")
fig.update_traces(textposition='top center')
fig.update_layout(font_size = 16,  margin=dict(l=1, r=1, t=1, b=1),
                  yaxis_range=[-2.5,-0.5], xaxis_range = [-3.0, 1.5])
#fig.write_image("artists.png")
fig.show()

In [322]:
ratings_dict = {'itemID': ['ommazh', 'ommazh', 'ommazh', 'ommazh',
                           'melt banana', 'melt banana', 'melt banana', 'melt banana',
                           'bts', 'bts', 'bts', 'bts',
                           'zhou shen', 'zhou shen', 'zhou shen', 'zhou shen',
                           'sanam', 'sanam', 'sanam', 'sanam'],
                'userID': ['an', 'bhavana', 'cordelia', 'diego', 
                           'an', 'bhavana', 'cordelia', 'diego', 
                           'an', 'bhavana', 'cordelia', 'diego',
                           'an', 'bhavana', 'cordelia', 'diego',
                           'an', 'bhavana', 'cordelia', 'diego'],
                'rating': [5, 4, 2, 2,
                          5, 5, 2, 2,
                          0, 0, 4, 5,
                          1, 0, 5, 5,
                          1, 1, 5, 4]}
df = pd.DataFrame(ratings_dict)

Let's build a two factor model.

In [323]:
from surprise import Reader
reader = Reader(rating_scale=(0, 5))
surprise_data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)
surprise_train_data = surprise_data.build_full_trainset()
from surprise import SVD
algo = SVD(n_factors = 2, biased = False, n_epochs = 2000)
algo.fit(surprise_train_data)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2b8469b0430>

In [324]:
algo.predict('an', 'bts')

Prediction(uid='an', iid='bts', r_ui=None, est=0.15933928669669695, details={'was_impossible': False})

In [325]:
surprise_train_data._raw2inner_id_items

{'ommazh': 0, 'melt banana': 1, 'bts': 2, 'zhou shen': 3, 'sanam': 4}

In [326]:
surprise_train_data._raw2inner_id_users

{'an': 0, 'bhavana': 1, 'cordelia': 2, 'diego': 3}

In [327]:
algo.bi[2] + algo.bu[0] + np.dot(algo.qi[2], algo.pu[0]) 

0.15933928669669695

In [328]:
algo.bu[0]

0.0

In [329]:
algo.trainset.global_mean

2.9

In [330]:
algo.qi

array([[ 1.71303163,  1.39277783],
       [ 1.94906726,  1.45319604],
       [-0.97872654,  1.72474103],
       [-0.84180954,  1.99056855],
       [-0.49178466,  1.85694124]])

In [331]:
algo.pu

array([[ 1.8195171 ,  1.12489291],
       [ 1.74720931,  0.89829247],
       [-0.61515345,  2.23315062],
       [-0.66651944,  2.22091047]])

Our 2 factor model's predictions

In [332]:
algo.pu @ algo.qi.T

array([[ 4.68361625,  5.18105115,  0.15933929,  0.70748961,  1.19404944],
       [ 4.24414664,  4.71082353, -0.16071825,  0.31729528,  0.80882559],
       [ 2.05650537,  2.04623021,  4.45367351,  4.96308144,  4.44935251],
       [ 1.95146599,  1.92832709,  4.48283568,  4.98195696,  4.45188427]])

The original data (note: fairly simlar to predictions above)

In [333]:
#m

## 4 factors (equivalent to SVD)

In [334]:
reader = Reader(rating_scale=(0, 5))
surprise_data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)
surprise_train_data = surprise_data.build_full_trainset()
algo = SVD(n_factors = 4, biased = False, n_epochs = 2000)
algo.fit(surprise_train_data)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2b8469b3e50>

Below, we see that with four factors, we're even closer to the original data.

In [335]:
algo.pu @ algo.qi.T

array([[ 4.92551054,  4.97775933,  0.02725963,  0.96920437,  1.0195284 ],
       [ 3.99238964,  4.92611008, -0.01074088,  0.04293881,  0.97909568],
       [ 1.9979191 ,  1.99896532,  4.00916651,  4.96311612,  4.92208447],
       [ 1.99123528,  1.98843581,  4.92242099,  4.96447094,  4.00941011]])

In [336]:
#m

In [337]:
algo.pu

array([[-1.65421821, -0.11378509,  0.06237906,  1.45835489],
       [-1.55345767, -0.16146321,  1.02187732,  0.84058623],
       [-0.74937869,  2.26692646, -0.07371344,  0.26806284],
       [ 0.0950929 ,  2.09002925,  0.35337464,  1.14158039]])

In [338]:
algo.qi.T

array([[-1.64111084e+00, -1.83078635e+00,  5.01575826e-01,
        -1.10840420e-01, -7.43739198e-01],
       [ 1.65084699e-01,  1.54211001e-01,  1.86493541e+00,
         2.05535721e+00,  1.92965166e+00],
       [ 1.87191845e-01,  9.87215290e-01,  4.59759880e-01,
        -3.90641228e-01,  1.32090788e-01],
       [ 1.52079747e+00,  1.30640667e+00,  7.13473753e-01,
         7.15934760e-01,  3.75327092e-04]])

In [339]:
#u

## Partial information only

In [340]:
ratings_dict = {'itemID': ['ommazh', 'ommazh', 'ommazh', 
                           'melt banana', 'melt banana', 'melt banana',
                           'bts', 'bts', 'bts',
                           'zhou shen', 'zhou shen', 'zhou shen',
                           'sanam', 'sanam', 'sanam', 'sanam'],
                'userID': ['an', 'bhavana', 'diego', 
                           'an', 'bhavana', 'cordelia', 
                           'an', 'cordelia', 'diego',
                           'an', 'bhavana', 'cordelia',
                           'an', 'bhavana', 'cordelia', 'diego'],
                'rating': [5, 4, 2, 5, 5, 2, 0, 4, 5, 1, 0, 5, 1, 1, 5, 4]}
df = pd.DataFrame(ratings_dict)

In [341]:
df

Unnamed: 0,itemID,userID,rating
0,ommazh,an,5
1,ommazh,bhavana,4
2,ommazh,diego,2
3,melt banana,an,5
4,melt banana,bhavana,5
5,melt banana,cordelia,2
6,bts,an,0
7,bts,cordelia,4
8,bts,diego,5
9,zhou shen,an,1


In [342]:
from surprise import Reader
reader = Reader(rating_scale=(0, 5))

In [343]:
surprise_data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)

In [344]:
surprise_train_data = surprise_data.build_full_trainset()

In [345]:
from surprise import SVD

In [346]:
algo = SVD(n_factors = 2)

In [347]:
algo.fit(surprise_train_data)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2b8498bf130>

In [348]:
algo.predict('an', 'bts')

Prediction(uid='an', iid='bts', r_ui=None, est=2.7729893178249365, details={'was_impossible': False})

In [349]:
algo.pu

array([[-0.00843373, -0.13320058],
       [ 0.1898689 ,  0.05935928],
       [ 0.09213678,  0.06144789],
       [ 0.05545923, -0.16803106]])

In [350]:
algo.bu

array([-0.26744707, -0.1877779 ,  0.1582294 ,  0.31777924])

In [351]:
algo.qi

array([[ 0.07189167, -0.04756338],
       [ 0.00936721, -0.05081793],
       [ 0.03607863, -0.02902683],
       [-0.01053925, -0.05944185],
       [-0.15714016, -0.02191264]])

In [352]:
algo.bi

array([ 0.16741385,  0.2455253 , -0.02562573, -0.26945014, -0.09723473])

In [353]:
algo.pu @ algo.qi.T

array([[ 0.00572915,  0.00668998,  0.00356211,  0.00800657,  0.00424405],
       [ 0.01082666, -0.00123797,  0.0051272 , -0.0055295 , -0.03113675],
       [ 0.0037012 , -0.00225959,  0.00154053, -0.00462363, -0.01582487],
       [ 0.01197918,  0.00905849,  0.0068783 ,  0.00940358, -0.00503287]])

In [354]:
algo.pu[0, :]

array([-0.00843373, -0.13320058])

In [355]:
algo.qi[0, :]

array([ 0.07189167, -0.04756338])

In [356]:
np.dot(algo.pu[0, :], algo.qi[0, :]) + algo.bu[0] + algo.bi[0]

-0.0943040657325736

In [357]:
# https://stackoverflow.com/questions/66169281/python-surprise-package-gives-different-predictions-for-predict-method-vs-manual