In [8]:
import sys
import os

# Get the current working directory of the notebook
notebook_dir = os.getcwd()

# Add the src directory to the Python path
src_dir = os.path.abspath(os.path.join(notebook_dir, '..', 'src'))
if src_dir not in sys.path:
    sys.path.append(src_dir)

# Now you can import etl
from pipeline import etl

ModuleNotFoundError: No module named 'src'

Collecting Data:

rating = 1 for every shiur in database. Can make something more advanced later.

In [156]:
db = ETL()
df = db.get_bookmarks_df()
df['rating'] = 1
df.shape


2024-06-28 18:35:44,158 - root - INFO - ETL instance created
2024-06-28 18:35:44,159 - root - INFO - START: Bookmarks Query
2024-06-28 18:35:44,256 - root - INFO - END: Favorites Query


(24677, 11)

Create fastai DataLoaders

In [157]:
from fastai.collab import *

dls = CollabDataLoaders.from_df(df, user_name='user', item_name='shiur', rating_name='rating', bs=64)
dls.show_batch()

Unnamed: 0,user,shiur,rating
0,48586.0,720247,1
1,49170.0,767032,1
2,37795.0,726986,1
3,51057.0,1099721,1
4,79749.0,733521,1
5,76777.0,1065957,1
6,2479.0,1099629,1
7,69741.0,869664,1
8,7725.0,761023,1
9,37769.0,759078,1


Run the model

In [159]:
from fastai.vision.all import *

learn = collab_learner(dls, n_factors=5, y_range=(0, 1), loss_func=BCEWithLogitsLossFlat())
learn.fit_one_cycle(15, 5e-3)

epoch,train_loss,valid_loss,time
0,0.470583,0.470057,00:01
1,0.454623,0.452919,00:01
2,0.422855,0.422133,00:01
3,0.387729,0.392547,00:01
4,0.363661,0.373325,00:01
5,0.348175,0.361069,00:01
6,0.338278,0.353237,00:01
7,0.332768,0.348102,00:01
8,0.328935,0.344708,00:01
9,0.327235,0.342509,00:01


Save the model

In [160]:
learn.model_dir = "saved_models/"
learn.save("user_collab_filtering_v1")

Path('saved_models/user_collab_filtering_v1.pth')

Use model

In [161]:
from typing import Dict, List
from base import BaseModel



class UserCollabFilteringV1(BaseModel):
    def __init__(self):
        model = learn.load("user_collab_filtering_v1")

    def get_recommendations(self, user_id: str = None, *args, **kwargs) -> List[int]:
        top_n = kwargs.get('top_n', 10)
        user_id = int(float(user_id))
        item_ids = dls.classes['shiur'].items[1:] # to avoid the na value
        item_ids = [int(item_id) for item_id in item_ids]
        user_tensor = torch.tensor([user_id] * len(item_ids)).unsqueeze(1)
        item_tensor = torch.tensor(item_ids).unsqueeze(1)
        input_tensor = torch.cat((user_tensor, item_tensor), dim=1)
        
        # Get predictions
        input_df = pd.DataFrame(input_tensor.numpy(), columns=['user', 'shiur'])
        user_item_dl = dls.test_dl(input_df)
        preds, _ = learn.get_preds(dl=user_item_dl)
        
        # Get top N recommendations
        top_indices = torch.argsort(preds, descending=True)[:top_n]
        top_item_ids = [item_ids[idx.item()] for idx in top_indices]
        return top_item_ids

    def get_weighted_recommendations(self, user_id: str = None, *args, **kwargs) -> Dict[int, float]:
        top_n = kwargs.get('top_n', 10)
        user_id = int(float(user_id))
        item_ids = dls.classes['shiur'].items[1:]
        item_ids = [int(item_id) for item_id in item_ids]
        user_tensor = torch.tensor([user_id] * len(item_ids)).unsqueeze(1)
        item_tensor = torch.tensor(item_ids).unsqueeze(1)
        input_tensor = torch.cat((user_tensor, item_tensor), dim=1)

        # Get predictions
        input_df = pd.DataFrame(input_tensor.numpy(), columns=['user', 'shiur'])
        user_item_dl = dls.test_dl(input_df)
        preds, _ = learn.get_preds(dl=user_item_dl)

        # Get top N recommendations with their scores
        top_indices = torch.argsort(preds, descending=True)[:top_n]
        top_item_ids = [item_ids[idx.item()] for idx in top_indices]
        top_scores = preds[top_indices].tolist()

        recommendations = {item_id: score for item_id, score in zip(top_item_ids, top_scores)}
        return recommendations

    def get_best_shiurim(self, shiur_num:int = 10): #based on highest bias
        shiur_bias = learn.model.i_bias.weight.squeeze()
        idxs = shiur_bias.argsort(descending=True)[:shiur_num]
        return [dls.classes['shiur'][i] for i in idxs]
    
    def get_user_bias(self, user_id:str = None):
        user_biases = learn.model.u_bias.weight
        user_idx = learn.dls.classes['user'].o2i[user_id]
        return user_biases[user_idx]
    
    def get_shiur_bias(self, shiur_id:str = None):
        item_biases = learn.model.i_bias.weight
        item_idx = learn.dls.classes['user'].o2i[shiur_id]
        return item_biases[item_idx]

Testing model

In [162]:
model = UserCollabFilteringV1()
print(model.get_shiur_bias('1098888'))
print(model.get_user_bias('0.387775	'))
print(model.get_best_shiurim(10))
print(model.get_recommendations("92378.0", top_n = 10))
print(model.get_weighted_recommendations("35049.0", top_n = 10))

tensor([-0.0084], grad_fn=<SelectBackward0>)
tensor([-0.0045], grad_fn=<SelectBackward0>)
[1098495, 1098754, 1098342, 1098683, 1098964, 1097854, 1099765, 1098108, 1098089, 1097678]


[1098754, 1098495, 1098683, 1098342, 1098964, 1097854, 1098108, 1098089, 1099332, 1097846]


{1098089: 0.7310585975646973, 1097854: 0.7310585975646973, 1098683: 0.7310585975646973, 1098958: 0.7310585975646973, 1099697: 0.7310585975646973, 1097846: 0.7310585975646973, 1099915: 0.7310585975646973, 1098108: 0.7310585975646973, 1099332: 0.7310585975646973, 1097815: 0.7310585975646973}
