In [153]:
import sys
import os

# Add the src directory to the Python path
try:
    current_dir = os.path.dirname(__file__)
except NameError:
    current_dir = os.getcwd()  # Fallback to the current working directory

src_dir = os.path.abspath(os.path.join(current_dir, '..', '..'))
sys.path.append(src_dir)

# Now import the module from utils
from src.pipeline.etl import ETL

Collecting Data:

rating = 1 for every shiur in database. Can make something more advanced later.

In [154]:
db = ETL()
df = db.get_bookmakrs_df()
df['rating'] = 1
df.shape


2024-06-28 18:35:07,785 - root - INFO - ETL instance created


AttributeError: 'ETL' object has no attribute 'get_bookmakrs_df'

Create fastai DataLoaders

In [3]:
from fastai.collab import *

dls = CollabDataLoaders.from_df(df, user_name='user', item_name='shiur', rating_name='rating', bs=64)
dls.show_batch()

Unnamed: 0,user,shiur,rating
0,80580.0,1024228,1
1,79212.0,955116,1
2,221467.0,1079301,1
3,92378.0,1099483,1
4,211439.0,1065734,1
5,35049.0,1012650,1
6,209914.0,869348,1
7,218322.0,889236,1
8,212079.0,878883,1
9,10794.0,1098888,1


Run the model

In [149]:
from fastai.vision.all import *

learn = collab_learner(dls, n_factors=5, y_range=(0, 1), loss_func=BCEWithLogitsLossFlat())
learn.fit_one_cycle(15, 5e-3)

epoch,train_loss,valid_loss,time
0,0.470572,0.469695,00:01
1,0.454898,0.452677,00:01
2,0.423188,0.422151,00:01
3,0.386732,0.392136,00:01
4,0.363057,0.372727,00:01
5,0.348497,0.360532,00:01
6,0.33791,0.352672,00:01
7,0.332761,0.347605,00:01
8,0.329789,0.34429,00:01
9,0.327579,0.342122,00:01


Save the model

In [150]:
learn.model_dir = "saved_models/"
learn.save("user_collab_filtering_v1")

Path('saved_models/user_collab_filtering_v1.pth')

Use model

In [151]:
from typing import Dict, List
from base import BaseModel



class UserCollabFilteringV1(BaseModel):
    def __init__(self):
        model = learn.load("user_collab_filtering_v1")

    def get_recommendations(self, user_id: str = None, *args, **kwargs) -> List[int]:
        top_n = kwargs.get('top_n', 10)
        user_id = int(float(user_id))
        item_ids = dls.classes['shiur'].items[1:] # to avoid the na value
        item_ids = [int(item_id) for item_id in item_ids]
        user_tensor = torch.tensor([user_id] * len(item_ids)).unsqueeze(1)
        item_tensor = torch.tensor(item_ids).unsqueeze(1)
        input_tensor = torch.cat((user_tensor, item_tensor), dim=1)
        
        # Get predictions
        input_df = pd.DataFrame(input_tensor.numpy(), columns=['user', 'shiur'])
        user_item_dl = dls.test_dl(input_df)
        preds, _ = learn.get_preds(dl=user_item_dl)
        
        # Get top N recommendations
        top_indices = torch.argsort(preds, descending=True)[:top_n]
        top_item_ids = [item_ids[idx.item()] for idx in top_indices]
        return top_item_ids

    def get_weighted_recommendations(self, user_id: str = None, *args, **kwargs) -> Dict[int, float]:
        top_n = kwargs.get('top_n', 10)
        user_id = int(float(user_id))
        item_ids = dls.classes['shiur'].items[1:]
        item_ids = [int(item_id) for item_id in item_ids]
        user_tensor = torch.tensor([user_id] * len(item_ids)).unsqueeze(1)
        item_tensor = torch.tensor(item_ids).unsqueeze(1)
        input_tensor = torch.cat((user_tensor, item_tensor), dim=1)

        # Get predictions
        input_df = pd.DataFrame(input_tensor.numpy(), columns=['user', 'shiur'])
        user_item_dl = dls.test_dl(input_df)
        preds, _ = learn.get_preds(dl=user_item_dl)

        # Get top N recommendations with their scores
        top_indices = torch.argsort(preds, descending=True)[:top_n]
        top_item_ids = [item_ids[idx.item()] for idx in top_indices]
        top_scores = preds[top_indices].tolist()

        recommendations = {item_id: score for item_id, score in zip(top_item_ids, top_scores)}
        return recommendations

    def get_best_shiurim(self, shiur_num:int = 10): #based on highest bias
        shiur_bias = learn.model.i_bias.weight.squeeze()
        idxs = shiur_bias.argsort(descending=True)[:shiur_num]
        return [dls.classes['shiur'][i] for i in idxs]
    
    def get_user_bias(self, user_id:str = None):
        user_biases = learn.model.u_bias.weight
        user_idx = learn.dls.classes['user'].o2i[user_id]
        return user_biases[user_idx]
    
    def get_shiur_bias(self, shiur_id:str = None):
        item_biases = learn.model.i_bias.weight
        item_idx = learn.dls.classes['user'].o2i[shiur_id]
        return item_biases[item_idx]

Testing model

In [152]:
model = UserCollabFilteringV1()
print(model.get_shiur_bias('1098888'))
print(model.get_user_bias('0.387775	'))
print(model.get_best_shiurim(10))
print(model.get_recommendations("92378.0", top_n = 10))
print(model.get_weighted_recommendations("35049.0", top_n = 10))

tensor([-0.0008], grad_fn=<SelectBackward0>)
tensor([3.5387e-05], grad_fn=<SelectBackward0>)
[1098754, 1098495, 1098683, 1098679, 1099765, 1098089, 1098964, 1098020, 1097854, 1098108]


[1098754, 1098495, 1098683, 1098964, 1097854, 1098089, 1098108, 1098020, 1098832, 1098342]


{1097854: 0.7310585975646973, 1098495: 0.7310585975646973, 1098964: 0.7310585975646973, 1098089: 0.7310585975646973, 1098754: 0.7310585975646973, 1099332: 0.7310585975646973, 1098832: 0.7310585975646973, 1098020: 0.7310585975646973, 1098108: 0.7310585975646973, 1098958: 0.7310585975646973}
