In [1]:
"""
Code for Recommender System (AutoRec), using pytorch 2.3 and python 3.12 and lightning 2.5
"""
import torch
from torch import nn
import lightning as L
import torch.nn.functional as F
from torchmetrics import MeanSquaredError

import os
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split

from pytorch_lightning.loggers import TensorBoardLogger

In [2]:
def read_data_ml100k(data_dir:str="../../Data/ml-100k") -> pd.DataFrame:
    names = ['user_id', 'item_id', 'rating', 'timestamp']
    data = pd.read_csv(os.path.join(data_dir, 'u.data'), sep='\t', names=names, engine='python')
    num_users = data.user_id.unique().shape[0]
    num_items = data.item_id.unique().shape[0]
    return data, num_users, num_items

class MFData(Dataset):
    def __init__(self, data_dir:str="../../Data/ml-100k", normalize_rating:bool=False):
        self.data_dir = data_dir
        self.normalize_rating = normalize_rating
        self.df, self.num_users, self.num_items = read_data_ml100k(data_dir)
        self.user_id = self.df.user_id.values - 1
        self.item_id = self.df.item_id.values - 1
        self.rating = self.df.rating.values.astype(np.float32)
        
    def split(self, train_ratio=0.8):
        train_len = int(train_ratio * len(self))
        test_len = len(self) - train_len
        return random_split(self, [train_len, test_len])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx:int):
        return self.user_id[idx], self.item_id[idx], self.rating[idx]
        
class AutoRecData(MFData):
    def __init__(self, data_dir:str="../../Data/ml-100k", user_based=False, normalize_rating:bool=False):
        super().__init__(data_dir, normalize_rating)
        self.user_based = user_based
        self.rating_matrix = np.zeros(
            (self.num_items, self.num_users), dtype=np.float32)
        self.rating_matrix[self.item_id, self.user_id] = self.rating
        if normalize_rating:
            self.rating_matrix /= 5.0

    def __len__(self):
        if self.user_based:
            return self.num_users
        else:
            return self.num_items

    def __getitem__(self, idx:int):
        if self.user_based:
            return self.rating_matrix[:, idx]
        else:
            return self.rating_matrix[idx]

class LitAutoRecData(L.LightningDataModule):
    def __init__(
        self, 
        dataset:Dataset, 
        train_ratio:float=0.8, 
        batch_size:int=32, 
        num_workers:int=4
    ):
        self.dataset = dataset
        self.train_ratio = train_ratio
        self.dataloader_kwargs = {
            "batch_size": batch_size,
            "num_workers": num_workers,
            "persistent_workers": True if num_workers > 0 else False
        }
        self._log_hyperparams = True
        self.allow_zero_length_dataloader_with_multiple_devices = False

    def setup(self, stage:str):
        self.num_users = getattr(self.dataset, "num_users", None)
        self.num_items = getattr(self.dataset, "num_items", None)
        self.train_split, self.test_split = self.dataset.split(
            self.train_ratio)

    def train_dataloader(self):
        return DataLoader(self.train_split, **self.dataloader_kwargs, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.test_split, **self.dataloader_kwargs, shuffle=False)

    def test_dataloader(self):
        return DataLoader(self.test_split, **self.dataloader_kwargs, shuffle=False)

In [6]:
class AutoRec(nn.Module):
    def __init__(
        self, 
        num_hidden:int, 
        num_users:int, 
        num_items:int,
        user_based:bool=True,
        dropout:float=0.05
    ):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.LazyLinear(num_hidden, bias=True), 
            nn.Sigmoid(),
            nn.Dropout(dropout)
        )
        
        if user_based:
            self.decoder = nn.LazyLinear(num_items, bias=True)
        else:
            self.decoder = nn.LazyLinear(num_users, bias=True)

    def forward(self, input: torch.Tensor):
        hidden = self.encoder(input)
        pred = self.decoder(hidden)
        return pred

class LitAutoRec(L.LightningModule):
    def __init__(self, model:nn.Module, lr:float=0.01, **kwargs):
        super().__init__()
        self.save_hyperparameters()
        self.model = model(**kwargs)
        self.lr = lr
        self.rmse = MeanSquaredError()
        self.training_step_outputs = []
        self.validation_step_outputs = []

    def get_loss(self, pred_ratings:torch.Tensor, batch:torch.Tensor):
        mask = (batch > 0).to(torch.float32) # only consider observed ratings
        print(pred_ratings.shape, batch.shape)
        pred_ratings = pred_ratings * mask
        return F.mse_loss(pred_ratings, batch)

    def forward(self, batch:torch.Tensor):
        return self.model(batch)
        
    def training_step(self, batch:torch.Tensor, batch_idx:int):
        outputs = self(batch)
        loss = self.get_loss(outputs, batch)
        self.training_step_outputs.append(loss)
        return loss
        
    def validation_step(self, batch:torch.Tensor, batch_idx:int):
        outputs = self(batch)
        loss = self.get_loss(outputs, batch)
        self.validation_step_outputs.append(loss)
        self.update_metric(outputs, batch)
        return loss

    def update_metric(self, outputs:torch.Tensor, batch:torch.Tensor):
        mask = batch > 0
        self.rmse.update(outputs[mask], batch[mask])
        
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), self.lr, weight_decay=1e-5)

    def on_train_epoch_end(self):
        epoch_average = torch.stack(self.training_step_outputs).mean()
        self.logger.experiment.add_scalar(
            "train/loss", epoch_average, self.current_epoch)
        self.training_step_outputs.clear()

    def on_validation_epoch_end(self):
        epoch_average = torch.stack(self.validation_step_outputs).mean()
        self.logger.experiment.add_scalar(
            "val/loss", epoch_average, self.current_epoch)
        self.logger.experiment.add_scalar(
            "val/mse", self.rmse.compute(), self.current_epoch)
        self.rmse.reset()
        self.validation_step_outputs.clear()

In [7]:
def auto_rec():
    embedding_dims, max_epochs, batch_size = 30, 40, 512
    user_based = False
    data = LitAutoRecData(AutoRecData(user_based=user_based), batch_size=batch_size, num_workers=0)
    data.setup("fit")
    model = LitAutoRec(
        AutoRec, 
        num_hidden=embedding_dims, 
        num_users=data.num_users, 
        num_items=data.num_items,
        user_based=user_based
    )
    logger = TensorBoardLogger("log", name=f"AutoRec_{embedding_dims}")
    trainer = L.Trainer(max_epochs=max_epochs, accelerator="auto", logger=logger)
    trainer.fit(model, data)

In [8]:
auto_rec()

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params | Mode 
---------------------------------------------------
0 | model | AutoRec          | 0      | train
1 | rmse  | MeanSquaredError | 0      | train
---------------------------------------------------
0         Trainable params
0         Non-trainable params
0         Total params
0.000     Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                                                                            …

torch.Size([337, 1682]) torch.Size([337, 943])


C:\ProgramData\miniconda3\envs\ai\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


RuntimeError: The size of tensor a (1682) must match the size of tensor b (943) at non-singleton dimension 1

In [7]:
%load_ext tensorboard
%tensorboard --logdir log/AutoRec_30

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
