# A Transformer-based recommendation system


In [1]:
%load_ext autoreload
%autoreload 2


In [2]:
import pandas as pd
from src.dataset import RatingDataset
from src import utils
from torch.utils.data import DataLoader
from torch import nn
import math

### (1) Load Dataset

In [4]:
df_train = pd.read_parquet("./artifacts/train_data.parquet")
df_test = pd.read_parquet("./artifacts/test_data.parquet")

In [5]:
train_dataset = RatingDataset(data=df_train) 

In [6]:
loader = DataLoader(train_dataset,batch_size=12,shuffle=True)

In [7]:
for inputs in loader:
    break


## (2) Model Config

In [8]:
# inputs

In [9]:
age_group_id_map_dict = utils.open_object("./artifacts/age_group_id_map_dict.pkl")

movie_id_map_dict = utils.open_object("./artifacts/movie_id_map_dict.pkl")

occupation_id_map_dict = utils.open_object("./artifacts/occupation_id_map_dict.pkl")

sex_id_map_dict = utils.open_object("./artifacts/sex_id_map_dict.pkl")

user_id_map_dict = utils.open_object("./artifacts/user_id_map_dict.pkl")
# genres_map_dict = utils.open_object("./artifacts/genres_map_dict.pkl")

In [10]:
rating_min_max_scaler = utils.open_object("./artifacts/rating_min_max_scaler.pkl")

In [11]:
num_user = len(user_id_map_dict)
num_movie = len(movie_id_map_dict)
num_occupation = len(occupation_id_map_dict)
num_age_group = len(age_group_id_map_dict)
# num_genre = len(genres_map_dict)

In [12]:
feature_embeding_config = {}
EMED_DIM=64
feature_embeding_config['user']={"embed_dim":int(math.sqrt(num_user)),"num_embed":num_user}
feature_embeding_config['movie']={"embed_dim":int(math.sqrt(num_movie)),"num_embed":num_movie}
feature_embeding_config['occupation']={"embed_dim":int(math.sqrt(num_occupation)),"num_embed":num_occupation}
feature_embeding_config['age_group']={"embed_dim":int(math.sqrt(num_age_group)),"num_embed":num_age_group}

In [13]:
feature_embeding_config

{'user': {'embed_dim': 77, 'num_embed': 6041},
 'movie': {'embed_dim': 62, 'num_embed': 3884},
 'occupation': {'embed_dim': 4, 'num_embed': 22},
 'age_group': {'embed_dim': 2, 'num_embed': 8}}

In [14]:
config_dict={}
config_dict['feature_embeding_config'] = feature_embeding_config

In [15]:
class Config:
    def __init__(self, dictionary):
        for key, value in dictionary.items():
            setattr(self, key, value)

In [16]:
config = Config(dictionary=config_dict)

In [17]:
config.feature_embeding_config

{'user': {'embed_dim': 77, 'num_embed': 6041},
 'movie': {'embed_dim': 62, 'num_embed': 3884},
 'occupation': {'embed_dim': 4, 'num_embed': 22},
 'age_group': {'embed_dim': 2, 'num_embed': 8}}

## Modeling

### (1) Create Embedding Layer

In [20]:
class self:
    pass

In [21]:
self.config = config

In [22]:
embedding_layers = []
for name,embed_config in self.config.feature_embeding_config.items():
    embed_dim = embed_config['embed_dim']
    num_embed = embed_config['num_embed']
    embeding_layer = nn.Embedding(num_embeddings=num_embed, embedding_dim=embed_dim)
    nn.init.xavier_uniform_(embeding_layer.weight)
    embedding_layers.append([name,embeding_layer])
self.embedding_layers = nn.ModuleDict(embedding_layers)

In [23]:
self.embedding_layers

ModuleDict(
  (user): Embedding(6041, 77)
  (movie): Embedding(3884, 62)
  (occupation): Embedding(22, 4)
  (age_group): Embedding(8, 2)
)