In [1]:
!pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 -U
!pip3 install fbgemm_gpu --index-url https://download.pytorch.org/whl/nightly/cu121
!pip3 install torchmetrics==1.0.3
!pip3 install torchrec --index-url https://download.pytorch.org/whl/nightly/cu121

Looking in indexes: https://download.pytorch.org/whl/nightly/cu121
Collecting torch
  Downloading https://download.pytorch.org/whl/nightly/cu121/torch-2.4.0.dev20240420%2Bcu121-cp310-cp310-linux_x86_64.whl (795.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m795.4/795.4 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m42.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m48.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cu

In [None]:
import os
import copy
import torch
import torchrec
import torch.distributed as dist
import pandas as pd
from tqdm import tqdm
from sklearn import preprocessing

In [1]:
import os
import torch
import torchrec
import torch.distributed as dist
from tqdm import tqdm

In [None]:
# Load Dataset
df = pd.read_csv('/content/ratings.csv')
# ID Processing
df['movieId'] = 'mov'+df['movieId'].astype(str)
df['userId'] = 'user'+df['userId'].astype(str)
# Label for Classification
df['rating_class'] = df['rating'].apply(lambda x: 1.0 if x > 2.5 else 0.0)
# Selecting Relevant Columns
df = df[['userId','movieId','rating_class']]
df.drop_duplicates(inplace=True)

In [13]:
from sklearn import preprocessing
movie_encoder = preprocessing.LabelEncoder()
user_encoder = preprocessing.LabelEncoder()

In [14]:
movie_encoder.fit(df['movieId'].tolist())
user_encoder.fit(df['userId'].tolist())
len(movie_encoder.classes_), len(user_encoder.classes_)

(9724, 610)

In [64]:
# Separating Positive and Negative Samples
df_neg = df[df['rating_class']==0.0]
df_pos = df[df['rating_class']==1.0]

neg_data = dict()
pos_data = dict()

# Collecting Negative Samples:
for idx, row in tqdm(df_neg.iterrows()):
  if user_encoder.transform([row['userId']])[0] not in neg_data:
    neg_data[user_encoder.transform([row['userId']])[0]] = list(movie_encoder.transform([row['movieId']]))
  else:
    neg_data[user_encoder.transform([row['userId']])[0]].extend(list(movie_encoder.transform([row['movieId']])))

# Collecting Positive Samples:
for idx, row in tqdm(df_neg.iterrows()):
  if user_encoder.transform([row['userId']])[0] not in pos_data:
    pos_data[user_encoder.transform([row['userId']])[0]] = list(movie_encoder.transform([row['movieId']]))
  else:
    pos_data[user_encoder.transform([row['userId']])[0]].extend(list(movie_encoder.transform([row['movieId']])))


19073it [03:22, 94.25it/s] 
19073it [03:21, 94.49it/s] 


In [66]:
list(neg_data.items())[:3]

[(0, [769, 2688, 2765, 2817, 3087, 3899]),
 (111, [9345, 543]),
 (222,
  [3789,
   6142,
   7321,
   7664,
   8036,
   9328,
   364,
   479,
   860,
   894,
   980,
   2482,
   2540,
   2550,
   2566,
   2856,
   3946,
   4762,
   5932,
   7114,
   8071])]

In [None]:
# Setting environment variables
os.environ["RANK"] = "0"
os.environ["WORLD_SIZE"] = "1"
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = "29500"

# you will need a V100 or A100 to run tutorial as as! Colab gives users access to V100 and A100 GPUs
# If you do not have access to those GPUs use “gloo” backend and run on CPU
dist.init_process_group(backend="nccl")

In [83]:
class RecommendationModel(torch.nn.Module):
    def __init__(self, ) -> None:
        super(RecommendationModel, self).__init__()
        self.embedding_collection = torchrec.EmbeddingBagCollection(
                                    device="meta",
                                    tables=[
                                        torchrec.EmbeddingBagConfig(
                                            name="userid_table",
                                            embedding_dim=64,
                                            num_embeddings=len(user_encoder.classes_),
                                            feature_names=["userid"],
                                            pooling=torchrec.PoolingType.SUM
                                        ),
                                        torchrec.EmbeddingBagConfig(
                                            name="movies_seen_table",
                                            embedding_dim=64,
                                            num_embeddings=len(movie_encoder.classes_),
                                            feature_names=["movies_seen"],
                                            pooling=torchrec.PoolingType.SUM
                                        )
                                    ]
                                )
        self.model = torchrec.distributed.DistributedModelParallel(self.embedding_collection, device=torch.device("cuda"))
        print(self.model.plan)
        self.linear_layer = torch.nn.Linear(64, 1)


    def forward(self, inputs):
        userids = []
        movies_seen = []
        for row in inputs:
          userids.append(row[0])
          movies_seen.append(row[1])

        values = copy.deepcopy(userids)
        lengths = [1]*len(values)
        for movie_list in movies_seen:
          values.extend(movie_list)
          lengths.append(len(movie_list))

        kjt = torchrec.KeyedJaggedTensor(
                  keys = ["userid","movies_seen"],
                  values = torch.tensor(values).cuda(),
                  lengths = torch.tensor(lengths, dtype=torch.int64).cuda(),
              )

        pooled_embeddings = self.model(kjt).to_dict()
        out = pooled_embeddings['userid'] * pooled_embeddings['movies_seen']
        out = out.to('cpu')
        out = torch.nn.functional.sigmoid(self.linear_layer(out))
        return out


In [84]:
rec_model = RecommendationModel()



module: 

      param       | sharding type | compute kernel | ranks
----------------- | ------------- | -------------- | -----
userid_table      | table_wise    | fused          | [0]  
movies_seen_table | table_wise    | fused          | [0]  

      param       | shard offsets | shard sizes |   placement  
----------------- | ------------- | ----------- | -------------
userid_table      | [0, 0]        | [610, 64]   | rank:0/cuda:0
movies_seen_table | [0, 0]        | [9724, 64]  | rank:0/cuda:0


In [69]:
sample = list(neg_data.items())[:3]

In [74]:
out = rec_model(sample)
print(out)

tensor([[0.4706],
        [0.4705],
        [0.4706]], grad_fn=<SigmoidBackward0>)


In [16]:
from torchrec.optim.apply_optimizer_in_backward import apply_optimizer_in_backward
from torchrec.optim.keyed import KeyedOptimizerWrapper
from torchrec.optim.optimizers import in_backward_optimizer_filter

apply_optimizer_in_backward(
    optimizer_class=torch.optim.SGD,
    params=rec_model.embedding_collection.parameters(),
    optimizer_kwargs={"lr": 0.02},
)

In [80]:
criterion = torch.nn.NLLLoss()
torchrec_optim = KeyedOptimizerWrapper(
    {k: v for k, v in dict(in_backward_optimizer_filter(
        rec_model.named_parameters())).items()},
    lambda params: torch.optim.SGD(params, lr=0.01, momentum=0.0),
)

In [81]:
criterion(out, torch.Tensor([0,0,0]).long())

tensor(-0.4706, grad_fn=<NllLossBackward0>)

In [82]:
# torchrec model forward
torchrec_optim.zero_grad()
output = rec_model(sample)
label = torch.Tensor([0,0,0]).long()
torchrec_loss = criterion(output, label)
torchrec_loss.backward()
torchrec_optim.step()


In [79]:
torchrec_loss

tensor(-0.4706, grad_fn=<NllLossBackward0>)