In [76]:
# %pip -q install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
%pip -q install -r ../requirements.txt


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [77]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from torch.nn.utils.rnn import pack_sequence
import torch.nn.functional as F
import torch.nn.utils.rnn as rnn_utils
import pandas as pd
from torch.utils.data import DataLoader, Dataset

In [78]:
torch.manual_seed(1234)
torch.cuda.manual_seed(1234)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [79]:
batch_size = 1
industry_size = 32
product_name_size = 20
product_category_size = 26

In [80]:
class LSTMComponent(nn.Module):
    def __init__(self, hidden_dim, output_dim):
        super(LSTMComponent, self).__init__()
        self.lstm = nn.LSTM(input_size=hidden_dim, hidden_size=hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, input):
        packed_input = pack_sequence(input, enforce_sorted=False)
        packed_output, (hidden, cell) = self.lstm(packed_input)
        return self.fc(hidden.squeeze(0))

class RecommenderModel(nn.Module):
    def __init__(self, industry_size, product_name_size, product_category_size, hidden_dim):
        super(RecommenderModel, self).__init__()

        self.product_name_size = product_name_size

        self.industry_embedding = nn.Embedding(num_embeddings=industry_size, embedding_dim=32)
        self.product_name_embedding = nn.Embedding(num_embeddings=product_name_size, embedding_dim=hidden_dim)
        self.product_category_embedding = nn.Embedding(num_embeddings=product_category_size, embedding_dim=hidden_dim)
        self.product_is_implemented_embedding = nn.Embedding(num_embeddings=2, embedding_dim=hidden_dim)

        self.lstm = LSTMComponent(hidden_dim=hidden_dim * 3, output_dim=32)

        self.fc1 = nn.Linear(64, 32)
        self.fc2 = nn.Linear(32, product_name_size)

        self.dropout = nn.Dropout(p=0.2)

    def expand_product_name_size(self, new_size):
        new_embedding = nn.Embedding(new_size, self.product_name_embedding.embedding_dim)
        new_fc = nn.Linear(32, new_size)
        
        new_embedding.weight.data[:self.product_name_embedding.num_embeddings] = self.product_name_embedding.weight.data
        nn.init.normal_(new_embedding.weight.data[self.product_name_embedding.num_embeddings:])

        with torch.no_grad():
            new_fc.weight[:self.fc2.out_features] = self.fc2.weight
            new_fc.bias[:self.fc2.out_features] = self.fc2.bias
            nn.init.normal_(new_fc.weight[self.fc2.out_features:])
            nn.init.normal_(new_fc.bias[self.fc2.out_features:])
        
        self.product_name_embedding = new_embedding
        self.fc2 = new_fc

        self.product_name_size = new_size

    def forward(self, industry, product_name, product_category, product_is_implemented, new_size):
        if new_size > self.product_name_size:
            self.expand_product_name_size(new_size)
            
        industry_embedded = self.industry_embedding(industry)
        product_name_embedded = self.product_name_embedding(product_name)
        product_category_embedded = self.product_category_embedding(product_category)
        product_is_implemented_embedded = self.product_is_implemented_embedding(product_is_implemented)

        product_embedded = torch.cat([product_name_embedded, product_category_embedded, product_is_implemented_embedded], dim=-1)

        product = self.lstm(product_embedded)

        combined = torch.cat([industry_embedded, product], dim=1)

        x = F.relu(self.fc1(combined))
        x = self.dropout(x)
        output = self.fc2(x)

        output = output[:, :product_name_size]

        return output

In [81]:
model = RecommenderModel(
    industry_size,
    product_name_size,
    product_category_size,
    hidden_dim=256
).to(device)

In [82]:
batch_size = 2
industry_1 = torch.randint(0, industry_size, (batch_size,), device=device)
product_1 = torch.randint(0, product_name_size, (batch_size, 2), device=device)
product_2 = torch.randint(0, product_category_size, (batch_size, 2), device=device)
product_3 = torch.randint(0, 2, (batch_size, 2), device=device)

In [83]:
result = model(industry_1, product_1, product_2, product_3, product_name_size)
result

tensor([[-0.3190, -0.0411,  0.0490,  0.0752, -0.3768, -0.0037,  0.0284, -0.2892,
          0.1096, -0.0144,  0.1461, -0.1642,  0.1814, -0.2203, -0.3188, -0.2520,
         -0.5617,  0.0086, -0.1564,  0.0943],
        [-0.1503, -0.0540, -0.0074, -0.0605, -0.0053,  0.0902,  0.0708, -0.3153,
         -0.1508,  0.2092, -0.1877,  0.0808, -0.3323, -0.2493,  0.0038, -0.2754,
         -0.5922, -0.1885,  0.1346,  0.3654]], grad_fn=<SliceBackward0>)

In [84]:
result = model(industry_1, product_1, product_2, product_3, product_name_size+1)
result

tensor([[-0.2855, -0.2265,  0.1086,  0.2448, -0.3218, -0.0655,  0.0636, -0.1277,
          0.0655,  0.0850,  0.2330, -0.0627,  0.0424, -0.0693, -0.3595, -0.2767,
         -0.5317,  0.0483, -0.1427,  0.1119],
        [-0.2494, -0.1548,  0.0594, -0.0022, -0.1253, -0.0537,  0.1882, -0.3427,
         -0.1543,  0.0796, -0.1224,  0.0980, -0.1784, -0.3356, -0.0967, -0.3535,
         -0.6304, -0.0629, -0.0027,  0.2383]], grad_fn=<SliceBackward0>)

In [85]:
data = pd.read_csv('dataset.csv')

industry = data["Industry"].tolist()
product_name = data["ProductName"].tolist()
product_category = data["ProductCategory"].tolist()
product_is_implemented = data["ProductIsImplemented"].tolist()

# class RecommenderDataset(Dataset):
#     def __init__(self, input, output)