In [None]:
%pip install --upgrade pip
# %pip -q install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 #comment if not using gpu
%pip -q install -r ../requirements.txt

In [565]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from torch.nn.utils.rnn import pack_sequence, pad_sequence
import torch.nn.functional as F
import torch.nn.utils.rnn as rnn_utils
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import json
import matplotlib.pyplot as plt
from pathlib import Path


In [566]:
torch.manual_seed(1234)
torch.cuda.manual_seed(1234)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

MODEL_NAME = "model.pth"

In [567]:
batch_size = 1
industry_size = 32
product_name_size = 20
product_category_size = 26

In [568]:
class LSTMComponent(nn.Module):
    def __init__(self, hidden_dim, output_dim):
        super(LSTMComponent, self).__init__()
        self.lstm = nn.LSTM(input_size=hidden_dim, hidden_size=hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, input):
        packed_input = pack_sequence(input, enforce_sorted=False)
        packed_output, (hidden, cell) = self.lstm(packed_input)
        return self.fc(hidden.squeeze(0))

class RecommenderModel(nn.Module):
    def __init__(self, industry_size, product_name_size, product_category_size, hidden_dim, padding_value=-1):
        super(RecommenderModel, self).__init__()

        self.product_name_size = product_name_size
        self.padding_value = padding_value

        self.industry_embedding = nn.Embedding(num_embeddings=industry_size+1, embedding_dim=hidden_dim, padding_idx=industry_size)
        self.product_name_embedding = nn.Embedding(num_embeddings=product_name_size+1, embedding_dim=hidden_dim, padding_idx=product_name_size)
        self.product_category_embedding = nn.Embedding(num_embeddings=product_category_size+1, embedding_dim=hidden_dim, padding_idx=product_category_size)
        self.product_is_implemented_embedding = nn.Embedding(num_embeddings=3, embedding_dim=hidden_dim, padding_idx=2)

        self.lstm = LSTMComponent(hidden_dim=hidden_dim * 3, output_dim=hidden_dim)

        self.fc1 = nn.Linear(hidden_dim * 2, 128)
        self.fc2 = nn.Linear(128, product_name_size)

        self.dropout = nn.Dropout(p=0.2)

    def expand_product_name_size(self, new_size):
        new_embedding = nn.Embedding(new_size, self.product_name_embedding.embedding_dim, padding_idx=self.padding_value)
        new_fc = nn.Linear(32, new_size)
        
        new_embedding.weight.data[:self.product_name_embedding.num_embeddings] = self.product_name_embedding.weight.data
        nn.init.normal_(new_embedding.weight.data[self.product_name_embedding.num_embeddings:])

        with torch.no_grad():
            new_fc.weight[:self.fc2.out_features] = self.fc2.weight
            new_fc.bias[:self.fc2.out_features] = self.fc2.bias
            nn.init.normal_(new_fc.weight[self.fc2.out_features:])
            nn.init.normal_(new_fc.bias[self.fc2.out_features:])
        
        self.product_name_embedding = new_embedding
        self.fc2 = new_fc

        self.product_name_size = new_size

    def forward(self, industry, product_name, product_category, product_is_implemented, new_size):
        if new_size > self.product_name_size:
            self.expand_product_name_size(new_size)

        industry_embedded = self.industry_embedding(industry)
        product_name_embedded = self.product_name_embedding(product_name)
        product_category_embedded = self.product_category_embedding(product_category)
        product_is_implemented_embedded = self.product_is_implemented_embedding(product_is_implemented)

        product_embedded = torch.cat([product_name_embedded, product_category_embedded, product_is_implemented_embedded], dim=-1)

        product = self.lstm(product_embedded)

        combined = torch.cat([industry_embedded.squeeze(), product], dim=1)

        x = F.relu(self.fc1(combined))
        x = self.dropout(x)
        output = self.fc2(x)

        output = output[:, :self.product_name_size]

        return output

In [569]:
# batch_size = 2
# industry_1 = torch.randint(0, industry_size, (batch_size,), device=device)
# product_1 = torch.randint(0, product_name_size, (batch_size, 2), device=device)
# product_2 = torch.randint(0, product_category_size, (batch_size, 2), device=device)
# product_3 = torch.randint(0, 2, (batch_size, 2), device=device)

In [570]:
# result = model(industry_1, product_1, product_2, product_3, product_name_size)
# result

In [571]:
# result = model(industry_1, product_1, product_2, product_3, product_name_size+1)
# result

In [None]:
with open("additional_data.json", "r") as file:
    info = json.load(file)

product_name_map = {k: v for v, k in list(enumerate(info["Product_Names"]))}
product_name_len = len(product_name_map)
product_category_map = {k: v for v, k in list(enumerate(info["Product_Categories"]))}
product_category_len = len(product_category_map)
industry_map = {k: v for v, k in list(enumerate(info["Industries"]))}
industry_len = len(industry_map)

print(product_name_len, product_category_len, industry_len)

In [573]:
with open("dataset.json", "r") as file:
    data = json.load(file)["Data"]

In [574]:
class RecommenderDataset(Dataset):
    def __init__(self, data, product_name_len, product_category_len, device="cpu"):
        self.industries = torch.tensor([[industry_map[x["Input"]["Industry"]]] for x in data]).to(device)

        self.product_names = [torch.tensor([product_name_map[y["ProductName"]] for y in x["Input"]["Products"]]) for x in data]
        self.product_names = pad_sequence(self.product_names, batch_first=True, padding_value=product_name_len).to(device)

        self.product_categories = [torch.tensor([product_category_map[y["ProductCategory"]] for y in x["Input"]["Products"]]) for x in data]
        self.product_categories = pad_sequence(self.product_categories, batch_first=True, padding_value=product_category_len).to(device)

        self.product_is_implemented = [torch.tensor([y["IsImplemented"] for y in x["Input"]["Products"]]).long() for x in data]
        self.product_is_implemented = pad_sequence(self.product_is_implemented, batch_first=True, padding_value=2).to(device)

        self.output = [torch.tensor([product_name_map[y] for y in x["Output"]]).long() for x in data]
        self.output = pad_sequence(self.output, batch_first=True, padding_value=product_name_len).to(device)

    def __len__(self):
        return self.product_names.shape[0]
    
    def __getitem__(self, index):
        return self.industries[index], self.product_names[index], self.product_categories[index], self.product_is_implemented[index], self.output[index]

In [575]:
TRAIN_TEST_SPLIT = int(0.8 * len(data))
BATCH_SIZE = 8
LEARNING_RATE = 1e-4
train_dataset = RecommenderDataset(data[:TRAIN_TEST_SPLIT], product_name_len, product_category_len, device)
test_dataset = RecommenderDataset(data[TRAIN_TEST_SPLIT:], product_name_len, product_category_len, device)

In [576]:
train_dataloader = DataLoader(train_dataset, BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_dataset, BATCH_SIZE, shuffle=False)

In [577]:
model = RecommenderModel(
    industry_len,
    product_name_len,
    product_category_len,
    hidden_dim=256
).to(device)

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.BCEWithLogitsLoss()

In [578]:
epochs = 10000
train_loss_values = []
test_loss_values = []

In [None]:
for epoch in tqdm(range(epochs)):
    avg_train_loss, avg_test_loss = 0, 0

    model.train()

    for X1, X2, X3, X4, y in train_dataloader:
        optimizer.zero_grad()

        output = model(X1, X2, X3, X4, product_name_len)

        target = torch.zeros((y.shape[0], product_name_len))

        for i in range(y.shape[0]):
            target[i, y[y < product_name_len]] = 1

        loss = criterion(output, target)

        avg_train_loss += loss.item()

        loss.backward()
        optimizer.step()

    model.eval()

    with torch.inference_mode():
        for X1, X2, X3, X4, y in test_dataloader:
            output = model(X1, X2, X3, X4, product_name_len)

            target = torch.zeros((y.shape[0], product_name_len))

            for i in range(y.shape[0]):
                target[i, y[y < product_name_len]] = 1

            loss = criterion(output, target)

            avg_test_loss += loss.item()

    
    avg_train_loss /= len(train_dataloader)
    avg_test_loss /= len(test_dataloader)

    print(f"Epoch: {epoch} | Train Loss: {avg_train_loss} | Test Loss: {avg_test_loss}")
    
    train_loss_values.append(avg_train_loss)
    test_loss_values.append(avg_test_loss)

In [481]:
torch.save(obj=model.state_dict(), f=MODEL_NAME)

In [None]:
plt.figure(figsize=(12, 6))
plt.title("Loss")
plt.plot(range(epochs), train_loss_values, label="Train")
plt.plot(range(epochs), test_loss_values, label="Test")
plt.legend();