# ⚙️ Set Up

In [1]:
# Import Python built-in libraries
import os
import copy
import pickle
import os
import random
import time

In [2]:
# Import pip libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm, trange

# Import torch packages
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data

In [3]:
# Import PyG packages
import torch_geometric as pyg
import torch_geometric.data as pyg_data
from torch_geometric.typing import Adj, OptTensor

In [4]:
class GraphDataset(pyg_data.InMemoryDataset):
    def __init__(self, root, file_name, transform=None, pre_transform=None):
        self.file_name = file_name
        super().__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        return [f'{self.file_name}.txt']

    @property
    def processed_file_names(self):
        return [f'{self.file_name}.pt']

    def download(self):
        pass

    def process(self):
        #raw_data_file = f'{self.raw_dir}/'
        with open("../SR-GNN/raw/united-sessions.pkl", 'rb') as f:
            sessions = pickle.load(f)

        with open("../SR-GNN/raw/united-next_items.pkl", 'rb') as f:
            predictions = pickle.load(f)

        with open("../SR-GNN/raw/united_composed_embedding.pkl", 'rb') as f:
            embedding_dict = pickle.load(f)

        data_list = []

        counter = 0

        for session in sessions:
            session, y = session, predictions[counter]
            codes, uniques = pd.factorize(session)
            senders, receivers = codes[:-1], codes[1:]

            # Build Data instance
            edge_index = torch.tensor([senders, receivers], dtype=torch.long)
            #x = torch.tensor(uniques, dtype=torch.long)
            x_new = torch.zeros((len(uniques), 100))

            item_counter = 0
            for item in uniques:
                x_new[item_counter] = torch.tensor(embedding_dict[item])
                item_counter += 1

            #y = torch.tensor([y], dtype=torch.long)
            y_new = torch.zeros(1,100)
            y_new[0] = torch.tensor(embedding_dict[y])

            data_list.append(pyg_data.Data(x=x_new, edge_index=edge_index, y=y_new))

            counter += 1

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

In [5]:
class GatedSessionGraphConv(pyg.nn.conv.MessagePassing):
    def __init__(self, out_channels, aggr: str = 'add', **kwargs):
        super().__init__(aggr=aggr, **kwargs)

        self.out_channels = out_channels

        self.gru = torch.nn.GRUCell(out_channels, out_channels, bias=False)

    def forward(self, x, edge_index):
        m = self.propagate(edge_index, x=x, size=None)
        x = self.gru(m, x)
        return x

    def message(self, x_j):
        return x_j

    def message_and_aggregate(self, adj_t, x):
        return matmul(adj_t, x, reduce=self.aggr)

In [6]:
class SRGNN(nn.Module):
    def __init__(self, hidden_size, n_items):
        super(SRGNN, self).__init__()
        self.hidden_size = hidden_size
        self.n_items = n_items

        self.gated = GatedSessionGraphConv(self.hidden_size)

        self.q = nn.Linear(self.hidden_size, 1)
        self.W_1 = nn.Linear(self.hidden_size, self.hidden_size, bias=False)
        self.W_2 = nn.Linear(self.hidden_size, self.hidden_size)
        self.W_3 = nn.Linear(2 * self.hidden_size, self.hidden_size, bias=False)

    def reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)

    def forward(self, data):
        x, edge_index, batch_map = data.x, data.edge_index, data.batch

        # (0)
        #embedding = self.embedding(x).squeeze()

        # (1)-(5)
        v_i = self.gated(x, edge_index)

        # Divide nodes by session
        # For the detailed explanation of what is happening below, please refer
        # to the Medium blog post.
        sections = list(torch.bincount(batch_map).cpu())
        v_i_split = torch.split(v_i, sections)

        v_n, v_n_repeat = [], []
        for session in v_i_split:
            v_n.append(session[-1])
            v_n_repeat.append(
                session[-1].view(1, -1).repeat(session.shape[0], 1))
        v_n, v_n_repeat = torch.stack(v_n), torch.cat(v_n_repeat, dim=0)

        q1 = self.W_1(v_n_repeat)
        q2 = self.W_2(v_i)

        # (6)
        alpha = self.q(F.sigmoid(q1 + q2))
        s_g_split = torch.split(alpha * v_i, sections)

        s_g = []
        for session in s_g_split:
            s_g_session = torch.sum(session, dim=0)
            s_g.append(s_g_session)
        s_g = torch.stack(s_g)

        # (7)
        s_l = v_n
        s_h = self.W_3(torch.cat([s_l, s_g], dim=-1))
        #print("SH: ")
        #print(s_h.shape)
        #print(s_h)


        return s_h

In [7]:
# Define the hyperparameters.
# Code taken from 2021 Fall CS224W Colab assignments.
args = {
    'batch_size': 100,
    'hidden_dim': 100,
    'epochs': 100,
    'l2_penalty': 0.00001,
    'weight_decay': 0.1,
    'step': 30,
    'lr': 0.001,
    'num_items': 466868}

class objectview(object):
    def __init__(self, d): 
        self.__dict__ = d

args = objectview(args)

In [8]:
def train(args):
    # Prepare data pipeline
    train_dataset = GraphDataset('./', 'UNITED')
    train_loader = pyg_data.DataLoader(train_dataset,
                                       batch_size=args.batch_size,
                                       shuffle=False,
                                       drop_last=True)
    # Build model
    model = SRGNN(args.hidden_dim, args.num_items).to('cuda')

    # Get training components
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.l2_penalty)
    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=args.step,
                                          gamma=args.weight_decay)

    #criterion = -F.cosine_similarity( dim=1)

    # Train
    losses = []
    test_accs = []
    top_k_accs = []

    best_acc = 0
    best_model = None

    counter = 1
    for epoch in range(args.epochs):
        total_loss = 0
        model.train()
        print(counter)
        counter+=1
        for _, batch in enumerate(tqdm(train_loader)):
            batch.to('cuda')
            optimizer.zero_grad()

            #print(batch.num_graphs)
            pred = model(batch)
            #print(pred.shape)
            #print("PRED: ")
            #print(pred.shape)
            #print(pred)
            label = batch.y
            #print("LABEL: ")
            #print(label.shape)
            #print(label)
            loss = torch.sum(-F.cosine_similarity(pred, label, dim=1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * batch.num_graphs

            if epoch % 10 == 0:
                torch.save(model.state_dict(), f'{epoch}united-model')

        total_loss /= len(train_loader.dataset)
        losses.append(total_loss)

        scheduler.step()


    best_model = copy.deepcopy(model)


    return losses, best_model

In [9]:
losses, best_model = train(args)


# Save the best model
torch.save(best_model.state_dict(), f'united-model')

print("Minimum loss: {0}".format(min(losses)))

os.remove('processed/pre_filter.pt')
os.remove('processed/pre_transform.pt')

# plt.title(dataset.name)
plt.plot(losses, label="united training loss" + " - ")
plt.legend()
plt.show()



1


100%|██████████| 3335/3335 [02:54<00:00, 19.12it/s]


2


100%|██████████| 3335/3335 [01:53<00:00, 29.42it/s]


3


100%|██████████| 3335/3335 [01:48<00:00, 30.75it/s]


4


100%|██████████| 3335/3335 [01:48<00:00, 30.81it/s]


5


100%|██████████| 3335/3335 [02:00<00:00, 27.60it/s]


6


100%|██████████| 3335/3335 [01:56<00:00, 28.53it/s]


7


100%|██████████| 3335/3335 [01:54<00:00, 29.18it/s]


8


100%|██████████| 3335/3335 [01:55<00:00, 28.89it/s]


9


100%|██████████| 3335/3335 [01:56<00:00, 28.74it/s]


10


100%|██████████| 3335/3335 [01:55<00:00, 28.98it/s]


11


100%|██████████| 3335/3335 [02:15<00:00, 24.59it/s]


12


100%|██████████| 3335/3335 [01:57<00:00, 28.50it/s]


13


100%|██████████| 3335/3335 [01:58<00:00, 28.10it/s]


14


100%|██████████| 3335/3335 [01:54<00:00, 29.02it/s]


15


100%|██████████| 3335/3335 [01:55<00:00, 28.87it/s]


16


100%|██████████| 3335/3335 [01:59<00:00, 28.02it/s]


17


100%|██████████| 3335/3335 [01:52<00:00, 29.65it/s]


18


100%|██████████| 3335/3335 [01:55<00:00, 28.87it/s]


19


100%|██████████| 3335/3335 [01:51<00:00, 29.82it/s]


20


100%|██████████| 3335/3335 [01:53<00:00, 29.36it/s]


21


100%|██████████| 3335/3335 [02:11<00:00, 25.44it/s]


22


 60%|██████    | 2002/3335 [01:02<00:33, 39.60it/s]

In [31]:
torch.cuda.is_available()

False

In [None]:
LOCALE = LOCALES[1]
WINDOW_SIZE = WINDOW_SIZES[1]
losses, best_model = train(args, LOCALE)

# Save the best model
torch.save(best_model.state_dict(), f'{LOCALE}model')

print("Minimum loss: {0}".format(min(losses)))

os.remove('processed/pre_filter.pt')
os.remove('processed/pre_transform.pt')

# plt.title(dataset.name)
plt.plot(losses, label="FR training loss" + " - ")
plt.legend()
plt.show()

In [None]:
LOCALE = LOCALES[2]
WINDOW_SIZE = WINDOW_SIZES[2]
losses, best_model = train(args, LOCALE)

# Save the best model
torch.save(best_model.state_dict(), f'{LOCALE}model')

print("Minimum loss: {0}".format(min(losses)))

os.remove('processed/pre_filter.pt')
os.remove('processed/pre_transform.pt')

# plt.title(dataset.name)
plt.plot(losses, label="IT training loss" + " - ")
plt.legend()
plt.show()