In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
filepath='/content/drive/MyDrive/Colab Notebooks/data/Beauty_item_org_rank.txt'
filepath2="/content/drive/MyDrive/Colab Notebooks/data/Beauty_item_var_rank.txt"

# **Training with NDCG Early Stop**

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import numpy as np


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.backends.cudnn.benchmark = True
print(f"Using device: {device}")


def load_sequences(file_path, max_users=None):
    sequences = []
    with open(file_path, 'r') as f:
        for i, line in enumerate(f):
            if max_users and i >= max_users:
                break
            parts = list(map(int, line.strip().split()))
            item_ids = parts[1:]  # skip user ID
            if len(item_ids) >= 2:
                sequences.append(item_ids)
    return sequences

def split_train_test(sequences):
    train_seqs = []
    test_data = []
    for seq in sequences:
        train_seqs.append(seq[:-1])
        test_data.append((seq[:-1], seq[-1]))  # leave-one-out setup
    return train_seqs, test_data

class SequenceDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = [torch.tensor(seq, dtype=torch.long) for seq in inputs]
        self.targets = torch.tensor(targets)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]

def collate_fn(batch):
    inputs, targets = zip(*batch)
    inputs_padded = pad_sequence(inputs, batch_first=True)
    return inputs_padded.to(device), torch.tensor(targets).to(device)


class GRU4Rec(nn.Module):
    def __init__(self, num_items, embedding_dim=128, hidden_dim=128):
        super(GRU4Rec, self).__init__()
        self.embedding = nn.Embedding(num_items, embedding_dim)
        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_items)

    def forward(self, x):
        emb = self.embedding(x)
        _, h = self.gru(emb)
        out = self.fc(h.squeeze(0))
        return out


def hit_ndcg_k(predictions, target, k):
    k = min(k, len(predictions))
    topk = predictions.argsort(descending=True)[:k]
    hit = int(target in topk)
    if target in topk:
        ndcg = 1 / np.log2(topk.tolist().index(target.item()) + 2)
    else:
        ndcg = 0
    return hit, ndcg

def evaluate(model, test_data, k_list=[10]):
    model.eval()
    hits = {k: [] for k in k_list}
    ndcgs = {k: [] for k in k_list}

    for input_seq, target in test_data:
        input_seq = torch.tensor(input_seq, dtype=torch.long, device=device).unsqueeze(0)
        target = torch.tensor(target, dtype=torch.long, device=device)
        with torch.no_grad():
            prediction = model(input_seq).squeeze(0)

        for k in k_list:
            hit, ndcg = hit_ndcg_k(prediction, target, k)
            hits[k].append(hit)
            ndcgs[k].append(ndcg)

    for k in k_list:
        print(f"Hit@{k}: {np.mean(hits[k]):.4f}, NDCG@{k}: {np.mean(ndcgs[k]):.4f}")

    return np.mean(ndcgs[10])  # return NDCG@10 for early stopping

# ----------------------- Training with NDCG Early Stop -----------------------
def train_model(filepath, embedding_dim=128, batch_size=128, epochs=100, max_users=None, patience=5):
    print(f"\nLoading file: {filepath}")
    sequences = load_sequences(filepath, max_users)
    train_seqs, test_data = split_train_test(sequences)
    num_items = max(max(seq) for seq in sequences) + 1

    # Prepare training input-target pairs
    inputs = []
    targets = []
    for seq in train_seqs:
        for i in range(1, len(seq)):
            inputs.append(seq[:i])
            targets.append(seq[i])

    dataset = SequenceDataset(inputs, targets)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

    model = GRU4Rec(num_items, embedding_dim, embedding_dim).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    best_ndcg = 0
    patience_counter = 0

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch_inputs, batch_targets in loader:
            logits = model(batch_inputs)
            loss = criterion(logits, batch_targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch+1}: Training Loss = {total_loss / len(loader):.4f}")

        # Evaluate on test set
        current_ndcg = evaluate(model, test_data, k_list=[10, 20])

        if current_ndcg > best_ndcg:
            best_ndcg = current_ndcg
            patience_counter = 0
        else:
            patience_counter += 1
            print(f"NDCG did not improve. Early stopping counter: {patience_counter}/{patience}")
            if patience_counter >= patience:
                print("\nEarly stopping triggered based on NDCG!")
                break

    print("\nFinal Evaluation:")
    evaluate(model, test_data, k_list=[10, 20])


Using device: cuda


In [None]:
train_model(filepath, embedding_dim=128, batch_size=128, epochs=350, max_users=None)


Loading file: /content/drive/MyDrive/Colab Notebooks/data/Beauty_item_org_rank.txt
Epoch 1: Training Loss = 8.8119
Hit@10: 0.0228, NDCG@10: 0.0114
Hit@20: 0.0358, NDCG@20: 0.0147
Epoch 2: Training Loss = 8.2093
Hit@10: 0.0316, NDCG@10: 0.0156
Hit@20: 0.0512, NDCG@20: 0.0205
Epoch 3: Training Loss = 7.8274
Hit@10: 0.0361, NDCG@10: 0.0184
Hit@20: 0.0579, NDCG@20: 0.0239
Epoch 4: Training Loss = 7.4783
Hit@10: 0.0423, NDCG@10: 0.0217
Hit@20: 0.0643, NDCG@20: 0.0272
Epoch 5: Training Loss = 7.1185
Hit@10: 0.0443, NDCG@10: 0.0227
Hit@20: 0.0677, NDCG@20: 0.0286
Epoch 6: Training Loss = 6.7549
Hit@10: 0.0438, NDCG@10: 0.0232
Hit@20: 0.0672, NDCG@20: 0.0291
Epoch 7: Training Loss = 6.3968
Hit@10: 0.0450, NDCG@10: 0.0237
Hit@20: 0.0668, NDCG@20: 0.0292
Epoch 8: Training Loss = 6.0586
Hit@10: 0.0427, NDCG@10: 0.0229
Hit@20: 0.0645, NDCG@20: 0.0283
NDCG did not improve. Early stopping counter: 1/5
Epoch 9: Training Loss = 5.7464
Hit@10: 0.0404, NDCG@10: 0.0219
Hit@20: 0.0622, NDCG@20: 0.0274
ND