#Data Loading

In [1]:
import os
import pandas as pd
from collections import Counter, defaultdict
from tqdm import tqdm as tqdm
import numpy as np


#Train Data

In [None]:
!unzip -q MINDsmall_train.zip -d train

In [2]:
train_news_path = os.path.abspath('train/news.tsv')
Train_News_data=pd.read_table(train_news_path,
              header=None,
              names=[
                  'id', 'category', 'subcategory', 'title', 'abstract', 'url',
                  'title_entities', 'abstract_entities'
              ])

print(Train_News_data.shape)

(51282, 8)


In [3]:
train_behaviors_path = os.path.abspath('train/behaviors.tsv')
Train_Behaviors_data=pd.read_table(train_behaviors_path,
              header=None,
              names=[
                  'impression_id', 'user_id', 'time', 'history', 'impressions'
              ])
print(Train_Behaviors_data.shape)

(156965, 5)


#Validation Data

In [None]:
!unzip -q MINDsmall_dev.zip -d dev

In [4]:
val_news_path = os.path.abspath('dev/news.tsv')
Val_News_data=pd.read_table(val_news_path,
              header=None,
              names=[
                  'id', 'category', 'subcategory', 'title', 'abstract', 'url',
                  'title_entities', 'abstract_entities'
              ])
print(Val_News_data.shape)

(42416, 8)


In [5]:
val_behaviors_path = os.path.abspath('dev/behaviors.tsv')
Val_Behaviors_data=pd.read_table(val_behaviors_path,
              header=None,
              names=[
                  'impression_id', 'user_id', 'time', 'history', 'impressions'
              ])
print(Val_Behaviors_data.shape)

(73152, 5)


#Subsampling

In [6]:
def subsample_train_val_same_users_split_by_source(
    Train_News_data, Train_Behaviors_data,
    Val_News_data, Val_Behaviors_data,
    n_users=10000, train_ratio=2.1, seed=42
):
    rng = np.random.default_rng(seed)

    train_users = set(Train_Behaviors_data['user_id'].dropna())
    val_users = set(Val_Behaviors_data['user_id'].dropna())
    common_users = np.array(list(train_users & val_users))
    common_user_ratio = len(train_users) / len(common_users)

    common_user_count = min(int(n_users / common_user_ratio), len(common_users))
    noncommon_user_count = n_users - common_user_count

    sampled_common_users = rng.choice(common_users, size=common_user_count, replace=False)

    train_unique_users = np.setdiff1d(list(train_users), common_users)
    val_unique_users = np.setdiff1d(list(val_users), common_users)

    sampled_train_unique = rng.choice(train_unique_users, size=noncommon_user_count, replace=False)
    sampled_val_unique = rng.choice(val_unique_users, size=noncommon_user_count, replace=False)

    user_train_logs = pd.concat([
        Train_Behaviors_data[Train_Behaviors_data['user_id'].isin(sampled_common_users)],
        Train_Behaviors_data[Train_Behaviors_data['user_id'].isin(sampled_train_unique)]
    ])

    user_val_logs = pd.concat([
        Val_Behaviors_data[Val_Behaviors_data['user_id'].isin(sampled_common_users)],
        Val_Behaviors_data[Val_Behaviors_data['user_id'].isin(sampled_val_unique)]
    ])

    target_val_size = int(len(user_train_logs) / train_ratio)
    if target_val_size < len(user_val_logs):
        user_val_logs = user_val_logs.sample(n=target_val_size, random_state=seed)

    def get_referenced_news(news_df, behaviors_df):
        news_ids = set()
        for _, row in behaviors_df.iterrows():
            history = str(row['history']) if not pd.isna(row['history']) else ''
            news_ids.update(history.split())
            impressions = str(row['impressions']) if not pd.isna(row['impressions']) else ''
            news_ids.update(x.split('-')[0] for x in impressions.split())
        return news_df[news_df['id'].astype(str).isin(news_ids)].copy()

    train_news = get_referenced_news(Train_News_data, user_train_logs)
    val_news = get_referenced_news(Val_News_data, user_val_logs)

    return train_news, user_train_logs, val_news, user_val_logs




In [7]:
# Subsample training data
Train_News_data, Train_Behaviors_data, Val_News_data, Val_Behaviors_data = subsample_train_val_same_users_split_by_source(Train_News_data, Train_Behaviors_data, Val_News_data, Val_Behaviors_data, n_users=5000,  train_ratio=2.1)

# Subsample validation data


print(f"Train News Data Shape: {Train_News_data.shape}")
print(f"Train Behaviors Data Shape: {Train_Behaviors_data.shape}")
print(f"Valid News Data Shape: {Val_News_data.shape}")
print(f"Valid Behaviors Data Shape: {Val_Behaviors_data.shape}")



Train News Data Shape: (23560, 8)
Train Behaviors Data Shape: (15698, 5)
Valid News Data Shape: (18612, 8)
Valid Behaviors Data Shape: (7378, 5)


#Data Preprocessing

In [None]:
!wget https://nlp.stanford.edu/data/glove.840B.300d.zip
!unzip glove.840B.300d.zip

--2025-05-05 11:15:43--  https://nlp.stanford.edu/data/glove.840B.300d.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://downloads.cs.stanford.edu/nlp/data/glove.840B.300d.zip [following]
--2025-05-05 11:15:43--  https://downloads.cs.stanford.edu/nlp/data/glove.840B.300d.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2176768927 (2.0G) [application/zip]
Saving to: ‘glove.840B.300d.zip’


2025-05-05 11:22:33 (5.07 MB/s) - ‘glove.840B.300d.zip’ saved [2176768927/2176768927]

Archive:  glove.840B.300d.zip
  inflating: glove.840B.300d.txt     


In [8]:
def load_glove_embeddings(glove_path, vocab=None, embed_dim=300):
    glove = {}
    with open(glove_path, 'r', encoding='utf-8') as f:
        for line in tqdm(f):
            values = line.strip().split()
            if len(values) != embed_dim + 1:
                continue
            word = values[0]

            if vocab is not None and word not in vocab:
                continue

            try:
                vector = torch.tensor([float(v) for v in values[1:]], dtype=torch.float32)
                glove[word] = vector
            except ValueError:
                continue

    return glove

In [9]:
import json

def extract_entities(entity_str):
    try:
        entities = json.loads(entity_str)
        return " ".join(e.get("Entity", "") for e in entities)
    except:
        return ""

def build_vocab(df, glove=None, min_freq=1):
    counter = Counter()

    titles = df['title'].fillna('').tolist()
    abstracts = df['abstract'].fillna('').tolist()

    title_entities = df['title_entities'].fillna('').apply(extract_entities).tolist()
    abstract_entities = df['abstract_entities'].fillna('').apply(extract_entities).tolist()

    texts = titles + abstracts + title_entities + abstract_entities

    for text in texts:
        tokens = text.lower().split()
        counter.update(tokens)

    vocab = {'<PAD>': 0, '<UNK>': 1}
    oov_words = 0
    in_glove_words = 0

    for word, freq in counter.items():
        if freq >= min_freq:
            if glove is None or word in glove:
                vocab[word] = len(vocab)
                in_glove_words += 1
            else:
                vocab[word] = len(vocab)
                oov_words += 1

    return vocab


In [10]:
def tokenize(text, vocab, max_len=20):
    if not isinstance(text, str):
        text = ""
    tokens = text.lower().split()
    token_ids = [vocab.get(w, vocab['<UNK>']) for w in tokens[:max_len]]
    token_ids += [vocab['<PAD>']] * (max_len - len(token_ids))
    return token_ids

In [11]:
def create_embedding_matrix(vocab, glove, embed_dim=300):
    matrix = torch.randn(len(vocab), embed_dim) * 0.01
    for word, idx in vocab.items():
        if word in glove:
            matrix[idx] = glove[word]
    return matrix

In [12]:
def build_news_dict(df, vocab, max_title_len=20, max_abstract_len=50):
    news_dict = {}
    newsid2cat = {}
    category_news_dict = defaultdict(list)

    unique_categories = df['category'].dropna().unique()
    cat2idx = {cat: idx for idx, cat in enumerate(unique_categories)}

    for _, row in df.iterrows():
        nid = row['id']

        title = row.get('title', '')
        abstract = row.get('abstract', '')
        title_entities = extract_entities(row.get('title_entities', ''))
        abstract_entities = extract_entities(row.get('abstract_entities', ''))

        title_indices = tokenize(title, vocab, max_len=max_title_len)
        abstract_indices = tokenize(abstract, vocab, max_len=max_abstract_len)
        title_entity_indices = tokenize(title_entities, vocab, max_len=max_title_len)
        abstract_entity_indices = tokenize(abstract_entities, vocab, max_len=max_abstract_len)

        news_dict[nid] = {
            'title_indices': title_indices,
            'abstract_indices': abstract_indices,
            'title_entity_indices': title_entity_indices,
            'abstract_entity_indices': abstract_entity_indices,
        }

        category = row.get('category', None)
        if pd.notnull(category):
            newsid2cat[nid] = category
            category_news_dict[category].append(nid)

    return news_dict, newsid2cat, category_news_dict, cat2idx

In [13]:
def build_news_freq_dict(behaviors_df):
    news_freq_counter = Counter()
    for history in behaviors_df['history'].dropna():
        for nid in history.split():
            news_freq_counter[nid] += 1
    return dict(news_freq_counter)

#Utility Functions

In [14]:
def clicked_candidate_news_preparation(behaviors, news):
    history_field = behaviors['history']
    impressions_field = behaviors['impressions']

    history_ids = history_field.split() if isinstance(history_field, str) and history_field.strip() else []
    impression_pairs = impressions_field.split() if isinstance(impressions_field, str) and impressions_field.strip() else []

    candidate_ids = [impr.split('-')[0] for impr in impression_pairs]
    candidate_labels = [int(impr.split('-')[1]) for impr in impression_pairs]

    clicked_news = [nid for nid in history_ids if nid in set(news['id'])]
    candidate_news = [nid for nid in candidate_ids if nid in set(news['id'])]

    return clicked_news, candidate_news, candidate_labels

In [15]:
def prepare_news_tensor(news_ids, news_dict, device):
    tensor = torch.tensor(
        [news_dict[nid]['title_indices'] for nid in news_ids],
        dtype=torch.long
    )
    return tensor.to(device)

#Model Implementation

##Import

In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F

##ChannelWiseLayer

In [17]:
class ChannelWiseDynamicConv(nn.Module):
    def __init__(self, category_size, category_emb_dim=300, word_dim=300, kernel_size=3):
        super(ChannelWiseDynamicConv, self).__init__()
        self.category_embedding = nn.Embedding(category_size, category_emb_dim)
        self.kernel_generator = nn.Sequential(
            nn.Linear(category_emb_dim, word_dim * kernel_size),
            nn.ReLU()
        )
        self.kernel_size = kernel_size
        self.word_dim = word_dim



    def forward(self, word_vecs):
        B, L, D = word_vecs.shape
        k = self.kernel_size
        device = word_vecs.device


        C_size = self.category_embedding.num_embeddings
        C = self.category_embedding(torch.arange(C_size, device=device))

        Kc = self.kernel_generator(C)
        Kc = Kc.view(C_size, D, k)


        W = torch.matmul(word_vecs, C.T)
        W = F.softmax(W, dim=-1)

        Ki = torch.matmul(W, Kc.view(C_size, -1))
        Ki = Ki.view(B, L, D, k)

        mi = word_vecs.transpose(1, 2)
        mi = F.pad(mi, (k // 2, k // 2), mode='replicate')

        outputs = torch.zeros(B, L, D, device=device)
        for i in range(L):
            x_slice = mi[:, :, i:i + k]
            Ki_sample = Ki[:, i, :, :]
            outputs[:, i, :] = torch.sum(x_slice * Ki_sample, dim=-1)

        return outputs


##News Encoder

In [18]:
class NewsEncoder(nn.Module):
    def __init__(self, embedding_matrix, category_size, word_dim=300, num_heads=10, kernel_size=3, dropout=0.2):
        super(NewsEncoder, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float))
        self.multihead_attention = nn.MultiheadAttention(word_dim, num_heads=num_heads, batch_first=True)
        self.feed_forward = nn.Sequential(
            nn.Linear(word_dim, word_dim),
            nn.ReLU(),
            nn.Linear(word_dim, word_dim)
        )
        self.dynamic_conv = ChannelWiseDynamicConv(
            category_size=category_size,
            word_dim=word_dim,
            kernel_size=kernel_size
        )
        self.query_vector = nn.Parameter(torch.randn(word_dim))
        self.norm1 = nn.LayerNorm(word_dim)
        self.norm2 = nn.LayerNorm(word_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, word_indices):
        emb = self.embedding(word_indices)
        hi, _ = self.multihead_attention(emb, emb, emb)
        hi = self.dropout(hi)
        hi_norm = self.norm1(emb + hi)

        ffn_out = self.feed_forward(hi_norm)
        ffn_out = self.dropout(ffn_out)
        ffn_out_norm = self.norm2(hi_norm + ffn_out)

        dyn_out = self.dynamic_conv(ffn_out_norm)
        final_word_repr = ffn_out_norm + dyn_out

        attn_scores = torch.matmul(final_word_repr, self.query_vector)
        attn_weights = F.softmax(attn_scores, dim=1).unsqueeze(-1)
        news_vec = torch.sum(final_word_repr * attn_weights, dim=1)

        return news_vec

##User Encoder

In [19]:
class UserEncoder(nn.Module):
    def __init__(self, news_dim=300):
        super(UserEncoder, self).__init__()
        self.V = nn.Linear(news_dim, news_dim)
        self.query_vector = nn.Parameter(torch.randn(news_dim))

    def forward(self, clicked_news_vecs):
        if clicked_news_vecs.dim() == 2:
            clicked_news_vecs = clicked_news_vecs.unsqueeze(0)

        atten = torch.tanh(self.V(clicked_news_vecs.clone()))
        ai = torch.matmul(atten, self.query_vector.clone())
        ai = F.softmax(ai, dim=1).unsqueeze(-1)
        user_vec = torch.sum(ai * clicked_news_vecs, dim=1)

        return user_vec.squeeze(0) if user_vec.size(0) == 1 else user_vec

##Frequency-Aware Module

In [20]:
import math
import random

In [21]:
def dropoutORreplace(clicked_ids, news_freq_dict, category_news_dict, newsid2cat, r_min=0.1, r_max=0.4, method='dropout'):
    all_freqs = [math.log(news_freq_dict.get(nid, 1)) for user in clicked_ids for nid in user]
    max_freq = max(all_freqs) if all_freqs else 1
    if max_freq == 0:
        max_freq = 1e-6

    modified_batch = []

    for user_clicks in clicked_ids:
        modified = []
        for nid in user_clicks:
            freq = math.log(news_freq_dict.get(nid, 1))
            r_i = (freq / max_freq) * (r_max - r_min) + r_min

            if method == 'dropout':
                if random.random() < r_i:
                    continue
                modified.append(nid)

            elif method == 'replace':
                if random.random() < r_i:
                    cat = newsid2cat.get(nid, None)
                    pool = category_news_dict.get(cat, [])
                    if pool:
                        replacement = random.choice(pool)
                        modified.append(replacement)
                    else:
                        modified.append(nid)
                else:
                    modified.append(nid)
        modified_batch.append(modified)

    return modified_batch

#Score Calculator

In [22]:
def compute_click_scores(user_vec, candidate_vecs):
    click_scores = torch.matmul(candidate_vecs, user_vec)
    return click_scores

#Loss Function

In [23]:
def contrastive_user_loss(v_orig, v_aug, temperature=0.1):
    if v_orig.dim() == 1:
        v_orig = v_orig.unsqueeze(0)
    if v_aug.dim() == 1:
        v_aug = v_aug.unsqueeze(0)

    v_orig = F.normalize(v_orig, dim=1)
    v_aug = F.normalize(v_aug, dim=1)

    pos_sim = torch.exp(torch.sum(v_orig * v_aug, dim=1) / temperature)
    denom = pos_sim + torch.exp(torch.sum(v_orig * v_orig, dim=1) / temperature)

    loss = -torch.log(pos_sim / denom)
    return loss.mean()

#Evaluation Metrics

In [24]:
from sklearn.metrics import roc_auc_score

In [25]:
def compute_mrr(y_true_sorted):
    for i, label in enumerate(y_true_sorted):
        if label == 1:
            return 1 / (i + 1)
    return 0

In [26]:
def ndcg(y_true_sorted, k):
    k = min(k, len(y_true_sorted))
    if k == 0:
        return 0.0
    gains = (2 ** y_true_sorted[:k] - 1)
    discounts = torch.log2(torch.arange(2, k + 2, device=y_true_sorted.device))
    dcg = torch.sum(gains / discounts)

    sorted_relevances, _ = torch.sort(y_true_sorted, descending=True)
    ideal_gains = (2 ** sorted_relevances[:k] - 1)
    ideal_dcg = torch.sum(ideal_gains / discounts)

    return (dcg / ideal_dcg).item() if ideal_dcg > 0 else 0.0


In [27]:
def eval_metrics(y_true, y_score):
    aucs, mrrs, ndcg5s, ndcg10s = [], [], [], []

    for labels, scores in zip(y_true, y_score):
        sorted_idx = torch.argsort(scores, descending=True)
        sorted_labels = labels[sorted_idx]

        auc = roc_auc_score(labels.numpy(), scores.numpy()) if len(set(labels.numpy())) > 1 else 0.0
        mrr = compute_mrr(sorted_labels)
        ndcg5 = ndcg(sorted_labels, 5)
        ndcg10 = ndcg(sorted_labels, 10)

        aucs.append(auc)
        mrrs.append(mrr)
        ndcg5s.append(ndcg5)
        ndcg10s.append(ndcg10)

    auc = sum(aucs) / len(aucs)
    mrr = sum(mrrs) / len(mrrs)
    ndcg5 = sum(ndcg5s) / len(ndcg5s)
    ndcg10 = sum(ndcg10s) / len(ndcg10s)

    return auc, mrr, ndcg5, ndcg10

#Model Object Initiation

In [28]:
glove = load_glove_embeddings('glove.840B.300d.txt')

2196017it [01:44, 20998.87it/s]


##For training

In [29]:
vocab = build_vocab(Train_News_data, glove)
embedding_matrix = create_embedding_matrix(vocab, glove)
news_dict, newsid2cat, category_news_dict, cat2idx = build_news_dict(Train_News_data, vocab)
category_size = len(cat2idx)
news_freq = build_news_freq_dict(Train_Behaviors_data)

##For testing

In [30]:
news_dict_val, newsid2cat_val, category_news_dict_val, cat2idx_val = build_news_dict(Val_News_data, vocab)
category_size_val = len(cat2idx_val)
news_freq_val = build_news_freq_dict(Val_Behaviors_data)

In [31]:
news_encoder = NewsEncoder(embedding_matrix, category_size)
user_encoder = UserEncoder()

  self.embedding = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float))


In [32]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [33]:
news_encoder.to(device)
user_encoder.to(device)

UserEncoder(
  (V): Linear(in_features=300, out_features=300, bias=True)
)

#Code Component Testing

In [34]:
behavior_row = Train_Behaviors_data.iloc[1]
clicked_news, candidate_news, candidate_labels = clicked_candidate_news_preparation(behavior_row, Train_News_data)

# print(clicked_news)

modified_clicked_news = dropoutORreplace([clicked_news], news_freq, category_news_dict, newsid2cat, r_min=0.1, r_max=0.4, method='dropout')[0]

# print(modified_clicked_news)

clicked_tensor = prepare_news_tensor(clicked_news, news_dict, device)
candidate_tensor = prepare_news_tensor(candidate_news, news_dict, device)
modified_clicked_tensor = prepare_news_tensor(modified_clicked_news, news_dict, device)

# print(clicked_tensor)
# print(modified_clicked_tensor)

final_clicked_vecs = news_encoder(clicked_tensor)
final_cand_vecs = news_encoder(candidate_tensor)
final_mod_clicked_vecs = news_encoder(modified_clicked_tensor)

# print(final_clicked_vecs)
# print(final_mod_clicked_vecs)


user_vec = user_encoder(final_clicked_vecs.unsqueeze(0))
user_vec_aug = user_encoder(final_mod_clicked_vecs.unsqueeze(0))

contrastive_loss = contrastive_user_loss(
        user_vec, user_vec_aug, temperature=0.1
        )

print(contrastive_loss)



tensor(9.7967, grad_fn=<MeanBackward0>)


#Train-test

In [35]:
def train_one_epoch(behaviors_df, news_df, model_components, optimizer, news_dict, news_freq, category_news_dict, newsid2cat, device):
    news_encoder = model_components['news_encoder']
    user_encoder = model_components['user_encoder']

    news_encoder.train()
    user_encoder.train()


    device = next(news_encoder.parameters()).device
    total_loss = 0
    contrastive_loss_total = 0
    count = 0

    user_vecs = []
    user_vecs_aug = []

    for _, behavior_row in tqdm(behaviors_df.iterrows(), total=len(behaviors_df)):
        clicked_news, candidate_news, candidate_labels = clicked_candidate_news_preparation(behavior_row, news_df)
        clicked_news = clicked_news[:50]

        if len(candidate_labels) == 0 or len(clicked_news) == 0 or len(candidate_news) == 0:
            continue

        modified_clicked_news = dropoutORreplace([clicked_news], news_freq, category_news_dict, newsid2cat, r_min=0.1, r_max=0.4, method='dropout')[0]

        if len(modified_clicked_news) == 0:
            continue


        clicked_tensor = prepare_news_tensor(clicked_news, news_dict, device)
        mod_clicked_tensor = prepare_news_tensor(modified_clicked_news, news_dict, device)
        candidate_tensor = prepare_news_tensor(candidate_news, news_dict, device)
        candidate_labels = torch.tensor(candidate_labels, dtype=torch.float).to(device)


        clicked_vecs = news_encoder(clicked_tensor)
        mod_clicked_vecs = news_encoder(mod_clicked_tensor)
        candidate_vecs = news_encoder(candidate_tensor)



        user_vec = user_encoder(clicked_vecs.unsqueeze(0))
        user_vec_aug = user_encoder(mod_clicked_vecs.unsqueeze(0))


        scores = compute_click_scores(user_vec, candidate_vecs)
        click_loss = F.binary_cross_entropy_with_logits(scores, candidate_labels)

        contrastive_loss = contrastive_user_loss(
        user_vec, user_vec_aug, temperature=0.1
        )

        loss = click_loss + 0.1 * contrastive_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += click_loss
        contrastive_loss_total += contrastive_loss
        count += 1

    avg_click_loss = total_loss / count if count > 0 else 0
    avg_contrastive_loss = contrastive_loss_total / count if count > 0 else 0
    return avg_click_loss, avg_contrastive_loss



In [36]:
def test_one_epoch(behaviors_df, news_df, model_components, news_dict, news_freq, category_news_dict, newsid2cat, device):
    news_encoder = model_components['news_encoder']
    user_encoder = model_components['user_encoder']

    news_encoder.eval()
    user_encoder.eval()

    device = next(news_encoder.parameters()).device
    all_scores, all_labels = [], []


    with torch.no_grad():
      for _, behavior_row in tqdm(behaviors_df.iterrows(), total=len(behaviors_df)):
          clicked_news, candidate_news, candidate_labels = clicked_candidate_news_preparation(behavior_row, news_df)
          clicked_news = clicked_news[:50]

          if len(candidate_labels) == 0 or len(clicked_news) == 0 or len(candidate_news) == 0:
              continue

          modified_clicked_news = dropoutORreplace([clicked_news], news_freq, category_news_dict, newsid2cat, r_min=0.1, r_max=0.4, method='dropout')[0]

          if len(modified_clicked_news) == 0:
              continue


          clicked_tensor = prepare_news_tensor(clicked_news, news_dict, device)
          mod_clicked_tensor = prepare_news_tensor(modified_clicked_news, news_dict, device)
          candidate_tensor = prepare_news_tensor(candidate_news, news_dict, device)
          candidate_labels = torch.tensor(candidate_labels, dtype=torch.float).to(device)


          clicked_vecs = news_encoder(clicked_tensor)
          mod_clicked_vecs = news_encoder(mod_clicked_tensor)
          candidate_vecs = news_encoder(candidate_tensor)



          user_vec = user_encoder(clicked_vecs.unsqueeze(0))
          user_vec_aug = user_encoder(mod_clicked_vecs.unsqueeze(0))


          click_scores = compute_click_scores(user_vec, candidate_vecs)


          all_scores.append(click_scores.detach().cpu())
          all_labels.append(torch.tensor(candidate_labels, dtype=torch.float32))

      return eval_metrics(all_labels, all_scores)



In [37]:
model_components = {
    'news_encoder': news_encoder,
    'user_encoder': user_encoder
}


In [38]:
optimizer = torch.optim.Adam(
    list(news_encoder.parameters()) +
    list(user_encoder.parameters()), lr=1e-4
)

In [39]:
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x7f1d1da661a0>

In [40]:
train_loss, contrastive_loss_total = train_one_epoch(
         Train_Behaviors_data, Train_News_data, model_components, optimizer, news_dict, news_freq, category_news_dict, newsid2cat, device
    )

auc, mrr, ndcg5, ndcg10 = test_one_epoch(
    Val_Behaviors_data, Val_News_data, model_components, news_dict_val, news_freq_val, category_news_dict_val, newsid2cat_val, device
)

print(f" Train Loss: {train_loss:.4f} | Contrastive Loss: {contrastive_loss_total}"
      f"AUC: {auc:.4f}, MRR: {mrr:.4f}, nDCG@5: {ndcg5:.4f}, nDCG@10: {ndcg10:.4f}")

 11%|█▏        | 1789/15569 [15:33<1:59:52,  1.92it/s] 


KeyboardInterrupt: 

#Final Evaluation

In [40]:
for epoch in range(3):
    print(f"Epoch {epoch}")


    train_loss, contrastive_loss_total = train_one_epoch(
         Train_Behaviors_data, Train_News_data, model_components, optimizer, news_dict, news_freq, category_news_dict, newsid2cat, device
    )

    auc, mrr, ndcg5, ndcg10 = test_one_epoch(
        Val_Behaviors_data, Val_News_data, model_components, news_dict_val, news_freq_val, category_news_dict_val, newsid2cat_val, device
    )

    print(f"Epoch {epoch} | Train Loss: {train_loss:.4f} | "
          f"AUC: {auc:.4f}, MRR: {mrr:.4f}, nDCG@5: {ndcg5:.4f}, nDCG@10: {ndcg10:.4f}")

print("-------------------------------------------Final Eval-------------------------------------------")
print(f"AUC: {auc * 100:.2f}, MRR: {mrr * 100:.2f}, nDCG@5: {ndcg5 * 100:.2f}, nDCG@10: {ndcg10 * 100:.2f}")


Epoch 0


100%|██████████| 15698/15698 [2:17:10<00:00,  1.91it/s]   
  all_labels.append(torch.tensor(candidate_labels, dtype=torch.float32))
100%|██████████| 7378/7378 [13:43<00:00,  8.96it/s]


Epoch 0 | Train Loss: 14.4284 | AUC: 0.5382, MRR: 0.2761, nDCG@5: 0.2535, nDCG@10: 0.3134
Epoch 1


100%|██████████| 15698/15698 [2:17:35<00:00,  1.90it/s]  
  all_labels.append(torch.tensor(candidate_labels, dtype=torch.float32))
100%|██████████| 7378/7378 [14:23<00:00,  8.54it/s]


Epoch 1 | Train Loss: 8.2332 | AUC: 0.5112, MRR: 0.2573, nDCG@5: 0.2322, nDCG@10: 0.2964
Epoch 2


100%|██████████| 15698/15698 [2:23:05<00:00,  1.83it/s]   
  all_labels.append(torch.tensor(candidate_labels, dtype=torch.float32))
100%|██████████| 7378/7378 [14:41<00:00,  8.37it/s]


Epoch 2 | Train Loss: 6.2969 | AUC: 0.5326, MRR: 0.2697, nDCG@5: 0.2468, nDCG@10: 0.3070
-------------------------------------------Final Eval-------------------------------------------
AUC: 53.26, MRR: 26.97, nDCG@5: 24.68, nDCG@10: 30.70


In [41]:
module_name = "MCCM"

df = pd.DataFrame([{
    'Module': module_name,
    'AUC': f"{auc * 100:.2f}",
    'MRR': f"{mrr * 100:.2f}",
    'nDCG@5': f"{ndcg5 * 100:.2f}",
    'nDCG@10': f"{ndcg10 * 100:.2f}",
}])

filename = f"{module_name}_results.csv"

df.to_csv(filename, index=False)