In [1]:
from datasets import load_dataset
from nltk.tokenize import wordpunct_tokenize
dataset = load_dataset('ag_news')

In [2]:
import tensorflow as tf

print("GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

from tensorflow.python.client import device_lib

# List all devices
print(device_lib.list_local_devices())

GPUs Available:  0
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 12456032966886774649
xla_global_id: -1
]


In [3]:
text=[]
label=[]
for row in dataset['train']['text']+dataset['test']['text']:
    text.append(wordpunct_tokenize(row.lower()))
for row in dataset['train']['label']+dataset['test']['label']:
    label.append(row)

In [4]:
word_dict={'PADDING':0}
for sent in text:    
    for token in sent:        
        if token not in word_dict:
            word_dict[token]=len(word_dict)

In [5]:
MAX_SENT_LENGTH=256

news_words = []
for sent in text:       
    sample=[]
    for token in sent:     
        sample.append(word_dict[token])
    sample = sample[:MAX_SENT_LENGTH]
    news_words.append(sample+[0]*(MAX_SENT_LENGTH-len(sample)))


In [6]:
import numpy as np
news_words=np.array(news_words,dtype='int32') 
label=np.array(label,dtype='int32') 

In [7]:
index=np.arange(len(label))
train_index=index[:120000]
np.random.shuffle(train_index)
test_index=index[120000:]

In [8]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

from keras.utils import to_categorical
from keras.layers import *
from keras.models import Model, load_model
from tensorflow.keras import backend as K
from sklearn.metrics import *
from keras.optimizers import *
import keras.backend as K
import keras

In [9]:
import numpy as np
news_words=np.array(news_words,dtype='int32') 
label=np.array(label,dtype='int32')

In [10]:
import random
index=np.arange(len(label))
train_index=index[:120000]
test_index=index[120000:]

In [11]:

class Fastformer(Layer):

    def __init__(self, nb_head, size_per_head, **kwargs):
        self.nb_head = nb_head
        self.size_per_head = size_per_head
        self.output_dim = nb_head*size_per_head
        self.now_input_shape=None
        super(Fastformer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.now_input_shape=input_shape
        self.WQ = self.add_weight(name='WQ', 
                                  shape=(input_shape[0][-1], self.output_dim),
                                  initializer='glorot_uniform',
                                  trainable=True)
        self.WK = self.add_weight(name='WK', 
                                  shape=(input_shape[1][-1], self.output_dim),
                                  initializer='glorot_uniform',
                                  trainable=True) 
        self.Wq = self.add_weight(name='Wq', 
                                  shape=(self.output_dim,self.nb_head),
                                  initializer='glorot_uniform',
                                  trainable=True)
        self.Wk = self.add_weight(name='Wk', 
                                  shape=(self.output_dim,self.nb_head),
                                  initializer='glorot_uniform',
                                  trainable=True)
        
        self.WP = self.add_weight(name='WP', 
                                  shape=(self.output_dim,self.output_dim),
                                  initializer='glorot_uniform',
                                  trainable=True)
        
        
        super(Fastformer, self).build(input_shape)
        
    def call(self, x):
        if len(x) == 2:
            Q_seq,K_seq = x
        elif len(x) == 4:
            Q_seq,K_seq,Q_mask,K_mask = x #different mask lengths, reserved for cross attention
        Q_seq = tf.linalg.matmul(Q_seq, self.WQ)        
        Q_seq_reshape = tf.reshape(Q_seq, (-1, self.now_input_shape[0][1], self.nb_head*self.size_per_head))

        Q_att = tf.transpose(tf.linalg.matmul(Q_seq_reshape, self.Wq), perm=(0, 2, 1)) / (self.size_per_head**0.5)
        #Q_seq = K.dot(Q_seq, self.WQ)        
        #Q_seq_reshape = K.reshape(Q_seq, (-1, self.now_input_shape[0][1], self.nb_head*self.size_per_head))

        #Q_att=  K.permute_dimensions(K.dot(Q_seq_reshape, self.Wq),(0,2,1))/ self.size_per_head**0.5

        if len(x)  == 4:
            #Q_att = Q_att-(1-K.expand_dims(Q_mask,axis=1))*1e8
            Q_att = Q_att - (1 - tf.expand_dims(Q_mask, axis=1)) * 1e8
        """
        Q_att = K.softmax(Q_att)
        Q_seq = K.reshape(Q_seq, (-1,self.now_input_shape[0][1], self.nb_head, self.size_per_head))
        Q_seq = K.permute_dimensions(Q_seq, (0,2,1,3))
        
        K_seq = K.dot(K_seq, self.WK)
        K_seq = K.reshape(K_seq, (-1,self.now_input_shape[1][1], self.nb_head, self.size_per_head))
        K_seq = K.permute_dimensions(K_seq, (0,2,1,3))

        Q_att = Lambda(lambda x: K.repeat_elements(K.expand_dims(x,axis=3),self.size_per_head,axis=3))(Q_att)
        global_q = K.sum(multiply([Q_att, Q_seq]),axis=2)
        
        global_q_repeat = Lambda(lambda x: K.repeat_elements(K.expand_dims(x,axis=2), self.now_input_shape[1][1],axis=2))(global_q)

        QK_interaction = multiply([K_seq, global_q_repeat])
        QK_interaction_reshape = K.reshape(QK_interaction, (-1, self.now_input_shape[0][1], self.nb_head*self.size_per_head))
        K_att = K.permute_dimensions(K.dot(QK_interaction_reshape, self.Wk),(0,2,1))/ self.size_per_head**0.5
        
        if len(x)  == 4:
            K_att = K_att-(1-K.expand_dims(K_mask,axis=1))*1e8
            
        K_att = K.softmax(K_att)

        K_att = Lambda(lambda x: K.repeat_elements(K.expand_dims(x,axis=3),self.size_per_head,axis=3))(K_att)

        global_k = K.sum(multiply([K_att, QK_interaction]),axis=2)
     
        global_k_repeat = Lambda(lambda x: K.repeat_elements(K.expand_dims(x,axis=2), self.now_input_shape[0][1],axis=2))(global_k)
        #Q=V
        QKQ_interaction = multiply([global_k_repeat, Q_seq])
        QKQ_interaction = K.permute_dimensions(QKQ_interaction, (0,2,1,3))
        QKQ_interaction = K.reshape(QKQ_interaction, (-1,self.now_input_shape[0][1], self.nb_head*self.size_per_head))
        QKQ_interaction = K.dot(QKQ_interaction, self.WP)
        QKQ_interaction = K.reshape(QKQ_interaction, (-1,self.now_input_shape[0][1], self.nb_head,self.size_per_head))
        QKQ_interaction = K.permute_dimensions(QKQ_interaction, (0,2,1,3))
        QKQ_interaction = QKQ_interaction+Q_seq
        QKQ_interaction = K.permute_dimensions(QKQ_interaction, (0,2,1,3))
        QKQ_interaction = K.reshape(QKQ_interaction, (-1,self.now_input_shape[0][1], self.nb_head*self.size_per_head))
        """
        #many operations can be optimized if higher versions are used. 
        # Softmax
        #Q_att = tf.nn.softmax(Q_att)

        # Reshape and transpose
        #Q_seq = tf.reshape(Q_seq, (-1, self.now_input_shape[0][1], self.nb_head, self.size_per_head))
        #Q_seq = tf.transpose(Q_seq, perm=(0, 2, 1, 3))
        # Ensure WK is shaped as [size_per_head, size_per_head]
        #if self.WK.shape != (self.size_per_head, self.size_per_head):
        #    self.WK = tf.Variable(tf.random.normal([self.size_per_head, self.size_per_head]))
        Q_att = K.softmax(Q_att)
        Q_seq = K.reshape(Q_seq, (-1,self.now_input_shape[0][1], self.nb_head, self.size_per_head))
        Q_seq = K.permute_dimensions(Q_seq, (0,2,1,3))
        # Matrix multiplication and reshaping
        #K_seq = tf.matmul(Q_seq, self.WK)
        K_seq = K.dot(K_seq, self.WK)
        
        K_seq = tf.reshape(K_seq, (-1, self.now_input_shape[1][1], self.nb_head, self.size_per_head))
        K_seq = tf.transpose(K_seq, perm=(0, 2, 1, 3))

        # Repeat and sum
        Q_att = tf.repeat(tf.expand_dims(Q_att, axis=3), repeats=self.size_per_head, axis=3)
        global_q = tf.reduce_sum(tf.multiply(Q_att, Q_seq), axis=2)

        # Repeat elements and reshape for interaction
        global_q_repeat = tf.repeat(tf.expand_dims(global_q, axis=2), self.now_input_shape[1][1], axis=2)
        QK_interaction = tf.multiply(K_seq, global_q_repeat)
        QK_interaction_reshape = tf.reshape(QK_interaction, (-1, self.now_input_shape[0][1], self.nb_head * self.size_per_head))

        # Another matrix multiplication and reshaping
        K_att = tf.matmul(QK_interaction_reshape, self.Wk)
        K_att = tf.transpose(K_att, perm=(0, 2, 1)) / (self.size_per_head ** 0.5)

        if len(x) == 4:
            K_att = K_att - (1 - tf.expand_dims(K_mask, axis=1)) * 1e8

        # Final operations
        K_att = tf.nn.softmax(K_att)
        K_att = tf.repeat(tf.expand_dims(K_att, axis=3), repeats=self.size_per_head, axis=3)
        global_k = tf.reduce_sum(tf.multiply(K_att, QK_interaction), axis=2)
        global_k_repeat = tf.repeat(tf.expand_dims(global_k, axis=2), self.now_input_shape[0][1], axis=2)

        # Final interaction
        QKQ_interaction = tf.multiply(global_k_repeat, Q_seq)
        QKQ_interaction = tf.transpose(QKQ_interaction, perm=(0, 2, 1, 3))
        QKQ_interaction = tf.reshape(QKQ_interaction, (-1, self.now_input_shape[0][1], self.nb_head * self.size_per_head))
        QKQ_interaction = tf.matmul(QKQ_interaction, self.WP)
        QKQ_interaction = tf.reshape(QKQ_interaction, (-1, self.now_input_shape[0][1], self.nb_head, self.size_per_head))
        QKQ_interaction = tf.transpose(QKQ_interaction, perm=(0, 2, 1, 3))
        QKQ_interaction = QKQ_interaction + Q_seq
        QKQ_interaction = tf.transpose(QKQ_interaction, perm=(0, 2, 1, 3))
        QKQ_interaction = tf.reshape(QKQ_interaction, (-1, self.now_input_shape[0][1], self.nb_head * self.size_per_head))
        return QKQ_interaction
        
    def compute_output_shape(self, input_shape):
        return (input_shape[0][0], input_shape[0][1], self.output_dim)

import os
import requests
from tempfile import TemporaryDirectory
from recommenders.models.newsrec.newsrec_utils import get_mind_data_set

def download_file(url, dest_path):
    """Download a file from a URL and save it locally."""
    response = requests.get(url, stream=True)
    response.raise_for_status()  # Check for request errors
    os.makedirs(os.path.dirname(dest_path), exist_ok=True)  # Create directories if needed
    with open(dest_path, 'wb') as file:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:  # Filter out keep-alive new chunks
                file.write(chunk)

# Initialize temporary directory and paths
tmpdir = TemporaryDirectory()
data_path = "dataset"#tmpdir.name
print(data_path)
train_news_file = os.path.join('news.tsv')
train_behaviors_file = os.path.join(data_path, 'train', 'behaviors.tsv')
valid_news_file = os.path.join(data_path, 'valid', 'news.tsv')
valid_behaviors_file = os.path.join(data_path, 'valid', 'behaviors.tsv')
wordEmb_file = os.path.join(data_path, "utils", "embedding.npy")
userDict_file = os.path.join(data_path, "utils", "uid2index.pkl")
wordDict_file = os.path.join(data_path, "utils", "word_dict.pkl")
yaml_file = os.path.join(data_path, "utils", 'nrms.yaml')

# Get MIND dataset URLs
#mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set(MIND_type)
mind_url = "https://recodatasets.z20.web.core.windows.net/newsrec/"
mind_dev_dataset = "MINDlarge_dev.zip"
mind_train_dataset = "MINDlarge_train.zip"
train_news_dataset = os.path.join(data_path, 'train', mind_train_dataset)
valid_news_dataset = os.path.join(data_path, 'valid', mind_dev_dataset)
# Download train dataset if not already present
if not os.path.exists(mind_train_dataset):
    download_file(os.path.join(mind_url, mind_train_dataset), os.path.join(data_path, 'train', mind_train_dataset))

# Download validation dataset if not already present
if not os.path.exists(mind_dev_dataset):
    download_file(os.path.join(mind_url, mind_dev_dataset), os.path.join(data_path, 'valid', mind_dev_dataset))

import zipfile
with zipfile.ZipFile(os.path.join(data_path, 'train', mind_train_dataset), 'r') as zip_ref:
    zip_ref.extractall(os.getcwd())

dataset


In [12]:

# Download utility files if not already present
#if not os.path.exists(yaml_file):
#    for util_file in mind_utils:
#        download_file(os.path.join('https://recodatasets.z20.web.core.windows.net/newsrec/', util_file),
#                      os.path.join(data_path, 'utils', util_file))

import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from transformers import BertTokenizer

# Load data
behaviors_df = pd.read_csv('behaviors.tsv', sep='\t', header=None, names=['UserID', 'Time', 'ClickedNews', 'Impressions'])
news_df = pd.read_csv('news.tsv', sep='\t', header=None, names=['NewsID', 'Category', 'SubCategory', 'Title', 'Abstract', 'URL', 'Entities', 'Keywords'])
print("data loaded")
print("tokenizer start")
# Tokenize text data
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
news_df['TitleTokens'] = news_df['Title'].apply(lambda x: tokenizer.encode(x, add_special_tokens=True, truncation=True, max_length=32))
print("tokenizer end")
# Replace NaN values in the 'Abstract' column with an empty string
news_df['Abstract'] = news_df['Abstract'].fillna('')
# Tokenize the 'Abstract' column

print("tokenizer start")
news_df['AbstractTokens'] = news_df['Abstract'].apply(lambda x: tokenizer.encode(x, add_special_tokens=True, truncation=True, max_length=128))

print("tokenizer end")
print("replaced nan")
# Encode categories and subcategories
category_encoder = LabelEncoder()
sub_category_encoder = LabelEncoder()

news_df['CategoryID'] = category_encoder.fit_transform(news_df['Category'])
news_df['SubCategoryID'] = sub_category_encoder.fit_transform(news_df['SubCategory'])
print("news df fit transformed")
# Map news ID to the processed news information
news_dict = {row['NewsID']: row for _, row in news_df.iterrows()}

# Replace NaN values in 'ClickedNews' and 'Impressions' columns with empty strings
behaviors_df['ClickedNews'] = behaviors_df['ClickedNews'].fillna('')
behaviors_df['Impressions'] = behaviors_df['Impressions'].fillna('')
print("behaviours fillna")
# Process user behaviors
def process_behaviors(row):
    #print(row['ClickedNews'])
    clicked_news = row['ClickedNews'].split(' ')
    impressions = [imp.split('-') for imp in row['Impressions'].split(' ')]
    return clicked_news, impressions
print("process behaviours start")
behaviors_df['ClickedNewsList'], behaviors_df['ImpressionsList'] = zip(*behaviors_df.apply(process_behaviors, axis=1))
print("process behaviours end")
from torch.utils.data import Dataset, DataLoader
class NewsDataset(Dataset):
    def __init__(self, behaviors, news_dict):
        self.behaviors = behaviors
        self.news_dict = news_dict

    def __len__(self):
        return len(self.behaviors)

    def __getitem__(self, idx):
        clicked_news = [self.news_dict[nid]['TitleTokens'] for nid in self.behaviors.iloc[idx]['ClickedNewsList']]
        candidate_news = [self.news_dict[nid]['TitleTokens'] for nid, _ in self.behaviors.iloc[idx]['ImpressionsList']]
        labels = [int(label) for _, label in self.behaviors.iloc[idx]['ImpressionsList']]
        return torch.tensor(clicked_news), torch.tensor(candidate_news), torch.tensor(labels)

# Next step, training models


data loaded
tokenizer start




tokenizer end
tokenizer start
tokenizer end
replaced nan
news df fit transformed
behaviours fillna
process behaviours start
process behaviours end


In [14]:
from keras.utils import to_categorical
from sklearn.metrics import accuracy_score, f1_score
import tensorflow as tf
from keras.layers import Input, Embedding, Dropout, LayerNormalization, add, Flatten, Dense, Activation, Dot, Lambda
from keras.models import Model
from keras.optimizers import Adam
import keras.backend as K
from datetime import datetime
from tensorflow.keras.preprocessing.sequence import pad_sequences

@tf.function
def predict_user_model(model, clicked_news):
    return model(clicked_news, training=False)  # Ensure training=False for inference

def build_model():
    text_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
    qmask = Lambda(lambda x: tf.cast(tf.cast(x, tf.bool), tf.float32), 
                   output_shape=lambda s: s)(text_input)
    word_emb = Embedding(word_dict_size, 256, trainable=True)(text_input)

    word_emb = Dropout(0.2)(word_emb)
    hidden_word_emb = Fastformer(16, 16)([word_emb, word_emb, qmask, qmask])
    hidden_word_emb = Dropout(0.2)(hidden_word_emb)
    hidden_word_emb = LayerNormalization()(add([word_emb, hidden_word_emb]))

    hidden_word_emb_layer2 = Fastformer(16, 16)([hidden_word_emb, hidden_word_emb, qmask, qmask])
    hidden_word_emb_layer2 = Dropout(0.2)(hidden_word_emb_layer2)
    hidden_word_emb_layer2 = LayerNormalization()(add([hidden_word_emb, hidden_word_emb_layer2]))

    word_att = Flatten()(Dense(1)(hidden_word_emb_layer2))
    word_att = Activation('softmax')(word_att)
    text_emb = Dot((1, 1))([hidden_word_emb_layer2, word_att])
    classifier = Dense(4, activation='softmax')(text_emb)

    model = Model([text_input], [classifier])
    model.compile(loss=['categorical_crossentropy'], 
                  optimizer=Adam(learning_rate=0.001),
                  metrics=['acc'])
    return model

# Parameters for the model
MAX_SENT_LENGTH = 256  # adjust based on your dataset
word_dict_size = len(word_dict)  # Vocabulary size from tokenizer or word_dict
batch_size = 64
epochs = 1
print(f"number of users:{len(behaviors_df['UserID'].unique())}")
save_dir = "user_models"
os.makedirs(save_dir, exist_ok=True)

def generate_input_target_pairs(clicked_news_list, window_size=3):
    input_sequences = []
    targets = []
    
    # Create sliding window input-target pairs
    for i in range(len(clicked_news_list) - window_size):
        input_seq = clicked_news_list[i:i + window_size]  # Input: a sequence of articles
        target = clicked_news_list[i + window_size]  # Target: the next clicked article
        input_sequences.append(input_seq)
        targets.append(target)
    
    return input_sequences, targets

# Apply this to the behavior data for each user
clicked_news_global = []
targets_global = []

for idx, row in behaviors_df.iterrows():
    clicked_news_list = row['ClickedNewsList']  # This is a list of clicked article IDs
    input_seqs, target_articles = generate_input_target_pairs(clicked_news_list)
    
    clicked_news_global.extend(input_seqs)  # Append all sequences to the global list
    targets_global.extend(target_articles)  # Append all targets
print(f"len(clicked_news_global)={len(clicked_news_global)}")
print(f"len(targets_global)={len(targets_global)}")
# Tokenize the input sequences and pad them
clicked_news_padded = pad_sequences([[news_dict[nid]['TitleTokens'] for nid in seq] for seq in clicked_news_global],
                                    maxlen=MAX_SENT_LENGTH, padding='post', truncating='post')

# Convert the target articles into embeddings (you may tokenize them similarly)
target_news_padded = np.array([news_dict[nid]['TitleTokens'] for nid in targets_global])
print(f"len(clicked_news_padded)={len(clicked_news_padded)}")
print(f"len(targets_global)={len(targets_global)}")

# Train/test split (80/20)
split_idx = int(0.8 * len(clicked_news_padded))
train_index = np.arange(split_idx)
test_index = np.arange(split_idx, len(clicked_news_padded))

# Convert target articles to one-hot encoding if necessary (for classification)
# If predicting classes, use `to_categorical` to one-hot encode the target labels
target_labels_onehot = to_categorical(target_news_padded, num_classes=4)

print(f"len(target_labels_onehot)={len(target_labels_onehot)}")
print(f"clicked_news_padded={clicked_news_padded}")
print(f"target_labels_onehot={target_labels_onehot}")
# Now fit the model
model.fit(clicked_news_padded[train_index], 
          target_labels_onehot[train_index], 
          shuffle=True, 
          batch_size=batch_size, 
          epochs=epochs, 
          verbose=1)
model_save_path = os.path.join(save_dir, f"global_fastformer_model.h5")
model.save(model_save_path)

if True:
    print(f"Evaluating model: {model_save_path}")
    y_pred = predict_user_model(model, clicked_news_padded[test_index])
    #y_pred = model.predict(clicked_news[test_index], batch_size=128, verbose=1)
    y_pred = np.argmax(y_pred, axis=1)
    y_true = labels[test_index]
    
    # Calculate metrics
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='macro')
    
    print(f" model: {model_save_path} - Accuracy: {acc:.4f}, F1 Score: {f1:.4f}")

number of users:711222
len(clicked_news_global)=67183379
len(targets_global)=67183379


NameError: name 'pad_sequences' is not defined

In [None]:
keras.backend.clear_session() 
import tensorflow as tf
text_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
#qmask=Lambda(lambda x:  K.cast(K.cast(x,'bool'),'float32'))(text_input)
#qmask = Lambda(lambda x: K.cast(K.cast(x, 'bool'), 'float32'), 
#               output_shape=lambda s: s)(text_input)
qmask = Lambda(lambda x: tf.cast(tf.cast(x, tf.bool), tf.float32), 
               output_shape=lambda s: s)(text_input)
word_emb = Embedding(len(word_dict),256, trainable=True)(text_input)

#pos_emb = Embedding(MAX_SENT_LENGTH, 256, trainable=True)(Lambda(lambda x:K.zeros_like(x,dtype='int32')+K.arange(x.shape[1]))(text_input))
#word_emb  =add([word_emb ,pos_emb])
#We find that position embedding is not important on this dataset and we removed it for simplicity. If needed, please uncomment the two lines above

word_emb=Dropout(0.2)(word_emb)

hidden_word_emb = Fastformer(16,16)([word_emb,word_emb,qmask,qmask])
hidden_word_emb = Dropout(0.2)(hidden_word_emb)
hidden_word_emb = LayerNormalization()(add([word_emb,hidden_word_emb])) 
#if there is no layer norm in old version, please import an external layernorm class from a higher version.

hidden_word_emb_layer2 = Fastformer(16,16)([hidden_word_emb,hidden_word_emb,qmask,qmask])
hidden_word_emb_layer2 = Dropout(0.2)(hidden_word_emb_layer2)
hidden_word_emb_layer2 = LayerNormalization()(add([hidden_word_emb,hidden_word_emb_layer2]))

#without FFNN for simplicity

word_att = Flatten()(Dense(1)(hidden_word_emb_layer2))
word_att = Activation('softmax')(word_att)
text_emb = Dot((1, 1))([hidden_word_emb_layer2 , word_att])
classifier = Dense(4, activation='softmax')(text_emb)
                                      
model = Model([text_input], [classifier])
#model.compile(loss=['categorical_crossentropy'],optimizer=Adam(lr=0.001), metrics=['acc'])
model.compile(loss=['categorical_crossentropy'], 
              optimizer=Adam(learning_rate=0.001),
              metrics=['acc'])

for i in range(1):
    model.fit(news_words[train_index],to_categorical(label)[train_index],shuffle=True,batch_size=64, epochs=1,verbose=1)


    y_pred = model.predict([news_words[test_index] ], batch_size=128, verbose=1)
    y_pred = np.argmax(y_pred, axis=1)
    y_true = label[test_index]
    acc = accuracy_score(y_true, y_pred)
    report = f1_score(y_true, y_pred, average='macro')  
    print(acc)
    print(report)


In [None]:
print(to_categorical(label, num_classes=4)[11])
print(to_categorical(labels, num_classes=4)[11])

In [None]:
print(len(labels))
print(len(clicked_news))
print(labels)
print(clicked_news)
print(clicked_news_padded)
user_id = "U532401"
user_behaviors = behaviors_df[behaviors_df['UserID'] == user_id]
print(user_behaviors['ClickedNewsList'].explode())
# Extract impressions for this user, which includes both the articles shown and the labels
impressions = user_behaviors['ImpressionsList'].explode()

# Extract the news articles and the corresponding labels from the impressions
clicked_news2 = [news_dict[nid]['TitleTokens'] for nid, label in impressions]
labels2 = [int(label) for nid, label in impressions]
print(impressions)
print(clicked_news2)
print(labels2)
print(len(clicked_news2))
print(len(labels2))

In [None]:
def make_model_for_each_user:
    # Loop through each user and train a model based on their specific data
    for user_id in behaviors_df['UserID'].unique():
        keras.backend.clear_session()  # Clear previous model to start fresh for each user
        
        # Filter the dataset for the current user
        user_behaviors = behaviors_df[behaviors_df['UserID'] == user_id]
        """
        # Extract the news interactions for this user
        clicked_news = [news_dict[nid]['TitleTokens'] for nid in user_behaviors['ClickedNewsList'].explode()]
        labels = [int(label) for nid, label in user_behaviors['ImpressionsList'].explode()]
    
        # Convert clicked news and labels to numpy arrays
        clicked_news = np.array(clicked_news)
        labels = np.array(labels)
        """
        import numpy as np
        from tensorflow.keras.preprocessing.sequence import pad_sequences
        
        # Assume MAX_SENT_LENGTH is defined as 256
        MAX_SENT_LENGTH = 256
        
        # Extract the news interactions for this user and pad sequences to the same length
        #clicked_news = [news_dict[nid]['TitleTokens'] for nid in user_behaviors['ClickedNewsList'].explode()]
        
        # Pad sequences to ensure they have the same length (MAX_SENT_LENGTH)
        #clicked_news_padded = pad_sequences(clicked_news, maxlen=MAX_SENT_LENGTH, padding='post', truncating='post')
        
        # Extract labels for this user
        #labels = [int(label) for nid, label in user_behaviors['ImpressionsList'].explode()]
    
        impressions = user_behaviors['ImpressionsList'].explode()
        
        # Extract the news articles and the corresponding labels from the impressions
        clicked_news = [news_dict[nid]['TitleTokens'] for nid, label in impressions]
        labels = [int(label) for nid, label in impressions]
        clicked_news_padded = pad_sequences(clicked_news, maxlen=MAX_SENT_LENGTH, padding='post', truncating='post')
        # Convert clicked news and labels to numpy arrays
        clicked_news_padded = np.array(clicked_news_padded)
        labels = np.array(labels)
        clicked_news = clicked_news_padded
        # Train/test split (you can adjust this as needed)
        split_idx = int(0.8 * len(clicked_news))
        import random
        #index=np.arange(len(label))
        #train_index=index[:120000]
        #test_index=index[120000:]
        train_index = np.arange(split_idx)
        test_index = np.arange(split_idx, len(clicked_news))
    
        # Build the model (same model as in the original code)
        model = build_model()
    
        # Train the model using the user-specific data
        print(f"Training model for user {user_id}. Click news:{len(clicked_news)}")
        model.fit(clicked_news[train_index], 
                  to_categorical(labels, num_classes=4)[train_index], 
                  shuffle=True, 
                  batch_size=batch_size, 
                  epochs=epochs, 
                  verbose=1)
        model_save_path = os.path.join(save_dir, f"user_{user_id}_model.h5")
        model.save(model_save_path)
        print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Model for user {user_id} saved to {model_save_path}")
        # Predict and evaluate on the test data
        if False:
            print(f"Evaluating model for user {user_id}...")
            y_pred = predict_user_model(model, clicked_news_padded[test_index])
            #y_pred = model.predict(clicked_news[test_index], batch_size=128, verbose=1)
            y_pred = np.argmax(y_pred, axis=1)
            y_true = labels[test_index]
            
            # Calculate metrics
            acc = accuracy_score(y_true, y_pred)
            f1 = f1_score(y_true, y_pred, average='macro')
            
            print(f"User {user_id} - Accuracy: {acc:.4f}, F1 Score: {f1:.4f}")