In [None]:
import random
import torch
import torch.nn as nn
import torch.multiprocessing as mp
mp.set_start_method('spawn', force=True)
# import data as data
# import model_evaluation as evaluation
import torch.optim as optim
import torch._dynamo
import numpy as np
import heapq
from torch.optim.lr_scheduler import ReduceLROnPlateau

torch._dynamo.config.suppress_errors = True

random.seed(1000)

def load_data(filename, threshold=4, train_ratio=0.7, test_ratio=0.15):
    """
    Load dataset and split data on a per-user basis.

    Args:
        filename (str): Path to the ratings file.
        train_ratio (float): Percentage of interactions used for training.
        test_ratio (float): Percentage of interactions used for testing.

    Returns:
        train_dict, val_dict, test_dict, movie_num, user_num
    """
    user_ratings = {}  # Store each user's interactions (user_id: [(movie_id, label), ...])
    movie_num = -1
    user_num = -1

    with open(filename, "r", encoding="utf-8") as file:
        for line in file:
            user_id, movie_id, rating, _ = map(int, line.strip().split("::"))
            
            # Ignore rating 3
            if rating == 3:
                continue
            
            # Map ratings >=4 to 1, ratings 1 or 2 to 0
            label = 1 if rating >= threshold else 0

            if user_id not in user_ratings:
                user_ratings[user_id] = []
            user_ratings[user_id].append((movie_id, label))

            # movie and user number
            movie_num = max(movie_num, movie_id)
            user_num = max(user_num, user_id)

    train_dict, val_dict, test_dict = {}, {}, {}
   
    ######### divide by users? cold start #######
    # Divide each user's movie interactions by proportion
    for user_id, interactions in user_ratings.items():
        random.shuffle(interactions)  # shuffle

        total_interactions = len(interactions)
        train_end = int(train_ratio * total_interactions)
        val_end = int((train_ratio + test_ratio) * total_interactions)

        train_dict[user_id] = interactions[:train_end]
        val_dict[user_id] = interactions[train_end:val_end]
        test_dict[user_id] = interactions[val_end:]

    return train_dict, val_dict, test_dict, movie_num, user_num

In [None]:
def load_data_rate(filename, threshold=4, train_ratio=0.7, test_ratio=0.15):
    """
    Load dataset and split data on a per-user basis.

    Args:
        filename (str): Path to the ratings file.
        train_ratio (float): Percentage of interactions used for training.
        test_ratio (float): Percentage of interactions used for testing.

    Returns:
        train_dict, val_dict, test_dict, movie_num, user_num
    """
    user_ratings = {}  # Store each user's interactions (user_id: [(movie_id, label), ...])
    movie_num = -1
    user_num = -1

    with open(filename, "r", encoding="utf-8") as file:
        for line in file:
            user_id, movie_id, rating, _ = map(int, line.strip().split("::"))
            
            # # Ignore rating 3
            # if rating == 3:
            #     continue
            
            # Map ratings >=4 to 1, ratings 1 or 2 to 0
            label = 1 if rating >= threshold else 0

            if user_id not in user_ratings:
                user_ratings[user_id] = []
            user_ratings[user_id].append((movie_id, label))

            # movie and user number
            movie_num = max(movie_num, movie_id)
            user_num = max(user_num, user_id)

    train_dict, val_dict, test_dict = {}, {}, {}
   
    ######### divide by users? cold start #######
    # Divide each user's movie interactions by proportion
    for user_id, interactions in user_ratings.items():
        random.shuffle(interactions)  # shuffle

        total_interactions = len(interactions)
        train_end = int(train_ratio * total_interactions)
        val_end = int((train_ratio + test_ratio) * total_interactions)

        train_dict[user_id] = interactions[:train_end]
        val_dict[user_id] = interactions[train_end:val_end]
        test_dict[user_id] = interactions[val_end:]

    return train_dict, val_dict, test_dict, movie_num, user_num

In [None]:
def load_data_meanStd(filename, train_ratio=0.7, test_ratio=0.15):
    """
    Load dataset and split data on a per-user basis using a dynamic threshold 
    based on user-specific mean (μ_u) and standard deviation (σ_u).
    
    Args:
        filename (str): Path to the ratings file.
        train_ratio (float): Percentage of interactions used for training.
        test_ratio (float): Percentage of interactions used for testing.

    Returns:
        train_dict, val_dict, test_dict, movie_num, user_num
    """
    user_ratings = {}  # Store each user's interactions (user_id: [(movie_id, rating), ...])
    movie_num = -1
    user_num = -1

    # Step 1: Read data and store user ratings
    with open(filename, "r", encoding="utf-8") as file:
        for line in file:
            user_id, movie_id, rating, _ = map(int, line.strip().split("::"))
            
            if user_id not in user_ratings:
                user_ratings[user_id] = []
            user_ratings[user_id].append((movie_id, rating))

            # Update max movie and user IDs
            movie_num = max(movie_num, movie_id)
            user_num = max(user_num, user_id)

    # Step 2: Compute user-specific mean and standard deviation
    train_dict, val_dict, test_dict = {}, {}, {}

    for user_id, interactions in user_ratings.items():
        ratings = np.array([r for _, r in interactions])
        mu_u = np.mean(ratings)
        sigma_u = np.std(ratings)

        # Step 3: Convert ratings to binary labels based on user-specific threshold
        labeled_interactions = []
        for movie_id, rating in interactions:
            if rating >= mu_u:
                label = 1  # Positive
            elif rating < mu_u - sigma_u:
                label = 0  # Negative
            else:
                continue  # Ignore ratings in the middle range
            
            labeled_interactions.append((movie_id, label))

        # Step 4: Shuffle and split into train/val/test sets
        random.shuffle(labeled_interactions)
        total_interactions = len(labeled_interactions)
        train_end = int(train_ratio * total_interactions)
        val_end = int((train_ratio + test_ratio) * total_interactions)

        train_dict[user_id] = labeled_interactions[:train_end]
        val_dict[user_id] = labeled_interactions[train_end:val_end]
        test_dict[user_id] = labeled_interactions[val_end:]

    return train_dict, val_dict, test_dict, movie_num, user_num

In [None]:
def get_input_data_nointeract(train_dict, non_interacted_movies):
    user_input, movie_input, labels = [], [], []

    for u, rate_list in train_dict.items():
        # positive samples in train set
        for movie_id, label in rate_list:
            user_input.append(u)
            movie_input.append(movie_id)
            labels.append(label)
        
        # collect all movies not interacted with user
        non_interacted_items = non_interacted_movies.get(u, [])
        
        # Add all non-interacted movies as negative samples with label 0
        for movie_id in non_interacted_items:
            user_input.append(u)
            movie_input.append(movie_id)
            labels.append(0)

    return user_input, movie_input, labels

In [None]:
######### rate>=4 negative sample? interaction #######
def get_input_data(train_dict, non_interacted_movies, negative_num):
    user_input, movie_input, labels = [], [], []

    for u, rate_list in train_dict.items():
        # positive samples in train set
        for movie_id, label in rate_list:
            user_input.append(u)
            movie_input.append(movie_id)
            labels.append(label)
        
        # collect all movies not interacted with user
        non_interacted_items = non_interacted_movies.get(u, [])
        # negative samples
        for _ in range(negative_num):
            if non_interacted_items:
                movie_id = random.choice(non_interacted_items)
                user_input.append(u)
                movie_input.append(movie_id)
                labels.append(0)

    return user_input, movie_input, labels

In [None]:
def get_non_interacted_movies(train_dict, val_dict, test_dict, movie_num):
    non_interacted_movies = {}

    for u in train_dict:
        # Get the movies that the user has interacted with (including train, val, test)
        interacted_movies = set(movie_id for movie_id, _ in train_dict.get(u, []))
        if u in val_dict:
            interacted_movies.update(movie_id for movie_id, _ in val_dict.get(u, []))
        if u in test_dict:
            interacted_movies.update(movie_id for movie_id, _ in test_dict.get(u, []))

        # Get the movies that the user has not interacted with
        all_movies = set(range(1, movie_num + 1))
        non_interacted_movies[u] = list(all_movies - interacted_movies)

    return non_interacted_movies

In [None]:
class NeuMF(nn.Module):
    def __init__(self, num_users, num_items, mf_dim=10, layers=[10], reg_mf=0.0, reg_layers=None):
        super(NeuMF, self).__init__()

        if reg_layers is None:
            reg_layers = [0] * len(layers)

        assert len(layers) == len(reg_layers)
        
        # GMF Embeddings
        self.user_embedding_gmf = nn.Embedding(num_users, mf_dim)
        self.item_embedding_gmf = nn.Embedding(num_items, mf_dim)

        # MLP Embeddings
        self.user_embedding_mlp = nn.Embedding(num_users, layers[0] // 2)
        self.item_embedding_mlp = nn.Embedding(num_items, layers[0] // 2)

        # Initialize embedding weights
        nn.init.normal_(self.user_embedding_gmf.weight, std=0.01)
        nn.init.normal_(self.item_embedding_gmf.weight, std=0.01)
        nn.init.normal_(self.user_embedding_mlp.weight, std=0.01)
        nn.init.normal_(self.item_embedding_mlp.weight, std=0.01)

        # MLP Layers
        self.mlp_layers = nn.Sequential()
        input_dim = layers[0]  # Initial input size (concatenated user & item embeddings)
        for i in range(1, len(layers)):
            self.mlp_layers.add_module(f"fc{i}", nn.Linear(input_dim, layers[i]))
            self.mlp_layers.add_module(f"relu{i}", nn.ReLU())
            input_dim = layers[i]

        # Output layer: combines GMF and MLP outputs
        self.fc_output = nn.Linear(mf_dim + layers[-1], 1)  # GMF (mf_dim) + MLP (last layer size)

        # Regularization parameters
        self.reg_mf = reg_mf
        self.reg_layers = reg_layers

    def forward(self, user_indices, item_indices):
        """ Forward pass for NeuMF model """

        # GMF Forward Pass: Element-wise multiplication
        user_latent_gmf = self.user_embedding_gmf(user_indices)
        item_latent_gmf = self.item_embedding_gmf(item_indices)
        gmf_out = torch.mul(user_latent_gmf, item_latent_gmf)  # Element-wise multiplication

        # MLP Forward Pass: Concatenate embeddings and pass through MLP layers
        user_latent_mlp = self.user_embedding_mlp(user_indices)
        item_latent_mlp = self.item_embedding_mlp(item_indices)
        mlp_input = torch.cat((user_latent_mlp, item_latent_mlp), dim=-1)  # Concatenation
        mlp_out = self.mlp_layers(mlp_input)

        # Combine GMF and MLP outputs
        combined = torch.cat((gmf_out, mlp_out), dim=-1)
        prediction = torch.sigmoid(self.fc_output(combined))  # Final prediction

        return prediction

    def get_regularization_loss(self):
        """ Compute L2 regularization loss for embeddings and MLP layers """
        reg_loss = 0
        reg_loss += self.reg_mf * (torch.norm(self.user_embedding_gmf.weight, p=2) + torch.norm(self.item_embedding_gmf.weight, p=2))

        for i, layer in enumerate(self.mlp_layers):
            if isinstance(layer, nn.Linear):
                reg_loss += self.reg_layers[i] * torch.norm(layer.weight, p=2)

        return reg_loss

In [None]:
import heapq
from sklearn.metrics import precision_score, recall_score, f1_score

In [None]:
def model_evaluation(model, val_dict, device, K=10):
    model.to(device)
    model.eval()  
    user_input = []
    movie_input = []
    labels = []
    
    for u, interactions in val_dict.items():
        for movie_id, label in interactions:
            user_input.append(u)
            movie_input.append(movie_id)
            labels.append(label)
    
    user_input = torch.tensor(user_input, dtype=torch.long, device=device)
    movie_input = torch.tensor(movie_input, dtype=torch.long, device=device)

    with torch.no_grad():
        predictions = model(user_input, movie_input).squeeze(-1).cpu().numpy()  

    predictions_dict = {}
    for u, m, score in zip(user_input.cpu().tolist(), movie_input.cpu().tolist(), predictions):
        if u not in predictions_dict:
            predictions_dict[u] = {}
        predictions_dict[u][m] = score

    precision_list = []
    recall_list = []
    
    for u, interactions in val_dict.items():
        pos_movies = {m for m, label in interactions if label == 1}
        if not pos_movies:
            continue

        if u not in predictions_dict:
            continue
        pred_scores = predictions_dict[u]

        top_k_items = np.array(sorted(pred_scores.keys(), key=lambda x: pred_scores[x], reverse=True))[:K]

        # Calculate Precision@10
        relevant_in_top_k = sum(1 for movie_id in top_k_items if movie_id in pos_movies)
        precision_at_10 = relevant_in_top_k / K
        precision_list.append(precision_at_10)

        # Calculate Recall@10
        recall_at_10 = relevant_in_top_k / len(pos_movies)
        recall_list.append(recall_at_10)

    # Calculate average Precision@10 and Recall@10
    avg_precision_at_10 = np.mean(precision_list) if precision_list else 0
    avg_recall_at_10 = np.mean(recall_list) if recall_list else 0

    # Calculate F1@10
    if avg_precision_at_10 + avg_recall_at_10 > 0:
        f1_at_10 = 2 * (avg_precision_at_10 * avg_recall_at_10) / (avg_precision_at_10 + avg_recall_at_10)
    else:
        f1_at_10 = 0

    return avg_precision_at_10, avg_recall_at_10, f1_at_10

In [None]:
# def model_evaluation(model, val_dict, device, K=10):
#     model.to(device)
#     model.eval()  
#     user_input = []
#     movie_input = []
#     labels = []
    
#     for u, interactions in val_dict.items():
#         for movie_id, label in interactions:
#             user_input.append(u)
#             movie_input.append(movie_id)
#             labels.append(label)
    
#     user_input = torch.tensor(user_input, dtype=torch.long, device=device)
#     movie_input = torch.tensor(movie_input, dtype=torch.long, device=device)

#     with torch.no_grad():
#         predictions = model(user_input, movie_input).squeeze(-1).cpu().numpy()  

#     predictions_dict = {}
#     for u, m, score in zip(user_input.cpu().tolist(), movie_input.cpu().tolist(), predictions):
#         if u not in predictions_dict:
#             predictions_dict[u] = {}
#         predictions_dict[u][m] = score

#     recall_list = []
#     ndcg_list = []

#     for u, interactions in val_dict.items():
#         pos_movies = {m for m, label in interactions if label == 1}
#         if not pos_movies:
#             continue

#         if u not in predictions_dict:
#             continue
#         pred_scores = predictions_dict[u]

#         top_k_items = np.array(sorted(pred_scores.keys(), key=lambda x: pred_scores[x], reverse=True))[:K]

#         recall = len(pos_movies.intersection(top_k_items)) / len(pos_movies)
#         recall_list.append(recall)

#         ndcg = calculate_ndcg(pos_movies, top_k_items, K)
#         ndcg_list.append(ndcg)

#     avg_recall = np.mean(recall_list) if recall_list else 0
#     avg_ndcg = np.mean(ndcg_list) if ndcg_list else 0

#     return avg_recall, avg_ndcg


# def calculate_ndcg(pos_movies, top_k_items, K):
#     """
#     Calculate NDCG for the top-K recommended items.

#     Args:
#     - pos_movies: A set of relevant (ground truth) items for the user.
#     - top_k_items: A list of the top-K recommended items.
#     - K: The number of top items considered for evaluation.

#     Returns:
#     - NDCG score.
#     """
#     K = min(K, len(top_k_items))  # Adjust K to avoid overestimation

#     # Compute DCG
#     dcg = sum(1 / np.log2(i + 2) for i, item in enumerate(top_k_items[:K]) if item in pos_movies)

#     # Compute IDCG (Ideal DCG)
#     ideal_hits = min(K, len(pos_movies))  # Can't be more than positive items
#     idcg = sum(1 / np.log2(i + 2) for i in range(ideal_hits))

#     return dcg / idcg if idcg > 0 else 0

In [None]:
from collections import defaultdict

In [None]:
train_losses = []
val_losses = []
train_recalls = []
train_ndcgs = []
recalls = []
ndcgs = []
f1s = []
patience = 10
counter = 0
best_val_loss = float('inf')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_dict, val_dict, test_dict, movie_num, user_num = load_data('/kaggle/input/mmmmmm/ratings.dat')
negative_num = 10
non_interacted_movies = get_non_interacted_movies(train_dict, val_dict, test_dict, movie_num)

user_input, movie_input, labels = get_input_data(train_dict, non_interacted_movies, negative_num)
# latent_dim = 8

batch_size = 256
num_epochs = 30
model = NeuMF(user_num+1, movie_num+1, 10, [10, 16]).to(device)
model = torch.compile(model)
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=3, verbose=True)

criterion = nn.BCEWithLogitsLoss()
scaler = torch.amp.GradScaler('cuda')

user_input = torch.tensor(user_input, dtype=torch.long).to(device)
movie_input = torch.tensor(movie_input, dtype=torch.long).to(device)
labels = torch.tensor(labels, dtype=torch.float32).to(device)

dataset = torch.utils.data.TensorDataset(user_input, movie_input, labels)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=16)

# val_user_input = []
# val_movie_input = []
# val_labels = []

# for u in val_dict.keys():
#     for (movie_id, label) in val_dict[u]:
#         val_user_input.append(u)
#         val_movie_input.append(movie_id)
#         val_labels.append(label)
val_user_input, val_movie_input, val_labels = get_input_data_nointeract(val_dict, non_interacted_movies)

val_dict = defaultdict(list)

for user_id, movie_id, label in zip(val_user_input, val_movie_input, val_labels):
    val_dict[user_id].append((movie_id, label))

val_dict = dict(val_dict)

val_user_input = torch.tensor(val_user_input, dtype=torch.long).to(device)
val_movie_input = torch.tensor(val_movie_input, dtype=torch.long).to(device)
val_labels = torch.tensor(val_labels, dtype=torch.float32).to(device)

val_dataset = torch.utils.data.TensorDataset(val_user_input, val_movie_input, val_labels)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=16)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch_users, batch_items, batch_labels in dataloader:
        batch_users = batch_users.to(device)
        batch_items = batch_items.to(device)
        batch_labels = batch_labels.to(device)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda'):
            predictions = model(batch_users, batch_items)
            loss = criterion(predictions, batch_labels.view(-1, 1))
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()

    print(f"Epoch {epoch + 1}, Training Loss: {total_loss / len(dataloader)}")

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_users, batch_items, batch_labels in val_dataloader:
            batch_users = batch_users.to(device)
            batch_items = batch_items.to(device)
            batch_labels = batch_labels.to(device)
            predictions = model(batch_users, batch_items)
            loss = criterion(predictions, batch_labels.view(-1, 1))
            val_loss += loss.item()
        val_loss_avg = val_loss / len(val_dataloader)
        scheduler.step(val_loss_avg)
        print(f"Epoch {epoch + 1}, Validation Loss: {val_loss_avg}")
    train_losses.append(total_loss / len(dataloader))
    val_losses.append(val_loss_avg)

    # train_recall, train_ndcg = model_evaluation(model, train_dict, device, K=10)
    _, _, f1 = model_evaluation(model, val_dict, device, K=10)
    # train_recalls.append(train_recall)
    # train_ndcgs.append(train_ndcg)
    # recalls.append(recall)
    # ndcgs.append(ndcg)
    f1s.append(f1)

    # early stop
    if val_loss_avg < best_val_loss:
        best_val_loss = val_loss_avg
        counter = 0  
        torch.save(model.state_dict(), "./best_model.pth") 
    else:
        counter += 1
        print(f"Early Stopping Counter: {counter}/{patience}")
        if counter >= patience:
            print("Early stopping triggered! Stopping training.")
            break     

In [None]:
train_losses_meanStd = []
val_losses_meanStd = []
train_recalls_meanStd = []
train_ndcgs_meanStd = []
recalls_meanStd = []
f1s_meanStd = []
ndcgs_meanStd = []
patience = 10
counter = 0
best_val_loss = float('inf')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_dict, val_dict, test_dict, movie_num, user_num = load_data_meanStd('/kaggle/input/mmmmmm/ratings.dat')
negative_num = 10
non_interacted_movies = get_non_interacted_movies(train_dict, val_dict, test_dict, movie_num)

user_input, movie_input, labels = get_input_data(train_dict, non_interacted_movies, negative_num)
# latent_dim = 8

batch_size = 256
num_epochs = 30
model_meanStd = NeuMF(user_num+1, movie_num+1,10, [10, 16]).to(device)
model_meanStd = torch.compile(model_meanStd)
optimizer = optim.Adam(model_meanStd.parameters(), lr=0.0001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=3, verbose=True)

criterion = nn.BCEWithLogitsLoss()
scaler = torch.amp.GradScaler('cuda')

user_input = torch.tensor(user_input, dtype=torch.long).to(device)
movie_input = torch.tensor(movie_input, dtype=torch.long).to(device)
labels = torch.tensor(labels, dtype=torch.float32).to(device)

dataset = torch.utils.data.TensorDataset(user_input, movie_input, labels)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8)

# val_user_input = []
# val_movie_input = []
# val_labels = []

# for u in val_dict.keys():
#     for (movie_id, label) in val_dict[u]:
#         val_user_input.append(u)
#         val_movie_input.append(movie_id)
#         val_labels.append(label)

val_user_input, val_movie_input, val_labels = get_input_data_nointeract(val_dict, non_interacted_movies)
val_dict = defaultdict(list)

for user_id, movie_id, label in zip(val_user_input, val_movie_input, val_labels):
    val_dict[user_id].append((movie_id, label))

val_dict = dict(val_dict)

val_user_input = torch.tensor(val_user_input, dtype=torch.long).to(device)
val_movie_input = torch.tensor(val_movie_input, dtype=torch.long).to(device)
val_labels = torch.tensor(val_labels, dtype=torch.float32).to(device)

val_dataset = torch.utils.data.TensorDataset(val_user_input, val_movie_input, val_labels)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=8)

for epoch in range(num_epochs):
    model_meanStd.train()
    total_loss = 0

    for batch_users, batch_items, batch_labels in dataloader:
        batch_users = batch_users.to(device)
        batch_items = batch_items.to(device)
        batch_labels = batch_labels.to(device)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda'):
            predictions = model_meanStd(batch_users, batch_items)
            loss = criterion(predictions, batch_labels.view(-1, 1))
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()

    print(f"Epoch {epoch + 1}, Training Loss: {total_loss / len(dataloader)}")

    model_meanStd.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_users, batch_items, batch_labels in val_dataloader:
            batch_users = batch_users.to(device)
            batch_items = batch_items.to(device)
            batch_labels = batch_labels.to(device)
            predictions = model_meanStd(batch_users, batch_items)
            loss = criterion(predictions, batch_labels.view(-1, 1))
            val_loss += loss.item()
        val_loss_avg = val_loss / len(val_dataloader)
        scheduler.step(val_loss_avg)
        print(f"Epoch {epoch + 1}, Validation Loss: {val_loss_avg}")
    train_losses_meanStd.append(total_loss / len(dataloader))
    val_losses_meanStd.append(val_loss_avg)
        
    _, _, f1 = model_evaluation(model, val_dict, device, K=10)
    # train_recall, train_ndcg = model_evaluation(model_meanStd, train_dict, device, K=10)
    # train_recalls_meanStd.append(train_recall)
    # train_ndcgs_meanStd.append(train_ndcg)
    f1s_meanStd.append(f1)
    # early stop
    if val_loss_avg < best_val_loss:
        best_val_loss = val_loss_avg
        counter = 0  
        torch.save(model_meanStd.state_dict(), "./best_model_meanStd.pth") 
    else:
        counter += 1
        print(f"Early Stopping Counter: {counter}/{patience}")
        if counter >= patience:
            print("Early stopping triggered! Stopping training.")
            break     

4

In [None]:
train_losses_rate = []
val_losses_rate = []
f1s_rate = []
recalls_rate = []
ndcgs_rate = []
train_recalls_rate = []
train_ndcgs_rate = []
patience = 10
counter = 0
best_val_loss = float('inf')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_dict, val_dict, test_dict, movie_num, user_num = load_data_rate('/kaggle/input/mmmmmm/ratings.dat')
negative_num = 10
non_interacted_movies = get_non_interacted_movies(train_dict, val_dict, test_dict, movie_num)

user_input, movie_input, labels = get_input_data(train_dict, non_interacted_movies, negative_num)
# latent_dim = 8

batch_size = 256
num_epochs = 30
model_4 = NeuMF(user_num+1, movie_num+1,10, [10, 16]).to(device)
model_4 = torch.compile(model_4)
optimizer = optim.Adam(model_4.parameters(), lr=0.0001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=3, verbose=True)

criterion = nn.BCEWithLogitsLoss()
scaler = torch.amp.GradScaler('cuda')

user_input = torch.tensor(user_input, dtype=torch.long).to(device)
movie_input = torch.tensor(movie_input, dtype=torch.long).to(device)
labels = torch.tensor(labels, dtype=torch.float32).to(device)

dataset = torch.utils.data.TensorDataset(user_input, movie_input, labels)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8)

# val_user_input = []
# val_movie_input = []
# val_labels = []

# for u in val_dict.keys():
#     for (movie_id, label) in val_dict[u]:
#         val_user_input.append(u)
#         val_movie_input.append(movie_id)
#         val_labels.append(label)
val_user_input, val_movie_input, val_labels = get_input_data_nointeract(val_dict, non_interacted_movies)

val_dict = defaultdict(list)

for user_id, movie_id, label in zip(val_user_input, val_movie_input, val_labels):
    val_dict[user_id].append((movie_id, label))

val_dict = dict(val_dict)

val_user_input = torch.tensor(val_user_input, dtype=torch.long).to(device)
val_movie_input = torch.tensor(val_movie_input, dtype=torch.long).to(device)
val_labels = torch.tensor(val_labels, dtype=torch.float32).to(device)

val_dataset = torch.utils.data.TensorDataset(val_user_input, val_movie_input, val_labels)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=8)

for epoch in range(num_epochs):
    model_4.train()
    total_loss = 0

    for batch_users, batch_items, batch_labels in dataloader:
        batch_users = batch_users.to(device)
        batch_items = batch_items.to(device)
        batch_labels = batch_labels.to(device)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda'):
            predictions = model_4(batch_users, batch_items)
            loss = criterion(predictions, batch_labels.view(-1, 1))
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()

    print(f"Epoch {epoch + 1}, Training Loss: {total_loss / len(dataloader)}")

    model_4.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_users, batch_items, batch_labels in val_dataloader:
            batch_users = batch_users.to(device)
            batch_items = batch_items.to(device)
            batch_labels = batch_labels.to(device)
            predictions = model_4(batch_users, batch_items)
            loss = criterion(predictions, batch_labels.view(-1, 1))
            val_loss += loss.item()
        val_loss_avg = val_loss / len(val_dataloader)
        scheduler.step(val_loss_avg)
        print(f"Epoch {epoch + 1}, Validation Loss: {val_loss_avg}")
    train_losses_rate.append(total_loss / len(dataloader))
    val_losses_rate.append(val_loss_avg)
        
    _, _, f1 = model_evaluation(model, val_dict, device, K=10)
    # train_recalls_rate.append(train_recall)
    # train_ndcgs_rate.append(train_ndcg)
    f1s_rate.append(f1)
    # early stop
    if val_loss_avg < best_val_loss:
        best_val_loss = val_loss_avg
        counter = 0  
        torch.save(model_4.state_dict(), "./best_model_rate.pth") 
    else:
        counter += 1
        print(f"Early Stopping Counter: {counter}/{patience}")
        if counter >= patience:
            print("Early stopping triggered! Stopping training.")
            break   

In [None]:
train_losses_rate3 = []
val_losses_rate3 = []
recalls_rate3 = []
ndcgs_rate3 = []
f1s_rate3 = []
train_recalls_rate3 = []
train_ndcgs_rate3 = []
patience = 10
counter = 0
best_val_loss = float('inf')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_dict, val_dict, test_dict, movie_num, user_num = load_data_rate('/kaggle/input/mmmmmm/ratings.dat', threshold=3)
negative_num = 10
non_interacted_movies = get_non_interacted_movies(train_dict, val_dict, test_dict, movie_num)

user_input, movie_input, labels = get_input_data(train_dict, non_interacted_movies, negative_num)
# latent_dim = 8

batch_size = 256
num_epochs = 30
model_3 = NeuMF(user_num+1, movie_num+1,10, [10, 16]).to(device)
model_3 = torch.compile(model_3)
optimizer = optim.Adam(model_3.parameters(), lr=0.0001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=3, verbose=True)

criterion = nn.BCEWithLogitsLoss()
scaler = torch.amp.GradScaler('cuda')

user_input = torch.tensor(user_input, dtype=torch.long).to(device)
movie_input = torch.tensor(movie_input, dtype=torch.long).to(device)
labels = torch.tensor(labels, dtype=torch.float32).to(device)

dataset = torch.utils.data.TensorDataset(user_input, movie_input, labels)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

# val_user_input = []
# val_movie_input = []
# val_labels = []

# for u in val_dict.keys():
#     for (movie_id, label) in val_dict[u]:
#         val_user_input.append(u)
#         val_movie_input.append(movie_id)
#         val_labels.append(label)
val_user_input, val_movie_input, val_labels = get_input_data_nointeract(val_dict, non_interacted_movies)
val_dict = defaultdict(list)

for user_id, movie_id, label in zip(val_user_input, val_movie_input, val_labels):
    val_dict[user_id].append((movie_id, label))

val_dict = dict(val_dict)

val_user_input = torch.tensor(val_user_input, dtype=torch.long).to(device)
val_movie_input = torch.tensor(val_movie_input, dtype=torch.long).to(device)
val_labels = torch.tensor(val_labels, dtype=torch.float32).to(device)

val_dataset = torch.utils.data.TensorDataset(val_user_input, val_movie_input, val_labels)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

for epoch in range(num_epochs):
    model_3.train()
    total_loss = 0

    for batch_users, batch_items, batch_labels in dataloader:
        batch_users = batch_users.to(device)
        batch_items = batch_items.to(device)
        batch_labels = batch_labels.to(device)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda'):
            predictions = model_3(batch_users, batch_items)
            loss = criterion(predictions, batch_labels.view(-1, 1))
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()

    print(f"Epoch {epoch + 1}, Training Loss: {total_loss / len(dataloader)}")

    model_3.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_users, batch_items, batch_labels in val_dataloader:
            batch_users = batch_users.to(device)
            batch_items = batch_items.to(device)
            batch_labels = batch_labels.to(device)
            predictions = model_3(batch_users, batch_items)
            loss = criterion(predictions, batch_labels.view(-1, 1))
            val_loss += loss.item()
        val_loss_avg = val_loss / len(val_dataloader)
        scheduler.step(val_loss_avg)
        print(f"Epoch {epoch + 1}, Validation Loss: {val_loss_avg}")
    train_losses_rate3.append(total_loss / len(dataloader))
    val_losses_rate3.append(val_loss_avg)
        
    _, _, f1 = model_evaluation(model, val_dict, device, K=10)
    # train_recall, train_ndcg = model_evaluation(model_3, train_dict, device, K=10)
    # train_recalls_rate3.append(train_recall)
    # train_ndcgs_rate3.append(train_ndcg)
    f1s_rate3.append(f1)
    # early stop
    if val_loss_avg < best_val_loss:
        best_val_loss = val_loss_avg
        counter = 0  
        torch.save(model_3.state_dict(), "./best_model_rate3.pth") 
    else:
        counter += 1
        print(f"Early Stopping Counter: {counter}/{patience}")
        if counter >= patience:
            print("Early stopping triggered! Stopping training.")
            break   

In [None]:
import numpy as np
import matplotlib.pyplot as plt

epochs = np.arange(1, num_epochs + 1)

train_losses = np.concatenate([train_losses, [np.nan] * (len(epochs) - len(train_losses))]) if len(train_losses) < len(epochs) else train_losses
val_losses = np.concatenate([val_losses, [np.nan] * (len(epochs) - len(val_losses))]) if len(val_losses) < len(epochs) else val_losses
# train_losses_rate = np.concatenate([train_losses_rate, [np.nan] * (len(epochs) - len(train_losses_rate))]) if len(train_losses_rate) < len(epochs) else train_losses_rate
# val_losses_rate = np.concatenate([val_losses_rate, [np.nan] * (len(epochs) - len(val_losses_rate))]) if len(val_losses_rate) < len(epochs) else val_losses_rate
# train_losses_meanStd = np.concatenate([train_losses_meanStd, [np.nan] * (len(epochs) - len(train_losses_meanStd))]) if len(train_losses_meanStd) < len(epochs) else train_losses_meanStd
# val_losses_meanStd = np.concatenate([val_losses_meanStd, [np.nan] * (len(epochs) - len(val_losses_meanStd))]) if len(val_losses_meanStd) < len(epochs) else val_losses_meanStd
# train_losses_rate3 = np.concatenate([train_losses_rate3, [np.nan] * (len(epochs) - len(train_losses_rate3))]) if len(train_losses_rate3) < len(epochs) else train_losses_rate3
# val_losses_rate3 = np.concatenate([val_losses_rate3, [np.nan] * (len(epochs) - len(val_losses_rate3))]) if len(val_losses_rate3) < len(epochs) else val_losses_rate3

fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(5, 15), sharex=True)

ax1.plot(epochs, train_losses, label='Training Loss (Filter 3)', linestyle='-', color='#1f77b4', linewidth=2)
ax1.plot(epochs, val_losses, label='Validation Loss (Filter 3)', linestyle='-', color='#ff7f0e', linewidth=2)
ax1.set_ylabel('Loss', fontsize=12, fontweight='bold')
ax1.legend(loc='upper right', fontsize=8, frameon=True)
ax1.grid(True, linestyle='--', alpha=0.5)
ax1.set_title('Training & Validation Loss (Filter 3)', fontsize=14, fontweight='bold')

# ax2.plot(epochs, train_losses_rate, label='Training Loss (≥4)', linestyle='--', color='red', linewidth=2)
# ax2.plot(epochs, val_losses_rate, label='Validation Loss (≥4)', linestyle='-.', color='green', linewidth=2)
# ax2.set_xlabel('Epoch', fontsize=12, fontweight='bold')
# ax2.set_ylabel('Loss Rate', fontsize=12, fontweight='bold')
# ax2.legend(loc='upper right', fontsize=8, frameon=True)
# ax2.grid(True, linestyle='--', alpha=0.5)
# ax2.set_title('Training & Validation Loss (Threshold ≥ 4)', fontsize=14, fontweight='bold')

# ax3.plot(epochs, train_losses_meanStd, label='Training Loss (Std Mean)', linestyle='--', color='red', linewidth=2)
# ax3.plot(epochs, val_losses_meanStd, label='Validation Loss (Std Mean)', linestyle='-.', color='green', linewidth=2)
# ax3.set_xlabel('Epoch', fontsize=12, fontweight='bold')
# ax3.set_ylabel('Loss Rate', fontsize=12, fontweight='bold')
# ax3.legend(loc='upper right', fontsize=8, frameon=True)
# ax3.grid(True, linestyle='--', alpha=0.5)
# ax3.set_title('Training & Validation Loss (Standardized Mean)', fontsize=14, fontweight='bold')

# ax4.plot(epochs, train_losses_rate3, label='Training Loss (≥3)', linestyle='--', color='red', linewidth=2)
# ax4.plot(epochs, val_losses_rate3, label='Validation Loss (≥3)', linestyle='-.', color='green', linewidth=2)
# ax4.set_xlabel('Epoch', fontsize=12, fontweight='bold')
# ax4.set_ylabel('Loss', fontsize=12, fontweight='bold')
# ax4.legend(loc='upper right', fontsize=11, frameon=True)
# ax4.grid(True, linestyle='--', alpha=0.5)
# ax4.set_title('Training & Validation Loss (Threshold ≥ 3)', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

epochs = np.arange(1, num_epochs + 1)

# Ensure all arrays have the same length as epochs
f1s = np.concatenate([f1s, [np.nan] * (len(epochs) - len(f1s))]) if len(f1s) < len(epochs) else f1s
# f1s_rate = np.concatenate([f1s_rate, [np.nan] * (len(epochs) - len(f1s_rate))]) if len(f1s_rate) < len(epochs) else f1s_rate
# f1s_meanStd = np.concatenate([f1s_meanStd, [np.nan] * (len(epochs) - len(f1s_meanStd))]) if len(f1s_meanStd) < len(epochs) else f1s_meanStd
# f1s_rate3 = np.concatenate([f1s_rate3, [np.nan] * (len(epochs) - len(f1s_rate3))]) if len(f1s_rate3) < len(epochs) else f1s_rate3

# Create a single plot
plt.figure(figsize=(5, 5))

plt.plot(epochs, f1s, label='F1 (Filter 3)', marker='o', linestyle='--', color='red', linewidth=2)
# plt.plot(epochs, f1s_rate, label='F1 (≥4)', marker='s', linestyle='-', color='blue', linewidth=2)
# plt.plot(epochs, f1s_meanStd, label='F1 (StdMean)', marker='^', linestyle='-.', color='green', linewidth=2)
# plt.plot(epochs, f1s_rate3, label='F1 (≥3)', marker='d', linestyle=':', color='purple', linewidth=2)

plt.xlabel('Epoch', fontsize=12, fontweight='bold')
plt.ylabel('F1', fontsize=12, fontweight='bold')
plt.legend(loc='center right', fontsize=11, frameon=True)
plt.grid(True, linestyle='--', alpha=0.5)
plt.title('F1 Comparison', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# epochs = np.arange(1, num_epochs + 1)

# # Ensure all arrays have the same length as epochs
# recalls = np.concatenate([recalls, [np.nan] * (len(epochs) - len(recalls))]) if len(recalls) < len(epochs) else recalls
# recalls_rate = np.concatenate([recalls_rate, [np.nan] * (len(epochs) - len(recalls_rate))]) if len(recalls_rate) < len(epochs) else recalls_rate
# recalls_meanStd = np.concatenate([recalls_meanStd, [np.nan] * (len(epochs) - len(recalls_meanStd))]) if len(recalls_meanStd) < len(epochs) else recalls_meanStd
# recalls_rate3 = np.concatenate([recalls_rate3, [np.nan] * (len(epochs) - len(recalls_rate3))]) if len(recalls_rate3) < len(epochs) else recalls_rate3

# # Create a single plot
# plt.figure(figsize=(5, 5))

# plt.plot(epochs, recalls, label='Recall (Filter 3)', marker='o', linestyle='--', color='red', linewidth=2)
# plt.plot(epochs, recalls_rate, label='Recall (≥4)', marker='s', linestyle='-', color='blue', linewidth=2)
# plt.plot(epochs, recalls_meanStd, label='Recall (StdMean)', marker='^', linestyle='-.', color='green', linewidth=2)
# plt.plot(epochs, recalls_rate3, label='Recall (≥3)', marker='d', linestyle=':', color='purple', linewidth=2)

# plt.xlabel('Epoch', fontsize=12, fontweight='bold')
# plt.ylabel('Recall', fontsize=12, fontweight='bold')
# plt.legend(loc='center right', fontsize=11, frameon=True)
# plt.grid(True, linestyle='--', alpha=0.5)
# plt.title('Recall Comparison Across Different Rate Setting', fontsize=14, fontweight='bold')

# plt.tight_layout()
# plt.show()

In [None]:
torch.cuda.empty_cache()
test_user_input, test_movie_input, test_labels = get_input_data_nointeract(test_dict, non_interacted_movies)
test_dict = defaultdict(list)

for user_id, movie_id, label in zip(test_user_input, test_movie_input, test_labels):
    test_dict[user_id].append((movie_id, label))

test_dict = dict(test_dict)
_, _, f1 = model_evaluation(model, test_dict, device, K=10)
# _, _, f1_meanStd = model_evaluation(model_meanStd, test_dict, device, K=10)
# _, _, f1_4 = model_evaluation(model_4, test_dict, device, K=10)
# _, _, f1_3 = model_evaluation(model_3, test_dict, device, K=10)

models = [' Filter3', 'MeanStd', 'rate ≥4', 'rate ≥3']
f1_scores = [f1]

x = np.arange(len(models))  # x-axis positions
width = 0.4  # Bar width

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Plot Recall comparison
ax1.bar(x, f1_scores, width, color=['blue', 'orange', 'green', 'red'])
ax1.set_ylabel('F1', fontsize=12, fontweight='bold')
ax1.set_xticks(x)
ax1.set_xticklabels(models, fontsize=11)
ax1.set_title('F1 Comparison', fontsize=14, fontweight='bold')
ax1.grid(axis='y', linestyle='--', alpha=0.6)

# # Plot NDCG comparison
# ax2.bar(x, ndcg_scores, width, color=['blue', 'orange', 'green', 'red'])
# ax2.set_ylabel('NDCG@10', fontsize=12, fontweight='bold')
# ax2.set_xticks(x)
# ax2.set_xticklabels(models, fontsize=11)
# ax2.set_title('NDCG Comparison', fontsize=14, fontweight='bold')
# ax2.grid(axis='y', linestyle='--', alpha=0.6)

plt.tight_layout()
plt.show()

In [None]:
# print(recall_scores)
# print(ndcg_scores)