In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
from typing import Tuple, List, Literal
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import lightning.pytorch as pl
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping
from lightning.pytorch.trainer.states import TrainerFn
from torch.utils.data import TensorDataset, DataLoader

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [None]:
print("Loading data...")
ratings = np.load("../ratings_train.npy").astype(np.float32)
test_ratings = np.load("../ratings_test.npy").astype(np.float32)

print(f"matrix shape: {ratings.shape}")
print(f"Test matrix shape: {test_ratings.shape}")

Loading data...
matrix shape: (610, 4980)
Test matrix shape: (610, 4980)


In [None]:
# Replace NaN values with 0
matrix = np.nan_to_num(ratings, nan=0.0).astype(np.float32)
test_matrix = np.nan_to_num(test_ratings, nan=0.0).astype(np.float32)

In [None]:
class DMFDataModule(pl.LightningDataModule):
    """
    DataModule for Deep Matrix Factorization.
    Handles data preparation, negative sampling, and dataloader creation.
    Creates fold-specific train/val matrices to prevent data leakage.
    """
    def __init__(
        self,
        matrix: np.ndarray,
        test_matrix: np.ndarray,
        train_positions: np.ndarray,
        val_positions: np.ndarray,
        batch_size: int,
        negatives_per_positive: int = 4,
        device: str = "cpu"
    ):
        super(DMFDataModule, self).__init__()
        self.matrix = matrix
        self.num_users, self.num_items = matrix.shape
        self.train_positions = train_positions
        self.val_positions = val_positions
        self.test_matrix = test_matrix
        self.batch_size = batch_size
        self.negatives_per_positive = negatives_per_positive
        self.device_type = device
        
    def setup(self, stage: TrainerFn):
        """Prepare train and validation datasets."""
        if stage is TrainerFn.FITTING or stage is None:
            self.train_features = self.matrix.copy()
            self.train_features[self.val_positions.T[0], self.val_positions.T[1]] = 0.0  # Mask validation entries
            self.val_features = self.train_features  # Same as train (both mask validation)
            self.train_dataset = self.__prepare_dataset("train")
            self.val_dataset = self.__prepare_dataset("val")
        
        # Standalone validate stage
        if stage is TrainerFn.VALIDATING:
            # Recreate val_features if not already set
            if not hasattr(self, 'val_features'):
                self.val_features = self.matrix.copy()
                self.val_features[self.val_positions.T[0], self.val_positions.T[1]] = 0.0 # Mask validation entries
            self.val_dataset = self.__prepare_dataset("val")
        
        # Test stage: use full training matrix as features
        if stage is TrainerFn.TESTING or stage is None:
            self.test_features = self.matrix  # Full training matrix (no masking)
            self.test_dataset = self.__prepare_dataset("test")
        
        # Predict stage: use full training matrix as features (same as test)
        if stage is TrainerFn.PREDICTING:
            self.predict_features = self.matrix
            self.predict_dataset = self.__prepare_dataset("test")  # Use test dataset

    def __prepare_dataset(self, dataset: Literal["train", "val", "test", "predict"] = "train"):
        """Prepare training data with negative sampling."""
        if dataset == "train":
            filtered_matrix = self.matrix.copy()
            filtered_matrix[self.val_positions.T[0], self.val_positions.T[1]] = 0.0  # Mask val entries
            negatives_per_positive = self.negatives_per_positive
        elif dataset == "val":
            filtered_matrix = self.matrix.copy()
            filtered_matrix[self.train_positions.T[0], self.train_positions.T[1]] = 0.0  # Mask train entries
            negatives_per_positive = 0
        elif dataset == "test":
            filtered_matrix = self.test_matrix
            negatives_per_positive = 0
        elif dataset == "predict":
            filtered_matrix = np.zeros_like(self.matrix)
            filtered_matrix[0, 0] = 1.0  # Dummy entry to avoid empty dataset
            negatives_per_positive = 0
        else:
            raise ValueError(f"Unknown dataset type: {dataset}")
        
        positive_pairs = [
            (u, i, filtered_matrix[u, i])
            for u in range(filtered_matrix.shape[0])
            for i in np.where(filtered_matrix[u] > 0)[0]
        ]

        u_list, i_list, y_list = [], [], []

        # Add positive samples
        for u, i, y in positive_pairs:
            u_list.append(u)
            i_list.append(i)
            y_list.append(y)

        # Add negative samples
        if negatives_per_positive > 0:
            negative_pairs = [np.where(filtered_matrix[u] == 0)[0] for u in range(self.num_users)]
            for u, i, _ in positive_pairs:
                if len(negative_pairs[u]) == 0:
                    continue
                    
                # Sample negative items
                neg_samples = np.random.choice(
                    negative_pairs[u],
                    size=negatives_per_positive,
                    replace=len(negative_pairs[u]) < negatives_per_positive
                )
                
                for neg_item in neg_samples:
                    u_list.append(u)
                    i_list.append(int(neg_item))
                    y_list.append(0.0)

        return TensorDataset(
            torch.tensor(u_list, dtype=torch.long),
            torch.tensor(i_list, dtype=torch.long),
            torch.tensor(y_list, dtype=torch.float32)
        )

    def train_dataloader(self):
        """Return training dataloader."""
        batch_size = min(self.batch_size, max(1, len(self.train_dataset)))
        return DataLoader(self.train_dataset, batch_size=batch_size, shuffle=True)
    
    def val_dataloader(self):
        """Return validation dataloader."""
        batch_size = max(1, len(self.val_dataset))
        return DataLoader(self.val_dataset, batch_size=batch_size, shuffle=False)

    def test_dataloader(self):
        """Return test dataloader."""
        batch_size = max(1, len(self.test_dataset))
        return DataLoader(self.test_dataset, batch_size=batch_size, shuffle=False)
    
    def predict_dataloader(self):
        """Return predict dataloader."""
        batch_size = max(1, len(self.predict_dataset))
        return DataLoader(self.predict_dataset, batch_size=batch_size, shuffle=False)


class DMFModel(nn.Module):
    """
    Deep Matrix Factorization model.
    Uses two neural networks to encode users and items into latent space.
    Supports flexible network depth through variable-length hidden_dims tuple.
    """
    def __init__(self, num_users: int, num_items: int, hidden_dims: Tuple[int, ...] = (128, 32)):
        super(DMFModel, self).__init__()
        
        # Build user encoder with flexible depth
        user_layers = []
        input_dim = num_items
        for hidden_dim in hidden_dims:
            user_layers.append(nn.Linear(input_dim, hidden_dim))
            user_layers.append(nn.LeakyReLU())
            input_dim = hidden_dim
        self.user_encoder = nn.Sequential(*user_layers)
        
        # Build item encoder with flexible depth
        item_layers = []
        input_dim = num_users
        for hidden_dim in hidden_dims:
            item_layers.append(nn.Linear(input_dim, hidden_dim))
            item_layers.append(nn.LeakyReLU())
            input_dim = hidden_dim
        self.item_encoder = nn.Sequential(*item_layers)
    
    def forward(self, user_rows: torch.Tensor, item_cols: torch.Tensor):
        """
        Forward pass.
        Args:
            user_rows: User feature vectors (batch_size, num_items)
            item_cols: Item feature vectors (batch_size, num_users)
        Returns:
            Similarity scores (batch_size,)
        """
        # Encode users and items
        user_embedding = self.user_encoder(user_rows)
        item_embedding = self.item_encoder(item_cols)
        
        # Compute cosine similarity
        similarity = F.cosine_similarity(user_embedding, item_embedding, dim=1, eps=1e-8)
        
        return similarity


class DMFModule(pl.LightningModule):
    """
    Lightning Module for Deep Matrix Factorization.
    Handles training, validation, and optimization.
    """
    def __init__(
        self,
        scale: float,
        num_users: int,
        num_items: int,
        hidden_dims: Tuple[int, ...] = (128, 32),
        learning_rate: float = 1e-3,
        min_prob: float = 1e-6
    ):
        super(DMFModule, self).__init__()
        # Model
        self.model = DMFModel(num_users, num_items, hidden_dims)
        
        # Hyperparameters
        self.learning_rate = learning_rate
        self.min_prob = min_prob
        self.scale = scale
    
    def setup(self, stage: str = None):
        """
        Called at the beginning of fit, validate, test, or predict.
        This is a good hook to load and cache data from the datamodule.
        
        Args:
            stage: 'fit', 'validate', 'test', or 'predict'
        """
        if self.trainer and self.trainer.datamodule:
            # Cache features as tensors on device for efficient access
            if stage is TrainerFn.FITTING or stage is None:
                self.train_features = torch.tensor(
                    self.trainer.datamodule.train_features,
                    device=self.device,
                    dtype=torch.float32
                )
                self.val_features = torch.tensor(
                    self.trainer.datamodule.val_features,
                    device=self.device,
                    dtype=torch.float32
                )

            if stage is TrainerFn.VALIDATING:
                # For standalone validation, ensure features are loaded
                if not hasattr(self, 'val_features') or self.val_features is None:
                    self.val_features = torch.tensor(
                        self.trainer.datamodule.val_features,
                        device=self.device,
                        dtype=torch.float32
                    )

            if stage is TrainerFn.TESTING or stage is None:
                self.test_features = torch.tensor(
                    self.trainer.datamodule.test_features,
                    device=self.device,
                    dtype=torch.float32
                )

            if stage is TrainerFn.PREDICTING:
                self.predict_features = torch.tensor(
                    self.trainer.datamodule.predict_features,
                    device=self.device,
                    dtype=torch.float32
                )

    def forward(self, user_indices: torch.Tensor, item_indices: torch.Tensor, raw_user_features: torch.Tensor, raw_item_features: torch.Tensor):
        """Forward pass through the model."""
        
        batch_size = user_indices.size(0)
        
        user_mask = torch.ones_like(raw_user_features)
        user_mask[torch.arange(batch_size, device=raw_user_features.device), item_indices] = 0
        user_features = raw_user_features * user_mask
        
        item_mask = torch.ones_like(raw_item_features)
        item_mask[torch.arange(batch_size, device=raw_item_features.device), user_indices] = 0
        item_features = raw_item_features * item_mask
        
        return self.model(user_features, item_features)
    
    def training_step(self, batch, batch_idx):
        """Training step."""
        user_idx, item_idx, target = batch
        normalized_target = target / self.scale        
        raw_user_features = self.train_features[user_idx]
        raw_item_features = self.train_features.T[item_idx]

        # Get prediction in [-1, 1] range
        cosine_similarity = self(user_idx, item_idx, raw_user_features, raw_item_features)
        
        # Scale from [-1, 1] to [0, 1]
        normalized_prediction = (cosine_similarity + 1) / 2

        # Clamp from [0, 1] to [p, 1 - p]
        normalized_prediction = torch.clamp(normalized_prediction, min=self.min_prob, max=1 - self.min_prob)

        # Compute BCE loss (both prediction and target are in [0, 1])
        loss = F.binary_cross_entropy(normalized_prediction, normalized_target)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=False)
        
        with torch.no_grad():
            # Scale predictions to original rating range for monitoring
            prediction = normalized_prediction * self.scale
            rmse = torch.sqrt(((prediction - target) ** 2).mean())
            self.log("train_rmse", rmse, on_step=True, on_epoch=True, prog_bar=True)
        
        return loss
    
    def validation_step(self, batch, batch_idx):
        """Validation step."""
        user_idx, item_idx, target = batch
        # normalized_target = target / self.scale        
        raw_user_features = self.val_features
        raw_item_features = self.val_features.T
        
        # Compute full similarity matrix
        user_latent = self.model.user_encoder(raw_user_features)
        item_latent = self.model.item_encoder(raw_item_features)
        
        eps = 1e-8
        # Compute inverse square root of diagonal norms
        user_norms_sq = torch.sum(user_latent ** 2, dim=1)  # L2 norm squared for each user
        item_norms_sq = torch.sum(item_latent ** 2, dim=1)  # L2 norm squared for each item
        user_inverse_norm = torch.diag((user_norms_sq + eps) ** -0.5)
        item_inverse_norm = torch.diag((item_norms_sq + eps) ** -0.5)
        
        # Cosine similarity in [-1, 1]
        cosine_similarity = user_inverse_norm @ user_latent @ item_latent.T @ item_inverse_norm
        
        # Extract predictions for specific user-item pairs
        cosine_sim_batch = cosine_similarity[user_idx, item_idx]
        
        # Scale from [-1, 1] to [0, 1]
        normalized_prediction = (cosine_sim_batch + 1) / 2
        
        # Scale to original rating range [0.5, 5]
        prediction = normalized_prediction * self.scale

        # Compute RMSE on scaled predictions
        rmse = torch.sqrt(((prediction - target) ** 2).mean())
        self.log("val_rmse", rmse, on_step=False, on_epoch=True, prog_bar=True)
        return rmse
    
    def test_step(self, batch, batch_idx):
        """Test step."""
        user_idx, item_idx, target = batch
        # normalized_target = target / self.scale
        raw_user_features = self.test_features
        raw_item_features = self.test_features.T
        
        # Compute full similarity matrix
        user_latent = self.model.user_encoder(raw_user_features)
        item_latent = self.model.item_encoder(raw_item_features)

        eps = 1e-8
        # Compute inverse square root of diagonal norms
        user_norms_sq = torch.sum(user_latent ** 2, dim=1)  # L2 norm squared for each user
        item_norms_sq = torch.sum(item_latent ** 2, dim=1)  # L2 norm squared for each item
        user_inverse_norm = torch.diag((user_norms_sq + eps) ** -0.5)
        item_inverse_norm = torch.diag((item_norms_sq + eps) ** -0.5)

        # Cosine similarity in [-1, 1]
        cosine_similarity = user_inverse_norm @ user_latent @ item_latent.T @ item_inverse_norm
        
        # Extract predictions for specific user-item pairs
        cosine_sim_batch = cosine_similarity[user_idx, item_idx]
        
        # Scale from [-1, 1] to [0, 1]
        normalized_prediction = (cosine_sim_batch + 1) / 2
        
        # Scale to original rating range [0.5, 5]
        prediction = normalized_prediction * self.scale
        
        # Compute RMSE on scaled predictions
        rmse = torch.sqrt(((prediction - target) ** 2).mean())
        self.log("test_rmse", rmse, on_step=False, on_epoch=True, prog_bar=True)
        return rmse
    
    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        """
        Predict step - generates predictions without computing loss.
        Uses DMFPredictDataModule for clean separation from training.
        
        Args:
            batch: Tuple of (user_idx, item_idx) or (user_idx, item_idx, target)
            batch_idx: Batch index
            dataloader_idx: Dataloader index (if multiple dataloaders)
            
        Returns:
            Dictionary with predictions and optionally targets
        """
        
        raw_user_features = self.predict_features
        raw_item_features = self.predict_features.T
        
        # Compute full similarity matrix
        user_latent = self.model.user_encoder(raw_user_features)
        item_latent = self.model.item_encoder(raw_item_features)
        
        eps = 1e-8
        # Compute inverse square root of diagonal norms
        user_norms_sq = torch.sum(user_latent ** 2, dim=1)  # L2 norm squared for each user
        item_norms_sq = torch.sum(item_latent ** 2, dim=1)  # L2 norm squared for each item
        user_inverse_norm = torch.diag((user_norms_sq + eps) ** -0.5)
        item_inverse_norm = torch.diag((item_norms_sq + eps) ** -0.5)

        # Cosine similarity in [-1, 1]
        cosine_similarity = user_inverse_norm @ user_latent @ item_latent.T @ item_inverse_norm

        
        # Scale from [-1, 1] to [0, 1]
        normalized_prediction = (cosine_similarity + 1) / 2

        # Scale to original rating range [0.5, 5]
        prediction = normalized_prediction * self.scale

        # Return predictions with metadata
        return {"predictions_matrix": prediction.cpu().numpy()}
    
    def configure_optimizers(self):
        """Configure optimizer."""
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)

In [None]:
CONFIG = {
    'num_folds': 10,
    'num_epochs': 100,
    'batch_size': 2**14,
    'negatives_per_positive': 0, # Even 1 will make the results much worse cause of wrong lables for val/test will be included in training
    'learning_rate': 1e-3,
    'min_prob': 1e-6,
    
    
    
    # Using 5 folds for quicker experimentation; increase to 10 for final results:
    
    # 'hidden_dims': (64,),              # Shallow: 1 layer
    # ======================================================================
    # Cross-Validation Results
    # ======================================================================
    # Mean Validation RMSE: 0.8903 ± 0.0104
    # Mean Test RMSE: 0.9003 ± 0.0018
    # ======================================================================
    # Computing Ensemble Predictions
    # ======================================================================
    # Ensemble Test RMSE: 0.8844
    # Number of predictions: 31598
    # ======================================================================
    
    # 'hidden_dims': (128, 64),            # Medium: 2 layers
    # ======================================================================
    # Cross-Validation Results
    # ======================================================================
    # Mean Validation RMSE: 0.8877 ± 0.0108
    # Mean Test RMSE: 0.8963 ± 0.0024
    # ======================================================================
    # Computing Ensemble Predictions
    # ======================================================================
    # Ensemble Test RMSE: 0.8799
    # Number of predictions: 31598
    # ======================================================================
    
    # 'hidden_dims': (256, 128, 64),     # Deep: 3 layers
    # ======================================================================
    # Cross-Validation Results
    # ======================================================================
    # Mean Validation RMSE: 0.8852 ± 0.0112
    # Mean Test RMSE: 0.8896 ± 0.0022
    # ======================================================================
    # Computing Ensemble Predictions
    # ======================================================================
    # Ensemble Test RMSE: 0.8762
    # Number of predictions: 31598
    # ======================================================================
    
    
    # 'hidden_dims': (512, 256, 128, 64),     # Very Deep: 4 layers
    # ======================================================================
    # Cross-Validation Results
    # ======================================================================
    # Mean Validation RMSE: 0.8828 ± 0.0101
    # Mean Test RMSE: 0.8893 ± 0.0024
    # ======================================================================
    # Computing Ensemble Predictions
    # ======================================================================
    # Ensemble Test RMSE: 0.8777
    # Number of predictions: 31598
    # ======================================================================
    
    # 'hidden_dims': (1024, 512, 256, 128, 64),     # Very x 2 Deep: 5 layers
    # ======================================================================
    # Cross-Validation Results
    # ======================================================================
    # Mean Validation RMSE: 0.8859 ± 0.0137
    # Mean Test RMSE: 0.8896 ± 0.0007
    # ======================================================================
    # Computing Ensemble Predictions
    # ======================================================================
    # Ensemble Test RMSE: 0.8786
    # Number of predictions: 31598
    # ======================================================================


    # 'hidden_dims': (512, 256, 128, 64, 32),     # Very x 2 Deep Medium latent: 5 layers
    # ======================================================================
    # Cross-Validation Results
    # ======================================================================
    # Mean Validation RMSE: 0.8860 ± 0.0099
    # Mean Test RMSE: 0.8915 ± 0.0027
    # ======================================================================
    # Computing Ensemble Predictions
    # ======================================================================
    # Ensemble Test RMSE: 0.8763
    # Number of predictions: 31598
    # ======================================================================
        
    # 'hidden_dims': (256, 128, 64, 32),     # Very Deep Medium latent: 4 layers
    # ======================================================================
    # Cross-Validation Results
    # ======================================================================
    # Mean Validation RMSE: 0.8862 ± 0.0099
    # Mean Test RMSE: 0.8924 ± 0.0025
    # ======================================================================
    # Computing Ensemble Predictions
    # ======================================================================
    # Ensemble Test RMSE: 0.8780
    # Number of predictions: 31598
    # ======================================================================

    # 'hidden_dims': (512, 256, 128, 4),     # Very Deep Small latent: 4 layers
    # ======================================================================
    # Cross-Validation Results
    # ======================================================================
    # Mean Validation RMSE: 0.8918 ± 0.0131
    # Mean Test RMSE: 0.8964 ± 0.0024
    # ======================================================================
    # Computing Ensemble Predictions
    # ======================================================================
    # Ensemble Test RMSE: 0.8823
    # Number of predictions: 31598
    # ======================================================================
    
    
    # Final choice after experiments:
    'hidden_dims': (256, 128, 64),     # Deep: 3 layers
    # ======================================================================
    # Cross-Validation Results
    # ======================================================================
    # Mean Validation RMSE: 0.8784 ± 0.0150
    # Mean Test RMSE: 0.8853 ± 0.0027
    # ======================================================================
    # Computing Ensemble Predictions
    # ======================================================================
    # Ensemble Test RMSE: 0.8727
    # Number of predictions: 31598
    # ======================================================================
    
    'patience': 10,  # Early stopping patience
}

print("Configuration:")
for key, value in CONFIG.items():
    print(f"  {key}: {value}")


print(f"\nStarting {CONFIG['num_folds']}-fold cross-validation...")

# Initialize K-Fold
kf = KFold(n_splits=CONFIG['num_folds'], shuffle=True, random_state=SEED)

# Storage for results
fold_modules = []
fold_val_rmse = []
fold_test_rmse = []

# Train each fold
for fold_idx, (train_idx, val_idx) in enumerate(kf.split(np.arange(np.count_nonzero(matrix))), 1):
    print(f"\n{'='*70}")
    print(f"Training Fold {fold_idx}/{CONFIG['num_folds']}")
    print(f"{'='*70}")

    train_val_positions = np.vstack(np.where(matrix > 0)).T
    train_positions = train_val_positions[train_idx]
    val_positions = train_val_positions[val_idx]

    # Create DataModule for this fold
    datamodule = DMFDataModule(
        matrix=matrix,
        test_matrix=test_matrix,
        train_positions=train_positions,
        val_positions=val_positions,
        batch_size=CONFIG['batch_size'],
        negatives_per_positive=CONFIG['negatives_per_positive'],
        device=device
    )
    
    # Create Lightning Module
    scale = float(np.max(matrix))
    num_users, num_items = datamodule.num_users, datamodule.num_items
    module = DMFModule(
        scale=scale,
        num_users=num_users,
        num_items=num_items,
        hidden_dims=CONFIG['hidden_dims'],
        learning_rate=CONFIG['learning_rate'],
        min_prob=CONFIG['min_prob']
    )
    
    # Setup callbacks
    checkpoint_callback = ModelCheckpoint(
        monitor="val_rmse",
        mode="min",
        save_top_k=1,
        filename=f'fold{fold_idx}-{{epoch}}-{{val_rmse:.4f}}'
    )
    
    early_stopping_callback = EarlyStopping(
        monitor="val_rmse",
        mode="min",
        patience=CONFIG['patience'],
        verbose=True
    )
    
    # Create Trainer
    trainer = pl.Trainer(
        max_epochs=CONFIG['num_epochs'],
        callbacks=[checkpoint_callback, early_stopping_callback],
        logger=True,
        enable_progress_bar=True,
        accelerator="gpu" if device == "cuda" else "cpu",
        devices=1,
    )
    
    # Train the model
    trainer.fit(module, datamodule=datamodule)
    
    # Load best model
    best_module = DMFModule.load_from_checkpoint(
        checkpoint_callback.best_model_path,
        scale=scale,
        num_users=num_users,
        num_items=num_items,
        hidden_dims=CONFIG['hidden_dims'],
        learning_rate=CONFIG['learning_rate'],
        min_prob=CONFIG['min_prob']
    )
    
    # Move to device and evaluation mode
    best_module.eval().to(device)
    
    # Store both module and datamodule for later use
    fold_modules.append((best_module, datamodule))
    
    # Evaluate on validation set using Lightning's validate method
    val_results = trainer.validate(best_module, datamodule=datamodule, verbose=False)
    val_rmse = val_results[0]['val_rmse']
    fold_val_rmse.append(val_rmse)
    print(f"Fold {fold_idx} Validation RMSE: {val_rmse:.4f}")
    
    # Evaluate on test set using Lightning's test method
    test_results = trainer.test(best_module, datamodule=datamodule, verbose=False)
    test_rmse = test_results[0]['test_rmse']
    
    fold_test_rmse.append(test_rmse)
    print(f"Fold {fold_idx} Test RMSE: {test_rmse:.4f}")

print(f"\n{'='*70}")
print("Cross-Validation Results")
print(f"{'='*70}")
print(f"Mean Validation RMSE: {np.mean(fold_val_rmse):.4f} ± {np.std(fold_val_rmse):.4f}")
print(f"Mean Test RMSE: {np.mean(fold_test_rmse):.4f} ± {np.std(fold_test_rmse):.4f}")

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Configuration:
  num_folds: 10
  num_epochs: 100
  batch_size: 16384
  negatives_per_positive: 0
  learning_rate: 0.001
  min_prob: 1e-06
  hidden_dims: (256, 128, 64)
  patience: 10

Starting 10-fold cross-validation...

Training Fold 1/10


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type     | Params | Mode 
-------------------------------------------
0 | model | DMFModel | 1.5 M  | train
-------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.055     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


                                                                           

/home/ali/VSCodeProjects/iasd/data-science-lab/assignment1-2025-wecare/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 0: 100%|██████████| 2/2 [00:00<00:00,  3.00it/s, v_num=199, train_rmse_step=1.000, val_rmse=0.970, train_rmse_epoch=1.020]

Metric val_rmse improved. New best score: 0.970


Epoch 1: 100%|██████████| 2/2 [00:00<00:00,  3.14it/s, v_num=199, train_rmse_step=0.943, val_rmse=0.917, train_rmse_epoch=0.954]

Metric val_rmse improved by 0.053 >= min_delta = 0.0. New best score: 0.917


Epoch 2: 100%|██████████| 2/2 [00:00<00:00,  3.57it/s, v_num=199, train_rmse_step=0.914, val_rmse=0.906, train_rmse_epoch=0.909]

Metric val_rmse improved by 0.011 >= min_delta = 0.0. New best score: 0.906


Epoch 3: 100%|██████████| 2/2 [00:00<00:00,  3.67it/s, v_num=199, train_rmse_step=0.889, val_rmse=0.888, train_rmse_epoch=0.892]

Metric val_rmse improved by 0.018 >= min_delta = 0.0. New best score: 0.888


Epoch 4: 100%|██████████| 2/2 [00:00<00:00,  5.70it/s, v_num=199, train_rmse_step=0.863, val_rmse=0.880, train_rmse_epoch=0.873]

Metric val_rmse improved by 0.008 >= min_delta = 0.0. New best score: 0.880


Epoch 5: 100%|██████████| 2/2 [00:00<00:00,  3.61it/s, v_num=199, train_rmse_step=0.856, val_rmse=0.874, train_rmse_epoch=0.858]

Metric val_rmse improved by 0.006 >= min_delta = 0.0. New best score: 0.874


Epoch 6: 100%|██████████| 2/2 [00:00<00:00,  3.60it/s, v_num=199, train_rmse_step=0.838, val_rmse=0.873, train_rmse_epoch=0.846]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.873


Epoch 7: 100%|██████████| 2/2 [00:00<00:00,  3.68it/s, v_num=199, train_rmse_step=0.838, val_rmse=0.870, train_rmse_epoch=0.835]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.870


Epoch 8: 100%|██████████| 2/2 [00:00<00:00,  5.73it/s, v_num=199, train_rmse_step=0.821, val_rmse=0.869, train_rmse_epoch=0.825]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.869


Epoch 10: 100%|██████████| 2/2 [00:00<00:00,  3.63it/s, v_num=199, train_rmse_step=0.798, val_rmse=0.869, train_rmse_epoch=0.805]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.869


Epoch 20: 100%|██████████| 2/2 [00:00<00:00,  4.99it/s, v_num=199, train_rmse_step=0.704, val_rmse=0.879, train_rmse_epoch=0.708]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.869. Signaling Trainer to stop.


Epoch 20: 100%|██████████| 2/2 [00:00<00:00,  4.92it/s, v_num=199, train_rmse_step=0.704, val_rmse=0.879, train_rmse_epoch=0.708]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 35.27it/s] 
Fold 1 Validation RMSE: 0.8686


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 34.73it/s] 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type     | Params | Mode 
-------------------------------------------
0 | model | DMFModel | 1.5 M  | train
-------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.055     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode



Fold 1 Test RMSE: 0.8865

Training Fold 2/10
Epoch 0: 100%|██████████| 2/2 [00:00<00:00,  2.94it/s, v_num=200, train_rmse_step=0.975, val_rmse=0.997, train_rmse_epoch=1.020]

Metric val_rmse improved. New best score: 0.997


Epoch 1: 100%|██████████| 2/2 [00:00<00:00,  3.71it/s, v_num=200, train_rmse_step=0.924, val_rmse=0.950, train_rmse_epoch=0.949]

Metric val_rmse improved by 0.047 >= min_delta = 0.0. New best score: 0.950


Epoch 2: 100%|██████████| 2/2 [00:00<00:00,  3.56it/s, v_num=200, train_rmse_step=0.905, val_rmse=0.924, train_rmse_epoch=0.907]

Metric val_rmse improved by 0.026 >= min_delta = 0.0. New best score: 0.924


Epoch 3: 100%|██████████| 2/2 [00:00<00:00,  5.66it/s, v_num=200, train_rmse_step=0.879, val_rmse=0.914, train_rmse_epoch=0.882]

Metric val_rmse improved by 0.010 >= min_delta = 0.0. New best score: 0.914


Epoch 4: 100%|██████████| 2/2 [00:00<00:00,  3.10it/s, v_num=200, train_rmse_step=0.865, val_rmse=0.910, train_rmse_epoch=0.867]

Metric val_rmse improved by 0.005 >= min_delta = 0.0. New best score: 0.910


Epoch 5: 100%|██████████| 2/2 [00:00<00:00,  3.53it/s, v_num=200, train_rmse_step=0.857, val_rmse=0.905, train_rmse_epoch=0.855]

Metric val_rmse improved by 0.005 >= min_delta = 0.0. New best score: 0.905


Epoch 6: 100%|██████████| 2/2 [00:00<00:00,  3.41it/s, v_num=200, train_rmse_step=0.844, val_rmse=0.901, train_rmse_epoch=0.843]

Metric val_rmse improved by 0.004 >= min_delta = 0.0. New best score: 0.901


Epoch 7: 100%|██████████| 2/2 [00:00<00:00,  3.42it/s, v_num=200, train_rmse_step=0.836, val_rmse=0.899, train_rmse_epoch=0.835]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.899


Epoch 9: 100%|██████████| 2/2 [00:00<00:00,  3.45it/s, v_num=200, train_rmse_step=0.813, val_rmse=0.898, train_rmse_epoch=0.817]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.898


Epoch 10: 100%|██████████| 2/2 [00:00<00:00,  3.47it/s, v_num=200, train_rmse_step=0.803, val_rmse=0.897, train_rmse_epoch=0.806]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.897


Epoch 20: 100%|██████████| 2/2 [00:00<00:00,  5.05it/s, v_num=200, train_rmse_step=0.710, val_rmse=0.907, train_rmse_epoch=0.708]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.897. Signaling Trainer to stop.


Epoch 20: 100%|██████████| 2/2 [00:00<00:00,  4.98it/s, v_num=200, train_rmse_step=0.710, val_rmse=0.907, train_rmse_epoch=0.708]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 32.21it/s] 

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Fold 2 Validation RMSE: 0.8972
Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 34.92it/s] 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Fold 2 Test RMSE: 0.8811

Training Fold 3/10



  | Name  | Type     | Params | Mode 
-------------------------------------------
0 | model | DMFModel | 1.5 M  | train
-------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.055     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 2/2 [00:00<00:00,  3.07it/s, v_num=201, train_rmse_step=0.981, val_rmse=0.947, train_rmse_epoch=1.050]

Metric val_rmse improved. New best score: 0.947


Epoch 1: 100%|██████████| 2/2 [00:00<00:00,  3.54it/s, v_num=201, train_rmse_step=0.930, val_rmse=0.911, train_rmse_epoch=0.943]

Metric val_rmse improved by 0.036 >= min_delta = 0.0. New best score: 0.911


Epoch 2: 100%|██████████| 2/2 [00:00<00:00,  3.65it/s, v_num=201, train_rmse_step=0.890, val_rmse=0.898, train_rmse_epoch=0.904]

Metric val_rmse improved by 0.013 >= min_delta = 0.0. New best score: 0.898


Epoch 3: 100%|██████████| 2/2 [00:00<00:00,  3.59it/s, v_num=201, train_rmse_step=0.876, val_rmse=0.882, train_rmse_epoch=0.887]

Metric val_rmse improved by 0.016 >= min_delta = 0.0. New best score: 0.882


Epoch 4: 100%|██████████| 2/2 [00:00<00:00,  5.55it/s, v_num=201, train_rmse_step=0.871, val_rmse=0.876, train_rmse_epoch=0.868]

Metric val_rmse improved by 0.005 >= min_delta = 0.0. New best score: 0.876


Epoch 5: 100%|██████████| 2/2 [00:00<00:00,  3.73it/s, v_num=201, train_rmse_step=0.852, val_rmse=0.874, train_rmse_epoch=0.855]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.874


Epoch 6: 100%|██████████| 2/2 [00:00<00:00,  3.65it/s, v_num=201, train_rmse_step=0.842, val_rmse=0.870, train_rmse_epoch=0.845]

Metric val_rmse improved by 0.004 >= min_delta = 0.0. New best score: 0.870


Epoch 8: 100%|██████████| 2/2 [00:00<00:00,  5.34it/s, v_num=201, train_rmse_step=0.822, val_rmse=0.870, train_rmse_epoch=0.822]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.870


Epoch 18: 100%|██████████| 2/2 [00:00<00:00,  3.63it/s, v_num=201, train_rmse_step=0.713, val_rmse=0.887, train_rmse_epoch=0.714]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.870. Signaling Trainer to stop.


Epoch 18: 100%|██████████| 2/2 [00:00<00:00,  3.59it/s, v_num=201, train_rmse_step=0.713, val_rmse=0.887, train_rmse_epoch=0.714]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 38.14it/s] 
Fold 3 Validation RMSE: 0.8697


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 32.09it/s] 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Fold 3 Test RMSE: 0.8910

Training Fold 4/10



  | Name  | Type     | Params | Mode 
-------------------------------------------
0 | model | DMFModel | 1.5 M  | train
-------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.055     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 2/2 [00:00<00:00,  2.55it/s, v_num=202, train_rmse_step=1.030, val_rmse=0.960, train_rmse_epoch=1.050]

Metric val_rmse improved. New best score: 0.960


Epoch 1: 100%|██████████| 2/2 [00:00<00:00,  4.17it/s, v_num=202, train_rmse_step=0.953, val_rmse=0.909, train_rmse_epoch=0.971]

Metric val_rmse improved by 0.051 >= min_delta = 0.0. New best score: 0.909


Epoch 2: 100%|██████████| 2/2 [00:00<00:00,  3.67it/s, v_num=202, train_rmse_step=0.939, val_rmse=0.897, train_rmse_epoch=0.929]

Metric val_rmse improved by 0.012 >= min_delta = 0.0. New best score: 0.897


Epoch 3: 100%|██████████| 2/2 [00:00<00:00,  3.90it/s, v_num=202, train_rmse_step=0.896, val_rmse=0.882, train_rmse_epoch=0.909]

Metric val_rmse improved by 0.016 >= min_delta = 0.0. New best score: 0.882


Epoch 4: 100%|██████████| 2/2 [00:00<00:00,  3.89it/s, v_num=202, train_rmse_step=0.894, val_rmse=0.878, train_rmse_epoch=0.889]

Metric val_rmse improved by 0.004 >= min_delta = 0.0. New best score: 0.878


Epoch 5: 100%|██████████| 2/2 [00:00<00:00,  5.52it/s, v_num=202, train_rmse_step=0.874, val_rmse=0.869, train_rmse_epoch=0.878]

Metric val_rmse improved by 0.009 >= min_delta = 0.0. New best score: 0.869


Epoch 6: 100%|██████████| 2/2 [00:00<00:00,  3.46it/s, v_num=202, train_rmse_step=0.866, val_rmse=0.866, train_rmse_epoch=0.867]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.866


Epoch 7: 100%|██████████| 2/2 [00:00<00:00,  3.25it/s, v_num=202, train_rmse_step=0.838, val_rmse=0.861, train_rmse_epoch=0.857]

Metric val_rmse improved by 0.005 >= min_delta = 0.0. New best score: 0.861


Epoch 8: 100%|██████████| 2/2 [00:00<00:00,  3.16it/s, v_num=202, train_rmse_step=0.846, val_rmse=0.860, train_rmse_epoch=0.847]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.860


Epoch 9: 100%|██████████| 2/2 [00:00<00:00,  4.86it/s, v_num=202, train_rmse_step=0.828, val_rmse=0.858, train_rmse_epoch=0.839]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.858


Epoch 10: 100%|██████████| 2/2 [00:00<00:00,  3.15it/s, v_num=202, train_rmse_step=0.817, val_rmse=0.857, train_rmse_epoch=0.831]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.857


Epoch 11: 100%|██████████| 2/2 [00:00<00:00,  3.46it/s, v_num=202, train_rmse_step=0.826, val_rmse=0.855, train_rmse_epoch=0.823]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.855


Epoch 12: 100%|██████████| 2/2 [00:00<00:00,  3.64it/s, v_num=202, train_rmse_step=0.810, val_rmse=0.854, train_rmse_epoch=0.814]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.854


Epoch 13: 100%|██████████| 2/2 [00:00<00:00,  5.32it/s, v_num=202, train_rmse_step=0.810, val_rmse=0.854, train_rmse_epoch=0.805]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.854


Epoch 23: 100%|██████████| 2/2 [00:00<00:00,  3.39it/s, v_num=202, train_rmse_step=0.722, val_rmse=0.873, train_rmse_epoch=0.711]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.854. Signaling Trainer to stop.


Epoch 23: 100%|██████████| 2/2 [00:00<00:00,  3.35it/s, v_num=202, train_rmse_step=0.722, val_rmse=0.873, train_rmse_epoch=0.711]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 34.59it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Fold 4 Validation RMSE: 0.8538
Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 34.89it/s] 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Fold 4 Test RMSE: 0.8829

Training Fold 5/10


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type     | Params | Mode 
-------------------------------------------
0 | model | DMFModel | 1.5 M  | train
-------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.055     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 2/2 [00:00<00:00,  2.74it/s, v_num=203, train_rmse_step=1.120, val_rmse=1.000, train_rmse_epoch=1.110]

Metric val_rmse improved. New best score: 1.000


Epoch 1: 100%|██████████| 2/2 [00:00<00:00,  3.69it/s, v_num=203, train_rmse_step=0.984, val_rmse=0.979, train_rmse_epoch=0.986]

Metric val_rmse improved by 0.021 >= min_delta = 0.0. New best score: 0.979


Epoch 2: 100%|██████████| 2/2 [00:03<00:00,  0.61it/s, v_num=203, train_rmse_step=0.930, val_rmse=0.948, train_rmse_epoch=0.951]

Metric val_rmse improved by 0.031 >= min_delta = 0.0. New best score: 0.948


Epoch 3: 100%|██████████| 2/2 [00:00<00:00,  3.78it/s, v_num=203, train_rmse_step=0.928, val_rmse=0.928, train_rmse_epoch=0.928]

Metric val_rmse improved by 0.020 >= min_delta = 0.0. New best score: 0.928


Epoch 4: 100%|██████████| 2/2 [00:00<00:00,  3.74it/s, v_num=203, train_rmse_step=0.901, val_rmse=0.911, train_rmse_epoch=0.904]

Metric val_rmse improved by 0.017 >= min_delta = 0.0. New best score: 0.911


Epoch 5: 100%|██████████| 2/2 [00:00<00:00,  3.53it/s, v_num=203, train_rmse_step=0.876, val_rmse=0.902, train_rmse_epoch=0.888]

Metric val_rmse improved by 0.009 >= min_delta = 0.0. New best score: 0.902


Epoch 6: 100%|██████████| 2/2 [00:00<00:00,  4.78it/s, v_num=203, train_rmse_step=0.877, val_rmse=0.896, train_rmse_epoch=0.875]

Metric val_rmse improved by 0.005 >= min_delta = 0.0. New best score: 0.896


Epoch 7: 100%|██████████| 2/2 [00:00<00:00,  3.20it/s, v_num=203, train_rmse_step=0.864, val_rmse=0.887, train_rmse_epoch=0.866]

Metric val_rmse improved by 0.009 >= min_delta = 0.0. New best score: 0.887


Epoch 8: 100%|██████████| 2/2 [00:00<00:00,  3.17it/s, v_num=203, train_rmse_step=0.844, val_rmse=0.883, train_rmse_epoch=0.853]

Metric val_rmse improved by 0.004 >= min_delta = 0.0. New best score: 0.883


Epoch 9: 100%|██████████| 2/2 [00:00<00:00,  3.28it/s, v_num=203, train_rmse_step=0.842, val_rmse=0.880, train_rmse_epoch=0.845]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.880


Epoch 10: 100%|██████████| 2/2 [00:00<00:00,  4.91it/s, v_num=203, train_rmse_step=0.840, val_rmse=0.879, train_rmse_epoch=0.837]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.879


Epoch 11: 100%|██████████| 2/2 [00:00<00:00,  3.45it/s, v_num=203, train_rmse_step=0.832, val_rmse=0.877, train_rmse_epoch=0.829]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.877


Epoch 12: 100%|██████████| 2/2 [00:00<00:00,  3.48it/s, v_num=203, train_rmse_step=0.825, val_rmse=0.876, train_rmse_epoch=0.821]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.876


Epoch 13: 100%|██████████| 2/2 [00:00<00:00,  3.44it/s, v_num=203, train_rmse_step=0.809, val_rmse=0.875, train_rmse_epoch=0.813]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.875


Epoch 14: 100%|██████████| 2/2 [00:00<00:00,  5.37it/s, v_num=203, train_rmse_step=0.810, val_rmse=0.875, train_rmse_epoch=0.806]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.875


Epoch 15: 100%|██████████| 2/2 [00:00<00:00,  3.50it/s, v_num=203, train_rmse_step=0.797, val_rmse=0.875, train_rmse_epoch=0.798]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.875


Epoch 18: 100%|██████████| 2/2 [00:00<00:00,  5.08it/s, v_num=203, train_rmse_step=0.763, val_rmse=0.875, train_rmse_epoch=0.775]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.875


Epoch 28: 100%|██████████| 2/2 [00:00<00:00,  3.41it/s, v_num=203, train_rmse_step=0.685, val_rmse=0.883, train_rmse_epoch=0.688]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.875. Signaling Trainer to stop.


Epoch 28: 100%|██████████| 2/2 [00:00<00:00,  3.38it/s, v_num=203, train_rmse_step=0.685, val_rmse=0.883, train_rmse_epoch=0.688]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 33.37it/s] 
Fold 5 Validation RMSE: 0.8748


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 31.75it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Fold 5 Test RMSE: 0.8856

Training Fold 6/10


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type     | Params | Mode 
-------------------------------------------
0 | model | DMFModel | 1.5 M  | train
-------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.055     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 2/2 [00:00<00:00,  2.35it/s, v_num=204, train_rmse_step=0.987, val_rmse=0.993, train_rmse_epoch=1.030]

Metric val_rmse improved. New best score: 0.993


Epoch 1: 100%|██████████| 2/2 [00:00<00:00,  5.65it/s, v_num=204, train_rmse_step=0.930, val_rmse=0.919, train_rmse_epoch=0.963]

Metric val_rmse improved by 0.074 >= min_delta = 0.0. New best score: 0.919


Epoch 2: 100%|██████████| 2/2 [00:00<00:00,  3.64it/s, v_num=204, train_rmse_step=0.921, val_rmse=0.901, train_rmse_epoch=0.919]

Metric val_rmse improved by 0.018 >= min_delta = 0.0. New best score: 0.901


Epoch 3: 100%|██████████| 2/2 [00:00<00:00,  3.88it/s, v_num=204, train_rmse_step=0.885, val_rmse=0.899, train_rmse_epoch=0.893]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.899


Epoch 4: 100%|██████████| 2/2 [00:00<00:00,  3.82it/s, v_num=204, train_rmse_step=0.876, val_rmse=0.889, train_rmse_epoch=0.881]

Metric val_rmse improved by 0.009 >= min_delta = 0.0. New best score: 0.889


Epoch 5: 100%|██████████| 2/2 [00:00<00:00,  5.50it/s, v_num=204, train_rmse_step=0.871, val_rmse=0.888, train_rmse_epoch=0.867]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.888


Epoch 6: 100%|██████████| 2/2 [00:00<00:00,  3.44it/s, v_num=204, train_rmse_step=0.858, val_rmse=0.884, train_rmse_epoch=0.859]

Metric val_rmse improved by 0.004 >= min_delta = 0.0. New best score: 0.884


Epoch 7: 100%|██████████| 2/2 [00:00<00:00,  3.34it/s, v_num=204, train_rmse_step=0.852, val_rmse=0.882, train_rmse_epoch=0.847]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.882


Epoch 8: 100%|██████████| 2/2 [00:00<00:00,  3.37it/s, v_num=204, train_rmse_step=0.839, val_rmse=0.880, train_rmse_epoch=0.838]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.880


Epoch 9: 100%|██████████| 2/2 [00:00<00:00,  4.79it/s, v_num=204, train_rmse_step=0.816, val_rmse=0.878, train_rmse_epoch=0.828]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.878


Epoch 10: 100%|██████████| 2/2 [00:00<00:00,  3.45it/s, v_num=204, train_rmse_step=0.821, val_rmse=0.878, train_rmse_epoch=0.821]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.878


Epoch 11: 100%|██████████| 2/2 [00:00<00:00,  3.20it/s, v_num=204, train_rmse_step=0.808, val_rmse=0.878, train_rmse_epoch=0.812]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.878


Epoch 13: 100%|██████████| 2/2 [00:00<00:00,  5.36it/s, v_num=204, train_rmse_step=0.775, val_rmse=0.877, train_rmse_epoch=0.794]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.877


Epoch 15: 100%|██████████| 2/2 [00:00<00:00,  3.55it/s, v_num=204, train_rmse_step=0.778, val_rmse=0.877, train_rmse_epoch=0.777]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.877


Epoch 25: 100%|██████████| 2/2 [00:00<00:00,  3.43it/s, v_num=204, train_rmse_step=0.669, val_rmse=0.889, train_rmse_epoch=0.678]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.877. Signaling Trainer to stop.


Epoch 25: 100%|██████████| 2/2 [00:00<00:00,  3.39it/s, v_num=204, train_rmse_step=0.669, val_rmse=0.889, train_rmse_epoch=0.678]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 32.97it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Fold 6 Validation RMSE: 0.8775
Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 29.38it/s] 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Fold 6 Test RMSE: 0.8875

Training Fold 7/10


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type     | Params | Mode 
-------------------------------------------
0 | model | DMFModel | 1.5 M  | train
-------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.055     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 2/2 [00:00<00:00,  2.36it/s, v_num=205, train_rmse_step=1.060, val_rmse=0.981, train_rmse_epoch=1.070]

Metric val_rmse improved. New best score: 0.981


Epoch 1: 100%|██████████| 2/2 [00:00<00:00,  5.48it/s, v_num=205, train_rmse_step=0.973, val_rmse=0.961, train_rmse_epoch=0.964]

Metric val_rmse improved by 0.021 >= min_delta = 0.0. New best score: 0.961


Epoch 2: 100%|██████████| 2/2 [00:00<00:00,  3.84it/s, v_num=205, train_rmse_step=0.913, val_rmse=0.944, train_rmse_epoch=0.928]

Metric val_rmse improved by 0.017 >= min_delta = 0.0. New best score: 0.944


Epoch 3: 100%|██████████| 2/2 [00:00<00:00,  3.73it/s, v_num=205, train_rmse_step=0.914, val_rmse=0.934, train_rmse_epoch=0.910]

Metric val_rmse improved by 0.010 >= min_delta = 0.0. New best score: 0.934


Epoch 4: 100%|██████████| 2/2 [00:00<00:00,  3.59it/s, v_num=205, train_rmse_step=0.887, val_rmse=0.925, train_rmse_epoch=0.890]

Metric val_rmse improved by 0.009 >= min_delta = 0.0. New best score: 0.925


Epoch 5: 100%|██████████| 2/2 [00:00<00:00,  5.30it/s, v_num=205, train_rmse_step=0.871, val_rmse=0.921, train_rmse_epoch=0.876]

Metric val_rmse improved by 0.004 >= min_delta = 0.0. New best score: 0.921


Epoch 6: 100%|██████████| 2/2 [00:00<00:00,  3.47it/s, v_num=205, train_rmse_step=0.860, val_rmse=0.915, train_rmse_epoch=0.863]

Metric val_rmse improved by 0.006 >= min_delta = 0.0. New best score: 0.915


Epoch 7: 100%|██████████| 2/2 [00:00<00:00,  3.68it/s, v_num=205, train_rmse_step=0.843, val_rmse=0.912, train_rmse_epoch=0.853]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.912


Epoch 8: 100%|██████████| 2/2 [00:00<00:00,  3.60it/s, v_num=205, train_rmse_step=0.851, val_rmse=0.907, train_rmse_epoch=0.845]

Metric val_rmse improved by 0.005 >= min_delta = 0.0. New best score: 0.907


Epoch 9: 100%|██████████| 2/2 [00:00<00:00,  5.50it/s, v_num=205, train_rmse_step=0.820, val_rmse=0.904, train_rmse_epoch=0.834]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.904


Epoch 10: 100%|██████████| 2/2 [00:00<00:00,  3.50it/s, v_num=205, train_rmse_step=0.828, val_rmse=0.902, train_rmse_epoch=0.826]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.902


Epoch 11: 100%|██████████| 2/2 [00:00<00:00,  3.45it/s, v_num=205, train_rmse_step=0.822, val_rmse=0.900, train_rmse_epoch=0.817]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.900


Epoch 12: 100%|██████████| 2/2 [00:00<00:00,  3.47it/s, v_num=205, train_rmse_step=0.805, val_rmse=0.899, train_rmse_epoch=0.810]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.899


Epoch 13: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s, v_num=205, train_rmse_step=0.798, val_rmse=0.898, train_rmse_epoch=0.801]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.898


Epoch 14: 100%|██████████| 2/2 [00:00<00:00,  3.42it/s, v_num=205, train_rmse_step=0.793, val_rmse=0.897, train_rmse_epoch=0.793]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.897


Epoch 24: 100%|██████████| 2/2 [00:00<00:00,  3.41it/s, v_num=205, train_rmse_step=0.701, val_rmse=0.906, train_rmse_epoch=0.708]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.897. Signaling Trainer to stop.


Epoch 24: 100%|██████████| 2/2 [00:00<00:00,  3.38it/s, v_num=205, train_rmse_step=0.701, val_rmse=0.906, train_rmse_epoch=0.708]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 33.13it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Fold 7 Validation RMSE: 0.8974
Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 32.88it/s] 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Fold 7 Test RMSE: 0.8824

Training Fold 8/10


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type     | Params | Mode 
-------------------------------------------
0 | model | DMFModel | 1.5 M  | train
-------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.055     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 2/2 [00:00<00:00,  2.76it/s, v_num=206, train_rmse_step=1.170, val_rmse=0.982, train_rmse_epoch=1.120]

Metric val_rmse improved. New best score: 0.982


Epoch 2: 100%|██████████| 2/2 [00:00<00:00,  3.94it/s, v_num=206, train_rmse_step=0.962, val_rmse=0.946, train_rmse_epoch=0.983]

Metric val_rmse improved by 0.036 >= min_delta = 0.0. New best score: 0.946


Epoch 3: 100%|██████████| 2/2 [00:00<00:00,  3.67it/s, v_num=206, train_rmse_step=0.938, val_rmse=0.943, train_rmse_epoch=0.938]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.943


Epoch 4: 100%|██████████| 2/2 [00:00<00:00,  3.54it/s, v_num=206, train_rmse_step=0.925, val_rmse=0.928, train_rmse_epoch=0.930]

Metric val_rmse improved by 0.015 >= min_delta = 0.0. New best score: 0.928


Epoch 5: 100%|██████████| 2/2 [00:00<00:00,  5.23it/s, v_num=206, train_rmse_step=0.898, val_rmse=0.909, train_rmse_epoch=0.908]

Metric val_rmse improved by 0.018 >= min_delta = 0.0. New best score: 0.909


Epoch 6: 100%|██████████| 2/2 [00:00<00:00,  3.44it/s, v_num=206, train_rmse_step=0.891, val_rmse=0.903, train_rmse_epoch=0.891]

Metric val_rmse improved by 0.006 >= min_delta = 0.0. New best score: 0.903


Epoch 7: 100%|██████████| 2/2 [00:00<00:00,  3.28it/s, v_num=206, train_rmse_step=0.880, val_rmse=0.895, train_rmse_epoch=0.882]

Metric val_rmse improved by 0.009 >= min_delta = 0.0. New best score: 0.895


Epoch 8: 100%|██████████| 2/2 [00:00<00:00,  3.30it/s, v_num=206, train_rmse_step=0.862, val_rmse=0.889, train_rmse_epoch=0.870]

Metric val_rmse improved by 0.005 >= min_delta = 0.0. New best score: 0.889


Epoch 9: 100%|██████████| 2/2 [00:00<00:00,  4.81it/s, v_num=206, train_rmse_step=0.865, val_rmse=0.886, train_rmse_epoch=0.862]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.886


Epoch 10: 100%|██████████| 2/2 [00:00<00:00,  3.40it/s, v_num=206, train_rmse_step=0.853, val_rmse=0.880, train_rmse_epoch=0.854]

Metric val_rmse improved by 0.006 >= min_delta = 0.0. New best score: 0.880


Epoch 11: 100%|██████████| 2/2 [00:00<00:00,  3.35it/s, v_num=206, train_rmse_step=0.848, val_rmse=0.878, train_rmse_epoch=0.845]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.878


Epoch 12: 100%|██████████| 2/2 [00:00<00:00,  3.44it/s, v_num=206, train_rmse_step=0.840, val_rmse=0.875, train_rmse_epoch=0.839]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.875


Epoch 13: 100%|██████████| 2/2 [00:00<00:00,  5.17it/s, v_num=206, train_rmse_step=0.838, val_rmse=0.873, train_rmse_epoch=0.832]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.873


Epoch 14: 100%|██████████| 2/2 [00:00<00:00,  3.50it/s, v_num=206, train_rmse_step=0.815, val_rmse=0.872, train_rmse_epoch=0.825]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.872


Epoch 15: 100%|██████████| 2/2 [00:00<00:00,  3.53it/s, v_num=206, train_rmse_step=0.815, val_rmse=0.870, train_rmse_epoch=0.819]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.870


Epoch 16: 100%|██████████| 2/2 [00:00<00:00,  3.01it/s, v_num=206, train_rmse_step=0.813, val_rmse=0.868, train_rmse_epoch=0.811]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.868


Epoch 17: 100%|██████████| 2/2 [00:00<00:00,  4.49it/s, v_num=206, train_rmse_step=0.805, val_rmse=0.867, train_rmse_epoch=0.805]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.867


Epoch 18: 100%|██████████| 2/2 [00:00<00:00,  3.30it/s, v_num=206, train_rmse_step=0.799, val_rmse=0.867, train_rmse_epoch=0.798]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.867


Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  3.40it/s, v_num=206, train_rmse_step=0.799, val_rmse=0.867, train_rmse_epoch=0.792]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.867


Epoch 20: 100%|██████████| 2/2 [00:00<00:00,  3.43it/s, v_num=206, train_rmse_step=0.793, val_rmse=0.867, train_rmse_epoch=0.785]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.867


Epoch 30: 100%|██████████| 2/2 [00:00<00:00,  3.30it/s, v_num=206, train_rmse_step=0.704, val_rmse=0.874, train_rmse_epoch=0.707]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.867. Signaling Trainer to stop.


Epoch 30: 100%|██████████| 2/2 [00:00<00:00,  3.27it/s, v_num=206, train_rmse_step=0.704, val_rmse=0.874, train_rmse_epoch=0.707]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 37.51it/s] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Fold 8 Validation RMSE: 0.8665
Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 34.79it/s] 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Fold 8 Test RMSE: 0.8840

Training Fold 9/10



  | Name  | Type     | Params | Mode 
-------------------------------------------
0 | model | DMFModel | 1.5 M  | train
-------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.055     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 2/2 [00:00<00:00,  2.94it/s, v_num=207, train_rmse_step=1.100, val_rmse=1.020, train_rmse_epoch=1.130]

Metric val_rmse improved. New best score: 1.016


Epoch 1: 100%|██████████| 2/2 [00:00<00:00,  2.98it/s, v_num=207, train_rmse_step=0.985, val_rmse=0.983, train_rmse_epoch=0.979]

Metric val_rmse improved by 0.034 >= min_delta = 0.0. New best score: 0.983


Epoch 2: 100%|██████████| 2/2 [00:00<00:00,  3.38it/s, v_num=207, train_rmse_step=0.929, val_rmse=0.970, train_rmse_epoch=0.940]

Metric val_rmse improved by 0.013 >= min_delta = 0.0. New best score: 0.970


Epoch 3: 100%|██████████| 2/2 [00:00<00:00,  3.94it/s, v_num=207, train_rmse_step=0.925, val_rmse=0.951, train_rmse_epoch=0.925]

Metric val_rmse improved by 0.019 >= min_delta = 0.0. New best score: 0.951


Epoch 4: 100%|██████████| 2/2 [00:00<00:00,  4.00it/s, v_num=207, train_rmse_step=0.896, val_rmse=0.940, train_rmse_epoch=0.902]

Metric val_rmse improved by 0.010 >= min_delta = 0.0. New best score: 0.940


Epoch 5: 100%|██████████| 2/2 [00:00<00:00,  3.89it/s, v_num=207, train_rmse_step=0.894, val_rmse=0.932, train_rmse_epoch=0.890]

Metric val_rmse improved by 0.009 >= min_delta = 0.0. New best score: 0.932


Epoch 6: 100%|██████████| 2/2 [00:00<00:00,  5.55it/s, v_num=207, train_rmse_step=0.870, val_rmse=0.926, train_rmse_epoch=0.876]

Metric val_rmse improved by 0.005 >= min_delta = 0.0. New best score: 0.926


Epoch 7: 100%|██████████| 2/2 [00:00<00:00,  3.43it/s, v_num=207, train_rmse_step=0.869, val_rmse=0.920, train_rmse_epoch=0.868]

Metric val_rmse improved by 0.006 >= min_delta = 0.0. New best score: 0.920


Epoch 8: 100%|██████████| 2/2 [00:00<00:00,  3.37it/s, v_num=207, train_rmse_step=0.854, val_rmse=0.913, train_rmse_epoch=0.858]

Metric val_rmse improved by 0.007 >= min_delta = 0.0. New best score: 0.913


Epoch 9: 100%|██████████| 2/2 [00:00<00:00,  3.28it/s, v_num=207, train_rmse_step=0.850, val_rmse=0.910, train_rmse_epoch=0.849]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.910


Epoch 10: 100%|██████████| 2/2 [00:00<00:00,  4.63it/s, v_num=207, train_rmse_step=0.845, val_rmse=0.907, train_rmse_epoch=0.842]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.907


Epoch 11: 100%|██████████| 2/2 [00:00<00:00,  3.19it/s, v_num=207, train_rmse_step=0.830, val_rmse=0.907, train_rmse_epoch=0.835]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.907


Epoch 12: 100%|██████████| 2/2 [00:00<00:00,  3.17it/s, v_num=207, train_rmse_step=0.818, val_rmse=0.905, train_rmse_epoch=0.828]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.905


Epoch 13: 100%|██████████| 2/2 [00:00<00:00,  3.33it/s, v_num=207, train_rmse_step=0.819, val_rmse=0.905, train_rmse_epoch=0.820]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.905


Epoch 14: 100%|██████████| 2/2 [00:00<00:00,  4.66it/s, v_num=207, train_rmse_step=0.813, val_rmse=0.904, train_rmse_epoch=0.813]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.904


Epoch 15: 100%|██████████| 2/2 [00:00<00:00,  3.30it/s, v_num=207, train_rmse_step=0.808, val_rmse=0.904, train_rmse_epoch=0.807]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.904


Epoch 16: 100%|██████████| 2/2 [00:00<00:00,  3.35it/s, v_num=207, train_rmse_step=0.799, val_rmse=0.903, train_rmse_epoch=0.800]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.903


Epoch 17: 100%|██████████| 2/2 [00:00<00:00,  3.08it/s, v_num=207, train_rmse_step=0.790, val_rmse=0.903, train_rmse_epoch=0.792]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.903


Epoch 22: 100%|██████████| 2/2 [00:00<00:00,  4.74it/s, v_num=207, train_rmse_step=0.752, val_rmse=0.903, train_rmse_epoch=0.756]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.903


Epoch 32: 100%|██████████| 2/2 [00:00<00:00,  3.29it/s, v_num=207, train_rmse_step=0.677, val_rmse=0.912, train_rmse_epoch=0.673]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.903. Signaling Trainer to stop.


Epoch 32: 100%|██████████| 2/2 [00:00<00:00,  3.26it/s, v_num=207, train_rmse_step=0.677, val_rmse=0.912, train_rmse_epoch=0.673]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 37.55it/s] 

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Fold 9 Validation RMSE: 0.9027
Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 29.48it/s] 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Fold 9 Test RMSE: 0.8863

Training Fold 10/10


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type     | Params | Mode 
-------------------------------------------
0 | model | DMFModel | 1.5 M  | train
-------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.055     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 2/2 [00:00<00:00,  2.46it/s, v_num=208, train_rmse_step=1.040, val_rmse=0.981, train_rmse_epoch=1.070]

Metric val_rmse improved. New best score: 0.981


Epoch 1: 100%|██████████| 2/2 [00:00<00:00,  5.80it/s, v_num=208, train_rmse_step=0.959, val_rmse=0.930, train_rmse_epoch=0.970]

Metric val_rmse improved by 0.050 >= min_delta = 0.0. New best score: 0.930


Epoch 2: 100%|██████████| 2/2 [00:00<00:00,  3.81it/s, v_num=208, train_rmse_step=0.910, val_rmse=0.922, train_rmse_epoch=0.919]

Metric val_rmse improved by 0.008 >= min_delta = 0.0. New best score: 0.922


Epoch 3: 100%|██████████| 2/2 [00:00<00:00,  3.30it/s, v_num=208, train_rmse_step=0.892, val_rmse=0.905, train_rmse_epoch=0.904]

Metric val_rmse improved by 0.017 >= min_delta = 0.0. New best score: 0.905


Epoch 4: 100%|██████████| 2/2 [00:00<00:00,  2.78it/s, v_num=208, train_rmse_step=0.886, val_rmse=0.901, train_rmse_epoch=0.882]

Metric val_rmse improved by 0.004 >= min_delta = 0.0. New best score: 0.901


Epoch 5: 100%|██████████| 2/2 [00:00<00:00,  4.51it/s, v_num=208, train_rmse_step=0.866, val_rmse=0.893, train_rmse_epoch=0.871]

Metric val_rmse improved by 0.008 >= min_delta = 0.0. New best score: 0.893


Epoch 6: 100%|██████████| 2/2 [00:00<00:00,  3.05it/s, v_num=208, train_rmse_step=0.855, val_rmse=0.890, train_rmse_epoch=0.858]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.890


Epoch 7: 100%|██████████| 2/2 [00:00<00:00,  2.84it/s, v_num=208, train_rmse_step=0.851, val_rmse=0.886, train_rmse_epoch=0.850]

Metric val_rmse improved by 0.004 >= min_delta = 0.0. New best score: 0.886


Epoch 8: 100%|██████████| 2/2 [00:00<00:00,  3.14it/s, v_num=208, train_rmse_step=0.829, val_rmse=0.883, train_rmse_epoch=0.839]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.883


Epoch 9: 100%|██████████| 2/2 [00:00<00:00,  4.33it/s, v_num=208, train_rmse_step=0.829, val_rmse=0.882, train_rmse_epoch=0.830]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.882


Epoch 10: 100%|██████████| 2/2 [00:00<00:00,  3.14it/s, v_num=208, train_rmse_step=0.816, val_rmse=0.879, train_rmse_epoch=0.822]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.879


Epoch 11: 100%|██████████| 2/2 [00:00<00:00,  2.96it/s, v_num=208, train_rmse_step=0.811, val_rmse=0.878, train_rmse_epoch=0.814]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.878


Epoch 12: 100%|██████████| 2/2 [00:03<00:00,  0.58it/s, v_num=208, train_rmse_step=0.814, val_rmse=0.877, train_rmse_epoch=0.806]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.877


Epoch 13: 100%|██████████| 2/2 [00:00<00:00,  5.93it/s, v_num=208, train_rmse_step=0.787, val_rmse=0.876, train_rmse_epoch=0.797]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.876


Epoch 14: 100%|██████████| 2/2 [00:00<00:00,  3.58it/s, v_num=208, train_rmse_step=0.779, val_rmse=0.875, train_rmse_epoch=0.788]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.875


Epoch 24: 100%|██████████| 2/2 [00:00<00:00,  3.42it/s, v_num=208, train_rmse_step=0.706, val_rmse=0.882, train_rmse_epoch=0.700]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.875. Signaling Trainer to stop.


Epoch 24: 100%|██████████| 2/2 [00:00<00:00,  3.38it/s, v_num=208, train_rmse_step=0.706, val_rmse=0.882, train_rmse_epoch=0.700]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 36.59it/s] 

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Fold 10 Validation RMSE: 0.8755
Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 38.76it/s] 
Fold 10 Test RMSE: 0.8854

Cross-Validation Results
Mean Validation RMSE: 0.8784 ± 0.0150
Mean Test RMSE: 0.8853 ± 0.0027


In [None]:
print(f"\n{'='*70}")
print("Generating Ensemble Predictions using trainer.predict()")
print(f"{'='*70}")

all_fold_predictions = []

for fold_idx, (module, datamodule) in enumerate(fold_modules, 1):
    print(f"\nFold {fold_idx}: Generating predictions...")
    
    predict_trainer = pl.Trainer(
        accelerator="gpu" if device == "cuda" else "cpu",
        devices=1,
        logger=False,
        enable_progress_bar=False
    )
    
    predictions = predict_trainer.predict(module, datamodule=datamodule)
    
    pred_matrix = predictions[0]['predictions_matrix']
    all_fold_predictions.append(pred_matrix)

    print(f"Fold {fold_idx}: Generated predictions matrix of shape {pred_matrix.shape}")

# Compute ensemble average
print(f"\n{'='*70}")
print("Computing Ensemble Predictions")
print(f"{'='*70}")

ensemble_prediction_matrix = np.mean(all_fold_predictions, axis=0)

# Compute RMSE only for the test positive positions
test_pos_idx = np.argwhere(test_matrix > 0)
test_targets = test_matrix[test_pos_idx[:, 0], test_pos_idx[:, 1]]
ensemble_predictions = ensemble_prediction_matrix[test_pos_idx[:, 0], test_pos_idx[:, 1]]
ensemble_rmse = np.sqrt(mean_squared_error(test_targets, ensemble_predictions))

print(f"Ensemble Test RMSE: {ensemble_rmse:.4f}")
print(f"Number of predictions: {len(ensemble_predictions)}")
print(f"{'='*70}")

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Generating Ensemble Predictions using trainer.predict()

Fold 1: Generating predictions...


/home/ali/VSCodeProjects/iasd/data-science-lab/assignment1-2025-wecare/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:433: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Fold 1: Generated predictions matrix of shape (610, 4980)

Fold 2: Generating predictions...


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Fold 2: Generated predictions matrix of shape (610, 4980)

Fold 3: Generating predictions...


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Fold 3: Generated predictions matrix of shape (610, 4980)

Fold 4: Generating predictions...


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Fold 4: Generated predictions matrix of shape (610, 4980)

Fold 5: Generating predictions...


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Fold 5: Generated predictions matrix of shape (610, 4980)

Fold 6: Generating predictions...


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Fold 6: Generated predictions matrix of shape (610, 4980)

Fold 7: Generating predictions...


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Fold 7: Generated predictions matrix of shape (610, 4980)

Fold 8: Generating predictions...


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Fold 8: Generated predictions matrix of shape (610, 4980)

Fold 9: Generating predictions...


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Fold 9: Generated predictions matrix of shape (610, 4980)

Fold 10: Generating predictions...
Fold 10: Generated predictions matrix of shape (610, 4980)

Computing Ensemble Predictions
Ensemble Test RMSE: 0.8727
Number of predictions: 31598
