In [6]:
#!/usr/bin/env python
# coding: utf-8

import os
import json
import pickle
import copy
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
from transformers import AutoTokenizer, AutoModel
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from sklearn.preprocessing import normalize

# For SVD on rating matrix
from scipy.sparse import coo_matrix
from sklearn.decomposition import TruncatedSVD

# Fix for CUDA multiprocessing error:
import torch.multiprocessing as mp
try:
    mp.set_start_method('spawn', force=True)
except RuntimeError:
    pass

os.environ["TOKENIZERS_PARALLELISM"] = "false"

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ---------------------------------------------------------------------
# Configuration & Hyperparameters
# ---------------------------------------------------------------------
DATA_PATH_INTERACTIONS = "./All_Beauty.jsonl"
DATA_PATH_ITEMS = "./meta_All_Beauty.jsonl"
PRETRAINED_TEXT_MODEL = "distilbert-base-uncased"

# The transformer model will fuse features into this hidden dimension.
HIDDEN_DIM = 512  
BATCH_SIZE = 2048
LR = 1e-4
EPOCHS = 15             # Increased epochs
NUM_FOLDS = 5

SVD_COMPONENTS = 2000      # Number of SVD components

LAMBDA_CLS = 1.5         # Classification loss weight
# Cache directories
os.makedirs("advanced_cache", exist_ok=True)
DATA_CACHE_PATH = "advanced_cache/data.pkl"
EMBED_CACHE_PATH = "advanced_cache/embeddings.pkl"
FEATURE_CACHE_PATH = "advanced_cache/features.pkl"

# ---------------------------------------------------------------------
# Data Loading and Caching with JSON Error Handling
# ---------------------------------------------------------------------
print("Loading cached data mappings...")
if os.path.exists(DATA_CACHE_PATH):
    with open(DATA_CACHE_PATH, 'rb') as f:
        data_cache = pickle.load(f)
    user2id = data_cache["user2id"]
    item2id = data_cache["item2id"]
    interactions_list = data_cache["interactions_list"]
    item_metadata = data_cache["item_metadata"]
else:
    user2id = {}
    item2id = {}
    interactions_list = []
    print("Loading Interactions...")
    i = 0
    with open(DATA_PATH_INTERACTIONS, 'r') as f:
        for line in tqdm(f, desc="Reading interactions", unit="line"):
            try:
                record = json.loads(line)
            except json.JSONDecodeError as e:
                print(f"Skipping line {i} due to JSONDecodeError: {e}")
                i += 1
                continue
            user_id = record.get("user_id")
            item_id = record.get("parent_asin")
            rating = record.get("rating", None)
            review_text = record.get("text", "")
            review_title = record.get("title", "")
            verified = record.get("verified_purchase", False)
            timestamp = record.get("timestamp", None)
            helpful_votes = record.get("helpful_votes", 0)
            if user_id not in user2id:
                user2id[user_id] = len(user2id)
            if item_id not in item2id:
                item2id[item_id] = len(item2id)
            interactions_list.append({
                "user_id": user2id[user_id],
                "item_id": item2id[item_id],
                "rating": float(rating) if rating is not None else 0.0,
                "review_text": review_text,
                "review_title": review_title,
                "verified": verified,
                "timestamp": timestamp,
                "helpful_votes": helpful_votes
            })
            i += 1
    print("Total Users:", len(user2id))
    print("Total Items:", len(item2id))
    print("Total Interactions:", len(interactions_list))
    print("Loading Item Metadata...")
    item_metadata = [None] * len(item2id)
    with open(DATA_PATH_ITEMS, 'r', encoding='utf-8') as f:
        for j, line in enumerate(tqdm(f, desc="Reading item metadata", unit="line")):
            try:
                record = json.loads(line)
            except json.JSONDecodeError as e:
                print(f"Skipping metadata line {j} due to JSONDecodeError: {e}")
                continue
            parent_asin = record.get("parent_asin")
            if parent_asin not in item2id:
                continue
            idx = item2id[parent_asin]
            title = record.get("title", "")
            avg_rating = record.get("average_rating", 0.0)
            rating_num = record.get("rating_number", 0.0)
            price = record.get("price", 0.0)
            item_metadata[idx] = {
                "title": title,
                "avg_rating": avg_rating,
                "rating_num": rating_num,
                "price": 0 if price is None else price
            }
    data_cache = {
        "user2id": user2id,
        "item2id": item2id,
        "interactions_list": interactions_list,
        "item_metadata": item_metadata
    }
    with open(DATA_CACHE_PATH, 'wb') as f:
        pickle.dump(data_cache, f)
    print("Data cached.")

# ---------------------------------------------------------------------
# Text Embeddings Using Pretrained Transformer (BERT)
# ---------------------------------------------------------------------
if os.path.exists(EMBED_CACHE_PATH):
    with open(EMBED_CACHE_PATH, 'rb') as f:
        embed_cache = pickle.load(f)
    user_text_embeds = embed_cache["user_text_embeds"]
    item_text_embeds = embed_cache["item_text_embeds"]
else:
    tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_TEXT_MODEL)
    text_model = AutoModel.from_pretrained(PRETRAINED_TEXT_MODEL).to(device)
    text_model.eval()
    @torch.no_grad()
    def encode_texts(texts, max_length=64):
        if len(texts) == 0:
            return torch.zeros(text_model.config.hidden_size)
        inputs = tokenizer(texts, padding=True, truncation=True, max_length=max_length, return_tensors='pt').to(device)
        outputs = text_model(**inputs)
        cls_embeds = outputs.last_hidden_state[:, 0, :]
        return cls_embeds.mean(dim=0).cpu()
    print("Encoding Item Titles...")
    item_text_embeds = torch.zeros((len(item2id), text_model.config.hidden_size))
    for i in tqdm(range(len(item2id)), desc="Item title embeddings"):
        meta = item_metadata[i]
        if meta is None:
            emb = torch.zeros(text_model.config.hidden_size)
        else:
            emb = encode_texts([meta["title"]])
        item_text_embeds[i] = emb
    print("Aggregating User Review Embeddings...")
    user_reviews_map = {uid: [] for uid in range(len(user2id))}
    for inter in interactions_list:
        uid = inter["user_id"]
        user_reviews_map[uid].append(inter["review_text"])
    user_text_embeds = torch.zeros((len(user2id), text_model.config.hidden_size))
    for uid in tqdm(user_reviews_map.keys(), desc="User review embeddings"):
        revs = user_reviews_map[uid]
        sampled_revs = revs[:5]
        if sampled_revs:
            emb = encode_texts(sampled_revs)
        else:
            emb = torch.zeros(text_model.config.hidden_size)
        user_text_embeds[uid] = emb
    embed_cache = {
        "user_text_embeds": user_text_embeds,
        "item_text_embeds": item_text_embeds
    }
    with open(EMBED_CACHE_PATH, 'wb') as f:
        pickle.dump(embed_cache, f)
    print("Embeddings cached.")

# ---------------------------------------------------------------------
# Feature Engineering: Multi-Modal Item Features & User Features
# ---------------------------------------------------------------------
if os.path.exists(FEATURE_CACHE_PATH):
    with open(FEATURE_CACHE_PATH, 'rb') as f:
        feature_cache = pickle.load(f)
    user_features = feature_cache["user_features"]      # shape: (num_users, text_dim)
    item_text_features = feature_cache["item_text_features"]  # shape: (num_items, text_dim)
    item_numeric_features = feature_cache["item_numeric_features"]  # shape: (num_items, 3)
    item_svd_features = feature_cache["item_svd_features"]  # shape: (num_items, SVD_COMPONENTS)
else:
    for i, meta in enumerate(item_metadata):
        if meta is None:
            item_metadata[i] = {"avg_rating": 0.0, "rating_num": 0.0, "price": 0.0, "title":""}
    item_avg_ratings = torch.tensor([m["avg_rating"] for m in item_metadata], dtype=torch.float32)
    item_rating_counts = torch.tensor([m["rating_num"] for m in item_metadata], dtype=torch.float32)
    item_prices = torch.tensor([m["price"] for m in item_metadata], dtype=torch.float32)
    item_avg_ratings = (item_avg_ratings - item_avg_ratings.mean()) / (item_avg_ratings.std() + 1e-9)
    item_rating_counts = (item_rating_counts - item_rating_counts.mean()) / (item_rating_counts.std() + 1e-9)
    item_prices = (item_prices - item_prices.mean()) / (item_prices.std() + 1e-9)
    item_numeric_features = torch.cat([
        item_avg_ratings.unsqueeze(-1),
        item_rating_counts.unsqueeze(-1),
        item_prices.unsqueeze(-1)
    ], dim=-1)
    print("Computing SVD latent factors for items...")
    num_items = len(item2id)
    num_users = len(user2id)
    rows, cols, data_vals = [], [], []
    for inter in interactions_list:
        rows.append(inter["item_id"])
        cols.append(inter["user_id"])
        data_vals.append(inter["rating"])
    rating_matrix = coo_matrix((data_vals, (rows, cols)), shape=(num_items, num_users))
    svd = TruncatedSVD(n_components=SVD_COMPONENTS, random_state=42)
    svd_features = svd.fit_transform(rating_matrix)
    item_svd_features = torch.tensor(svd_features, dtype=torch.float32)
    print("SVD latent factors added. Dimension:", item_svd_features.size(1))
    item_text_features = item_text_embeds
    user_features = user_text_embeds
    feature_cache = {
        "user_features": user_features,
        "item_text_features": item_text_features,
        "item_numeric_features": item_numeric_features,
        "item_svd_features": item_svd_features
    }
    with open(FEATURE_CACHE_PATH, 'wb') as f:
        pickle.dump(feature_cache, f)
    print("Features cached.")

# ---------------------------------------------------------------------
# Create Fused Item Features by Concatenating Modalities
# ---------------------------------------------------------------------
# For each item, we concatenate text features, numeric features, and SVD features.
# The resulting item feature dimension is: text_dim + 3 + SVD_COMPONENTS.
text_dim = item_text_features.shape[1]
item_features = torch.cat([item_text_features, item_numeric_features, item_svd_features], dim=-1)

# ---------------------------------------------------------------------
# Define a PyTorch Dataset for Interactions
# ---------------------------------------------------------------------
class InteractionDataset(Dataset):
    def __init__(self, interactions, user_features, item_features):
        self.interactions = interactions
        self.user_features = user_features  # shape: (num_users, text_dim)
        self.item_features = item_features  # shape: (num_items, text_dim + 3 + SVD_COMPONENTS)
        
    def __len__(self):
        return len(self.interactions)
    
    def __getitem__(self, idx):
        inter = self.interactions[idx]
        user_id = inter["user_id"]
        item_id = inter["item_id"]
        rating = inter["rating"]
        user_feat = self.user_features[user_id]
        item_feat = self.item_features[item_id]
        # For classification, the target class is rating-1 (assuming ratings in {1,2,3,4,5})
        target_cls = int(rating) - 1 if rating >= 1 else 0
        return user_feat, item_feat, rating, target_cls

# ---------------------------------------------------------------------
# Transformer Fusion Recommender Model
# ---------------------------------------------------------------------
class TransformerFusionRecommender(nn.Module):
    def __init__(self, user_dim, item_dim, hidden_dim, num_layers=2, num_heads=4, num_classes=5):
        super().__init__()
        # Project input features to hidden_dim
        self.user_proj = nn.Linear(user_dim, hidden_dim)
        self.item_proj = nn.Linear(item_dim, hidden_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=num_heads, batch_first=True)
        self.user_transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.item_transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        # Cross-attention: user queries attend to item representation
        self.cross_attn = nn.MultiheadAttention(embed_dim=hidden_dim, num_heads=num_heads, batch_first=True)
        self.fusion_linear = nn.Linear(2 * hidden_dim, hidden_dim)
        self.regressor = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )
        self.classifier = nn.Linear(hidden_dim, num_classes)
        
    def forward(self, user_feat, item_feat):
        # user_feat: (batch, user_dim), item_feat: (batch, item_dim)
        user_h = self.user_proj(user_feat)   # (batch, hidden_dim)
        item_h = self.item_proj(item_feat)   # (batch, hidden_dim)
        # Add dummy sequence dimension
        user_h = user_h.unsqueeze(1)  # (batch, 1, hidden_dim)
        item_h = item_h.unsqueeze(1)  # (batch, 1, hidden_dim)
        user_enc = self.user_transformer(user_h)  # (batch, 1, hidden_dim)
        item_enc = self.item_transformer(item_h)  # (batch, 1, hidden_dim)
        # Cross-attention: use user_enc as query, item_enc as key/value
        cross_out, _ = self.cross_attn(query=user_enc, key=item_enc, value=item_enc)
        # Squeeze sequence dimension
        user_enc = user_enc.squeeze(1)
        cross_out = cross_out.squeeze(1)
        fused = self.fusion_linear(torch.cat([user_enc, cross_out], dim=-1))
        reg_out = self.regressor(fused).squeeze(-1)
        cls_logits = self.classifier(fused)
        return reg_out, cls_logits

# ---------------------------------------------------------------------
# Training and Evaluation Functions for the New Model
# ---------------------------------------------------------------------
def train_epoch_transformer(model, optimizer, dataloader, criterion_reg, criterion_cls):
    model.train()
    total_loss = 0.0
    for user_feat, item_feat, target_reg, target_cls in tqdm(dataloader, desc="Training epoch", unit="batch"):
        user_feat = user_feat.to(device)
        item_feat = item_feat.to(device)
        target_reg = target_reg.to(device)
        target_cls = target_cls.to(device)
        optimizer.zero_grad()
        reg_pred, cls_logits = model(user_feat, item_feat)
        loss_reg = criterion_reg(reg_pred, target_reg)
        loss_cls = criterion_cls(cls_logits, target_cls)
        loss = loss_reg + LAMBDA_CLS * loss_cls
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * user_feat.size(0)
    return total_loss / len(dataloader.dataset)

@torch.no_grad()
def evaluate_transformer(model, dataloader, criterion_reg, criterion_cls):
    model.eval()
    total_loss = 0.0
    preds = []
    targets = []
    for user_feat, item_feat, target_reg, target_cls in tqdm(dataloader, desc="Evaluating", unit="batch"):
        user_feat = user_feat.to(device)
        item_feat = item_feat.to(device)
        target_reg = target_reg.to(device)
        reg_pred, cls_logits = model(user_feat, item_feat)
        loss_reg = criterion_reg(reg_pred, target_reg)
        total_loss += loss_reg.item() * user_feat.size(0)
        preds.append(reg_pred.detach().cpu().numpy())
        targets.append(target_reg.detach().cpu().numpy())
    preds = np.concatenate(preds)
    targets = np.concatenate(targets)
    print(preds)
    print(targets)
    mae = mean_absolute_error(targets, preds)
    rmse = root_mean_squared_error(targets, preds)
    return mae, rmse, total_loss / len(dataloader.dataset)

# ---------------------------------------------------------------------
# Main Training Loop with Cross-Validation for the Transformer Model
# ---------------------------------------------------------------------
# Prepare complete item features and user features (precomputed)
user_feats = user_features  # shape: (num_users, text_dim)
# For items, use our concatenated features
item_feats = item_features  # shape: (num_items, text_dim + 3 + SVD_COMPONENTS)

# Create a list of interaction indices for cross-validation
all_indices = np.arange(len(interactions_list))
kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=42)
all_mae = []
all_rmse = []

criterion_reg = nn.SmoothL1Loss()  # Huber loss
criterion_cls = nn.CrossEntropyLoss()

for fold, (train_idx, test_idx) in enumerate(kf.split(all_indices)):
    print(f"Fold {fold+1}/{NUM_FOLDS}")
    train_inter = [interactions_list[i] for i in train_idx]
    test_inter = [interactions_list[i] for i in test_idx]
    
    train_dataset = InteractionDataset(train_inter, user_feats, item_feats)
    test_dataset = InteractionDataset(test_inter, user_feats, item_feats)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    # Define dimensions for the transformer model:
    user_dim = user_feats.shape[1]          # e.g. text_dim from BERT
    item_dim = item_feats.shape[1]          # text_dim + 3 + SVD_COMPONENTS
    model = TransformerFusionRecommender(user_dim, item_dim, hidden_dim=HIDDEN_DIM,
                                          num_layers=2, num_heads=4).to(device)
    optimizer = optim.Adam(model.parameters(), lr=LR)
    
    best_val_mae = float('inf')
    early_stop_patience = 5
    no_improve_epochs = 0
    
    for epoch in range(EPOCHS):
        print(f"Epoch {epoch+1}/{EPOCHS}")
        train_loss = train_epoch_transformer(model, optimizer, train_loader, criterion_reg, criterion_cls)
        val_mae, val_rmse, val_loss = evaluate_transformer(model, test_loader, criterion_reg, criterion_cls)
        print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val MAE:{val_mae:.4f} | Val RMSE: {val_rmse:.4f}")
        if val_mae < best_val_mae:
            best_val_mae = val_mae
            no_improve_epochs = 0
        else:
            no_improve_epochs += 1
            if no_improve_epochs >= early_stop_patience:
                print("Early stopping triggered.")
                break
    fold_mae, fold_rmse, _ = evaluate_transformer(model, test_loader, criterion_reg, criterion_cls)
    print(f"Fold {fold+1} Final: MAE: {fold_mae:.4f}, RMSE: {fold_rmse:.4f}")
    all_mae.append(fold_mae)
    all_rmse.append(fold_rmse)

print("Cross-Validation Results:")
print("MAE per fold:", all_mae)
print("RMSE per fold:", all_rmse)
print("Mean MAE:", np.nanmean(all_mae), "Mean RMSE:", np.nanmean(all_rmse))


  from .autonotebook import tqdm as notebook_tqdm


Loading cached data mappings...
Loading Interactions...


Reading interactions: 701528line [00:02, 344268.15line/s]


Total Users: 631986
Total Items: 112565
Total Interactions: 701528
Loading Item Metadata...


Reading item metadata: 112590line [00:00, 167269.42line/s]


Data cached.
Encoding Item Titles...


Item title embeddings: 100%|██████████| 112565/112565 [26:37<00:00, 70.45it/s]


Aggregating User Review Embeddings...


User review embeddings: 100%|██████████| 631986/631986 [2:26:42<00:00, 71.79it/s]  


Embeddings cached.
Computing SVD latent factors for items...
SVD latent factors added. Dimension: 2000
Features cached.
Fold 1/5
Epoch 1/15


Training epoch: 100%|██████████| 275/275 [00:54<00:00,  5.04batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 20.57batch/s]


[4.955543  3.7649174 4.893451  ... 1.2620151 5.137888  4.945681 ]
[5. 3. 5. ... 1. 5. 5.]
Epoch 1: Train Loss: 1.7153 | Val Loss: 0.3093 | Val MAE:0.5541 | Val RMSE: 0.9297
Epoch 2/15


Training epoch: 100%|██████████| 275/275 [00:54<00:00,  5.05batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.46batch/s]


[4.816468  3.5885997 4.64817   ... 1.7349715 4.9763184 4.8589597]
[5. 3. 5. ... 1. 5. 5.]
Epoch 2: Train Loss: 1.4503 | Val Loss: 0.3008 | Val MAE:0.5706 | Val RMSE: 0.9018
Epoch 3/15


Training epoch: 100%|██████████| 275/275 [00:54<00:00,  5.05batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.06batch/s]


[4.999124  4.286536  4.9519043 ... 1.9447325 5.0959206 4.8945594]
[5. 3. 5. ... 1. 5. 5.]
Epoch 3: Train Loss: 1.4158 | Val Loss: 0.3441 | Val MAE:0.5837 | Val RMSE: 1.0131
Epoch 4/15


Training epoch: 100%|██████████| 275/275 [00:54<00:00,  5.06batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.84batch/s]


[4.911625  2.5485356 4.7092085 ... 1.2520556 4.9519386 4.7782736]
[5. 3. 5. ... 1. 5. 5.]
Epoch 4: Train Loss: 1.3894 | Val Loss: 0.2887 | Val MAE:0.5395 | Val RMSE: 0.8915
Epoch 5/15


Training epoch: 100%|██████████| 275/275 [00:54<00:00,  5.03batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.81batch/s]


[4.930895  2.546204  4.6474004 ... 1.394331  5.0337043 4.851374 ]
[5. 3. 5. ... 1. 5. 5.]
Epoch 5: Train Loss: 1.3657 | Val Loss: 0.2780 | Val MAE:0.5285 | Val RMSE: 0.8668
Epoch 6/15


Training epoch: 100%|██████████| 275/275 [00:54<00:00,  5.04batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.69batch/s]


[4.9707837 3.831585  4.7468524 ... 1.5616516 5.0746193 4.9240613]
[5. 3. 5. ... 1. 5. 5.]
Epoch 6: Train Loss: 1.3477 | Val Loss: 0.2785 | Val MAE:0.5218 | Val RMSE: 0.8761
Epoch 7/15


Training epoch: 100%|██████████| 275/275 [00:54<00:00,  5.06batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.77batch/s]


[4.9082847 2.8354352 4.702272  ... 1.2775056 4.9698467 4.931593 ]
[5. 3. 5. ... 1. 5. 5.]
Epoch 7: Train Loss: 1.3352 | Val Loss: 0.2753 | Val MAE:0.5115 | Val RMSE: 0.8731
Epoch 8/15


Training epoch: 100%|██████████| 275/275 [00:54<00:00,  5.00batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 20.40batch/s]


[4.9387875 3.8229792 4.730012  ... 1.3804493 5.052318  5.026682 ]
[5. 3. 5. ... 1. 5. 5.]
Epoch 8: Train Loss: 1.3183 | Val Loss: 0.2671 | Val MAE:0.5158 | Val RMSE: 0.8507
Epoch 9/15


Training epoch: 100%|██████████| 275/275 [00:56<00:00,  4.85batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.09batch/s]


[4.9833245 3.8936172 4.785772  ... 1.5529944 5.052036  4.9711294]
[5. 3. 5. ... 1. 5. 5.]
Epoch 9: Train Loss: 1.3048 | Val Loss: 0.2872 | Val MAE:0.5112 | Val RMSE: 0.9086
Epoch 10/15


Training epoch: 100%|██████████| 275/275 [00:55<00:00,  4.96batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.59batch/s]


[4.8349085 3.9707623 4.640236  ... 1.1872514 4.915089  4.787477 ]
[5. 3. 5. ... 1. 5. 5.]
Epoch 10: Train Loss: 1.2920 | Val Loss: 0.2595 | Val MAE:0.5032 | Val RMSE: 0.8559
Epoch 11/15


Training epoch: 100%|██████████| 275/275 [00:55<00:00,  4.99batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.22batch/s]


[4.9681396 4.135741  4.5304623 ... 1.1758865 5.056633  4.941602 ]
[5. 3. 5. ... 1. 5. 5.]
Epoch 11: Train Loss: 1.2787 | Val Loss: 0.2704 | Val MAE:0.5040 | Val RMSE: 0.8675
Epoch 12/15


Training epoch: 100%|██████████| 275/275 [00:54<00:00,  5.01batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.36batch/s]


[5.066119  3.074925  4.461817  ... 1.2649072 5.104216  5.063979 ]
[5. 3. 5. ... 1. 5. 5.]
Epoch 12: Train Loss: 1.2636 | Val Loss: 0.2679 | Val MAE:0.5084 | Val RMSE: 0.8613
Epoch 13/15


Training epoch: 100%|██████████| 275/275 [00:54<00:00,  5.02batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.61batch/s]


[4.8425794 2.6166532 4.3860617 ... 1.2821496 4.97205   4.8727374]
[5. 3. 5. ... 1. 5. 5.]
Epoch 13: Train Loss: 1.2525 | Val Loss: 0.2748 | Val MAE:0.5291 | Val RMSE: 0.8692
Epoch 14/15


Training epoch: 100%|██████████| 275/275 [00:54<00:00,  5.02batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.68batch/s]


[4.9787908 3.2819047 4.462974  ... 1.239338  5.069059  4.889463 ]
[5. 3. 5. ... 1. 5. 5.]
Epoch 14: Train Loss: 1.2382 | Val Loss: 0.2605 | Val MAE:0.5071 | Val RMSE: 0.8378
Epoch 15/15


Training epoch: 100%|██████████| 275/275 [00:54<00:00,  5.02batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.46batch/s]


[5.0229635  2.7474217  4.5078616  ... 0.93238974 5.0712485  4.9784136 ]
[5. 3. 5. ... 1. 5. 5.]
Epoch 15: Train Loss: 1.2223 | Val Loss: 0.2971 | Val MAE:0.5253 | Val RMSE: 0.9289
Early stopping triggered.


Evaluating: 100%|██████████| 69/69 [00:03<00:00, 20.47batch/s]


[5.0229635  2.7474217  4.5078616  ... 0.93238974 5.0712485  4.9784136 ]
[5. 3. 5. ... 1. 5. 5.]
Fold 1 Final: MAE: 0.5253, RMSE: 0.9289
Fold 2/5
Epoch 1/15


Training epoch: 100%|██████████| 275/275 [00:54<00:00,  5.01batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 20.76batch/s]


[3.844799  4.9145412 4.767411  ... 4.918138  1.2729757 3.8364959]
[5. 5. 5. ... 5. 1. 4.]
Epoch 1: Train Loss: 1.7080 | Val Loss: 0.3262 | Val MAE:0.6148 | Val RMSE: 0.9332
Epoch 2/15


Training epoch: 100%|██████████| 275/275 [00:55<00:00,  4.95batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 21.12batch/s]


[4.2154784 5.10436   4.9409213 ... 5.111803  1.16456   4.2900257]
[5. 5. 5. ... 5. 1. 4.]
Epoch 2: Train Loss: 1.4545 | Val Loss: 0.2994 | Val MAE:0.5373 | Val RMSE: 0.9227
Epoch 3/15


Training epoch: 100%|██████████| 275/275 [00:56<00:00,  4.89batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 20.26batch/s]


[3.7165382 4.9361725 4.680546  ... 4.960857  1.2692426 3.763457 ]
[5. 5. 5. ... 5. 1. 4.]
Epoch 3: Train Loss: 1.4154 | Val Loss: 0.2848 | Val MAE:0.5433 | Val RMSE: 0.8729
Epoch 4/15


Training epoch: 100%|██████████| 275/275 [00:56<00:00,  4.86batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 20.72batch/s]


[4.096775  5.016957  4.85748   ... 5.082095  1.3199866 4.2662296]
[5. 5. 5. ... 5. 1. 4.]
Epoch 4: Train Loss: 1.3877 | Val Loss: 0.2812 | Val MAE:0.5155 | Val RMSE: 0.8885
Epoch 5/15


Training epoch: 100%|██████████| 275/275 [00:56<00:00,  4.88batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 20.27batch/s]


[3.6428125 4.907384  4.691933  ... 4.936069  1.0946246 3.8731773]
[5. 5. 5. ... 5. 1. 4.]
Epoch 5: Train Loss: 1.3669 | Val Loss: 0.3033 | Val MAE:0.5616 | Val RMSE: 0.9124
Epoch 6/15


Training epoch: 100%|██████████| 275/275 [00:56<00:00,  4.84batch/s]
Evaluating: 100%|██████████| 69/69 [00:03<00:00, 20.49batch/s]


[4.155861  5.1246815 4.9393806 ... 5.1910467 1.0759174 4.3416276]
[5. 5. 5. ... 5. 1. 4.]
Epoch 6: Train Loss: 1.3503 | Val Loss: 0.2886 | Val MAE:0.5232 | Val RMSE: 0.9073
Epoch 7/15


Training epoch:  80%|████████  | 220/275 [00:46<00:11,  4.65batch/s]

: 

In [None]:
Cross-Validation Results:
MAE per fold: [0.49213757361534505, 0.5203920227670602, 0.5501907572041689, 0.5028691199894604, 0.5111886623057732]
RMSE per fold: [0.8844305602048712, 0.9108551409362662, 0.9460393990431739, 0.8631044222211626, 0.8369910783816911]
Mean MAE: 0.5153556271763615 Mean RMSE: 0.888284120157433