In [1]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")

PyTorch version: 2.1.2
CUDA available: True
CUDA version: 12.1


In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/cdae/pytorch/default/1/cdae.py
/kaggle/input/models/cdae.py
/kaggle/input/cdae-model/user_book_matrix.npz
/kaggle/input/cdae-model/avg_embeddings_matrix.npz
/kaggle/input/cdae-model/emotion_matrix.npz


In [3]:
#loading libraries
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from scipy.sparse import load_npz

In [4]:
def load_data():
    user_book_matrix = load_npz('/kaggle/input/cdae-model/user_book_matrix.npz')
    emotion_matrix = load_npz('/kaggle/input/cdae-model/emotion_matrix.npz')
    book_embeddings = load_npz('/kaggle/input/cdae-model/avg_embeddings_matrix.npz')

    return user_book_matrix, emotion_matrix, book_embeddings

In [5]:
import torch
from torch.utils.data import Dataset
import numpy as np

class CDAEDataset(Dataset):
    def __init__(self, interactions, emotions, review_embeddings, noise_factor=0.2):
        self.users, self.items = interactions.nonzero()
        self.interactions = interactions.toarray()
        self.emotions = emotions.toarray()
        self.review_embeddings = review_embeddings.toarray()
        self.noise_factor = noise_factor

    def __len__(self):
        return len(self.users)
    
    def __getitem__(self, idx):
        user, item = self.users[idx], self.items[idx]
        user_vector = self.interactions[user].flatten()
        noisy_user_vector = self.add_noise(user_vector)
        emotion_vector = torch.tensor(self.emotions[item], dtype=torch.float32)
        review_embedding = torch.tensor(self.review_embeddings[item], dtype=torch.float32)
        
        return (torch.FloatTensor(noisy_user_vector), 
                torch.FloatTensor(user_vector),
                emotion_vector, 
                review_embedding,
                torch.LongTensor([user]))

    def add_noise(self, vector):
        noise = np.random.normal(loc=0, scale=self.noise_factor, size=vector.shape)
        noisy_vector = vector + noise
        return np.clip(noisy_vector, 0, 1)



In [6]:
class CDAE(nn.Module):
    def __init__(self, num_users, num_items, num_emotions, review_embedding_dim, 
                 embedding_dim=64, hidden_dims=[256, 128, 64], dropout=0.2):
        super(CDAE, self).__init__()
        
        # Embedding layers
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        self.emotion_embedding = nn.Embedding(num_emotions, embedding_dim)
        
        # Encoder layers
        self.encoder = nn.ModuleList()
        input_dim = num_items + embedding_dim * 3 + review_embedding_dim  # input + user + item + emotion + review
        encoder_dims = [input_dim] + hidden_dims
        for i in range(len(encoder_dims) - 1):
            self.encoder.append(nn.Linear(encoder_dims[i], encoder_dims[i+1]))
            self.encoder.append(nn.ReLU())
            self.encoder.append(nn.BatchNorm1d(encoder_dims[i+1]))
            self.encoder.append(nn.Dropout(dropout))
        
        # Decoder layers
        self.decoder = nn.ModuleList()
        decoder_dims = hidden_dims[::-1] + [num_items]
        for i in range(len(decoder_dims) - 1):
            self.decoder.append(nn.Linear(decoder_dims[i], decoder_dims[i+1]))
            if i < len(decoder_dims) - 2:  # No activation on the final layer
                self.decoder.append(nn.ReLU())
                self.decoder.append(nn.BatchNorm1d(decoder_dims[i+1]))
                self.decoder.append(nn.Dropout(dropout))
        
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(dropout)

    def forward(self, user_vector, user_indices, item_indices, emotion_weights, review_embeddings):
        user_emb = self.user_embedding(user_indices)
        item_emb = self.item_embedding(item_indices)
        
        # Handle emotion embeddings
        emotion_emb = torch.matmul(emotion_weights, self.emotion_embedding.weight)

        # Concatenate input
        x = torch.cat([user_vector, user_emb, item_emb, emotion_emb, review_embeddings], dim=1)

        # Apply dropout
        x = self.dropout(x)

        # Encode
        for layer in self.encoder:
            x = layer(x)
        
        encoded = x  # Save the encoded representation
        
        # Decode
        for layer in self.decoder:
            x = layer(x)
        
        # Apply sigmoid to get probabilities
        reconstructed = self.sigmoid(x)
        
        return reconstructed, encoded

    def get_recommendation(self, user_indices, item_indices, emotion_indices, review_embeddings):
        with torch.no_grad():
            # Use a zero vector as input (we're not denoising here, just reconstructing)
            user_vector = torch.zeros(user_indices.size(0), self.item_embedding.num_embeddings).to(user_indices.device)
            
            reconstructed, _ = self.forward(user_vector, user_indices, item_indices, emotion_indices, review_embeddings)
            
            return reconstructed

In [7]:
def get_data_info(user_item_interactions, emotion_labels, review_embeddings):
    print(f"User-Item Interactions shape: {user_item_interactions.shape}")
    print(f"Emotion Labels shape: {emotion_labels.shape}")
    print(f"Review Embeddings shape: {review_embeddings.shape}")
    # Get the number of unique users and items
    num_users = user_item_interactions.shape[0]
    num_items = user_item_interactions.shape[1]
    
    # Get the number of unique emotions
    num_emotions = emotion_labels.shape[1]
    
    # Get the dimension of review embeddings
    review_embedding_dim = review_embeddings.shape[1]
    
    return num_users, num_items, num_emotions, review_embedding_dim


In [8]:
def custom_collate_fn(batch):
    noisy_user_vectors, user_vectors, emotions, review_embeddings, user_indices = zip(*batch)
    
    noisy_user_vectors = torch.stack(noisy_user_vectors)
    user_vectors = torch.stack(user_vectors)
    emotions = torch.stack(emotions)
    review_embeddings = torch.stack(review_embeddings)
    user_indices = torch.cat(user_indices)
    
    return noisy_user_vectors, user_vectors, emotions, review_embeddings, user_indices



In [9]:
import torch
from torch.utils.data import DataLoader, random_split
import torch.optim as optim

embedding_dim = 32
review_embedding_dim = 100
learning_rate = 0.001
batch_size = 32
num_epochs = 10

# Load and preprocess data
user_item_interactions, emotion_labels, review_embeddings = load_data()
num_users, num_items, num_emotions, review_embedding_dim = get_data_info(user_item_interactions, emotion_labels, review_embeddings)

# Create dataset
dataset = CDAEDataset(user_item_interactions, emotion_labels, review_embeddings, noise_factor=0.2)

# Split dataset into train and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=custom_collate_fn)

# Initialize the model
model = CDAE(num_users, num_items, num_emotions, review_embedding_dim, 
             embedding_dim=embedding_dim, hidden_dims=[256, 128, 64])

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

User-Item Interactions shape: (575887, 74298)
Emotion Labels shape: (74298, 6)
Review Embeddings shape: (74298, 100)


In [10]:
def train(model, dataloader, criterion, optimizer):
    model.train()
    running_loss = 0.0

    for noisy_user_vectors, user_vectors, emotion_weights, review_embeddings, user_indices in train_loader:
        noisy_user_vectors = noisy_user_vectors.to(device)
        user_vectors = user_vectors.to(device)
        emotion_weights = emotion_weights.to(device)
        review_embeddings = review_embeddings.to(device)
        user_indices = user_indices.to(device)
    
        # Generate item indices (assuming they're the same as user indices in this case)
        item_indices = user_indices.clone()

        optimizer.zero_grad()
        try:
            prediction, _ = model(noisy_user_vectors, user_indices, item_indices, emotion_weights, review_embeddings)
        except RuntimeError as e:
            print(f"Error: {e}")
            print(f"Max user index: {user_indices.max().item()}")
            print(f"Max item index: {item_indices.max().item()}")
            print(f"Emotion weights shape: {emotion_weights.shape}")
            continue  # Skip this batch
    
        loss = criterion(prediction, user_vectors)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    average_loss = running_loss / len(dataloader)
    return average_loss



In [11]:
def validate(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0

    with torch.no_grad():
        for noisy_user_vectors, user_vectors, emotions, review_embeddings, user_indices in dataloader:
            noisy_user_vectors = noisy_user_vectors.to(device)
            user_vectors = user_vectors.to(device)
            emotions = emotions.to(device)
            review_embeddings = review_embeddings.to(device)
            user_indices = user_indices.to(device)

            outputs, _ = model(noisy_user_vectors, user_indices, user_indices, user_indices, review_embeddings)
            loss = criterion(outputs, user_vectors)
            running_loss += loss.item()

    average_loss = running_loss / len(dataloader)
    return average_loss


In [12]:
class EarlyStopping:
    def __init__(self, patience=5, verbose=False, delta=0, path='cdae_checkpoint.pth'):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        """Saves model when validation loss decreases."""
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving CDAE model...')
        torch.save({
            'model_state_dict': model.state_dict(),
            'val_loss': val_loss,
        }, self.path)
        self.val_loss_min = val_loss

    def load_checkpoint(self, model):
        """Loads the best saved model."""
        checkpoint = torch.load(self.path)
        model.load_state_dict(checkpoint['model_state_dict'])
        return checkpoint['val_loss']


In [13]:
import torch
import torch.nn as nn
import torch.optim as optim

# Initialize model
model = CDAE(num_users, num_items, num_emotions, review_embedding_dim, 
             embedding_dim=32, hidden_dims=[256, 128, 64], dropout=0.2).to(device)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [14]:
import torch

# Initialize EarlyStopping object
early_stopping = EarlyStopping(patience=5, verbose=True, path='/kaggle/working/cdaemodel.pth')

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for noisy_user_vectors, user_vectors, emotions, review_embeddings, user_indices in train_loader:
        noisy_user_vectors = noisy_user_vectors.to(device)
        user_vectors = user_vectors.to(device)
        emotions = emotions.to(device)
        review_embeddings = review_embeddings.to(device)
        user_indices = user_indices.to(device)

        optimizer.zero_grad()
        try:
            prediction, _ = model(noisy_user_vectors, user_indices, user_indices, emotions, review_embeddings)
        except RuntimeError as e:
            print(f"Error: {e}")
            print(f"User indices: {user_indices}")
            print(f"Item indices: {user_indices}")
            print(f"Emotion indices: {emotions}")
            continue  # Skip this batch
        
        loss = criterion(prediction, user_vectors)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

    # Compute average training loss
    train_loss = running_loss / len(train_loader)

    # Validation step
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for noisy_user_vectors, user_vectors, emotions, review_embeddings, user_indices in test_loader:
            noisy_user_vectors = noisy_user_vectors.to(device)
            user_vectors = user_vectors.to(device)
            emotions = emotions.to(device)
            review_embeddings = review_embeddings.to(device)
            user_indices = user_indices.to(device)

            prediction, _ = model(noisy_user_vectors, user_indices, user_indices, emotions, review_embeddings)
            loss = criterion(prediction, user_vectors)
            val_loss += loss.item()

        val_loss /= len(test_loader)

    # Print training and validation loss
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

    # Update early stopping object
    early_stopping(val_loss, model)

    # Check if early stopping criterion is met
    if early_stopping.early_stop:
        print("Early stopping")
        break


# Load the best model checkpoint
model.load_state_dict(torch.load('/kaggle/working/cdaemodel.pth'))

Error: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`


RuntimeError: CUDA error: device-side assert triggered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
print(f"Number of users: {num_users}")
print(f"Number of items: {num_items}")
print(f"Number of emotions: {num_emotions}")
print(f"User embedding size: {model.user_embedding.num_embeddings}")
print(f"Item embedding size: {model.item_embedding.num_embeddings}")
print(f"Emotion embedding size: {model.emotion_embedding.num_embeddings}")

In [None]:
print(f"Max user index: {user_indices.max().item()}")
print(f"Max item index: {item_indices.max().item()}")
print(f"Max emotion index: {emotions.max().item()}")