## Import Libraries

In [54]:
import matplotlib.pyplot as plt
import seaborn as sns
import math
import pandas as pd
from collections import Counter
import numpy as np
import missingno
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import torch
import torch.nn as nn
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
# from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN
from sklearn.model_selection import GridSearchCV
# from recommenders.models.deeprec.deeprec_utils import prepare_hparams
import dgl

In [1]:
!pip install dgl

Collecting dgl
  Using cached dgl-1.1.1-cp39-cp39-manylinux1_x86_64.whl (6.3 MB)
Installing collected packages: dgl
Successfully installed dgl-1.1.1


In [2]:
!pip install missingno

Collecting missingno
  Using cached missingno-0.5.2-py3-none-any.whl (8.7 kB)
Installing collected packages: missingno
Successfully installed missingno-0.5.2


## Read the Dataset

In [36]:
dataa = pd.read_csv('ctr_data_1M_modified.csv',index_col=0)

### Take a sample from the dataset. A million record

In [37]:
data = dataa.sample(n=1000000, random_state=11)

In [38]:
data

Unnamed: 0,user_id,item_id,click,follow,like,share,video_category,watching_times,gender,age,hist_1,hist_2,hist_3,hist_4,hist_5,hist_6,hist_7,hist_8,hist_9,hist_10
21508185,153098,1369942,0,0,0,0,0,1,1,2,1886,21660,290301,3944,71249,62141,62903,2117,36473,141
68059050,511148,1534,0,0,0,0,1,0,1,3,15717,855,3138,152,46097,145574,2986,89964,2106,1425
74617845,566038,1380004,0,0,0,0,0,1,2,6,3537,14580,24059,1932,48184,1442,301,1356871,1353656,1381411
86042747,662993,20406,0,0,0,0,0,0,2,2,217,237931,7336,5302,1099,20807,64,23576,1305,84673
4049227,29918,47146,1,0,0,0,0,3,2,2,25230,1075,21289,62442,9440,59868,12935,31318,35093,10825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2440518,18106,32023,0,0,0,0,0,1,2,2,38132,83331,15654,83332,41739,80588,9917,5015,11324,4081
112889086,930728,1429780,1,0,0,0,1,1,1,4,1357685,1356855,1356738,1412813,1452179,1381563,1356802,1574881,1361787,1483510
109094921,889508,1358622,0,0,0,0,0,0,1,4,1384358,1357301,1354712,1352836,1354063,1356635,27915,1377335,1358332,1369234
8179276,59938,1368961,0,0,0,0,1,0,1,2,976,921,49355,943,340,154411,1778,58902,5271,14976


In [39]:
print(f"ctr_data_1M dataset has {data.shape[0]} samples and {data.shape[1]} features.")

ctr_data_1M dataset has 1000000 samples and 20 features.


# Modeling ESMM

ESMM Model :

In [41]:
'''
Reference:
    [1]Xiao Ma et al. Entire space multi-task model: An effective approach for estimating post-click conversion rate. In The 41st International
    ACM SIGIR Conference on Research & Development in Information Retrieval, pages 1137–1140, 2018.
Reference:
    https://github.com/busesese/MultiTaskModel
    https://github.com/yuangh-x/2022-NIPS-Tenrec/blob/master/model/mtl/esmm.py
'''

class ESMM(nn.Module):
    def __init__(self, user_feature_dict, item_feature_dict, emb_dim=128, hidden_dim=[128, 64], dropouts=[0.5, 0.5],
                 output_size=1, num_task=2):
        """
        esmm model input parameters
        :param user_feature_dict: user feature dict include: {feature_name: (feature_unique_num, feature_index)}
        :param item_feature_dict: item feature dict include: {feature_name: (feature_unique_num, feature_index)}
        :param emb_dim: int, embedding size
        :param hidden_dim: list of ctr and ctcvr dnn hidden sizes
        :param dropouts: list of ctr and ctcvr dnn drop out probability
        :param output_size: int out put size
        :param num_task: int default 2 multitask numbers
        """
        super(ESMM, self).__init__()
        
        # check input parameters
        if user_feature_dict is None or item_feature_dict is None:
            raise Exception("input parameter user_feature_dict and item_feature_dict must be not None")
        if isinstance(user_feature_dict, dict) is False or isinstance(item_feature_dict, dict) is False:
            raise Exception("input parameter user_feature_dict and item_feature_dict must be dict")
        
        self.user_feature_dict = user_feature_dict
        self.item_feature_dict = item_feature_dict
        self.num_task = num_task
        
        # embedding初始化
        user_cate_feature_nums, item_cate_feature_nums = 0, 0
        for user_cate, num in self.user_feature_dict.items():
            if num[0] > 1:
                user_cate_feature_nums += 1
                setattr(self, user_cate, nn.Embedding(num[0], emb_dim))
        for item_cate, num in self.item_feature_dict.items():
            if num[0] > 1:
                item_cate_feature_nums += 1
                setattr(self, item_cate, nn.Embedding(num[0], emb_dim))
                
        # user embedding + item embedding
        hidden_size = emb_dim * (user_cate_feature_nums + item_cate_feature_nums) + \
                      (len(user_feature_dict) - user_cate_feature_nums) + (len(item_feature_dict) - item_cate_feature_nums)
        
        # esmm 独立任务的DNN结构
        for i in range(self.num_task):
            setattr(self, 'task_{}_dnn'.format(i + 1), nn.ModuleList())
            hid_dim = [hidden_size] + hidden_dim
            for j in range(len(hid_dim) - 1):
                getattr(self, 'task_{}_dnn'.format(i + 1)).add_module('ctr_hidden_{}'.format(j),
                                                                      nn.Linear(hid_dim[j], hid_dim[j + 1]))
                getattr(self, 'task_{}_dnn'.format(i + 1)).add_module('ctr_batchnorm_{}'.format(j),
                                                                      nn.BatchNorm1d(hid_dim[j + 1]))
                getattr(self, 'task_{}_dnn'.format(i + 1)).add_module('ctr_dropout_{}'.format(j),
                                                                      nn.Dropout(dropouts[j]))
            getattr(self, 'task_{}_dnn'.format(i + 1)).add_module('task_last_layer',
                                                                  nn.Linear(hid_dim[-1], output_size))

    def forward(self, x):
        assert x.size()[1] == len(self.item_feature_dict) + len(self.user_feature_dict)
        # embedding
        user_embed_list, item_embed_list = list(), list()
        for user_feature, num in self.user_feature_dict.items():
            if num[0] > 1:
                user_embed_list.append(getattr(self, user_feature)(x[:, num[1]].long()))
            else:
                user_embed_list.append(x[:, num[1]].unsqueeze(1))
        for item_feature, num in self.item_feature_dict.items():
            if num[0] > 1:
                item_embed_list.append(getattr(self, item_feature)(x[:, num[1]].long()))
            else:
                item_embed_list.append(x[:, num[1]].unsqueeze(1))
            
        # embedding 融合
        user_embed = torch.cat(user_embed_list, axis=1)
        item_embed = torch.cat(item_embed_list, axis=1)
        
        # hidden layer
        hidden = torch.cat([user_embed, item_embed], axis=1).float()

        # task tower
        task_outputs = list()
        for i in range(self.num_task):
            x = hidden
            for mod in getattr(self, 'task_{}_dnn'.format(i + 1)):
                x = mod(x)
            task_outputs.append(x)

        return task_outputs

In [42]:
data.head(2)

Unnamed: 0,user_id,item_id,click,follow,like,share,video_category,watching_times,gender,age,hist_1,hist_2,hist_3,hist_4,hist_5,hist_6,hist_7,hist_8,hist_9,hist_10
21508185,153098,1369942,0,0,0,0,0,1,1,2,1886,21660,290301,3944,71249,62141,62903,2117,36473,141
68059050,511148,1534,0,0,0,0,1,0,1,3,15717,855,3138,152,46097,145574,2986,89964,2106,1425


In [43]:
print(data['user_id'].max())
print(data['gender'].max())
print(data['age'].max())
print(data['item_id'].max())
print(data['video_category'].max())
print(data['watching_times'].max())

1000014
2
7
3862454
1
124


### Preparing the data for training an ESMM (Entire Space Multi-Task Model) using PyTorch. 

It processes the dataset, converts target labels to binary values, specifies categorical columns, splits the data into training and testing sets, and creates custom PyTorch Dataset objects to hold the features and labels. The resulting datasets, along with dictionaries for user and item features, are returned for training the ESMM model.

In [44]:
from torch.utils.data import Dataset, DataLoader
def data_preparation():
    
    # First group of tasks according to the paper
    label_columns = ['click', 'like']
    
    # Categorical columns in your dataset
    categorical_columns = ['user_id', 'item_id', 'video_category', 'gender', 'age', 'watching_times']
    
    # Process the labels
    for col in label_columns:
        data[col] = data[col].apply(lambda x: 1 if x == 1 else 0)
        
    user_feature_dict = {'user_id': (1000054, 0), 'gender': (5, 4), 'age': (9, 5)}
    item_feature_dict = {'item_id': (3862554, 1), 'video_category': (5, 2), 'watching_times': (170, 3)}
    
    # Split your data into train and test sets
    train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
    
    # Prepare train_dataset and test_dataset
    train_dataset = TrainDataSet(train_data.iloc[:, :-2].values, (train_data['click'].values, train_data['like'].values))
    test_dataset = TrainDataSet(test_data.iloc[:, :-2].values, (test_data['click'].values, test_data['like'].values))

    return train_dataset, test_dataset, user_feature_dict, item_feature_dict

In [45]:
from torch.utils.data import Dataset

class TrainDataSet(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels_click = labels[0]
        self.labels_like = labels[1]
        
    def __getitem__(self, index):
        feature = self.features[index]
        label_click = self.labels_click[index]
        label_like = self.labels_like[index]
        return feature, label_click, label_like
        
    def __len__(self):
        return len(self.features)

### Training function
PyTorch training function for the ESMM (Entire Space Multi-Task Model). It trains the model using the specified number of epochs, given training and validation data loaders. The model is optimized using the provided loss function and optimizer. The training progress, including loss and AUC values, is displayed for each epoch. The function also implements early stopping based on the validation loss to prevent overfitting. The trained model is saved to the specified model_path.

In [46]:
import torch
from tqdm import tqdm
from sklearn.metrics import roc_auc_score

def train_model(esmm_model, train_dataloader, val_dataloader, num_epochs, loss_function, optimizer, model_path, early_stop):
    """
    Train the ESMM model.
    
    Args:
        esmm_model (ESMM): The ESMM model instance.
        train_dataloader (DataLoader): DataLoader for the training data.
        val_dataloader (DataLoader): DataLoader for the validation data.
        num_epochs (int): Number of epochs to train the model.
        loss_function: Loss function for training.
        optimizer: Optimizer for model parameters.
        model_path (str): Path to save the trained model.
        early_stop (int): Number of epochs to wait for validation loss improvement before early stopping.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    esmm_model.to(device)
    
    best_val_loss = float('inf')
    patience = 0
    
    for epoch in range(num_epochs):
        train_loss = 0.0
        train_predictions_click = []
        train_labels_click = []
        train_predictions_like = []
        train_labels_like = []

        # Training
        esmm_model.train()
        for batch_input, batch_label_click, batch_label_like in tqdm(train_dataloader, desc=f"Epoch {epoch+1}"):
            batch_input = batch_input.to(device)
            batch_label_click = batch_label_click.to(device)
            batch_label_like = batch_label_like.to(device)

            optimizer.zero_grad()

            outputs = esmm_model(batch_input)
            loss1 = loss_function(outputs[0], batch_label_click.unsqueeze(1).float())
            loss2 = loss_function(outputs[1], batch_label_like.unsqueeze(1).float())
            loss = loss1 + loss2

            loss.backward()
            optimizer.step()

            train_loss += loss.item() * batch_input.size(0)

            train_predictions_click.extend(outputs[0].squeeze().detach().cpu().numpy())
            train_labels_click.extend(batch_label_click.squeeze().cpu().numpy())
            train_predictions_like.extend(outputs[1].squeeze().detach().cpu().numpy())
            train_labels_like.extend(batch_label_like.squeeze().cpu().numpy())
        
        train_loss /= len(train_dataloader.dataset)
        train_auc = roc_auc_score(train_labels, train_predictions)
        
        print(f"Epoch {epoch+1} - Train Loss: {train_loss:.4f} - Train AUC: {train_auc:.4f}")
        
        # Validation
        val_loss = 0.0
        val_predictions = []
        val_labels = []
        
        esmm_model.eval()
        with torch.no_grad():
            for batch_input, batch_label1, batch_label2 in tqdm(val_dataloader, desc=f"Epoch {epoch+1} - Validation"):
                batch_input = batch_input.to(device)
                batch_label1 = batch_label1.to(device)
                batch_label2 = batch_label2.to(device)
                
                outputs = esmm_model(batch_input)
                loss1 = loss_function(outputs[0], batch_label1.unsqueeze(1).float())
                loss2 = loss_function(outputs[1], batch_label2.unsqueeze(1).float())
                loss = loss1 + loss2
                
                val_loss += loss.item() * batch_input.size(0)
                
                val_predictions.extend(outputs[0].squeeze().detach().cpu().numpy())
                val_labels.extend(batch_label1.squeeze().cpu().numpy())
        
        val_loss /= len(val_dataloader.dataset)
        val_auc = roc_auc_score(val_labels, val_predictions)
        
        print(f"Epoch {epoch+1} - Validation Loss: {val_loss:.4f} - Validation AUC: {val_auc:.4f}")
        
        # Check for early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience = 0
            torch.save(esmm_model.state_dict(), model_path)
        else:
            patience += 1
            if patience >= early_stop:
                print(f"No improvement in validation loss for {early_stop} epochs. Early stopping.")
                break

### Train and evaluate the model

In [47]:
train_dataset, test_dataset, user_feature_dict, item_feature_dict = data_preparation()

# Define the batch size for training
batch_size = 32

# Create DataLoader objects for train and test datasets
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the ESMM model
esmm = ESMM(user_feature_dict, item_feature_dict, emb_dim=64)

# Define the loss function
loss_fn = nn.BCEWithLogitsLoss()

# Define the optimizer
optimizer = torch.optim.Adam(esmm.parameters(), lr=0.01)

# Define the number of training epochs
num_epochs = 20
# Define the early stopping parameter
early_stop = 3

# Train the model
train_model(esmm, train_dataloader, test_dataloader, num_epochs, loss_fn, optimizer, 'model_esmm.pt', early_stop)

Epoch 1:   0%|          | 0/25000 [00:00<?, ?it/s]


AssertionError: 

In [49]:
user_feature_dict = {'user_id': (1000054, 0), 'gender': (5, 4), 'age': (9, 5)}
item_feature_dict = {'item_id': (3862554, 1), 'video_category': (5, 2), 'watching_times': (170, 3)}

esmm_model = ESMM(user_feature_dict, item_feature_dict)

In [50]:
selected_columns = ['user_id', 'item_id', 'video_category', 'watching_times', 'gender', 'age']

x = torch.tensor(data[selected_columns].values)

In [51]:
outputs = esmm_model(x)

In [33]:
# Print the outputs
print("Model Outputs:")
for i, output in enumerate(outputs[:5]):
    print("Task {}: {}".format(i+1, output[:5]))

Model Outputs:
Task 1: tensor([[-0.9761],
        [-0.0262],
        [-0.2407],
        [ 0.3445],
        [-0.4559]], grad_fn=<SliceBackward0>)
Task 2: tensor([[ 0.1649],
        [-0.1276],
        [-0.3530],
        [-1.6469],
        [ 0.0116]], grad_fn=<SliceBackward0>)


In [52]:
# Assuming 'outputs' is the list of tensors containing the predictions
click_predictions = outputs[0].detach().numpy().flatten()
like_predictions = outputs[1].detach().numpy().flatten()

# Define the threshold values
click_threshold = 0.5  # Set your click threshold here
like_threshold = 0.5  # Set your like threshold here

# Convert predictions to zeros and ones
click_predictions_binary = (click_predictions >= click_threshold).astype(int)
like_predictions_binary = (like_predictions >= like_threshold).astype(int)

print("Click Predictions (Binary):\n", click_predictions_binary)
print("Like Predictions (Binary):\n", like_predictions_binary)

Click Predictions (Binary):
 [0 1 0 ... 0 0 1]
Like Predictions (Binary):
 [0 0 1 ... 0 1 0]


In [53]:
from sklearn.metrics import accuracy_score

# Assuming 'click_labels' and 'like_labels' are the actual labels
click_labels = data['click'].values
like_labels = data['like'].values

# Calculate accuracy for click task
click_accuracy = accuracy_score(click_labels, click_predictions_binary)

# Calculate accuracy for like task
like_accuracy = accuracy_score(like_labels, like_predictions_binary)

print("Click Accuracy:", click_accuracy)
print("Like Accuracy:", like_accuracy)

Click Accuracy: 0.604858
Like Accuracy: 0.718606


The accuracy achieved for the click and like tasks, which are approximately 0.6049 and 0.7186, respectively. These values represent the proportion of correct predictions for each task, with a value of 1.0 indicating perfect accuracy and a value of 0.0 indicating no accuracy.

In [55]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score

# Calculate other evaluation metrics for click task
click_precision = precision_score(click_labels, click_predictions_binary)
click_recall = recall_score(click_labels, click_predictions_binary)
click_f1_score = f1_score(click_labels, click_predictions_binary)
click_roc_auc = roc_auc_score(click_labels, click_predictions_binary)
click_pr_auc = average_precision_score(click_labels, click_predictions_binary)

# Calculate other evaluation metrics for like task
like_precision = precision_score(like_labels, like_predictions_binary)
like_recall = recall_score(like_labels, like_predictions_binary)
like_f1_score = f1_score(like_labels, like_predictions_binary)
like_roc_auc = roc_auc_score(like_labels, like_predictions_binary)
like_pr_auc = average_precision_score(like_labels, like_predictions_binary)

print("Click Precision:", click_precision)
print("Click Recall:", click_recall)
print("Click F1-score:", click_f1_score)
print("Click ROC-AUC:", click_roc_auc)
print("Click PR-AUC:", click_pr_auc)

print("Like Precision:", like_precision)
print("Like Recall:", like_recall)
print("Like F1-score:", like_f1_score)
print("Like ROC-AUC:", like_roc_auc)
print("Like PR-AUC:", like_pr_auc)


Click Precision: 0.24998343207814758
Click Recall: 0.3088872329449144
Click F1-score: 0.2763311637174787
Click ROC-AUC: 0.5046967384870596
Click PR-AUC: 0.24601269061669195
Like Precision: 0.02054385752939152
Like Recall: 0.2878469194069666
Like F1-score: 0.03835060283784892
Like ROC-AUC: 0.5075083193730217
Like PR-AUC: 0.019795486102570967


Based on the evaluation metrics, the model's performance for both the click and like tasks:

Click Task:
Precision: 0.250
Recall: 0.309
F1-score: 0.276
ROC-AUC: 0.505
PR-AUC: 0.246
The click task shows modest performance with relatively low precision, recall, and F1-score. The ROC-AUC value being close to 0.5 suggests that the model's ability to distinguish between positive and negative samples for the click task is not significantly better than random chance. The PR-AUC score, which takes into account the imbalanced nature of the dataset, also indicates a limited ability to identify positive samples.

Like Task:
Precision: 0.021
Recall: 0.288
F1-score: 0.038
ROC-AUC: 0.508
PR-AUC: 0.020
The like task also demonstrates poor performance with low precision, recall, and F1-score. The ROC-AUC value close to 0.5 indicates that the model's ability to discriminate between positive and negative samples for the like task is not much better than random guessing. The PR-AUC score, accounting for class imbalance, shows limited success in identifying positive samples for the like task.