<a href="https://colab.research.google.com/github/ChimeraLC/CHIP-8-emulator/blob/main/DANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#DANN

## Preparation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Preparation.
import csv
import itertools
import pandas as pd
from tqdm import tqdm
import numpy as np
from collections import defaultdict
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function
import torch.utils.data as data
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score, roc_curve, average_precision_score, precision_recall_curve, accuracy_score
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
criteo_train_data_x = pd.read_csv('/content/drive/MyDrive/DADF/Criteo/Criteo_train_x.csv')
criteo_train_data_y = pd.read_csv('/content/drive/MyDrive/DADF/Criteo/Criteo_train_y.csv')
criteo_test_data_x = pd.read_csv('/content/drive/MyDrive/DADF/Criteo/Criteo_test_x.csv')
criteo_test_data_y = pd.read_csv('/content/drive/MyDrive/DADF/Criteo/Criteo_test_y.csv')
total_data = pd.concat([criteo_train_data_x,criteo_test_data_x])

In [None]:
# Get the min value and max value of each integer columns.
integer_min_max = {'i' + str(i): [total_data['i' + str(i)].min(), total_data['i' + str(i)].max()] for i in range(1, 9)}
# Apply min max normalization for each integer value column
for i in range(1,9):
  col = 'i' + str(i)
  if col in integer_min_max:
      min_val, max_val = integer_min_max[col]
      criteo_train_data_x[col] = criteo_train_data_x[col].apply(lambda x: (x - min_val) / (max_val - min_val))
      criteo_test_data_x[col] = criteo_test_data_x[col].apply(lambda x: (x - min_val) / (max_val - min_val))

In [None]:
# Get unique values for all categorical value columns.
cate_dict = defaultdict(list)
for i in range(1,10):
    col = 'c' + str(i)
    unique_val = list(total_data[col].unique())
    cate_dict[col] = unique_val

In [None]:
# Encoding for categorical values.
def create_empty_array():
    return np.array([])
# Each unique value will be encoded as starting from 1, 2, 3, .....
def get_ohe(col_name, uni_vals):
    ohedict = defaultdict(create_empty_array)
    i = 0
    for val in uni_vals:
        ohe_vec = i
        ohedict[val] = ohe_vec
        i += 1
    return ohedict

# Apply encoding for all Categorical value columns.
all_cate = defaultdict(create_empty_array)
for i in range(1,10):
  col = 'c' + str(i)
  all_cate[col] = get_ohe(col, cate_dict[col])

for i in range(1,10):
  col = 'c' + str(i)
  val_dict = all_cate[col]
  criteo_train_data_x[col] = criteo_train_data_x[col].apply(lambda x: val_dict[x])
  criteo_test_data_x[col] = criteo_test_data_x[col].apply(lambda x: val_dict[x])

In [None]:
# Preparing Source,target train data.
seconds_a_day = 60*60*24
# First 32 Days for training with label and the rest 28 days for training without label.
source_train_indices = criteo_train_data_x[criteo_train_data_x['timestamp'] // seconds_a_day < 31].index
target_train_indices = criteo_train_data_x[criteo_train_data_x['timestamp'] // seconds_a_day >= 31].index

# Drop not used columns.
criteo_source_train_data_x = np.array(criteo_train_data_x.loc[source_train_indices].drop(['convertTimestamp','timestamp'],axis=1))
criteo_source_train_data_y = np.array(criteo_train_data_y.loc[source_train_indices])
source_train_data_convertTime = np.array(criteo_train_data_x.loc[source_train_indices]['convertTimestamp'])

cirteo_target_train_data_x = np.array(criteo_train_data_x.loc[target_train_indices].drop(['convertTimestamp','timestamp'],axis=1))
criteo_target_train_data_y = np.array(criteo_train_data_y.loc[target_train_indices])
target_train_data_convertTime = np.array(criteo_train_data_x.loc[target_train_indices]['convertTimestamp'])

# Preparing test data.
test_data_convertTime = np.array(criteo_test_data_x['convertTimestamp'])
criteo_test_data_x = np.array(criteo_test_data_x.drop(['convertTimestamp','timestamp'],axis=1))
criteo_test_data_y = np.array(criteo_test_data_y)

In [None]:
# Criteo Dataset.
class Criteo_Dataset(data.Dataset):
    def __init__(self, x, label, convertTimestamp):
      self.numeric_x = x[:, :8]
      self.cate_x = x[:, 8:]
      self.true_label = label.copy()
      self.domain_label = label.copy()
      # Delayed feed back label, 60 and 61 day for testing(0 index).
      condition = (convertTimestamp // 86400) > 59
      self.domain_label[condition] = 0

    def __len__(self):
        return len(self.true_label)

    def __getitem__(self, idx):
        return self.numeric_x[idx], self.cate_x[idx], self.true_label[idx], self.domain_label[idx]

## DANN

In [None]:
# Reverse gradient layer.
class ReverseLayerF(Function):
    '''
     Forward pass of the layer which simply returns the input as is.
     * @param ctx   The context that stores information for backpropagation. Here, it stores the alpha value.
     * @param x     The input tensor that will be passed through this layer.
     * @param alpha The scalar to be used for scaling the gradient during backpropagation.
     * @return      The input tensor itself (identity operation).
    '''
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha
        return x.view_as(x)
    '''
     Backward pass of the layer where the gradients are scaled and the sign is reversed.
     * @param ctx         The context where alpha was stored during the forward pass.
     * @param grad_output The gradient tensor from the previous layer in the backpropagation.
     * @return            The gradient tensor with its sign reversed and scaled by alpha.
     '''
    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha
        return output, None

In [None]:
"""
Initialize MLP.
Args.
@param input_size - The size of the input layer.
@param hidden_sizes - The size of the hidden layers.
@param output_size - The size of the output layer.
@param predict - whether to generate predictions with sigmoid
@param cate_size - list of categorical value dimension.
@param embed_size - categorical value embedding dimension.
@param dropout - The dropout value. Default is 0. ( 0 by default )
@param normal - Whether to use batch Normalization (True by default)
"""
class MLP(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size=0, predict=False, cate_size=None, embed_size=None, dropout=0, normal=True):
        super(MLP, self).__init__()
        self.has_embeddings = cate_size is not None and embed_size is not None
        if self.has_embeddings:
            self.embedlayers = nn.ModuleList([nn.Embedding(c, embed_size) for c in cate_size])
            total_input_size = input_size + len(cate_size) * embed_size
        else:
            total_input_size = input_size
        layers = []
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(total_input_size, hidden_size))
            if normal:
                layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.ReLU())
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
            total_input_size = hidden_size

        if output_size:
          layers.append(nn.Linear(hidden_sizes[-1], output_size))

        if predict:
          layers.append(nn.Sigmoid())

        self.mlp = nn.Sequential(*layers)

    def forward(self, x, cate_x=None):
        if self.has_embeddings and cate_x is not None:
            embedded_cate_x = [embed(cate_x[:, i]) for i, embed in enumerate(self.embedlayers)]
            x = torch.cat([x] + embedded_cate_x, dim=1)

        return self.mlp(x)

In [None]:
# Discriminator class wraps around MLP and apply reverse layer.
class Discriminator(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(Discriminator, self).__init__()
        self.discriminator = MLP(input_size, hidden_sizes, output_size, predict=True)

    def forward(self, x, alpha):
        x = ReverseLayerF.apply(x, alpha)
        return self.discriminator(x)

In [None]:
# class DANN(nn.Module):
#     def __init__(self, base_model, input_size, clf_hidden_sizes, feaex_hidden_sizes, dis_hidden_sizes, output_size, cate_size, embed_size, dropout):
#         super(DANN, self).__init__()
#         # Build Base model with MLP or MMoE.
#         if base_model == 'MLP':
#           # Feature Extractor.
#           self.feature_extractor = MLP(input_size, feaex_hidden_sizes, cate_size=cate_size, embed_size=embed_size, dropout=dropout)
#           # Classifier.
#           fea_dim_input = feaex_hidden_sizes[-1]
#           self.classifier = MLP(fea_dim_input, clf_hidden_sizes, output_size=output_size, predict=True, dropout=dropout)
#           # Discriminator.
#           self.discriminator = Discriminator(fea_dim_input, dis_hidden_sizes, output_size=output_size)
#         else:


#     def forward(self, x, cate_x=None, alpha=None):
#         # Feature Extraction.
#         features = self.feature_extractor(x, cate_x)
#         # Classification.
#         class_pred = self.classifier(features)
#         # Discrimination.
#         if alpha is not None:
#             disc_pred = self.discriminator(features, alpha)
#             return class_pred, disc_pred
#         return class_pred


## Evaluation

In [None]:
# Function for evaluating the model with accuracy, ROC_AUC and PR_AUC.
def evaluate_model(pred, true):
  acc_pred = torch.where(pred >= 0.5, 1, 0)
  accuracy = accuracy_score(true.detach().cpu().numpy(), acc_pred.detach().cpu().numpy())
  roc_auc = roc_auc_score(true.detach().cpu().numpy(), pred.detach().cpu().numpy())
  pr_auc = average_precision_score(true.detach().cpu().numpy(), pred.detach().cpu().numpy())
  return accuracy, roc_auc, pr_auc

## Starting Training

In [None]:
# Scheduler for alpha being applied in the reverse gradient layer.
def get_lambda(epoch, max_epoch):
    p = epoch / max_epoch
    return 2. / (1+np.exp(-10.*p)) - 1.

In [None]:
def train(models, lrate, num_epoch, train_source_loader, train_target_loader, test_loader, model_directory):
  # Extracting models.
  clf, dis, feaex = models
  clf.train()
  dis.train()
  feaex.train()
  # Building Optimizers for each model.
  clf_optimizer = torch.optim.Adam(clf.parameters(), lr=lrate)
  dis_optimizer = torch.optim.Adam(dis.parameters(), lr=lrate)
  feaex_optimizer = torch.optim.Adam(feaex.parameters(), lr=lrate)

  clf_bce = nn.BCELoss()
  dis_bce = nn.BCELoss()

  test_loss_per_epoch = []; train_average_clf_loss_per_epoch = []; train_average_feaex_loss_per_epoch = [];prediction_loss_per_epoch = []
  metric_accuracy = []; metric_roc_auc = []; metric_pr_auc = [];

  # Train iteration.
  for epoch in range(num_epoch):
    target_iter = iter(itertools.cycle(train_target_loader))
    one_epoch_clf_train_loss = []; one_epoch_feaex_train_loss = []
    for i, (num_x, cate_x, true_y, delayed_y) in enumerate(train_source_loader):
      target_num_x, target_cate_x, _, __ = next(target_iter)
      num_x = num_x.to(dtype=torch.float32, device=device); cate_x = cate_x.to(dtype=torch.long, device=device); delayed_y = delayed_y.to(device); target_num_x = target_num_x.to(dtype=torch.float32, device=device); target_cate_x = target_cate_x.to(dtype=torch.long, device=device)
      # Create Domain Label.
      D_src = torch.ones(num_x.shape[0], 1).to(device)
      D_tgt = torch.zeros(target_num_x.shape[0], 1).to(device)
      # Extract features and Train discriminator.
      alpha = get_lambda(epoch, num_epoch)
      src_features = feaex(num_x, cate_x)
      tgt_features = feaex(target_num_x, target_cate_x)
      # Classifier feed.
      src_pred = clf(src_features)
      clf_loss = clf_bce(src_pred, delayed_y.to(torch.float32))
      one_epoch_clf_train_loss.append(clf_loss.item())

      src_domain_pred = dis(src_features, alpha)
      tgt_domain_pred = dis(tgt_features, alpha)
      # Domain loss.
      dis_src_loss = dis_bce(src_domain_pred, D_src)
      dis_tgt_loss = dis_bce(tgt_domain_pred, D_tgt)

      dis_loss = dis_src_loss + dis_tgt_loss
      ltot = 0.0001 * dis_loss + clf_loss
      one_epoch_feaex_train_loss.append(dis_loss.item())
      dis_optimizer.zero_grad()
      clf_optimizer.zero_grad()
      feaex_optimizer.zero_grad()
      ltot.backward()
      clf_optimizer.step()
      dis_optimizer.step()
      feaex_optimizer.step()

    train_average_clf_loss_per_epoch.append(sum(one_epoch_clf_train_loss) / len(one_epoch_clf_train_loss))
    train_average_feaex_loss_per_epoch.append(sum(one_epoch_feaex_train_loss) / len(one_epoch_feaex_train_loss))
    print('Epoch: [{}/{}], Average CLF Loss: {:.9f}, Average Discriminantor Loss: {:.9f}'.format(epoch+1, num_epoch, train_average_clf_loss_per_epoch[-1], train_average_feaex_loss_per_epoch[-1]))

    feaex.eval()
    clf.eval()
    pred_list = []; label_list = []
    # Validation.
    for (test_num_x, test_cate_x, test_y, test_delayed_y) in test_loader:
      test_num_x = test_num_x.to(dtype=torch.float32, device=device); test_cate_x = test_cate_x.to(dtype=torch.long, device=device); test_y = test_y.to(device)
      with torch.no_grad():
        pred = clf(feaex(test_num_x, test_cate_x))
      pred_list.append(pred); label_list.append(test_y)
    pred = torch.vstack(pred_list); test_y = torch.vstack(label_list)
    test_loss = clf_bce(pred, test_y.to(torch.float32))
    test_loss_per_epoch.append(test_loss.item())
    test_metric_acc, test_metric_roc_auc, test_metric_pr_auc = evaluate_model(pred, test_y)
    metric_accuracy.append(test_metric_acc); metric_roc_auc.append(test_metric_roc_auc); metric_pr_auc.append(test_metric_pr_auc)
    print('Test Epoch {}: test loss: {:.9f}; Accuracy: {:.9f}; ROC_AUC: {:.9f}; PR_AUC:{:.9f}'.format(epoch+1, test_loss_per_epoch[-1], test_metric_acc, test_metric_roc_auc, test_metric_pr_auc))
    feaex.train()
    clf.train()
  return metric_accuracy, metric_roc_auc, metric_pr_auc

In [None]:
# Initilize parameters for model and training loop.
embed_size = 64
input_size = 8
output_size = 1
clf_hidden_sizes = [32,64]
feaex_hidden_sizes = [64,128]
fea_dim_input = feaex_hidden_sizes[-1]
dis_hidden_sizes = [64,64]
cate_size = [len(cate_dict[key]) for key in cate_dict.keys()]
dropout = 0.3
num_epoch = 20
batch_size = 512
lrate = 0.001
model_directory = ''

In [None]:
# Classifier, feature extractor and discriminator
clf = MLP(fea_dim_input, clf_hidden_sizes, output_size=output_size , predict=True, dropout = dropout).to(device)
feaex = MLP(input_size, feaex_hidden_sizes, cate_size=cate_size, embed_size=embed_size, dropout=dropout).to(device)
discriminator = Discriminator(fea_dim_input, dis_hidden_sizes, output_size=output_size).to(device)
models = [clf, discriminator, feaex]
model_names = ["Classifier", "Discriminator", "Feature Extractor"]
for i, model in enumerate(models):
    print(f'Number of parameters of {model_names[i]}: {sum(param.numel() for param in model.parameters())}')

Number of parameters of Classifier: 6497
Number of parameters of Discriminator: 12737
Number of parameters of Feature Extractor: 9800640


In [None]:
# Preparing Data.
train_source_data = Criteo_Dataset(criteo_source_train_data_x, criteo_source_train_data_y, source_train_data_convertTime)
train_source_loader = DataLoader(train_source_data, batch_size=batch_size, shuffle=True)

train_target_data = Criteo_Dataset(cirteo_target_train_data_x, criteo_target_train_data_y, target_train_data_convertTime)
train_target_loader = DataLoader(train_target_data, batch_size=batch_size, shuffle=True)

test_data = Criteo_Dataset(criteo_test_data_x, criteo_test_data_y, test_data_convertTime)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [None]:
metric_accuracy, metric_roc_auc, metric_pr_auc = train(models, lrate, num_epoch, train_source_loader, train_target_loader, test_loader, model_directory)

Epoch: [1/20], Average CLF Loss: 0.386431146, Average Discriminantor Loss: 1.211064775
Test Epoch 1: test loss: 0.409389973; Accuracy: 0.818116067; ROC_AUC: 0.834322387; PR_AUC:0.641663079
Epoch: [2/20], Average CLF Loss: 0.378106685, Average Discriminantor Loss: 1.334623849
Test Epoch 2: test loss: 0.404996723; Accuracy: 0.821445133; ROC_AUC: 0.837936970; PR_AUC:0.647843062
Epoch: [3/20], Average CLF Loss: 0.376077274, Average Discriminantor Loss: 1.371021459
Test Epoch 3: test loss: 0.407429725; Accuracy: 0.820804928; ROC_AUC: 0.836682318; PR_AUC:0.646237635
Epoch: [4/20], Average CLF Loss: 0.374734568, Average Discriminantor Loss: 1.376111258
Test Epoch 4: test loss: 0.409120530; Accuracy: 0.820410423; ROC_AUC: 0.835963088; PR_AUC:0.645177640
Epoch: [5/20], Average CLF Loss: 0.373693655, Average Discriminantor Loss: 1.378671977
Test Epoch 5: test loss: 0.410626590; Accuracy: 0.820514240; ROC_AUC: 0.835388657; PR_AUC:0.644675908
Epoch: [6/20], Average CLF Loss: 0.372788212, Average D