# MMoE demo with census income data

In [1]:
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import VarianceScaling
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import roc_auc_score
from model import MMoE
from typing import *


SEED = 1
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

## 1.数据处理

In [3]:
column_names = ['age', 'class_worker', 'det_ind_code', 'det_occ_code', 'education', 'wage_per_hour', 'hs_college',
                'marital_stat', 'major_ind_code', 'major_occ_code', 'race', 'hisp_origin', 'sex', 'union_member',
                'unemp_reason', 'full_or_part_emp', 'capital_gains', 'capital_losses', 'stock_dividends',
                'tax_filer_stat', 'region_prev_res', 'state_prev_res', 'det_hh_fam_stat', 'det_hh_summ',
                'instance_weight', 'mig_chg_msa', 'mig_chg_reg', 'mig_move_reg', 'mig_same', 'mig_prev_sunbelt',
                'num_emp', 'fam_under_18', 'country_father', 'country_mother', 'country_self', 'citizenship',
                'own_or_self', 'vet_question', 'vet_benefits', 'weeks_worked', 'year', 'income_50k']

categorical_columns = ['class_worker', 'det_ind_code', 'det_occ_code', 'education', 'hs_college', 'major_ind_code',
                       'major_occ_code', 'race', 'hisp_origin', 'sex', 'union_member', 'unemp_reason',
                       'full_or_part_emp', 'tax_filer_stat', 'region_prev_res', 'state_prev_res', 'det_hh_fam_stat',
                       'det_hh_summ', 'mig_chg_msa', 'mig_chg_reg', 'mig_move_reg', 'mig_same', 'mig_prev_sunbelt',
                       'fam_under_18', 'country_father', 'country_mother', 'country_self', 'citizenship',
                       'vet_question']

label_columns = ["marital_stat", "income_50k"]

train_df = pd.read_csv("data/census-income.data.gz", header=None, names=column_names)
other_df = pd.read_csv("data/census-income.test.gz", header=None, names=column_names)  # will split into eval & test later
print("Train: ", len(train_df))
print("Other: ", len(other_df))
total_df = pd.concat([train_df, other_df], axis=0, ignore_index=True)
train_df[:5]

Train:  199523
Other:  99762


Unnamed: 0,age,class_worker,det_ind_code,det_occ_code,education,wage_per_hour,hs_college,marital_stat,major_ind_code,major_occ_code,...,country_father,country_mother,country_self,citizenship,own_or_self,vet_question,vet_benefits,weeks_worked,year,income_50k
0,73,Not in universe,0,0,High school graduate,0,Not in universe,Widowed,Not in universe or children,Not in universe,...,United-States,United-States,United-States,Native- Born in the United States,0,Not in universe,2,0,95,- 50000.
1,58,Self-employed-not incorporated,4,34,Some college but no degree,0,Not in universe,Divorced,Construction,Precision production craft & repair,...,United-States,United-States,United-States,Native- Born in the United States,0,Not in universe,2,52,94,- 50000.
2,18,Not in universe,0,0,10th grade,0,High school,Never married,Not in universe or children,Not in universe,...,Vietnam,Vietnam,Vietnam,Foreign born- Not a citizen of U S,0,Not in universe,2,0,95,- 50000.
3,9,Not in universe,0,0,Children,0,Not in universe,Never married,Not in universe or children,Not in universe,...,United-States,United-States,United-States,Native- Born in the United States,0,Not in universe,0,0,94,- 50000.
4,10,Not in universe,0,0,Children,0,Not in universe,Never married,Not in universe or children,Not in universe,...,United-States,United-States,United-States,Native- Born in the United States,0,Not in universe,0,0,94,- 50000.


In [32]:
# one-hot encoding categorical columns
X = pd.get_dummies(total_df.drop(columns=label_columns, axis=1), columns=categorical_columns)
# categorical-encoding for labels
income_Y = pd.get_dummies(total_df["income_50k"]) 
marital_Y = pd.get_dummies((total_df["marital_stat"] == ' Never married').astype(int))  # 原本是多类别，转为2分类任务

train_X, other_X = X[:len(train_df)], X[len(train_df):]
train_income_Y, other_income_Y = income_Y[:len(train_df)], income_Y[len(train_df):]
train_marital_Y, other_marital_Y = marital_Y[:len(train_df)], marital_Y[len(train_df):]

# split other into eval & test
val_indices = other_X.sample(frac=0.5, replace=False, random_state=SEED).index
test_indices = list(set(other_X.index) - set(val_indices))

val_X, test_X = other_X.loc[val_indices], other_X.loc[test_indices]
val_income_Y, test_income_Y = other_income_Y.loc[val_indices], other_income_Y.loc[test_indices]
val_marital_Y, test_marital_Y = other_marital_Y.loc[val_indices], other_marital_Y.loc[test_indices]

# combine labels, keep the sequence order of labels: ["income", "marital"]
task_classes = {"income": income_Y.shape[1], "marital": marital_Y.shape[1]}
task_names = ["income", "marital"]

train_labels = {"income": train_income_Y, "marital": train_marital_Y}
val_labels = {"income": val_income_Y, "marital": val_marital_Y}
test_labels = {"income": test_income_Y, "marital": test_marital_Y}

## 2. 模型构造

In [34]:
num_features = train_X.shape[1]
print(num_features)

499


In [35]:
input_layer = Input(shape=(num_features,))
mmoe_layers = MMoE(units=4, n_experts=4, n_tasks=2)(input_layer)

output_layers = {}
for i, task_layer in enumerate(mmoe_layers):
    task_name = task_names[i]
    tower_layer = Dense(units=8, activation="relu", kernel_initializer="VarianceScaling")(task_layer)
    output_layer = Dense(units=task_classes[task_name], name=task_name, activation="softmax", kernel_initializer="VarianceScaling")(tower_layer)
    output_layers[task_name] = output_layer

model = Model(inputs=[input_layer], outputs=output_layers)
model.compile(loss={'income': 'binary_crossentropy', 'marital': 'binary_crossentropy'}, 
              optimizer=Adam(), metrics=["accuracy"])
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 499)]        0                                            
__________________________________________________________________________________________________
m_mo_e_1 (MMoE)                 [(None, 4), (None, 4 12000       input_2[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 8)            40          m_mo_e_1[0][0]                   
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, 8)            40          m_mo_e_1[0][1]                   
______________________________________________________________________________________________

## 3. 训练

In [40]:
class ROCCallback(Callback):
    
    def __init__(self, training_data, validation_data, test_data):
        super(ROCCallback, self).__init__()
        self.train_X, self.train_Y = training_data
        self.val_X, self.val_Y = validation_data
        self.test_X, self.test_Y = test_data
    
    def on_train_begin(self, logs={}):
        return

    def on_train_end(self, logs={}):
        return

    def on_epoch_begin(self, epoch, logs={}):
        return

    def on_epoch_end(self, epoch, logs={}):
        train_pred = self.model.predict(self.train_X)
        val_pred = self.model.predict(self.val_X)
        test_pred = self.model.predict(self.test_X)

        # Iterate through each task and output their ROC-AUC across different datasets
        for output_name in self.model.output_names:
            train_auc = roc_auc_score(self.train_Y[output_name], train_pred[output_name])
            val_auc = roc_auc_score(self.val_Y[output_name], val_pred[output_name])
            test_auc = roc_auc_score(self.test_Y[output_name], test_pred[output_name])
            print(f"{output_name}-AUC: Train-{train_auc:.4f}, Eval-{val_auc:.4f}, Test-{test_auc:.4f}")

        return

    def on_batch_begin(self, batch, logs={}):
        return

    def on_batch_end(self, batch, logs={}):
        return

In [41]:
auc_callback = ROCCallback(training_data=(train_X, train_labels), 
                           validation_data=(val_X, val_labels), 
                           test_data=(test_X, test_labels))

model.fit(x=train_X, y=train_labels, validation_data=(val_X, val_labels), callbacks=[auc_callback], epochs=100)

Epoch 1/100
income-AUC: Train-0.5015, Eval-0.5009, Test-0.5016
marital-AUC: Train-0.9890, Eval-0.9890, Test-0.9887
Epoch 2/100
income-AUC: Train-0.6171, Eval-0.6216, Test-0.6259
marital-AUC: Train-0.9904, Eval-0.9904, Test-0.9900
Epoch 3/100
income-AUC: Train-0.8895, Eval-0.8958, Test-0.8912
marital-AUC: Train-0.9904, Eval-0.9906, Test-0.9902
Epoch 4/100
income-AUC: Train-0.8005, Eval-0.8077, Test-0.8080
marital-AUC: Train-0.9913, Eval-0.9913, Test-0.9908
Epoch 5/100
income-AUC: Train-0.8026, Eval-0.8069, Test-0.8070
marital-AUC: Train-0.9917, Eval-0.9915, Test-0.9912
Epoch 6/100
income-AUC: Train-0.8958, Eval-0.8997, Test-0.8983
marital-AUC: Train-0.9923, Eval-0.9922, Test-0.9919
Epoch 7/100
income-AUC: Train-0.8984, Eval-0.9031, Test-0.8999
marital-AUC: Train-0.9924, Eval-0.9923, Test-0.9920
Epoch 8/100
income-AUC: Train-0.8979, Eval-0.9008, Test-0.9000
marital-AUC: Train-0.9906, Eval-0.9907, Test-0.9902
Epoch 9/100
income-AUC: Train-0.9030, Eval-0.9075, Test-0.9051
marital-AUC: Trai

KeyboardInterrupt: 