In [98]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import random
import pandas as pd
from sklearn.metrics import roc_auc_score

import warnings
warnings.filterwarnings("ignore")

In [99]:
pd.options.display.max_rows = 100
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

In [100]:
!nvidia-smi

Fri Dec 29 22:09:22 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.191.01   Driver Version: 450.191.01   CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:1B:00.0 Off |                    0 |
| N/A   47C    P0    75W / 300W |   1710MiB / 32510MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [101]:

random.seed(3)
np.random.seed(3)
seed = 3
batch_size = 1024

device = 'cuda' if torch.cuda.is_available() else 'cpu'



In [102]:
device

'cuda'

In [103]:
!pwd

/root/.jupyter/connection


In [104]:
def to_categorical(y, num_classes=None, dtype='float32'):
    y = np.array(y, dtype='int')
    input_shape = y.shape
    if input_shape and input_shape[-1]==1 and len(input_shape)>1:
        input_shape = tuple(input_shape[:-1])
    y = y.ravel()  # 拉成一维矩阵
    if not num_classes:
        num_classes = np.max(y) + 1
    n = y.shape[0]
    categorical = np.zeros((n, num_classes), dtype=dtype)
    categorical[np.arange(n), y] = 1
    output_shape = input_shape + (num_classes, )
    categorical = np.reshape(categorical, output_shape)
    return categorical



In [105]:

def data_preparation():
    column_names = ['country_y', 'yearmonth', 'media_gained_decisiondate', 
            'media_total_decisiondate', 'followers_gained_decisiondate', 'followers_total_decisiondate', 'following_gained_decisiondate', 
            'following_total_decisiondate', 'media_gained_decisiondate_7days', 'media_total_decisiondate_7days', 'followers_gained_decisiondate_7days', 
            'followers_total_decisiondate_7days', 'following_gained_decisiondate_7days', 'following_total_decisiondate_7days', 'media_gained_decisiondate_15days', 
            'media_total_decisiondate_15days', 'followers_gained_decisiondate_15days', 'followers_total_decisiondate_15days', 'following_gained_decisiondate_15days', 
            'following_total_decisiondate_15days', 'media_gained_decisiondate_30days', 'media_total_decisiondate_30days', 'followers_gained_decisiondate_30days', 
            'followers_total_decisiondate_30days', 'following_gained_decisiondate_30days', 'following_total_decisiondate_30days', 'media_gained_decisiondate_60days', 
            'media_total_decisiondate_60days', 'followers_gained_decisiondate_60days', 'followers_total_decisiondate_60days', 'following_gained_decisiondate_60days', 
            'following_total_decisiondate_60days', 'media_gained_decisiondate_90days', 'media_total_decisiondate_90days', 'followers_gained_decisiondate_90days', 
            'followers_total_decisiondate_90days', 'following_gained_decisiondate_90days', 'following_total_decisiondate_90days', 'followers_rate_decisiondate_7days', 
            'following_rate_decisiondate_7days', 'media_rate_decisiondate_7days', 'followers_rate_decisiondate_15days', 'following_rate_decisiondate_15days', 
            'media_rate_decisiondate_15days', 'followers_rate_decisiondate_30days', 'following_rate_decisiondate_30days', 'media_rate_decisiondate_30days', 
            'followers_rate_decisiondate_60days', 'following_rate_decisiondate_60days', 'media_rate_decisiondate_60days', 'followers_rate_decisiondate_90days', 
            'following_rate_decisiondate_90days', 'media_rate_decisiondate_90days',  'avg_total_orders', 'avg_new_customers', 'avg_customer_num', 
            'first_campaign_time', 'new_influencer', 'last_orders_total', 'last_new_customers', 'last_customer_num', 'days_from_last_posting', 
            'days_from_last_sponsored', 'days_from_last_org_branded', 'days_from_last_org_nonbranded', 'days_interval_posting', 'days_interval_sponsored', 
            'days_interval_org_branded', 'days_interval_org_nonbranded', '90days_comment_count', '90days_like_count', 'num_sponsored_posts_90days', 
            'num_organic_branded_90days', 'num_organic_nonbranded_90days', 'num_posts_90days', '60days_comment_count', '60days_like_count', 
            'num_sponsored_posts_60days', 'num_organic_branded_60days', 'num_organic_nonbranded_60days', 'num_posts_60days', '30days_comment_count', 
            '30days_like_count', 'num_sponsored_posts_30days', 'num_organic_branded_30days', 'num_organic_nonbranded_30days', 'num_posts_30days', 
            '15days_comment_count', '15days_like_count', 'num_sponsored_posts_15days', 'num_organic_branded_15days', 'num_organic_nonbranded_15days', 
            'num_posts_15days', '7days_comment_count', '7days_like_count', 'num_sponsored_posts_7days', 'num_organic_branded_7days', 'num_organic_nonbranded_7days', 
            'num_posts_7days', 'accept', 'Revenue', 'reputation_change_v2']

    df = pd.read_csv('/var/tmp/code/recommendation_model/mmoe_model/data/precampaign_duringcampaign_features_acceptance_decision_performance_0502.csv', delimiter=',', index_col=None, low_memory=False)
    df = df[column_names]

#     train_df = pd.read_csv('/var/tmp/code/recommendation_model/mmoe_model/data/census-income.data.gz', delimiter=',', header=None, index_col=None, names=column_names)
#     test_df = pd.read_csv('/var/tmp/code/recommendation_model/mmoe_model/data/census-income.test.gz', delimiter=',', header=None, index_col=None, names=column_names)

    # 论文中第一组任务
    label_columns = ['accept', 'Revenue', 'reputation_change_v2']

    categorical_columns = ['first_campaign_time', 'country_y', 'yearmonth']

    df.fillna(0, inplace=True)
    df_transformed = pd.get_dummies(df.drop(label_columns, axis=1), columns=categorical_columns)
    df_labels = df[label_columns]


    return df_transformed, df_labels


In [106]:
df_transformed, df_labels = data_preparation()

In [107]:
df_transformed.iloc[0]

media_gained_decisiondate                  0.000000
media_total_decisiondate                  82.000000
followers_gained_decisiondate             -1.000000
followers_total_decisiondate            3334.000000
following_gained_decisiondate             -3.000000
following_total_decisiondate             687.000000
media_gained_decisiondate_7days           -1.000000
media_total_decisiondate_7days            86.000000
followers_gained_decisiondate_7days        3.000000
followers_total_decisiondate_7days      3330.000000
following_gained_decisiondate_7days        0.000000
following_total_decisiondate_7days       689.000000
media_gained_decisiondate_15days          -1.000000
media_total_decisiondate_15days           88.000000
followers_gained_decisiondate_15days      14.000000
followers_total_decisiondate_15days     3308.000000
following_gained_decisiondate_15days      -1.000000
following_total_decisiondate_15days      688.000000
media_gained_decisiondate_30days           0.000000
media_total_

In [108]:

def getTensorDataset(my_x, my_y):
    tensor_x = torch.Tensor(my_x)
    tensor_y = torch.Tensor(my_y)
    return torch.utils.data.TensorDataset(tensor_x, tensor_y)



In [109]:

class Expert(nn.Module):
    def __init__(self, input_size, output_size, hidden_size):
        super(Expert, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out

class Tower(nn.Module):
    def __init__(self, input_size, output_size, hidden_size):
        super(Tower, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.4)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

class MMOE(nn.Module):
    def __init__(self, input_size, num_experts, experts_out, experts_hidden, towers_hidden, tasks):
        super(MMOE, self).__init__()
        self.input_size = input_size
        self.num_experts = num_experts
        self.experts_out = experts_out
        self.experts_hidden = experts_hidden
        self.towers_hidden = towers_hidden
        self.tasks = tasks

        self.softmax = nn.Softmax(dim=1)

        self.experts = nn.ModuleList([Expert(self.input_size, self.experts_out, self.experts_hidden) for i in range(self.num_experts)])
        self.w_gates = nn.ParameterList([nn.Parameter(torch.randn(input_size, num_experts), requires_grad=True) for i in range(self.tasks)])
        self.towers = nn.ModuleList([Tower(self.experts_out, 1, self.towers_hidden) for i in range(self.tasks)])

    def forward(self, x):
        experts_o = [e(x) for e in self.experts]
        experts_o_tensor = torch.stack(experts_o)

        gates_o = [self.softmax(x @ g) for g in self.w_gates]

        tower_input = [g.t().unsqueeze(2).expand(-1, -1, self.experts_out) * experts_o_tensor for g in gates_o]
        tower_input = [torch.sum(ti, dim=0) for ti in tower_input]

        final_output = [t(ti) for t, ti in zip(self.towers, tower_input)]
        return final_output


In [110]:

def test(loader):
    t1_pred, t2_pred, t1_target, t2_target = [], [], [], []
    model.eval()
    with torch.no_grad():
        epoch_loss = []
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            yhat = model(x)
            y1, y2 = y[:, 0],y[:, 1]
            yhat_1, yhat_2 = yhat[0], yhat[1]

            loss1, loss2 = loss_fn(yhat_1, y1.view(-1, 1)), loss_fn(yhat_2, y2.view(-1, 1))
            loss = loss1 + loss2

            # t1_hat = yhat_1.view(-1) > 0.7
            # t2_hat = yhat_2.view(-1) > 0.5
            t1_hat, t2_hat = list(yhat_1.cpu()), list(yhat_2.cpu())

            t1_pred += t1_hat
            t2_pred += t2_hat
            t1_target += list(y1.cpu())
            t2_target += list(y2.cpu())

    # t1_pred = [1 if x else 0 for x in list(t1_pred)]
    # t2_pred = [1 if x else 0 for x in list(t2_pred)]

    auc_1 = roc_auc_score(t1_target, t1_pred)
    auc_2 = roc_auc_score(t2_target, t2_pred)
    return auc_1, auc_2




In [111]:

# train_data, train_label, validation_data, validation_label, test_data, test_label, output_info = data_preparation()
#print(output_info)


In [112]:
df_labels_np = df_labels.to_numpy()

In [113]:
# df_label_tmp = np.column_stack((np.argmax(df_labels_np_tp[0]), np.argmax(df_labels_np_tp[1]), np.argmax(df_labels_np_tp[2])))
df_dataset = getTensorDataset(df_transformed.to_numpy(), df_labels_np)
train_size = int(0.8 * len(df_dataset))
test_size = len(df_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset=df_dataset, lengths=[train_size, test_size], generator=torch.manual_seed(0))

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size)

In [93]:
df_transformed.to_numpy().shape

(378961, 320)

In [85]:
len(train_loader)

297

In [86]:
len(test_loader)

75

In [118]:
model = MMOE(input_size=320, num_experts=6, experts_out=16, experts_hidden=32, towers_hidden=8, tasks=3)
model = model.to(device)
lr = 1e-4
n_epochs = 10
loss_fn = nn.BCELoss(reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
losses = []
val_loss = []


In [120]:
for epoch in range(n_epochs):
    model.train()
    epoch_loss = []
    c = 0
    print("Epoch: {}/{}".format(epoch, n_epochs))
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        y_hat = model(x)

        y1, y2 = y[:, 0], y[:, 1]
        y_1, y_2 = y_hat[0], y_hat[1]

        loss1 = loss_fn(y_1, y1.view(-1, 1))
        loss2 = loss_fn(y_2, y2.view(-1, 1))
        loss = loss1 + loss2
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        epoch_loss.append(loss.item())
    losses.append(np.mean(epoch_loss))

    auc1, auc2 = test(val_loader)
    print('train loss: {:.5f}, val task1 auc: {:.5f}, val task2 auc: {:.3f}'.format(np.mean(epoch_loss), auc1, auc2))


In [17]:
auc1, auc2 = test(test_loader)
print('test task1 auc: {:.3f}, test task2 auc: {:.3f}'.format(auc1, auc2))

test task1 auc: 0.853, test task2 auc: 0.874
