## Synthetic users, items, history datasets

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

import pandas as pd
import numpy as np

users_df = pd.DataFrame(data=np.random.normal(0, 1, size=(1000, 35)), columns=[f'user_attr_{i}' for i in range(35)])
items_df = pd.DataFrame(data=np.random.normal(1, 1, size=(200, 20)), columns=[f'item_attr_{i}' for i in range(20)])
users_df['user_id'] = np.arange(len(users_df))
items_df['item_id'] = np.arange(len(items_df))
history_df = pd.DataFrame()
history_df['user_id'] = np.random.randint(0, 1000, size=3000)
history_df['item_id'] = np.random.randint(0, 200, size=3000)
history_df['rating'] = np.random.randint(0, 5, size=3000)
history_df = history_df.drop_duplicates(subset=['user_id', 'item_id'], ignore_index=True)

## Response Functions

In [2]:
from simulator.modules import NoiseResponse, ResponseFunctionSim, ConstantResponseHeuristic
from deeptables.models.deeptable import ModelConfig
from deeptables.models import deepnets
from deeptables.models import deeptable


deepfm_conf = ModelConfig(
            nets=deepnets.DeepFM,
            optimizer='RMSprop',
            auto_discrete=True,
            home_dir='./models/tmp',
            earlystopping_patience=20,
            dnn_params={'dnn_activation': 'relu'}
        )
deepfm = deeptable.DeepTable(config=deepfm_conf)

cross_join_df = pd.merge(history_df, users_df, on='user_id', how='left')
cross_join_df = pd.merge(cross_join_df, items_df, on='item_id', how='left')

deepfm.fit(
    cross_join_df.drop(['user_id', 'item_id', 'rating'], axis=1),
    cross_join_df['rating'],
    epochs=100,
    batch_size=64
)

m1 = NoiseResponse(mu=1.0, sigma=1.0)
m2 = ConstantResponseHeuristic(value=0.0)

response_func = ResponseFunctionSim([m1, m2, deepfm])

06-08 12:13:56 I deeptables.m.deeptable.py 338 - X.Shape=(2979, 55), y.Shape=(2979,), batch_size=64, config=ModelConfig(name='conf-1', nets=['linear', 'fm_nets', 'dnn_nets'], categorical_columns='auto', exclude_columns=[], task='auto', pos_label=None, metrics=['accuracy'], auto_categorize=False, cat_exponent=0.5, cat_remain_numeric=True, auto_encode_label=True, auto_imputation=True, auto_discrete=True, auto_discard_unique=True, apply_gbm_features=False, gbm_params={}, gbm_feature_type='embedding', fixed_embedding_dim=True, embeddings_output_dim=4, embeddings_initializer='uniform', embeddings_regularizer=None, embeddings_activity_regularizer=None, dense_dropout=0, embedding_dropout=0.3, stacking_op='add', output_use_bias=True, apply_class_weight=False, optimizer='RMSprop', loss='auto', dnn_params={'dnn_activation': 'relu'}, autoint_params={'num_attention': 3, 'num_heads': 1, 'dropout_rate': 0, 'use_residual': True}, fgcnn_params={'fg_filters': (14, 16), 'fg_heights': (7, 7), 'fg_pool_he

2022-06-08 12:13:57.088401: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-06-08 12:13:58.227281: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 26475 MB memory:  -> device: 0, name: A100-SXM4-40GB, pci bus id: 0000:0f:00.0, compute capability: 8.0


06-08 12:13:58 I deeptables.m.deepmodel.py 287 - >>>>>>>>>>>>>>>>>>>>>> Model Desc <<<<<<<<<<<<<<<<<<<<<<< 
---------------------------------------------------------
inputs:
---------------------------------------------------------
['all_categorical_vars: (55)', 'input_continuous_all: (55)']
---------------------------------------------------------
embeddings:
---------------------------------------------------------
input_dims: [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6]
output_dims: [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
dropout: 0.3
---------------------------------------------------------
dense: dropout: 0
batch_normalization: False
---------------------------------------------------------
concat_embed_dense: shape: (None, 275)
---------

2022-06-08 12:14:05.086734: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 26: early stopping
06-08 12:14:24 I deeptables.m.deepmodel.py 122 - Training finished.
06-08 12:14:24 I deeptables.m.deeptable.py 370 - Training finished.
06-08 12:14:24 I deeptables.m.deeptable.py 704 - Model has been saved to:./models/tmp/dt_20220608121356_linear_fm_nets_dnn_nets/linear+fm_nets+dnn_nets.h5


In [3]:
from simulator.utils import save

save(response_func, 'models/demo_response.m')

INFO:tensorflow:Assets written to: ram://a92c2c18-829e-42b2-a1f4-0040c4c82202/assets


## SynEvaRec generator training and saving

In [3]:
from simulator.modules import SDVGenerator

import warnings
warnings.filterwarnings("ignore")

user_gen = SDVGenerator(model='gaussiancopula')
item_gen = SDVGenerator(model='gaussiancopula')

user_gen.fit(data=users_df.drop(['user_id'], axis=1))
item_gen.fit(data=items_df.drop(['item_id'], axis=1))

In [5]:
user_gen.generate(5)

Unnamed: 0,user_attr_0,user_attr_1,user_attr_2,user_attr_3,user_attr_4,user_attr_5,user_attr_6,user_attr_7,user_attr_8,user_attr_9,user_attr_10,user_attr_11,user_attr_12,user_attr_13,user_attr_14,user_attr_15,user_attr_16,user_attr_17,user_attr_18,user_attr_19,user_attr_20,user_attr_21,user_attr_22,user_attr_23,user_attr_24,user_attr_25,user_attr_26,user_attr_27,user_attr_28,user_attr_29,user_attr_30,user_attr_31,user_attr_32,user_attr_33,user_attr_34
0,-0.841159,-1.130349,-0.562767,-1.436658,-0.431905,0.862523,-0.477934,-1.569757,0.065317,-0.317478,0.930838,0.899253,-0.971943,-0.003831,-0.066607,-0.159761,1.120148,1.699936,1.498054,-0.53268,1.661771,0.22192,-0.084877,0.016012,-0.125709,0.651412,-0.535483,0.150738,-0.012269,-1.137382,-0.476427,-2.797294,0.663258,-0.41674,-0.193871
1,-0.830372,0.358775,-0.790639,-0.40558,0.317278,-0.581364,-0.241973,0.01242,-0.876017,-1.523888,0.93497,0.529797,-1.40153,-1.754575,0.763509,-1.114828,-2.029288,-0.428042,-1.166084,-0.093582,0.09047,0.102403,0.727626,-0.378074,-0.636578,-1.313639,1.653801,-1.654276,-2.215451,-1.174355,-0.620082,-0.029469,-0.648945,0.433497,-0.167933
2,-0.216478,-1.453142,0.288821,-1.180521,2.225587,-0.728381,-1.003061,-0.234521,-1.208059,0.083634,-0.617756,-0.935496,-0.422625,-1.227134,0.01635,0.542612,-0.192226,-0.398528,-0.479267,2.380701,-1.954038,-0.600726,-1.394947,-1.377155,0.569841,0.748014,-0.349937,0.195864,-0.617014,-0.340315,-1.584362,-0.227072,-0.131698,1.296526,-0.738848
3,0.308584,0.786508,-1.091464,0.800473,-0.134954,1.975992,2.134923,-0.310742,2.451551,2.867538,0.551122,-1.095965,0.318201,0.199045,0.493003,0.066637,-2.010671,0.082334,1.057195,1.45516,-0.187629,-0.071992,0.377811,-0.302314,-2.470564,2.274113,0.095904,-0.789807,0.060285,1.314964,0.511961,1.6352,-0.127105,-0.203623,0.754358
4,0.670639,0.375217,-0.609513,-0.316834,-0.212012,-0.295041,-0.039992,-0.546652,0.378768,1.085191,-0.553888,1.908566,-1.047519,0.615485,-0.860151,0.13961,1.235945,0.033071,0.318093,1.64837,0.038652,0.341122,1.150417,-0.46223,1.044938,-0.477347,1.489727,1.554991,-0.656179,-0.188854,-0.671738,-0.030108,1.525725,-0.075385,-1.121751


In [6]:
item_gen.generate(3)

Unnamed: 0,item_attr_0,item_attr_1,item_attr_2,item_attr_3,item_attr_4,item_attr_5,item_attr_6,item_attr_7,item_attr_8,item_attr_9,item_attr_10,item_attr_11,item_attr_12,item_attr_13,item_attr_14,item_attr_15,item_attr_16,item_attr_17,item_attr_18,item_attr_19
0,1.197979,1.491922,0.038514,1.293404,1.232039,2.690946,1.625152,0.857004,3.51627,1.097954,0.963833,1.820838,0.629233,0.712699,1.446134,1.217148,1.169454,1.154508,1.901708,0.950508
1,1.573775,0.701697,1.798784,0.680811,-0.45352,2.658993,1.283767,0.249473,1.045836,2.980768,2.778132,0.219815,1.219519,1.873755,0.032647,3.095739,0.058625,1.561995,1.912624,0.791787
2,1.172211,0.770769,0.44796,0.303134,0.363194,2.610464,-0.269859,0.877516,3.306723,-0.260894,3.509923,0.347011,2.359311,1.154931,0.288173,1.367866,2.066255,1.233136,1.310583,0.343579


In [8]:
from simulator.utils import save

save(user_gen, 'models/demo_user_gen.m')
save(item_gen, 'models/demo_item_gen.m')