In [1]:
#Load the packages
import numpy
import tensorflow as tf
import os
import random
from collections import defaultdict
import pandas as pd
import matplotlib.pyplot as plt
import multiprocessing
from multiprocessing.dummy import Pool as ThreadPool 
import time
from functools import partial
from tqdm import tqdm
%matplotlib inline

In [2]:
DATA_DIR = "../gamedata/original_dataset/"
DATA_FILE = "dd_event_schemas.csv"
DATA_FOR_BPR = "data_for_BPR.csv"
SAVE_DIR = "../gamedata/train_result/"

In [3]:
def process_data(data_path):
    data = pd.read_csv(DATA_DIR+DATA_FILE)
    gameid = data['gameid'].drop_duplicates()
    gameid_list = []
    gameid_list = list(gameid)
    gameid2newid = {old:new+1 for new, old in enumerate(gameid_list)}
    data['new_game_id'] = data['gameid'].map(gameid2newid)

    events = data['eventname'].drop_duplicates()
    events_list = list(events)
    events2id = {old:new+1 for new,old in enumerate(events_list)}
    data['new_event_id'] = data['eventname'].map(events2id)
    
    data.to_csv(DATA_DIR+'data_for_BPR.csv',index=False)
    data=pd.read_csv(DATA_DIR+'data_for_BPR.csv')
    
    game_events = defaultdict(set)
    game_count = -1
    event_count = -1
    with open(data_path,'r') as f:
        header_line = next(f)
        for line in f.readlines():
            _, _, _, _, _, _, gameid, eventid = line.split(",")
            gameid = int(gameid)
            eventid = int(eventid)
            #print(u,i)
            game_events[gameid].add(eventid)
            game_count = max(gameid, game_count)
            event_count = max(eventid, event_count)
    print ("game_count:", game_count)
    print ("event_count:", event_count)
    return game_count,event_count,game_events
    

#data_path = os.path.join('D:\\tmp\\ml-100k', 'u.data')
#user_count, item_count, user_ratings = load_data(data_path)
game_count,event_count,game_events_dict = process_data(DATA_DIR+DATA_FOR_BPR)

game_count: 1108
event_count: 11189


In [4]:
#genereate temp dict for later use
def generate_temp(game_events_dict):
    user_test = dict()
    for u, i_list in game_events_dict.items():
        user_test[u] = random.sample(game_events_dict[u], 1)[0]
    return user_test

temp_dict = generate_temp(game_events_dict)

In [5]:
#generate dataset for training
def generate_train_batch(game_events_dict, temp_dict, event_count, batch_size):
    t = []
    
    #print('generate batch..')
    for b in range(batch_size):
        break_flag1 = 0
        break_flag2 = 0
        u = random.sample(game_events_dict.keys(), 1)[0]
        #print('u:',u)
        i = random.sample(game_events_dict[u], 1)[0]
        while i == temp_dict[u]:
            break_flag1 += 1
            i = random.sample(game_events_dict[u], 1)[0]
            if break_flag1 == 20:
                #game_events_dict.pop('u',None)
                #temp_dict.pop('u',None)
                break
            #print('i:',i)
        j = random.randint(1, event_count)
        while j in game_events_dict[u]:
            #break_flag2 += 1
            j = random.randint(1, event_count)
            #if break_flag2 == 20:
                #game_events_dict.pop('u',None)
                #temp_dict.pop('u',None)
             #   break
            #print('j:',j)
        t.append([u, i, j])
        #print(u,i,j)
    #print('generate batch done!')
    return numpy.asarray(t)

#train_set = generate_train_batch(game_events_dict,temp_dict,event_count)

In [6]:
def generate_test_batch(game_events_dict, temp_dict, event_count):
    for u in game_events_dict.keys():
        t = []
        i = temp_dict[u]
        #for j in range(1, event_count+1):
        for j in range(1, 1001):
            if not (j in game_events_dict[u]):
                t.append([u, i, j])
        yield numpy.asarray(t)

In [None]:
#Algorithm
def bpr_mf(game_count, event_count, hidden_dim):
    u = tf.placeholder(tf.int32, [None])
    i = tf.placeholder(tf.int32, [None])
    j = tf.placeholder(tf.int32, [None])
    with tf.device("/cpu:0"):
        user_emb_w = tf.get_variable("user_emb_w", [game_count+1, hidden_dim], 
                            initializer=tf.random_normal_initializer(0, 0.1))
        item_emb_h = tf.get_variable("item_emb_h", [event_count+1, hidden_dim], 
                                initializer=tf.random_normal_initializer(0, 0.1))
        u_emb = tf.nn.embedding_lookup(user_emb_w, u)
        i_emb = tf.nn.embedding_lookup(item_emb_h, i)
        j_emb = tf.nn.embedding_lookup(item_emb_h, j)
    
    # MF predict: u_i > u_j
    x = tf.reduce_sum(tf.multiply(u_emb, (i_emb - j_emb)), 1, keepdims=True)
    loss1 = - tf.reduce_mean(tf.log(tf.sigmoid(x)))
    # AUC for one user:
    # reasonable iff all (u,i,j) pairs are from the same user
    # 
    # average AUC = mean( auc for each user in test set)
    mf_auc = tf.reduce_mean(tf.to_float(x > 0))
    regulation_rate = 0.0001
    loss2 = regulation_rate * tf.add_n([
            tf.reduce_sum(tf.multiply(u_emb, u_emb)), 
            tf.reduce_sum(tf.multiply(i_emb, i_emb)),
            tf.reduce_sum(tf.multiply(j_emb, j_emb))
        ])
    bprloss = loss1 + loss2
    train_op = tf.train.GradientDescentOptimizer(0.01).minimize(bprloss)
    return u, i, j, mf_auc, bprloss, train_op

In [None]:
#Training session
with tf.Graph().as_default(), tf.Session() as session:
    u, i, j, mf_auc, bprloss, train_op = bpr_mf(game_count, event_count, 20)
#     print(u, i, j, mf_auc, bprloss, train_op)
    session.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    print('start epoch')
    for epoch in range(1, 50):
        print ("epoch: ", epoch)
        _batch_bprloss = 0
        for k in tqdm(range(1, 5001)): # uniform samples from training set
            uij = generate_train_batch(game_events_dict, temp_dict,event_count,batch_size=256)
            _bprloss, _train_op = session.run([bprloss, train_op],feed_dict={u:uij[:,0], i:uij[:,1], j:uij[:,2]})
            _batch_bprloss += _bprloss
        print ("bpr_loss: ", _batch_bprloss / k)

        games_count = 0
        _auc_sum = 0.0
        test_bprloss = 0.0
        # each batch will,co return only one user's auc
        for t_uij in generate_test_batch(game_events_dict, temp_dict, event_count):
            _auc, _test_bprloss = session.run([mf_auc, bprloss],feed_dict={u:t_uij[:,0], i:t_uij[:,1], j:t_uij[:,2]})
            games_count += 1
            _auc_sum += _auc
            test_bprloss += _test_bprloss
        print ("test_loss: ", test_bprloss/games_count, "test_auc: ", _auc_sum/games_count)
        print ("")
    variable_names = [v.name for v in tf.trainable_variables()]
    values = session.run(variable_names)
    saver.save(session, SAVE_DIR)
    for k,v in zip(variable_names, values):
        print("Variable: ", k)
        print("Shape: ", v.shape)
        print(v)
print('all done')

  0%|          | 2/5000 [00:00<04:27, 18.68it/s]

start epoch
epoch:  1


100%|██████████| 5000/5000 [00:46<00:00, 108.50it/s]


bpr_loss:  0.7083323358416558


  0%|          | 12/5000 [00:00<00:43, 115.97it/s]

test_loss:  0.7496982880662925 test_auc:  0.5041036438919765

epoch:  2


100%|██████████| 5000/5000 [00:44<00:00, 113.59it/s]


bpr_loss:  0.7072573763012886


In [None]:
data=pd.read_csv(DATA_DIR+'data_for_BPR.csv')

In [37]:
#Recommend events for games
def recommend(gameid):    
    tempdata = data[data['gameid']==gameid]
    new_game_id = tempdata['new_game_id'].drop_duplicates()
    new_game_id = int(new_game_id)
    print('the new id of game',gameid,'is:',new_game_id)
    game_index = new_game_id - 1
    session1 = tf.Session()
    g1_dim = tf.expand_dims(values[0][game_index], 0)
    g1_all = tf.matmul(g1_dim, values[1],transpose_b=True)
    rating_g1 = session1.run(g1_all)
    print ('rating vector is:',rating_g1)
    print("new recommedations:")
    p = numpy.squeeze(rating_g1)
    
    p[numpy.argsort(p)[:-20]] = 0
    for index in range(len(p)):
        if p[index] != 0:
            event_name = data[data['new_event_id']==index].drop_duplicates('eventname')
            print (index, p[index],event_name['eventname'].values)  

In [38]:
#Recommend events for game 59
recommend(59)

the new id of game 59 is: 6
rating vector is: [[ 0.00056908  0.28475577  0.12661853 ... -0.08589959  0.05925079
  -0.05144139]]
new recommedations:
1 0.28475577 ['transaction']
3 0.21875711 ['missionFailed']
4 0.29840305 ['newPlayer']
5 0.22287144 ['levelUp']
9 0.25470904 ['clientDevice']
10 0.23450017 ['missionCompleted']
12 0.28952318 ['gameStarted']
15 0.2659001 ['missionStarted']
158 0.14379299 ['engageResponse']
229 0.18501669 ['uiInteraction']
540 0.15469907 ['adRequest']
929 0.1313952 ['hexagonRemix']
1893 0.14295721 ['playerSwiped']
3052 0.13349694 ['leaderboardProgress']
3291 0.14054212 ['starmapToShip']
6257 0.13372383 ['weaponUpgraded']
7662 0.13457422 ['open_pack_card_given']
8940 0.14834107 ['heroEvolved']
9702 0.13318123 ['pveResume']


In [39]:
#Recommend events for game 3517
recommend(3517)

the new id of game 3517 is: 164
rating vector is: [[-0.02958678  0.1601882   0.16383761 ...  0.08742317  0.03531764
  -0.01973129]]
new recommedations:
1 0.1601882 ['transaction']
2 0.16383761 ['gameEnded']
4 0.3220476 ['newPlayer']
5 0.15975364 ['levelUp']
9 0.28008744 ['clientDevice']
10 0.19038443 ['missionCompleted']
12 0.34213522 ['gameStarted']
15 0.222442 ['missionStarted']
473 0.161226 ['tutTurn3TargetClick']
2331 0.19632903 ['takeHelm']
3664 0.17196904 ['tutorial_select_node_four']
5620 0.15742174 ['purchaseBucksShown']
5955 0.16392037 ['tacticsScreenDisplayed']
6150 0.16323766 ['userAction']
6716 0.1577468 ['tutorialAnalyticsStarted']
7955 0.15834884 ['levelRetry']
8129 0.15641315 ['gup']
8147 0.17845218 ['pvp']
8965 0.17865223 ['levelfail']
9873 0.17794976 ['ws_claimMonthlyCardReward']
