In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import tqdm
from torchsummary import summary
import os


In [None]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data_path = '/content/drive/MyDrive/LPOINT'
act = pd.read_csv(os.path.join(data_path, '제6회 L.POINT Big Data Competition-분석용데이터-01.온라인 행동 정보.csv'))
act

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,clnt_id,sess_id,hit_seq,action_type,biz_unit,sess_dt,hit_tm,hit_pss_tm,trans_id,sech_kwd,tot_pag_view_ct,tot_sess_hr_v,trfc_src,dvc_ctg_nm
0,7809,1,8,5,A03,20190913,01:16,2571103,,,34.0,2663.0,DIRECT,
1,7809,1,4,2,A03,20190913,01:14,2485909,,,34.0,2663.0,DIRECT,
2,7809,1,11,5,A03,20190913,01:17,2646597,,,34.0,2663.0,DIRECT,
3,7809,1,1,2,A03,20190913,00:46,788304,,,34.0,2663.0,DIRECT,
4,7809,1,9,5,A03,20190913,01:17,2617609,,,34.0,2663.0,DIRECT,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3196357,31040,1,1,0,A02,20190815,03:47,13525,,다이슨 에어랩,4.0,62.0,unknown,mobile_web
3196358,4129,1,2,0,A02,20190721,01:22,250099,,여성메탈시계,3.0,250.0,unknown,mobile_web
3196359,4129,1,1,0,A02,20190721,01:18,1525,,여성메탈시계,3.0,250.0,unknown,mobile_web
3196360,54403,3,1,0,A02,20190731,20:14,16905,,비비고왕교자,3.0,68.0,unknown,mobile_app


In [None]:
act = act.sort_values(by=['clnt_id', 'sess_id', 'hit_seq'])
act

Unnamed: 0,clnt_id,sess_id,hit_seq,action_type,biz_unit,sess_dt,hit_tm,hit_pss_tm,trans_id,sech_kwd,tot_pag_view_ct,tot_sess_hr_v,trfc_src,dvc_ctg_nm
3102890,1,1,1,0,A01,20190911,16:14,11880,,과일선물세트,14.0,124.0,unknown,mobile_app
3136925,1,1,2,0,A01,20190911,16:15,22432,,과일선물세트 백화점,14.0,124.0,unknown,mobile_app
3065714,1,1,3,0,A01,20190911,16:15,36140,,과일바구니,14.0,124.0,unknown,mobile_app
3091872,1,2,1,0,A01,20190922,14:09,41584,,초등가을잠바,45.0,424.0,unknown,mobile_app
3142610,1,2,2,0,A01,20190922,14:10,56113,,초등가을점퍼,45.0,424.0,unknown,mobile_app
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3148589,72428,3,3,0,A01,20190914,21:50,839771,,페레가모가방,113.0,1295.0,unknown,mobile_app
3148588,72428,4,1,0,A01,20190914,23:42,243555,,펜디가방,35.0,418.0,unknown,mobile_app
3158259,72428,5,1,0,A01,20190918,06:37,12568,,라인에디션블라우스,14.0,24.0,unknown,mobile_app
1759477,72429,1,1,1,A03,20190919,22:09,839064,,,9.0,984.0,DIRECT,mobile_web


In [None]:
np.unique(act.action_type, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7]),
 array([651638, 307998, 655266, 596765, 172240, 750530,  56249,   5676]))

In [None]:
# tokenizing
token = {
'action_type': {'검색': 0,
                '제품 목록': 1, 
                '제품 세부정보': 2,
                '장바구니 추가': 3,
                '장바구니 삭제': 4,
                '결제 시도': 5,
                '구매 완료': 6,
                '구매 환불': 7},
'hit_seq': None,
'biz_unit': {'A01': 0, 
             'A02': 1, 
             'A03': 2},
'trfc_src': {'unknown': 0, 
             'DIRECT': 1, 
             'PUSH': 2, 
             'PORTAL_1': 3, 
             'WEBSITE': 4, 
             'PORTAL_2': 5,
             'PORTAL_3': 6},
'dvc_ctg_nm': {'unknown': 0,
               'mobile_app': 1,
               'mobile_web': 2,
               'PC': 3},
# 'target_type': {0: 2,
#                 1: 0, 
#                 2: 2,
#                 3: 2,
#                 4: 1,
#                 5: 2,
#                 6: 1,
#                 7: 1}
         

# 승재님 라벨
'target_type' : {0:0,
          1:0,
          2:0,
          5:0,
          3:1,
          6:1,
          4:2,
          7:2}
}

In [None]:
act['biz_unit'] = act['biz_unit'].replace(token['biz_unit'])
act['trfc_src'] = act['trfc_src'].replace(token['trfc_src'])
act['dvc_ctg_nm'] = act['dvc_ctg_nm'].replace(np.nan, 'unknown').replace(token['dvc_ctg_nm'])

In [None]:
# weekday
sess_date = pd.to_datetime(act.sess_dt.astype(str), format='%Y%m%d')
weekday = sess_date.apply(lambda x: x.weekday())
act['weekday'] = weekday

In [None]:
# hour
from datetime import datetime
act['hour'] = act.hit_tm.apply(lambda x: int(x[:2]))

In [None]:
cat_features = ['action_type', 'hit_seq', 'biz_unit', 'trfc_src', 'dvc_ctg_nm', 'weekday', 'hour']
numeric_features = ['hit_pss_tm']

In [None]:
# n_times 기간의 action_type 시퀀스를 이용해
# 다음 predict_period 스텝의 action_type을 예측하는 task로
# 임베딩 레이어를 fitting 한다
# 따라서 최소 n_times + predict_period 개의 데이터가 있어야 함

n_times = 10
predict_period = 1

X, X_no_seq, y = [], [], []
def make_seq(x, target_col='action_type'):
    if len(x) >= n_times + predict_period:
        src = x[target_col].iloc[-n_times-predict_period:-predict_period]
        src_no_seq = x[['hit_seq', 'biz_unit', 'trfc_src', 'dvc_ctg_nm', 'weekday', 'hour']].iloc[-predict_period:]
        trg = x[target_col].iloc[-predict_period:]
        
        X.append(src)
        X_no_seq.append(src_no_seq)
        y.append(trg)

    else:
        return None

In [None]:
# sess_id 별로 시퀀스 생성
(act[['clnt_id', 'sess_id']+cat_features]
 .groupby(['clnt_id', 'sess_id'])
 .apply(lambda x: make_seq(x)))

In [None]:
df = pd.DataFrame()
df['action_seq'] = pd.Series(map(lambda x: x.values.tolist(), X))

In [None]:
for name in cat_features[1:]:
  df[f'{name}'] = list(map(lambda x: x[name].values[0], X_no_seq))

In [None]:
df['target'] = list(map(lambda x: x.values[0], y))

In [None]:
df.head()

Unnamed: 0,action_seq,hit_seq,biz_unit,trfc_src,dvc_ctg_nm,weekday,hour,target
0,"[5, 5, 5, 5, 6, 3, 5, 5, 5, 5]",43,2,1,0,0,0,5
1,"[5, 2, 2, 5, 5, 5, 5, 5, 5, 5]",68,2,1,0,2,21,6
2,"[5, 5, 5, 5, 5, 5, 6, 1, 2, 1]",20,2,1,0,3,22,2
3,"[0, 1, 2, 5, 5, 5, 1, 2, 5, 0]",29,2,1,0,5,7,0
4,"[2, 0, 0, 0, 1, 2, 0, 0, 5, 2]",12,2,1,0,5,8,5


In [None]:
df['target'] = df['target'].replace(token['target_type'])

In [None]:
df.target.unique()

array([2, 1, 0])

In [None]:
df.to_csv(os.path.join(data_path, 'transformer_df3.csv'))

## data loading

In [None]:
df = pd.read_csv(os.path.join(data_path, 'transformer_df3.csv')).iloc[:,1:]

In [None]:
df.head()

Unnamed: 0,action_seq,hit_seq,biz_unit,trfc_src,dvc_ctg_nm,weekday,hour,target
0,"[5, 5, 5, 5, 6, 3, 5, 5, 5, 5]",43,2,1,0,0,0,0
1,"[5, 2, 2, 5, 5, 5, 5, 5, 5, 5]",68,2,1,0,2,21,1
2,"[5, 5, 5, 5, 5, 5, 6, 1, 2, 1]",20,2,1,0,3,22,0
3,"[0, 1, 2, 5, 5, 5, 1, 2, 5, 0]",29,2,1,0,5,7,0
4,"[2, 0, 0, 0, 1, 2, 0, 0, 5, 2]",12,2,1,0,5,8,0


In [None]:
df.biz_unit.unique()

array([2, 0, 1])

In [None]:
df.action_seq[0]

'[5, 5, 5, 5, 6, 3, 5, 5, 5, 5]'

In [None]:
# df.biz_unit = df.biz_unit-1

# transformer_df2.csv인 경우에 실행
import ast
df_action_seq = df.action_seq.apply(lambda x: ast.literal_eval(x))
df['action_seq'] = df_action_seq

In [None]:
df.head()

Unnamed: 0,action_seq,hit_seq,biz_unit,trfc_src,dvc_ctg_nm,weekday,hour,target
0,"[5, 5, 5, 5, 6, 3, 5, 5, 5, 5]",43,2,1,0,0,0,2
1,"[5, 2, 2, 5, 5, 5, 5, 5, 5, 5]",68,2,1,0,2,21,1
2,"[5, 5, 5, 5, 5, 5, 6, 1, 2, 1]",20,2,1,0,3,22,2
3,"[0, 1, 2, 5, 5, 5, 1, 2, 5, 0]",29,2,1,0,5,7,2
4,"[2, 0, 0, 0, 1, 2, 0, 0, 5, 2]",12,2,1,0,5,8,2


In [None]:
df.target.unique()

array([0, 1, 2])

In [None]:
df.trfc_src.unique()

array([1, 2, 0, 5, 6, 4, 3])

In [None]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df,
                                     train_size=0.7, 
                                     random_state=96)

In [None]:
print(train_df.shape)
print(test_df.shape)

(55695, 8)
(23870, 8)


In [None]:
import pandas as pd
import torch
import torch.utils.data as data
from torchvision import transforms
import ast
from torch.nn.utils.rnn import pad_sequence

class Dataset(data.Dataset):

    def __init__(
        self, df, test=False
    ):
        self.frame = df
        self.test = test

    def __len__(self):
        return len(self.frame)

    def __getitem__(self, idx):
        data = self.frame.iloc[idx]

        history = data.action_seq
        history = torch.LongTensor(history)
        target = data.target

        biz_unit = data.biz_unit
        trfc_src = data.trfc_src
        dvc_ctg_nm = data.dvc_ctg_nm
        weekday = data.weekday
        hour = data.hour
        
        return history, biz_unit, trfc_src, dvc_ctg_nm, weekday, hour, target

In [None]:
train_dataset = Dataset(train_df)
test_dataset = Dataset(test_df)

In [None]:
train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=128,
            shuffle=False,
            num_workers=os.cpu_count(),
        )

test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=128,
            shuffle=False,
            num_workers=os.cpu_count(),
        )

In [None]:
for x in test_loader:
  print(x)
  break

[tensor([[1, 2, 5,  ..., 1, 2, 0],
        [5, 5, 5,  ..., 6, 5, 2],
        [1, 2, 5,  ..., 5, 5, 6],
        ...,
        [3, 3, 4,  ..., 3, 3, 3],
        [5, 5, 5,  ..., 5, 5, 5],
        [2, 5, 5,  ..., 2, 2, 5]]), tensor([2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 2,
        2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 2, 2, 1, 2, 0, 2, 2, 2, 0, 1, 2,
        2, 1, 0, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 1]), tensor([2, 2, 1, 0, 1, 1, 2, 1, 1, 1, 2, 0, 1, 1, 1, 0, 0, 1, 2, 0, 1, 1, 1, 1,
        1, 1, 0, 1, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
        1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 2, 1, 0, 2, 0, 1, 1, 1, 1, 1, 5, 1, 1, 1,
        1, 1, 1, 2, 1, 0, 2, 2, 1, 0, 1, 1, 2, 4, 1, 0, 1, 0, 1, 2, 1, 0, 0, 1,
        2, 0, 0, 1, 0, 1, 

## Transformer 

In [None]:
sequence_length = 10
num_epochs = 100

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [None]:
import pandas as pd
import torch
from tqdm import tqdm
import math
from urllib.request import urlretrieve
from zipfile import ZipFile
import os
import torch.nn as nn
import numpy as np

In [None]:
class BST(nn.Module):
    def __init__( self, args=None):
      super(BST, self).__init__()

      self.embeddings_biz_unit = nn.Embedding(
              len(token['biz_unit']), int(math.sqrt(len(token['biz_unit']))),
          )
      self.embeddings_trfc_src = nn.Embedding(
          len(token['trfc_src']), int(math.sqrt(len(token['trfc_src']))),
      )
      self.embeddings_dvc_ctg_nm = nn.Embedding(
          len(token['dvc_ctg_nm']), int(math.sqrt(len(token['dvc_ctg_nm']))),
      )
      self.embeddings_weekday = nn.Embedding(
          7, int(math.sqrt(7)),
      )
      self.embeddings_hour = nn.Embedding(
          24, int(math.sqrt(24)),
      )
      
      ## action type sequence 
      self.embeddings_action_type = nn.Embedding(
          len(token['action_type']), 63
      )
      self.embeddings_position  = nn.Embedding(
          sequence_length, 63
      )

      # Network
      self.transfomerlayer = nn.TransformerEncoderLayer(63, 3, dropout=0.2)
      self.linear = nn.Sequential(
          nn.Linear(
              641, 
              1024,
          ),
          nn.LeakyReLU(),
          nn.Linear(1024, 512),
          nn.LeakyReLU(),
          nn.Linear(512, 256),
          nn.LeakyReLU(),
          nn.Linear(256, 3),
      )

    def encode_input(self,inputs):
      history, biz_unit, trfc_src, dvc_ctg_nm, weekday, hour = inputs
      
      # sequence 
      history = self.embeddings_action_type(history)
      
      positions = torch.arange(0,sequence_length, 1,dtype=int, device=device)
      positions = self.embeddings_position(positions)
      
      encoded_sequence_with_poistion= (history + positions) #Yet to multiply by rating
      transfomer_features = encoded_sequence_with_poistion
      
      # no sequence
      biz_unit= self.embeddings_biz_unit(biz_unit)   
      trfc_src = self.embeddings_trfc_src(trfc_src)
      dvc_ctg_nm = self.embeddings_dvc_ctg_nm(dvc_ctg_nm)
      weekday = self.embeddings_weekday(weekday)
      hour = self.embeddings_hour(hour)

      features = torch.cat((biz_unit, trfc_src, dvc_ctg_nm, weekday, hour), 1)
      
      return transfomer_features, features

    def forward(self, batch):
        transfomer_features, features = self.encode_input(batch)
        transformer_output = self.transfomerlayer(transfomer_features)
        transformer_output = torch.flatten(transformer_output,start_dim=1)
        
        #Concat with other features
        features = torch.cat((transformer_output,features),dim=1)
        
        output = self.linear(features)
        return output

In [None]:
model = BST()
model = model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)

In [None]:
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    train_loss = 0.0
    best_model_acc = 0 
    for i, (all_data) in enumerate(train_loader):
        data, labels = all_data[:-1], all_data[-1]
        
        data = list(map(lambda x: x.to(device), data))
        labels = labels.to(device)

        # Forward pass
        outputs = model(data)
        # import pdb;pdb.set_trace()
        loss = criterion(outputs, labels.to(torch.int64))
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 1000 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
        train_loss += loss.item()*data[0].size(0)

    train_loss = train_loss/len(train_loader.sampler)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}')

    if epoch % 5 == 0:
      # Test the model
      model.eval()
      with torch.no_grad():
          correct = 0
          total = 0
          for all_data in test_loader:
              data, labels = all_data[:-1], all_data[-1]
              
              data = list(map(lambda x: x.to(device), data))
              labels = labels.to(device)

              outputs = model(data)
              _, predicted = torch.max(outputs.data, 1)
              total += labels.size(0)
              correct += (predicted == labels).sum().item()
          
          if (100*correct/total) > best_model_acc:
             torch.save(model, os.path.join(data_path, 'Transformer_seq_10_three_target_best_model.pt'))
        
          print('Test Accuracy of the Transformer model: {} %'.format(100 * correct / total)) 

Epoch [1/100], Loss: 0.3842
Test Accuracy of the Transformer model: 84.47842480100545 %
Epoch [2/100], Loss: 0.3566
Epoch [3/100], Loss: 0.3506
Epoch [4/100], Loss: 0.3457
Epoch [5/100], Loss: 0.3404
Epoch [6/100], Loss: 0.3342
Test Accuracy of the Transformer model: 84.50775031420193 %
Epoch [7/100], Loss: 0.3267
Epoch [8/100], Loss: 0.3189
Epoch [9/100], Loss: 0.3093
Epoch [10/100], Loss: 0.2977
Epoch [11/100], Loss: 0.2851
Test Accuracy of the Transformer model: 83.50230414746544 %
Epoch [12/100], Loss: 0.2724
Epoch [13/100], Loss: 0.2587
Epoch [14/100], Loss: 0.2454
Epoch [15/100], Loss: 0.2319
Epoch [16/100], Loss: 0.2215
Test Accuracy of the Transformer model: 82.9576874738165 %
Epoch [17/100], Loss: 0.2114
Epoch [18/100], Loss: 0.2005
Epoch [19/100], Loss: 0.1910
Epoch [20/100], Loss: 0.1836
Epoch [21/100], Loss: 0.1778
Test Accuracy of the Transformer model: 82.48847926267281 %
Epoch [22/100], Loss: 0.1688
Epoch [23/100], Loss: 0.1653
Epoch [24/100], Loss: 0.1665
Epoch [25/100]

In [None]:
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    train_loss = 0.0
    for i, (all_data) in enumerate(train_loader):
        data, labels = all_data[:-1], all_data[-1]
        
        data = list(map(lambda x: x.to(device), data))
        labels = labels.to(device)

        # Forward pass
        outputs = model(data)
        # import pdb;pdb.set_trace()
        loss = criterion(outputs, labels.to(torch.int64))
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 1000 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
        train_loss += loss.item()*data[0].size(0)

    train_loss = train_loss/len(train_loader.sampler)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}')

In [None]:
torch.save(model, os.path.join(data_path, 'Transformer_seq_10_three_target_embedding.pt'))

In [None]:
model = torch.load(os.path.join(data_path, 'Transformer_seq_10_three_target_embedding.pt'))

In [None]:
# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for all_data in train_loader:
        data, labels = all_data[:-1], all_data[-1]
        
        data = list(map(lambda x: x.to(device), data))
        labels = labels.to(device)

        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Train Accuracy of the Transformer model: {} %'.format(100 * correct / total)) 

Train Accuracy of the Transformer model: 88.14794864889129 %


In [None]:
# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    predict_list = []
    for all_data in test_loader:
        data, labels = all_data[:-1], all_data[-1]
        
        data = list(map(lambda x: x.to(device), data))
        labels = labels.to(device)

        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        predict_list += predicted.cpu().tolist()

    print('Test Accuracy of the Transformer model: {} %'.format(100 * correct / total)) 

Test Accuracy of the Transformer model: 81.63803937997487 %


In [None]:
test_df.head()

Unnamed: 0,action_seq,hit_seq,biz_unit,trfc_src,dvc_ctg_nm,weekday,hour,target
31176,"[1, 2, 5, 5, 1, 2, 0, 1, 2, 0]",41,2,2,0,6,14,2
18522,"[5, 5, 5, 3, 5, 3, 5, 6, 5, 2]",52,2,2,0,4,9,2
73883,"[1, 2, 5, 5, 5, 5, 5, 5, 5, 6]",21,2,1,0,2,20,2
22314,"[2, 2, 1, 2, 2, 1, 2, 2, 1, 2]",17,1,0,2,6,12,2
29136,"[0, 3, 5, 4, 5, 4, 5, 4, 5, 1]",17,2,1,0,6,0,2


In [None]:
np.unique(test_df.target, return_counts=True)   # 준우님 정답라벨

(array([0, 1, 2]), array([  233,  6296, 17341]))

In [None]:
np.unique(predict_list, return_counts=True) # 준우님 예측라벨

(array([0, 1, 2]), array([  157,  6594, 17119]))

In [None]:
import pickle
with open(os.path.join(data_path, "승재님 y predict.pickle"), "wb") as f:
  pickle.dump(predict_list, f)

In [None]:
np.unique(test_df.target, return_counts=True)   # 승재님 정답라벨

(array([0, 1, 2]), array([15565,  7813,   492]))

In [None]:
np.unique(predict_list, return_counts=True) # 승재님 예측라벨

(array([0, 1, 2]), array([15838,  7720,   312]))

# tmp

In [None]:
class Transformer(nn.Module):
    def __init__(self, n_classes=n_classes, n_times=n_times,
                 output_dim=output_dim, 
                 embedding_dim=embedding_dim, 
                 hidden_size=hidden_size, 
                 num_layers=num_layers):
        super(Act2Vec, self).__init__()

        self.n_classes = n_classes
        self.n_times = n_times
        self.embedding_dim = embedding_dim
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_dim = output_dim

        self.embed_layers = nn.ModuleList([nn.Embedding(num_embeddings=n_class,
                                                        embedding_dim=self.embedding_dim,
                                                        padding_idx=0) 
                                            for n_class in self.n_classes])
        
        # Network
        d_model = 320
        self.transfomerlayer = nn.TransformerEncoderLayer(d_model, 3, dropout=0.2)   # d_model: the numer of expected features in the input
        self.linear = nn.Sequential(
            nn.Linear(
                589,     # transformer output + user features 
                1024,
            ),
            nn.LeakyReLU(),
            nn.Linear(1024, 512),
            nn.LeakyReLU(),
            nn.Linear(512, 256),
            nn.LeakyReLU(),
            nn.Linear(256, output_dim),
        )
    
    def forward(self, x):
      x = x.type(torch.LongTensor).to(device)
      embeddeds = [layer(x[:, :, i]) for i, layer in enumerate(self.embed_layers)]

      x = torch.cat(embeddeds, dim=-1)
      transformer_output = self.transfomerlayer(x)

      transformer_output = torch.flatten(transformer_output,start_dim=1)

      features = torch.cat((transformer_output, user_features),dim=1)

      output = self.linear(features)

      return output
