# 概念

1. DeepFM模型包含FM和DNN两部分，FM模型可以抽取low-order特征，DNN可以抽high-order特征。无需Wide&Deep模型人工特征工程。
2. 由于输入仅为原始特征，而且FM和DNN共享输入向量特征，DeepFM模型训练速度很快。
3. 不同field特征长度不同，但是子网络输出的向量需具有相同维度
4. 利用FM模型的隐特征向量V作为网络权重初始化来获得子网络输出向量

[架構](https://pic2.zhimg.com/80/v2-a893a331c3556046be1be7771b2cb1a9_720w.jpg)

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error 
from sklearn.model_selection import KFold 
from sklearn.preprocessing import OneHotEncoder
import torch 
import torch.nn as nn
# from torch.nn.functional import binary_cross_entropy
torch.manual_seed(0)

<torch._C.Generator at 0x7fa147de5fb0>

In [2]:
user_item_path = '/content/drive/MyDrive/python_data/社群網路與推薦系統/hw3/data/Movielens/user_movie.dat'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Feature

In [3]:
def get_feature(path):
  names = ['id', 'feature_id']
  df = pd.read_csv(path, sep= '\t', names= names)
  n = int(df['id'].max())
  n_feature = int(df['feature_id'].max())
  feature_mat = torch.zeros(size= (n, n_feature), dtype= torch.float, device= device)
  for i, row in df.iterrows():
    feature_mat[int(row['id'])-1, int(row['feature_id'])-1] = 1 
  return feature_mat

In [4]:
item_feature_mats = []
user_feature_mats = []
folder = '/content/drive/MyDrive/python_data/社群網路與推薦系統/hw3/data/Movielens/'
for file in ['movie_genre']:
  path = folder + file + '.dat'
  item_feature_mat = get_feature(path= path)
  item_feature_mats.append(item_feature_mat)
for file in ['user_age', 'user_occupation']:
  path = folder + file + '.dat'
  user_feature_mat = get_feature(path= path)
  user_feature_mats.append(user_feature_mat)

In [5]:
item_feature_mat = torch.cat(item_feature_mats, dim= 1)
user_feature_mat = torch.cat(user_feature_mats, dim= 1)
print(f'item feature mat: {item_feature_mat.shape}')
print(f'user feature mat: {user_feature_mat.shape}')
n_user = user_feature_mat.shape[0]
n_item = item_feature_mat.shape[0]
d = n_item + item_feature_mat.shape[1] + n_user + user_feature_mat.shape[1]
print(f'd: {d}')

item_feature_len = [mat.shape[1] for mat in item_feature_mats]
user_feature_len = [mat.shape[1] for mat in user_feature_mats]

item feature mat: torch.Size([1682, 18])
user feature mat: torch.Size([943, 29])
d: 2672


In [6]:
rows = []
y= []
with open(user_item_path, 'r') as f:
  for line in f.readlines():
    user_temp = torch.zeros(size= (1, n_user), dtype= torch.float, device= device)
    item_temp = torch.zeros(size= (1, n_item), dtype= torch.float, device= device)
    user_id, item_id, rating, _= line.strip().split('\t')
    user_temp[0,int(user_id)-1] = 1
    item_temp[0,int(item_id)-1] = 1
    row = torch.cat([user_temp, item_temp, user_feature_mat[int(user_id)-1].unsqueeze(dim= 0), item_feature_mat[int(item_id)-1].unsqueeze(dim= 0)], dim= 1)
    rows.append(row)
    y.append(int(rating))

X = torch.cat(rows, dim= 0)
y = torch.tensor(y, dtype=torch.float)
encoder = OneHotEncoder(sparse= False)
y_onehot = encoder.fit_transform(y.view(-1,1))
y_onehot = torch.tensor(y_onehot, dtype=torch.float) # tensor

In [7]:
print(f'{X.size()}')
print(f'{y.size()}')

torch.Size([100000, 2672])
torch.Size([100000])


# Model

In [8]:
class DeepFM(nn.Module):
  def __init__(self, fields, k= 5, hidden_dims= [16, 16], dropout= 0.2, n_class= 5):
    super(DeepFM, self).__init__()
    self.fields = fields 
    self.k = k 
    self.hidden_dims = hidden_dims

    """FM"""
    d = sum(fields)
    self.FM_w = nn.Linear(d, 1, bias= False)
    self.embedding_ws = nn.ParameterList([nn.Parameter(torch.randn(size= (i, k), dtype=torch.float, device= device)) for i in fields])
    
    """DNN"""
    layers = []
    input_dim = k * len(fields)

    for hidden_dim in hidden_dims:
      layers.append(nn.Linear(input_dim, hidden_dim))
      layers.append(nn.BatchNorm1d(hidden_dim))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(p=dropout))
      input_dim = hidden_dim
    
    layers.append(nn.Linear(hidden_dims[-1], n_class))
    self.dnn = nn.Sequential(*layers)

  def Dense_Embedding(self, X):
    es = []
    start= 0
    for i, field in enumerate(self.fields):
      # ei = self.embedding_ws[i](X[:, start:start+field]).unsqueeze(dim= 1) # ei: [n, 1, k]
      ei = torch.matmul(X[:, start:start+field], self.embedding_ws[i]).unsqueeze(dim= 1) # ei: [n, 1, k]
      start += field
      es.append(ei)

    return torch.cat(es, dim= 1) # [n, n_fields, k]  

  
  def FM(self, X):

    sum_of_square = torch.sum(X, dim= 1)**2 #[n, k]
    square_of_sum = torch.sum(X**2, dim= 1)
    ix = sum_of_square - square_of_sum    
    return 0.5 * torch.sum(ix, dim= 1, keepdim= True)

  def DNN(self, X):

    X = X.view(-1, self.k * len(self.fields)) # [n, k*n_fields]
    X = self.dnn(X)
    return X
  
  def forward(self, X):

    dense_X = self.Dense_Embedding(X)
    FM_y = self.FM(dense_X)
    DNN_y = self.DNN(dense_X)
    y = self.FM_w(X) + FM_y + DNN_y

    return nn.Sigmoid()(y)

# Training

In [9]:
fields = [n_user, n_item] + user_feature_len + item_feature_len
print(fields)

k = 10
hidden_dims = [128, 128, 128]
lr = 1e-2
n_epoch = 100
p = 0.3

model = DeepFM(fields= fields, k= k, hidden_dims= hidden_dims, dropout= p).to(device)
kf= KFold(n_splits=5)
optimizer = torch.optim.Adam(model.parameters(), lr= lr)
criterion = nn.BCELoss()

[943, 1682, 8, 21, 18]


In [10]:
for epoch in range(n_epoch):

  RMSEs = []
  for train_indice, test_indice in kf.split(X):
    a = train_indice
    train_X, test_X = X[train_indice], X[test_indice]
    '''training process'''
    model.train()
    optimizer.zero_grad()
    output = model(X= train_X)
    loss = criterion(output.squeeze(dim= 1).cpu(), y_onehot[train_indice])
    # loss = criterion(output.cpu(), y[train_indice])
    loss.backward()
    optimizer.step()
    out = output.squeeze(dim= 1).detach().cpu()
    out_rank = torch.argmax(out, dim=1)+1
    rmse = mean_squared_error(out_rank, y[train_indice], squared= False)
    # print(f'training rmse: {round(rmse,2)}')

    '''testing process'''
    model.eval()
    with torch.no_grad():
      output = model(X= test_X)
      out = output.squeeze(dim= 1).detach().cpu()
      out_rank = torch.argmax(out, dim=1)+1
      rmse = mean_squared_error(out_rank, y[test_indice], squared= False)
      # print(f'test rmse: {round(rmse, 2)}')
      RMSEs.append(rmse)  

  if ((epoch+1)% 10) == 0:
    print(f'epoch: {epoch+1}')
    print(f'training loss: {round(loss.item(), 2)}')
    print(f'avg RMSEs: {round(np.mean(RMSEs), 2)}')

epoch: 10
training loss: 0.96
avg RMSEs: 1.2
epoch: 20
training loss: 0.63
avg RMSEs: 1.2
epoch: 30
training loss: 0.54
avg RMSEs: 1.17
epoch: 40
training loss: 0.5
avg RMSEs: 1.12
epoch: 50
training loss: 0.46
avg RMSEs: 1.06
epoch: 60
training loss: 0.44
avg RMSEs: 1.02
epoch: 70
training loss: 0.43
avg RMSEs: 1.0
epoch: 80
training loss: 0.42
avg RMSEs: 0.99
epoch: 90
training loss: 0.41
avg RMSEs: 0.99
epoch: 100
training loss: 0.4
avg RMSEs: 0.97


In [11]:
out_rank[:10]

tensor([4, 1, 4, 3, 4, 5, 5, 2, 5, 3])

In [12]:
y[test_indice][:10]

tensor([4., 1., 1., 2., 4., 5., 3., 2., 5., 3.])

In [13]:
# for name, param in model.named_parameters():
#     if param.requires_grad:
#         print(name)