# Params:

* w0 [1,1]
* n: n_fields
* zi: [k+1, 1] for ith field
* w0_i : [k+1, (endi -starti +1)= d]

# 概念
* 針對每個field (每組特徵 如user age, user_accupation, 即n_fields = 2)
* 針對field i, 用Wi weight matrix將X[starti, endi] embed成 [w, v1, v2, v3.....vk] = zi
* 再利用z = [w0, z1, z2, ... zn]做fully-connected layer兩次後做sigmoid函數得y


In [None]:
import numpy as np
import torch 
import torch.nn as nn
import pandas as pd
from sklearn.metrics import mean_squared_error 
from sklearn.model_selection import KFold 

In [None]:
user_item_path = '/content/drive/MyDrive/python_data/社群網路與推薦系統/hw3/data/Movielens/user_movie.dat'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Feature Matrix

In [None]:
def get_feature(path):
  names = ['id', 'feature_id']
  df = pd.read_csv(path, sep= '\t', names= names)
  n = int(df['id'].max())
  n_feature = int(df['feature_id'].max())
  feature_mat = torch.zeros(size= (n, n_feature), dtype= float, device= device)
  for i, row in df.iterrows():
    feature_mat[int(row['id'])-1, int(row['feature_id'])-1] = 1 
  return feature_mat

In [None]:
item_feature_mats = []
user_feature_mats = []
folder = '/content/drive/MyDrive/python_data/社群網路與推薦系統/hw3/data/Movielens/'
for file in ['movie_genre', 'movie_movie(knn)']:
  path = folder + file + '.dat'
  item_feature_mat = get_feature(path= path)
  item_feature_mats.append(item_feature_mat)
for file in ['user_age', 'user_occupation']:
  path = folder + file + '.dat'
  user_feature_mat = get_feature(path= path)
  user_feature_mats.append(user_feature_mat)

In [None]:
item_feature_mat = torch.cat(item_feature_mats, dim= 1)
user_feature_mat = torch.cat(user_feature_mats, dim= 1)
print(f'item feature mat: {item_feature_mat.shape}')
print(f'user feature mat: {user_feature_mat.shape}')
n_user = user_feature_mat.shape[0]
n_item = item_feature_mat.shape[0]
d = n_item + item_feature_mat.shape[1] + n_user + user_feature_mat.shape[1]
print(f'd: {d}')

In [None]:
rows = []
y= []
with open(user_item_path, 'r') as f:
  for line in f.readlines():
    user_temp = torch.zeros(size= (1, n_user), dtype= float, device= device)
    item_temp = torch.zeros(size= (1, n_item), dtype= float, device= device)
    user_id, item_id, rating, _= line.strip().split('\t')
    user_temp[0,int(user_id)-1] = 1
    item_temp[0,int(item_id)-1] = 1
    row = torch.cat([user_temp, item_temp, user_feature_mat[int(user_id)-1].unsqueeze(dim= 0), item_feature_mat[int(item_id)-1].unsqueeze(dim= 0)], dim= 1)
    rows.append(row)
    y.append(int(rating))

X = torch.cat(rows, dim= 0)
y= torch.tensor(y, dtype=float) # tensor

# Model

In [None]:
class FeaturesEmbedding(nn.Module):

  def __init__(self, field_dims, embed_dim):
    super().__init__()
    self.embedding = torch.nn.Embedding(sum(field_dims), embed_dim)
    self.offsets = np.array((0, *np.cumsum(field_dims)[:-1]), dtype=np.long)
    torch.nn.init.xavier_uniform_(self.embedding.weight.data)

  def forward(self, x):
    """
    :param x: Long tensor of size ``(batch_size, num_fields)``
    """
    x = x + x.new_tensor(self.offsets).unsqueeze(0)
    return self.embedding(x)

In [None]:
class MultiLayerPerceptron(nn.Module):
  def __init__(self, input_dim, embed_dims, dropout, output_layer=True):
    super().__init__()
    layers = list()
    for embed_dim in embed_dims:
        layers.append(torch.nn.Linear(input_dim, embed_dim))
        layers.append(torch.nn.BatchNorm1d(embed_dim))
        layers.append(torch.nn.ReLU())
        layers.append(torch.nn.Dropout(p=dropout))
        input_dim = embed_dim
    if output_layer:
        layers.append(torch.nn.Linear(input_dim, 1))
    self.mlp = torch.nn.Sequential(*layers)

  def forward(self, x):
    """
    :param x: Float tensor of size ``(batch_size, embed_dim)``
    """
    return self.mlp(x)


In [None]:
class FactorizationSupportedNeuralNetworkModel(nn.Module):
  """
  A pytorch implementation of Neural Factorization Machine.
  Reference:
      W Zhang, et al. Deep Learning over Multi-field Categorical Data - A Case Study on User Response Prediction, 2016.
  """

  def __init__(self, field_dims, embed_dim, mlp_dims, dropout):
    super().__init__()
    self.embedding = FeaturesEmbedding(field_dims, embed_dim)
    self.embed_output_dim = len(field_dims) * embed_dim
    self.mlp = MultiLayerPerceptron(self.embed_output_dim, mlp_dims, dropout)

  def forward(self, x):
    """
    :param x: Long tensor of size ``(batch_size, num_fields)``
    """
    embed_x = self.embedding(x)
    x = self.mlp(embed_x.view(-1, self.embed_output_dim))
    return torch.sigmoid(x.squeeze(1))

In [None]:
# class FNN(nn.Module):
#   def __init__(self, k, d)