In [1]:
import numpy as np
import torch 
import torch.nn as nn
import pandas as pd
from sklearn.metrics import mean_squared_error 
from sklearn.model_selection import KFold 
torch.manual_seed(0)

<torch._C.Generator at 0x7f2e911bee70>

In [2]:
user_item_path = '/content/drive/MyDrive/python_data/社群網路與推薦系統/hw3/data/Movielens/user_movie.dat'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Feature Matrix

In [3]:
def get_feature(path):
  names = ['id', 'feature_id']
  df = pd.read_csv(path, sep= '\t', names= names)
  n = int(df['id'].max())
  n_feature = int(df['feature_id'].max())
  feature_mat = torch.zeros(size= (n, n_feature), dtype= torch.float, device= device)
  for i, row in df.iterrows():
    feature_mat[int(row['id'])-1, int(row['feature_id'])-1] = 1 
  return feature_mat

In [4]:
item_feature_mats = []
user_feature_mats = []
folder = '/content/drive/MyDrive/python_data/社群網路與推薦系統/hw3/data/Movielens/'
for file in ['movie_genre', 'movie_movie(knn)']:
  path = folder + file + '.dat'
  item_feature_mat = get_feature(path= path)
  item_feature_mats.append(item_feature_mat)
for file in ['user_age', 'user_occupation']:
  path = folder + file + '.dat'
  user_feature_mat = get_feature(path= path)
  user_feature_mats.append(user_feature_mat)

In [5]:
item_feature_mat = torch.cat(item_feature_mats, dim= 1)
user_feature_mat = torch.cat(user_feature_mats, dim= 1)
print(f'item feature mat: {item_feature_mat.shape}')
print(f'user feature mat: {user_feature_mat.shape}')
n_user = user_feature_mat.shape[0]
n_item = item_feature_mat.shape[0]
d = n_item + item_feature_mat.shape[1] + n_user + user_feature_mat.shape[1]
print(f'd: {d}')

item_feature_len = [mat.shape[1] for mat in item_feature_mats]
user_feature_len = [mat.shape[1] for mat in user_feature_mats]

item feature mat: torch.Size([1682, 19])
user feature mat: torch.Size([943, 29])
d: 2673


In [6]:
rows = []
y= []
with open(user_item_path, 'r') as f:
  for line in f.readlines():
    user_temp = torch.zeros(size= (1, n_user), dtype= torch.float, device= device)
    item_temp = torch.zeros(size= (1, n_item), dtype= torch.float, device= device)
    user_id, item_id, rating, _= line.strip().split('\t')
    user_temp[0,int(user_id)-1] = 1
    item_temp[0,int(item_id)-1] = 1
    row = torch.cat([user_temp, item_temp, user_feature_mat[int(user_id)-1].unsqueeze(dim= 0), item_feature_mat[int(item_id)-1].unsqueeze(dim= 0)], dim= 1)
    rows.append(row)
    y.append(int(rating))

X = torch.cat(rows, dim= 0)
y= torch.tensor(y, dtype=torch.float) # tensor

In [7]:
# encoder = OneHotEncoder()

# Params:

* w0 [1,1]
* n: n_fields
* zi: [k+1, 1] for ith field
* w0_i : [k+1, (endi -starti +1)]

# 概念
* 針對每個field (每組特徵 如user age, user_accupation, 即n_fields = 2)
* 針對field i, 用Wi weight matrix將X[starti, endi] embed成 [w, v1, v2, v3.....vk] = zi
* 再利用z = [w0, z1, z2, ... zn]做fully-connected layer兩次後做sigmoid函數得y


# Model

In [8]:
class FNN(nn.Module):
  def __init__(self, k, fields, hidden_dim, p= 0.3):
    super(FNN, self).__init__()
    self.k = k
    self.fields = fields 
    self.n_fields = len(fields)
    self.d = sum(fields)
    self.p = p
    self.w0 = torch.nn.Parameter(torch.zeros(size= (1,1), dtype= torch.float, device= device))
    self.param_list= nn.ModuleList()
    for field in fields:
      # self.param_list.append(torch.nn.Parameter(torch.randn(size= (field, k+1), dtype= torch.float, device= device)))
      self.param_list.append(nn.Linear(field, k+1, bias= False))
    # self.params = torch.nn.ParameterList(self.param_list.append(self.w0))
    self.fc1 = nn.Linear(in_features= (self.n_fields * (k+1)), out_features= hidden_dim, bias= True)
    self.fc2 = nn.Linear(in_features= hidden_dim, out_features= hidden_dim, bias= True)
    self.fc3 = nn.Linear(in_features= hidden_dim, out_features= 1, bias= True)

  def forward(self, x):
    
    zs = []
    count = 0 
    for i, field in enumerate(self.fields):
      # zi = torch.matmul(x[:, count:(count+field)], self.param_list[i]) # zi: [n_sample, k+1]
      zi= self.param_list[i](x[:, count:(count+field)])
      zs.append(zi)
      count += field

    z = self.w0 + torch.cat(zs, dim= 1)
    z = self.fc1(z)
    z = nn.ReLU()(z)
    z = nn.Dropout(p= self.p)(z)
    z = self.fc2(z)
    z = nn.ReLU()(z)
    z = nn.Dropout(p= self.p)(z)
    z = self.fc3(z)
    # z = nn.Sigmoid()(z)
    # z = torch.argmax(z, dim= 1)
    return z

-----

# Train

In [9]:
fields = [n_user, n_item] + user_feature_len + item_feature_len
print(fields)

k = 10
hidden_dim = 32
lr = 1e-2
# n_epoch = 100
p = 0

test_kf = KFold(n_splits=5, shuffle= True, random_state=42)
val_kf = KFold(n_splits=8, shuffle= True, random_state=42)

model = FNN(k= k, fields= fields, hidden_dim= hidden_dim, p= p).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr= lr)
criterion = nn.MSELoss()

[943, 1682, 8, 21, 18, 1]


In [10]:
# for epoch in range(n_epoch):

RMSEs = []
for rest_indice, test_indice in test_kf.split(X):
  rest_X = X[rest_indice]
  test_X = X[test_indice]
  for train_indice, val_indice in val_kf.split(rest_X):
    train_X, val_X = rest_X[train_indice], rest_X[val_indice]
    '''training process'''
    model.train()
    optimizer.zero_grad()
    output = model(x= train_X)
    loss = criterion(output.squeeze(dim= 1).cpu(), y[train_indice])
    # loss = criterion(output.cpu(), y[train_indice])
    loss.backward()
    optimizer.step()
    rmse = mean_squared_error(output.squeeze(dim= 1).detach().cpu(), y[train_indice], squared= False)
    # print(f'training rmse: {round(rmse,2)}')

    '''testing process'''
    model.eval()
    with torch.no_grad():
      output = model(x= test_X)
      rmse = mean_squared_error(output.squeeze(dim= 1).cpu(), y[test_indice], squared= False)
      # print(f'test rmse: {round(rmse, 2)}')
      RMSEs.append(rmse)  

  # if ((epoch+1)% 10) == 0:
  #   print(f'epoch: {epoch+1}')
  #   print(f'training loss: {round(loss.item(), 2)}')
  #   print(f'avg RMSEs: {round(np.mean(RMSEs), 2)}')
print(f'avg RMSEs: {round(np.mean(RMSEs), 2)}')

avg RMSEs: 1.5399999618530273


In [11]:
output

tensor([[3.3525],
        [3.2944],
        [3.4088],
        ...,
        [3.4272],
        [3.5017],
        [3.4148]], device='cuda:0')

In [12]:
y[test_indice]

tensor([1., 4., 3.,  ..., 4., 3., 1.])

In [13]:
model(x=X).shape

torch.Size([100000, 1])