In [1]:
import pandas as pd
import numpy as np
import data

In [2]:
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
df = pd.read_csv("./ml-1m/ratings.dat", sep ='::' , names=r_cols )

  


In [3]:
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(df, test_size=0.2, random_state=1981)
df_train = df_train.reset_index()
df_test = df_test.reset_index()
df_train = df_train.drop(["index"], axis = 1)
df_test = df_test.drop(["index"], axis = 1)

In [4]:
train_x, train_y = data.create_dataset(df_train)
test_x, test_y = data.create_dataset(df_test)

In [5]:
import torch
from torch import optim
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [6]:
# Select GPU Number
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [7]:
# check if cuda available
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(315)
if device == "cuda":
    torch.cuda.manual_seed_all(912)

device

'cuda'

In [8]:
class MovielensDataset(Dataset):
    """
    torch.utils.data.Dataset 상속
    """
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __len__(self):
        return self.X.size(0)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [9]:
train_dataset = MovielensDataset(X = torch.FloatTensor(train_x),
                                y = torch.FloatTensor(train_y))

In [10]:
class FactorizationMachine(nn.Module):
    
    def __init__(self, field_dims, latent_dims):
        super(FactorizationMachine, self).__init__()
        
        self.w_0 = nn.Parameter(nn.init.normal_(torch.zeros((1, ))), requires_grad=True) #(1, )
        self.w_i = nn.Parameter(nn.init.normal_(torch.zeros((1, field_dims)), std=1.0/field_dims), requires_grad = True) # (1, 4308)
        self.V = nn.Parameter(nn.init.normal_(torch.zeros((field_dims, latent_dims)), std=1.0/latent_dims), requires_grad = True) # (4308, 40)
        
        
    def forward(self, x):
        """
        batch shape (64, 4308)
        """
        temp_1 = self.w_0 + torch.matmul(x, self.w_i.T) # (64, 1)
        
        square_of_sum = torch.sum(torch.matmul(x, self.V), dim = 1) ** 2
        sum_of_square = torch.sum(torch.matmul(x, self.V) ** 2, dim = 1)
        temp_2 = (square_of_sum - sum_of_square).view(-1, 1)
        
        result = temp_1 + 0.5 * temp_2
        
        return result
    
    def init_weight(self):
        pass

In [11]:
model = FactorizationMachine(field_dims = train_x.shape[1], latent_dims = 20).cuda()
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_function = nn.MSELoss()
batch_size = 64
n_epochs = 300

In [12]:
from tensorboardX import SummaryWriter
writer = SummaryWriter(logdir="runs/FactorizationMachine_MatrixVer")

In [13]:
for epoch_id in range(n_epochs):
    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    model.train()
    total_loss = 0
    for batch_idx, batch in enumerate(train_loader):

        X, y = batch[0], batch[1]
        X, y = X.cuda(), y.cuda()

        optimizer.zero_grad()
        y_pred = model(X)
        loss = loss_function(y_pred.view(-1), y.view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss
        
    model.eval()
    
    y_test = model(torch.FloatTensor(test_x).cuda())
    y_test = y_test.cpu().detach().numpy()
    rmse = np.sqrt(np.mean((y_test - test_y)**2))
    
    writer.add_scalar("loss/ Train_loss", total_loss, epoch_id)
    writer.add_scalar("performace/RMSE", rmse, epoch_id)
    
    if ((epoch + 1) % 10 == 0 ):
        print ('Epoch {} of {}, training Loss: {:.4f}, RMSE: {:.4f}'.format(epoch_id + 1, n_epochs, total_loss, rmse))

KeyboardInterrupt: 