In [30]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, TensorDataset, DataLoader
import tqdm

In [31]:
import pandas as pd
from sklearn import model_selection
# Apply and divide Train-Data and Test-Data.
df = pd.read_csv("ml-20m/ml-20m/ratings.csv")

In [48]:
# Variable 'X' is (userID, movieID) pair.
X = df[["userId", "movieId"]].values
Y = df[["rating"]].values
# Y = df[['rating']].astype(int)
df[["rating"]] = df[["rating"]].astype(int)
print("Format of Y : ", Y)
print("df : ", df)

Format of Y :  [[3]
 [3]
 [3]
 ...
 [3]
 [5]
 [2]]
df :            userId  movieId  rating   timestamp
0              1        2       3  1112486027
1              1       29       3  1112484676
2              1       32       3  1112484819
3              1       47       3  1112484727
4              1       50       3  1112484580
...          ...      ...     ...         ...
20000258  138493    68954       4  1258126920
20000259  138493    69526       4  1259865108
20000260  138493    69644       3  1260209457
20000261  138493    70286       5  1258126944
20000262  138493    71619       2  1255811136

[20000263 rows x 4 columns]


In [49]:
# Divide Train-Data and Test-Data as 9 to 1.
train_X, test_X, train_Y, test_Y\
    = model_selection.train_test_split(X, Y, test_size=0.1)
dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor

In [50]:
# Convert float32 of tensor that the 'X' is ID, and Integer,
# and the 'Y' is Real-Number.
train_dataset = TensorDataset(
    # torch.tensor(train_X, dtype=torch.int64),
    # torch.tensor(train_Y, dtype=torch.float32)
    torch.tensor(train_X, dtype=torch.int64),
    torch.tensor(train_Y, dtype=torch.float32),
)

test_dataset = TensorDataset(
    # torch.tensor(test_X, dtype=torch.int64),
    # torch.tensor(test_Y, dtype=torch.float32)
    torch.tensor(test_X, dtype=torch.int64),
    torch.tensor(test_Y, dtype=torch.float32),
)

In [51]:
train_loader = DataLoader(
    train_dataset, batch_size=1024, num_workers=4, shuffle=True
)

test_loader = DataLoader(
    train_dataset, batch_size=1024, num_workers=4
)

In [52]:
class MatrixFactorization(nn.Module):
    def __init__(self, max_user, max_item, k=20):
        super().__init__()
        self.max_user = max_user
        self.max_item = max_item
        self.user_emb = nn.Embedding(max_user, k, 0)
        self.item_emb = nn.Embedding(max_item, k, 0)
        
    def forward(self, x):
        user_idx = x[:, 0]
        item_idx = x[:, 1]
        user_feature = self.user_emb(user_idx)
        item_feature = self.item_emb(item_idx)
        
        # 'user_feature * item_feature' is the dimension (batch_size, k),
        # so it becames each of internal-Samples when you get the value 
        # of 'sum' of variable 'k'.
        out = torch.sum(user_feature * item_feature, 1)
        
        # Adjust range [0, 5].
        out = torch.sigmoid(out) * 5
        # out = nn.functional.sigmoid(out) * 5
        return out

In [57]:
max_user, max_item = X.max(0)
# Cast 'np.int64' types to the Python standard type, 'int'.
# max_user = max_user.type(dtype)
# max_item = max_item.type(dtype)
# It occurs error when you 
max_user = float(max_user)
max_item = float(max_item)
print(max_user)
print(max_item)
net = MatrixFactorization(max_user + 1, max_item + 1)

138493.0
131262.0


TypeError: new() received an invalid combination of arguments - got (float, int), but expected one of:
 * (*, torch.device device)
      didn't match because some of the arguments have invalid types: (!float!, !int!)
 * (torch.Storage storage)
 * (Tensor other)
 * (tuple of ints size, *, torch.device device)
 * (object data, *, torch.device device)


In [40]:
def eval_net(net, loader, score_fn=nn.functional.l1_loss, device="cpu"):
    ys = []
    ypreds = []
    
    for x, y in loader:
        x = x.to(device)
        ys.append(y)
        
        with torch.no_grad():
            ypred = net(x).to("cpu").view(-1)
        ypreds.append(ypred)
    
    score = score_fn(torch.cat(ys).squeeze(), torch.cat(ypreds))
    return score.item()