In [None]:
import os
import time
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from MetaMF import *

In [None]:
random.seed(1)
np.random.seed(1)
torch.manual_seed(1)#set random seed for cpu
torch.cuda.manual_seed(1)#set random seed for current gpu
torch.cuda.manual_seed_all(1)#set random seed for all gpus

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [None]:
if torch.cuda.is_available():
    use_cuda = True
else:
    use_cuda = False
use_cuda

# Read Dataset

In [None]:
def readdata(filename1, filename2, filename3, filename4, filename5):
    userlist = []
    with open(filename1, 'r') as f:
        content = f.readlines()
    for line in content:
        line = line.strip()
        userlist.append(int(line))
    itemlist = []
    with open(filename2, 'r') as f:
        content = f.readlines()
    for line in content:
        line = line.strip()
        itemlist.append(int(line))
    traindata = []
    with open(filename3, 'r') as f:
        content = f.readlines()
    for line in content:
        line = line.split('\t')
        user = int(line[0])
        item = int(line[1])
        rating = float(line[2])
        traindata.append((user,item,rating))
    validdata = []
    with open(filename4, 'r') as f:
        content = f.readlines()
    for line in content:
        line = line.split('\t')
        user = int(line[0])
        item = int(line[1])
        rating = float(line[2])
        validdata.append((user,item,rating))
    testdata = []
    with open(filename5, 'r') as f:
        content = f.readlines()
    for line in content:
        line = line.split('\t')
        user = int(line[0])
        item = int(line[1])
        rating = float(line[2])
        testdata.append((user,item,rating))
    return userlist, itemlist, traindata, validdata, testdata

In [None]:
filename1 = 'data/db.userlist'
filename2 = 'data/db.itemlist'
filename3 = 'data/db.train.rating'
filename4 = 'data/db.valid.rating'
filename5 = 'data/db.test.rating'
#filename1 = 'data/ht-ml.userlist'
#filename2 = 'data/ht-ml.itemlist'
#filename3 = 'data/ht-ml.train.rating'
#filename4 = 'data/ht-ml.valid.rating'
#filename5 = 'data/ht-ml.test.rating'
#filename1 = 'data/ml.userlist'
#filename2 = 'data/ml.itemlist'
#filename3 = 'data/ml.train.rating'
#filename4 = 'data/ml.valid.rating'
#filename5 = 'data/ml.test.rating'
#filename1 = 'data/ciao.userlist'
#filename2 = 'data/ciao.itemlist'
#filename3 = 'data/ciao.train.rating'
#filename4 = 'data/ciao.valid.rating'
#filename5 = 'data/ciao.test.rating'

In [None]:
userlist, itemlist, traindata, validdata, testdata = readdata(filename1, filename2, filename3, filename4, filename5)  

# Utility Functions

In [None]:
def batchtoinput(batch, use_cuda):
    users = []
    items = []
    ratings = []
    for example in batch:
        users.append(example[0])
        items.append(example[1])
        ratings.append(example[2])
    users = torch.tensor(users, dtype=torch.int64)
    items = torch.tensor(items, dtype=torch.int64)
    ratings = torch.tensor(ratings, dtype=torch.float32)
    if use_cuda:
        users = users.cuda()
        items = items.cuda()
        ratings = ratings.cuda()
    return users, items, ratings

In [None]:
def getbatches(traindata, batch_size, use_cuda, shuffle):
    dataset = traindata.copy()
    if shuffle:
        random.shuffle(dataset)
    for batch_i in range(0,int(np.ceil(len(dataset)/batch_size))):
        start_i = batch_i*batch_size
        batch = dataset[start_i:start_i+batch_size]
        yield batchtoinput(batch, use_cuda)

In [None]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Linear') != -1:
        nn.init.xavier_normal_(m.weight.data)
        nn.init.constant_(m.bias.data, 0)

In [None]:
def get_eval(ratlist, predlist, output=False):
    mae = np.mean(np.abs(ratlist-predlist))
    mse = np.mean(np.square(ratlist-predlist))
    if output:
        maelist = np.abs(ratlist-predlist)
        with open('maelist.dat','w') as f:
            i = 0
            while i < len(maelist):
                f.write(str(maelist[i])+'\n')
                i += 1
        mselist = np.square(ratlist-predlist)
        with open('mselist.dat','w') as f:
            i = 0
            while i < len(mselist):
                f.write(str(mselist[i])+'\n')
                i += 1        
    return  mae, mse

# Train Model

In [None]:
net = model(len(userlist), len(itemlist))
net.apply(weights_init)
if use_cuda:
    net.cuda()

In [None]:
optimizer = optim.Adam(net.parameters(), lr=0.0001, weight_decay=0.001)#for MetaMF
#optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=0.001)#for NeuMF   

In [None]:
batch_size = 64#for MetaMF
#batch_size = 256#for NeuMF
epoches = 100

In [None]:
print(time.localtime())
for epoch in range(epoches):
    net.train()#switch to train mode
    error = 0
    num = 0
    for k, (users, items, ratings) in enumerate(getbatches(traindata, batch_size, use_cuda, True)):
        optimizer.zero_grad()
        pred = net(users, items)
        loss = net.loss(pred, ratings)
        loss.backward()
        nn.utils.clip_grad_norm_(net.parameters(), 5)
        optimizer.step()
        error += loss.detach().cpu().numpy()*len(users)#loss is averaged
        num += len(users)
        if (k+1)%1000 == 0:
            print(error/num)
    print('Epoch {}/{} - Training Loss: {:.3f}'.format(epoch+1,epoches,error/num))
    torch.save(net, 'checkpoint/epoch_'+str(epoch+1)+'.model')
    net.eval()#switch to test mode
    ratlist = []
    predlist= []
    for k, (users, items, ratings) in enumerate(getbatches(validdata, batch_size, use_cuda, False)):
        pred = net(users, items)
        predlist.extend(pred.tolist())
        ratlist.extend(ratings.tolist())
    mae, mse = get_eval(np.array(ratlist), np.array(predlist))
    print('MAE: {:.5f}'.format(mae))
    print('MSE: {:.5f}'.format(mse))
    print(time.localtime())
print(time.localtime())

# Test Model

In [None]:
net = torch.load('checkpoint/epoch_.model')

In [None]:
print(time.localtime())
net.eval()#switch to test mode
ratlist = []
predlist= []
for k, (users, items, ratings) in enumerate(getbatches(testdata, batch_size, use_cuda, False)):
    pred = net(users, items)
    predlist.extend(pred.tolist())
    ratlist.extend(ratings.tolist())
mae, mse = get_eval(np.array(ratlist), np.array(predlist))
print('MAE: {:.5f}'.format(mae))
print('MSE: {:.5f}'.format(mse))
print(time.localtime())