In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, SubsetRandomSampler

In [3]:
import numpy as np
import random
import pandas as pd

In [4]:
from Dataset import Data
from bprModel import BPR

In [5]:
def load_data():    
    df = pd.read_csv('train.csv')
    user_items = {}
    itemId_max=[]
    for i,row in df.iterrows():
        user = int(row[0])
        user_items[user] = [int(x) for x in row[1].split()]
        itemId_max.append(max(user_items[user]))
    num_users = max(user_items.keys())+1
    num_items = max(itemId_max)+1
    return num_users, num_items, user_items

In [6]:
def train(loader, model, optimizer, epochs, batch_size, device):
    trainLoss = []
    valLoss = []
    for epoch in range(epochs+1):
        train_loss = []
        val_loss = []
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train(True)  
            else:
                model.train(False)  
            
            model.to(device)
            data_loaders[phase].dataset.get_neg()
            for batch, (batch_u, batch_i, batch_j) in enumerate(data_loaders[phase]):
                
                batch_u = batch_u.to(device)
                batch_i = batch_i.to(device)
                batch_j = batch_j.to(device)
            
                loss = model(batch_u, batch_i, batch_j)
                
                optimizer.zero_grad()
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                    train_loss.append(loss.data)
                else:
                    val_loss.append(loss.data)
                    
        avg_train_batch_loss = torch.mean(torch.FloatTensor(train_loss))/batch_size
        avg_val_batch_loss = torch.mean(torch.FloatTensor(val_loss))/batch_size
        
        trainLoss.append(avg_train_batch_loss)
        valLoss.append(avg_val_batch_loss)
        
        print(f"Epoch : {epoch} | Avg. train batch loss = {avg_train_batch_loss:.4f} | Avg. val batch loss = {avg_val_batch_loss:.4f}\n")
    
    #return trainLoss, valLoss

In [7]:
user_size, item_size,user_items = load_data()

In [8]:
batch_size = 3000
epochs = 55
embedding_size = 128

In [9]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

In [10]:
dataset = Data(user_size, item_size, user_items)
validation_split = 0.1
shuffle_dataset = True

dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(233)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=valid_sampler)
data_loaders = {"train": train_loader, "val": validation_loader}
data_lengths = {"train": len(train_indices), "val": len(val_indices)}

In [11]:
model = BPR(user_size, item_size, embedding_size, batch_size, device)

In [12]:
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)

In [13]:
train(data_loaders, model, optimizer, epochs, batch_size, device)

Epoch : 0 | Avg. train batch loss = 0.6866 | Avg. val batch loss = 0.6189

Epoch : 1 | Avg. train batch loss = 0.5067 | Avg. val batch loss = 0.3542

Epoch : 2 | Avg. train batch loss = 0.3383 | Avg. val batch loss = 0.2985

Epoch : 3 | Avg. train batch loss = 0.3025 | Avg. val batch loss = 0.2786

Epoch : 4 | Avg. train batch loss = 0.2786 | Avg. val batch loss = 0.2607

Epoch : 5 | Avg. train batch loss = 0.2585 | Avg. val batch loss = 0.2418

Epoch : 6 | Avg. train batch loss = 0.2420 | Avg. val batch loss = 0.2340

Epoch : 7 | Avg. train batch loss = 0.2270 | Avg. val batch loss = 0.2258

Epoch : 8 | Avg. train batch loss = 0.2164 | Avg. val batch loss = 0.2174

Epoch : 9 | Avg. train batch loss = 0.2068 | Avg. val batch loss = 0.2084

Epoch : 10 | Avg. train batch loss = 0.1995 | Avg. val batch loss = 0.2064

Epoch : 11 | Avg. train batch loss = 0.1933 | Avg. val batch loss = 0.1960

Epoch : 12 | Avg. train batch loss = 0.1851 | Avg. val batch loss = 0.1921

Epoch : 13 | Avg. trai

In [15]:
torch.save(model, 'bpr.pth')