In [1]:
'''
Created on Aug 8, 2016
Processing datasets. 

@author: Xiangnan He (xiangnanhe@gmail.com)
'''
import scipy.sparse as sp
import numpy as np

class Dataset(object):
    '''
    classdocs
    '''

    def __init__(self, path):
        '''
        Constructor
        '''
        self.trainMatrix = self.load_rating_file_as_matrix(path + ".train.rating")
        self.testRatings = self.load_rating_file_as_list(path + ".test.rating")
        self.testNegatives = self.load_negative_file(path + ".test.negative")
        assert len(self.testRatings) == len(self.testNegatives)
        
        self.num_users, self.num_items = self.trainMatrix.shape
        
    def load_rating_file_as_list(self, filename):
        ratingList = []
        with open(filename, "r") as f:
            line = f.readline()
            while line != None and line != "":
                arr = line.split("\t")
                user, item = int(arr[0]), int(arr[1])
                ratingList.append([user, item])
                line = f.readline()
        return ratingList
    
    def load_negative_file(self, filename):
        negativeList = []
        with open(filename, "r") as f:
            line = f.readline()
            while line != None and line != "":
                arr = line.split("\t")
                negatives = []
                for x in arr[1: ]:
                    negatives.append(int(x))
                negativeList.append(negatives)
                line = f.readline()
        return negativeList
    
    def load_rating_file_as_matrix(self, filename):
        '''
        Read .rating file and Return dok matrix.
        The first line of .rating file is: num_users\t num_items
        '''
        # Get number of users and items
        num_users, num_items = 0, 0
        with open(filename, "r") as f:
            line = f.readline()
            while line != None and line != "":
                arr = line.split("\t")
                u, i = int(arr[0]), int(arr[1])
                num_users = max(num_users, u)
                num_items = max(num_items, i)
                line = f.readline()
        # Construct matrix
        mat = sp.dok_matrix((num_users+1, num_items+1), dtype=np.float32)
        with open(filename, "r") as f:
            line = f.readline()
            while line != None and line != "":
                arr = line.split("\t")
                user, item, rating = int(arr[0]), int(arr[1]), float(arr[2])
                if (rating > 0):
                    mat[user, item] = 1.0
                line = f.readline()    
        return mat

In [2]:
def load_rating_file_as_list(filename):
        ratingList = []
        with open(filename, "r") as f:
            line = f.readline()
            while line != None and line != "":
                arr = line.split("\t")
                user, item = int(arr[0]), int(arr[1])
                ratingList.append([user, item])
                line = f.readline()
        return ratingList
    
def load_negative_file(filename):
    negativeList = []
    with open(filename, "r") as f:
        line = f.readline()
        while line != None and line != "":
            arr = line.split("\t")
            negatives = []
            for x in arr[1: ]:
                negatives.append(int(x))
            negativeList.append(negatives)
            line = f.readline()
    return negativeList

In [3]:
def get_train_instances(train, num_negatives):
    user_input, item_input, labels = [],[],[]
    num_users = train.shape[0]
    num_items = train.shape[1]
    for (u, i) in train.keys():
        # positive instance
        user_input.append(u)
        item_input.append(i)
        labels.append(1)
        # negative instances
        for t in range(num_negatives):
            j = np.random.randint(num_items)
            while train[u, j] != 0:
                j = np.random.randint(num_items)
            user_input.append(u)
            item_input.append(j)
            labels.append(0)
    return user_input, item_input, labels

In [4]:
path = "../data/processed/ml-1m/ml-1m"

data = Dataset(path)

In [5]:
train = data.trainMatrix
test = data.testRatings
neg = data.testNegatives

In [6]:
len(neg), len(test), len(train)

(6040, 6040, 994169)

In [7]:
import sys
sys.path.append('/home/alexabades/recsys')

from src.data.DataLoader import MovieLensDataset
data_path = "../data/processed/ml-1m/ml-1m"
data1 = MovieLensDataset()
data1.load_processed_data(data_path)

train1 = data1.trainMatrix
test1 = data1.testRatings
neg1 = data1.testNegatives

In [8]:
len(neg1), len(test1), len(train1)

(6040, 6040, 994169)

In [6]:
user_input, item_input, labels = get_train_instances(train, 4)

In [7]:
import torch

user_input = torch.tensor(user_input, dtype=torch.long)
item_input = torch.tensor(item_input, dtype=torch.long)
labels = torch.tensor(labels, dtype=torch.float)

In [32]:
import yaml
def load_config(config_path):
    with open(config_path, "r") as file:
        return yaml.safe_load(file)


# Load the configuration
config = load_config("../config/nfc/ml-1m-1.yaml")

In [20]:
layers = [64,32,16,8]
learning_rate =  0.001
optimizer =  "adam"
batch_size =  20
num_epochs =  1
num_negative_instances =  4
dropout =  0 
# size of the matrix factorization embedings
num_factors =  8
num_users, num_items = (6040, 3706)

In [9]:
import sys
sys.path.append('/home/alexabades/recsys')
from src.models.nfc.nfc import NFC

model = NFC(num_users=num_users, num_items=num_items, mf_dim=num_factors, layers=layers)
model

NFC(
  (MF_Embedding_User): Embedding(6040, 8)
  (MF_Embedding_Item): Embedding(3706, 8)
  (MLP_Embedding_User): Embedding(6040, 32)
  (MLP_Embedding_Item): Embedding(3706, 32)
  (MLP_layers): ModuleList(
    (0): Linear(in_features=64, out_features=32, bias=True)
    (1): Linear(in_features=32, out_features=16, bias=True)
    (2): Linear(in_features=16, out_features=8, bias=True)
  )
  (predict_layer): Linear(in_features=16, out_features=1, bias=True)
)

In [18]:
from torch.utils.data import DataLoader, TensorDataset
from torch import Tensor, nn, optim

# Assuming user_input, item_input, and labels are your full dataset tensors
dataset = TensorDataset(user_input, item_input, labels)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [28]:
cuda_available = torch.cuda.is_available()
if cuda_available:
  num_gpus = torch.cuda.device_count()
  print("CUDA avaliable: ", cuda_available)
  print("Number of GPUs available:", num_gpus)
else:
    print("CUDA not available on your machine.")

CUDA avaliable:  True
Number of GPUs available: 1


In [29]:
current_gpu = torch.cuda.current_device()
gpu_properties = torch.cuda.get_device_properties(current_gpu)
print("Current GPU Index:", current_gpu)
print("GPU Properties:", gpu_properties)


Current GPU Index: 0
GPU Properties: _CudaDeviceProperties(name='NVIDIA GeForce GTX 1650', major=7, minor=5, total_memory=4095MB, multi_processor_count=16)


In [31]:
for i in range(num_gpus):
    print("GPU #", i, ": ", torch.cuda.get_device_name(i))
    print("Memory Allocated:", round(torch.cuda.memory_allocated(i)/1024**3, 1), 'GB')
    print("Memory Cached:   ", round(torch.cuda.memory_reserved(i)/1024**3, 2), 'GB')


GPU # 0 :  NVIDIA GeForce GTX 1650
Memory Allocated: 0.0 GB
Memory Cached:    0.0 GB


In [54]:

loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
model.train()
for epoch in range(num_epochs): 

  for batch in dataloader:
    user_input = batch[0]
    item_input = batch[1]
    labels = batch[2]
    labels = labels.view(-1,1)
    print(user_input.shape)
    # Forward pass.
    output = model(user_input, item_input)
    
    # Compute loss.
    loss = loss_fn(output, labels)
    
    # Clean up gradients from the model.
    optimizer.zero_grad()
    
    # Compute gradients based on the loss from the current batch (backpropagation).
    loss.backward()
    print(loss.backward())
    
    # Take one optimizer step using the gradients computed in the previous step.
    optimizer.step()
    
    # Add evaluation
    break

torch.Size([256])


RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [None]:
from torch.utils.data import DataLoader, TensorDataset

# Assuming user_input, item_input, and labels are your full dataset tensors
dataset = TensorDataset(user_input, item_input, labels)
dataloader = DataLoader(dataset, batch_size=256, shuffle=True)


for user_batch, item_batch, label_batch in dataloader:
    print(user_batch.shape)
    # Forward pass through your model
    model.eval()
    with torch.no_grad():
      mock_out = model(user_batch, item_batch)
    
        
        # Calculate loss and update model parameters
        # ...


In [None]:
model.eval()
with torch.no_grad():
  mock_out = model(user_input, item_input)


In [None]:
import torch.optim as optim
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Example learning rate


In [49]:
from src.data.DataLoader import MovieLensDataset

In [50]:
data = MovieLensDataset()

TypeError: MovieLensDataset.__init__() missing 1 required positional argument: 'path'