In [1]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
import numpy as np

import sys
sys.path.insert(0, '..')  # Add the parent directory to the search path
sys.path.insert(0, '.')  # Add the parent directory to the search path
from bivaecf.recom_bivaecf import BiVAECF
from bivaecf.dataset import Dataset
from torch.utils.data import DataLoader


In [3]:
# Preprocess Inputs
movies_path = "../data/movies.csv"
ratings_path = "../data/ratings.csv"
movie_data = pd.read_csv(movies_path, index_col=["movieId"], header=0)
ratings_data = pd.read_csv(ratings_path)
merged_data = pd.merge(movie_data, ratings_data, on='movieId', how='inner')
# Preprocess data
merged_data.drop('genres', axis=1, inplace=True)
merged_data.drop(['timestamp', 'title'], axis=1, inplace=True)

# Get unique values from column 'A' and sort them
unique_values = merged_data['movieId'].unique()
sorted_values = sorted(unique_values)
movie_map = dict()
id = 0
for i in sorted_values:
    movie_map[i] = id
    id += 1

# Get unique values from column 'A' and sort them
unique_values = merged_data['userId'].unique()
sorted_values = sorted(unique_values)
user_map = dict()
id = 0
for i in sorted_values:
    user_map[i] = id
    id += 1


In [3]:
# Setup Params

# Model parameters
LATENT_DIM = 50
ENCODER_DIMS = [100]
ACT_FUNC = "tanh"
LIKELIHOOD = "pois"
NUM_EPOCHS = 500
BATCH_SIZE = 128
LEARNING_RATE = 0.001
SEED=10
train_set = Dataset.from_uir(merged_data.itertuples(index=False), seed=SEED)


In [4]:
bivae = BiVAECF(
    k=LATENT_DIM,
    encoder_structure=ENCODER_DIMS,
    act_fn=ACT_FUNC,
    likelihood=LIKELIHOOD,
    n_epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    seed=SEED,
    use_gpu=torch.cuda.is_available(),
    verbose=True
)



In [5]:
# Define custom PyTorch dataset
class MovieLensDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe.values.astype(np.float32)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]
# Split data into training and test sets
train, test = train_test_split(merged_data, test_size=0.2)
train_dataset = MovieLensDataset(train)
test_dataset = MovieLensDataset(test)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
batch_size = 128


In [9]:
for batch in train_loader:

    break
    pass

In [9]:
bivae.get_item_vectors().shape

(610, 50)

In [8]:
bivae.get_user_vectors().shape

(9724, 50)

In [16]:
comb = get_batch_similarity(batch)
comb.shape

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  movie_ids = np.array(batch[:,0], dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  user_ids = np.array(batch[:,1], dtype=np.int)


(128, 50)

In [11]:
comb.shape
comb

array([[ 3.8409349e-02, -7.2367452e-02, -2.7437152e-02, ...,
        -1.9638252e-01, -2.4818715e-03,  7.7284584e-03],
       [ 2.4900427e-02, -7.8474395e-03,  8.8169344e-02, ...,
        -2.1769682e-01,  9.2799542e-03,  5.5049271e-03],
       [ 1.9433698e-02, -1.2074784e-01, -1.8828329e-02, ...,
        -3.8801131e-01, -1.2008947e-04,  9.3307532e-04],
       ...,
       [ 1.9086964e-04,  4.7220770e-02,  9.1929451e-02, ...,
         1.7609963e-02, -4.1097621e-03,  2.4722693e-02],
       [ 6.2871381e-04, -7.1380823e-03, -2.0608272e-01, ...,
        -2.4622622e-01, -6.5181111e-03, -8.6321682e-03],
       [ 7.8078285e-03, -1.9613123e-02, -2.5556281e-01, ...,
         1.5935700e-02,  1.6151632e-03, -1.0005354e-02]], dtype=float32)

In [8]:
bivae.load("weights/BiVAECF/2024-03-17_11-08-01-044494.pkl")

<bivaecf.recom_bivaecf.BiVAECF at 0x7f541f06cbe0>

In [7]:
bivae.fit(train_set)


  0%|          | 0/500 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
# bivae.fit(train_set)
# bivae.save("weights")
combined_rank_matrix = np.matmul(bivae.get_user_vectors(), bivae.get_item_vectors().T)
combined_rank_matrix.shape
all_methods = [method for method in dir(bivae) if callable(getattr(bivae, method))]
all_methods
