# Boltzmann Machines

## Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

## Importing the dataset

1. **engine="python"**

The engine parameter specifies the parser engine to use. "python" means the Python engine will be used instead of the default C engine. This can be helpful for more complex separators, like "::", which the C engine may not handle well.


2. **encoding="latin-1"**

The encoding parameter specifies the character encoding to use when reading the file. "latin-1" (also known as ISO-8859-1) is commonly used for Western European languages and characters. This encoding is often used if the dataset contains special characters that might not be handled by UTF-8 encoding.

In [2]:
movies = pd.read_csv(
    "ml-1m/movies.dat", sep="::", header=None, engine="python", encoding="latin-1"
)
users = pd.read_csv(
    "ml-1m/users.dat", sep="::", header=None, engine="python", encoding="latin-1"
)
ratings = pd.read_csv(
    "ml-1m/ratings.dat", sep="::", header=None, engine="python", encoding="latin-1"
)

## Preparing the training set and the test set

In [3]:
training_set = pd.read_csv("ml-100k/u1.base", sep="\t")
training_set = np.array(training_set, dtype="int")
test_set = pd.read_csv("ml-100k/u1.test", sep="\t")
test_set = np.array(test_set, dtype="int")

## Getting the number of users and movies

In [4]:
nb_users = int(max(max(training_set[:, 0]), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1])))

## Converting the data into an array with users in lines and movies in columns

In [5]:
# Define a function to convert the dataset into a matrix format where each row represents a user and each column represents a movie.
# Each entry in this matrix represents the user's rating for that specific movie.
def convert(data):
    new_data = (
        []
    )  # Initialize an empty list to store the transformed data for each user.

    # Loop through each user by their ID (from 1 up to the number of users).
    for id_users in range(1, nb_users + 1):

        # Extract the movie IDs that the current user has rated.
        # Filter rows where the user ID matches the current user and get the corresponding movie IDs (column index 1).
        id_movies = data[:, 1][data[:, 0] == id_users]

        # Extract the ratings given by the current user.
        # Filter rows where the user ID matches the current user and get the corresponding ratings (column index 2).
        id_ratings = data[:, 2][data[:, 0] == id_users]

        # Initialize an array of zeros for the number of movies.
        # Each entry in the array represents the user's rating for a movie (0 means no rating).
        ratings = np.zeros(nb_movies)

        # Populate the ratings array for the current user by setting the rating values for movies they rated.
        # The movie IDs are adjusted by -1 to match Python's zero-based indexing.
        ratings[id_movies - 1] = id_ratings

        # Append the user's rating array (list of ratings) to new_data.
        # Each entry in new_data represents one user's movie ratings.
        new_data.append(list(ratings))

    # Return the completed list, where each entry is a user's ratings for all movies.
    return new_data


# Convert the training and test sets to matrix format for model compatibility.
training_set = convert(training_set)
test_set = convert(test_set)

## Converting the data into Torch tensors

In [6]:
# Convert the training and test sets from Python lists to PyTorch FloatTensors.
# PyTorch requires data in tensor format for efficient operations on GPUs.
# Converting to FloatTensor enables PyTorch's neural network and matrix operations on the data.
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

## Converting the ratings into binary ratings 1 (Liked) or 0 (Not Liked)

In [7]:
# Modify the training set values to prepare data for a binary classification or specific rating model.
# This transformation reassigns ratings to simplify the input for a neural network:
# - Ratings of 0 (no rating) are changed to -1, so the model can distinguish them from actual ratings.
# - Ratings of 1 and 2 are changed to 0, possibly indicating "dislike" or "neutral" feedback.
# - Ratings of 3 or higher are changed to 1, possibly indicating "like" or "positive" feedback.
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1

# Apply the same transformation to the test set.
# The same adjustments ensure the model interprets test data in the same way as training data.
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

## Creating the architecture of the Neural Network

In [8]:
# Define the Restricted Boltzmann Machine (RBM) class.
# This class will create and train an RBM, a generative stochastic neural network with visible and hidden units.
class RBM:
    def __init__(self, nv, nh):
        # Initialize the RBM with random weights and biases.
        # nv: Number of visible nodes (units), i.e., features or inputs (in this case, movies).
        # nh: Number of hidden nodes (units), used to learn hidden patterns.

        # Initialize the weight matrix W with random values from a normal distribution.
        # W is a matrix of weights with dimensions [nh x nv], connecting visible and hidden layers.
        self.W = torch.randn(nh, nv)

        # Initialize the bias vector for hidden units (a) with random values.
        # This is a 1 x nh matrix (row vector), where each hidden node has its own bias.
        self.a = torch.randn(1, nh)

        # Initialize the bias vector for visible units (b) with random values.
        # This is a 1 x nv matrix (row vector), where each visible node has its own bias.
        self.b = torch.randn(1, nv)

    # Sample hidden units based on visible units (input data).
    def sample_h(self, x):
        # x: The visible layer input (users' ratings for movies).

        # Compute the activation for the hidden layer by multiplying x with W's transpose.
        # This gives a weighted sum for each hidden unit based on the visible inputs.
        wx = torch.mm(x, self.W.t())

        # Add the hidden layer bias 'a' to each weighted sum.
        # expand_as(wx) ensures the bias is expanded to match wx's dimensions, so it can be added element-wise.
        activation = wx + self.a.expand_as(wx)

        # Apply the sigmoid function to convert activation values to probabilities.
        # p_h_given_v: The probability that each hidden unit is activated given the visible units.
        p_h_given_v = torch.sigmoid(activation)

        # Return the activation probabilities and a sampled state.
        # torch.bernoulli(p_h_given_v) samples binary states (0 or 1) from the probabilities.
        return p_h_given_v, torch.bernoulli(p_h_given_v)

    # Sample visible units based on hidden units (reconstruction step).
    def sample_v(self, y):
        # y: The hidden layer input (binary states of hidden units).

        # Compute the activation for the visible layer by multiplying y with W.
        # This gives a weighted sum for each visible unit based on the hidden states.
        wy = torch.mm(y, self.W)

        # Add the visible layer bias 'b' to each weighted sum.
        # expand_as(wy) ensures the bias is expanded to match wy's dimensions, so it can be added element-wise.
        activation = wy + self.b.expand_as(wy)

        # Apply the sigmoid function to convert activation values to probabilities.
        # p_v_given_h: The probability that each visible unit is activated given the hidden units.
        p_v_given_h = torch.sigmoid(activation)

        # Return the activation probabilities and a sampled state.
        # torch.bernoulli(p_v_given_h) samples binary states (0 or 1) from the probabilities.
        return p_v_given_h, torch.bernoulli(p_v_given_h)

    # Train the RBM using Contrastive Divergence, an approximation method for training RBMs.
    def train(self, v0, vk, ph0, phk):
        # v0: Initial visible layer input (the original data).
        # vk: Reconstructed visible layer after k sampling steps (contrastive divergence step).
        # ph0: Probabilities of the hidden units given v0 (initial hidden activations).
        # phk: Probabilities of the hidden units given vk (hidden activations after k steps).

        # Update the weights using the difference in correlation between initial and reconstructed states.
        # The weight update is proportional to the difference between v0*ph0 and vk*phk.
        # This follows the contrastive divergence rule to approximate the negative log-likelihood gradient.
        self.W += torch.mm(v0.t(), ph0).t() - torch.mm(vk.t(), phk).t()

        # Update the visible layer biases 'b'.
        # This update uses the difference between the initial visible layer (v0) and the reconstructed (vk).
        self.b += torch.sum((v0 - vk), 0)

        # Update the hidden layer biases 'a'.
        # This update uses the difference between the probabilities of initial and reconstructed hidden units.
        self.a += torch.sum((ph0 - phk), 0)


# Define the dimensions for the RBM and the batch size for training.
nv = len(
    training_set[0]
)  # Number of visible units, based on the number of movies (columns in the dataset).
nh = 100  # Number of hidden units, a hyperparameter for the model that determines latent factors learned.
batch_size = 100  # Batch size used during training to update weights with mini-batches.

# Instantiate the RBM model with the defined number of visible and hidden units.
rbm = RBM(nv, nh)

## Training the RBM

In [9]:
# Define the number of training epochs.
# nb_epoch: The number of times the entire training set will be passed through the model.
nb_epoch = 10

# Start the training loop, where the model will learn over a specified number of epochs.
for epoch in range(1, nb_epoch + 1):

    # Initialize variables to track the total training loss and the count of processed batches.
    train_loss = 0  # To accumulate the loss for the epoch.
    s = 0.0  # Counter to keep track of the number of processed batches.

    # Loop through users in batches for training.
    # Batch processing helps the model learn more efficiently, especially when working with large datasets.
    for id_user in range(0, nb_users - batch_size, batch_size):

        # Select a batch of users from the training set.
        # vk: Visible layer used for the reconstruction steps (updated through Contrastive Divergence).
        # v0: Initial visible layer (original data), used to compare with vk for calculating the loss.
        vk = training_set[id_user : id_user + batch_size]
        v0 = training_set[id_user : id_user + batch_size]

        # Compute the initial probabilities of hidden units given the visible units (input data).
        # ph0: The initial hidden layer activations (used for weight updates later in training).
        ph0, _ = rbm.sample_h(v0)

        # Perform k-step Contrastive Divergence for this batch.
        # This iterative process helps the model approximate the expected values for training.
        for k in range(10):
            # Sample hidden layer based on the visible layer vk.
            # hk: Hidden layer activations given vk.
            _, hk = rbm.sample_h(vk)

            # Sample the visible layer based on the hidden layer hk (reconstruction step).
            # vk is updated to approximate the original data based on hidden representations.
            _, vk = rbm.sample_v(hk)

            # Ensure that ratings which were initially missing (v0 < 0) remain unchanged in vk.
            # This preserves the original 'unknown' ratings, so they aren't incorrectly filled during training.
            vk[v0 < 0] = v0[v0 < 0]

        # After k iterations, compute the final probabilities of the hidden units given the reconstructed visible units.
        # phk: Final hidden layer activations after k steps (used for weight updates).
        phk, _ = rbm.sample_h(vk)

        # Update the weights and biases in the RBM using the original and reconstructed data.
        # This training step is based on the difference between initial and final states.
        rbm.train(v0, vk, ph0, phk)

        # Calculate the training loss for this batch.
        # This is based on the mean absolute difference between original and reconstructed ratings.
        # Only consider rated movies (where v0 >= 0) for loss calculation to ignore missing values.
        train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))

        # Increment the batch counter.
        s += 1.0

    # Print the average training loss for this epoch.
    # The loss helps monitor how well the model is learning to reconstruct the input data.
    print("epoch: " + str(epoch) + " loss: " + str(train_loss / s))

epoch: 1 loss: tensor(0.3472)
epoch: 2 loss: tensor(0.2580)
epoch: 3 loss: tensor(0.2332)
epoch: 4 loss: tensor(0.2503)
epoch: 5 loss: tensor(0.2453)
epoch: 6 loss: tensor(0.2481)
epoch: 7 loss: tensor(0.2447)
epoch: 8 loss: tensor(0.2452)
epoch: 9 loss: tensor(0.2486)
epoch: 10 loss: tensor(0.2417)


## Testing the RBM

In [10]:
# Initialize variables to calculate the test loss and keep track of the number of tested users.
test_loss = 0  # To accumulate the test loss across all users.
s = 0.0  # Counter to count the users with actual ratings for testing.

# Loop through each user in the test set to evaluate the model's performance on unseen data.
for id_user in range(nb_users):

    # Select the user’s data from both the training and test sets.
    # v: The user’s visible data from the training set (ratings used to activate hidden units).
    # vt: The user’s data from the test set (actual ratings, used to calculate test loss).
    v = training_set[id_user : id_user + 1]
    vt = test_set[id_user : id_user + 1]

    # Check if the user has any rated movies in the test set.
    # If there are no ratings (vt < 0), skip this user to avoid meaningless calculations.
    if len(vt[vt >= 0]) > 0:

        # Sample the hidden units based on the training data.
        # h: Hidden layer activations given the training ratings v.
        _, h = rbm.sample_h(v)

        # Reconstruct the visible units based on the hidden units.
        # v: Reconstructed visible layer (ratings) using hidden activations.
        _, v = rbm.sample_v(h)

        # Calculate the mean absolute error between the test set ratings and reconstructed ratings.
        # This difference measures how close the model’s predictions are to the actual ratings.
        # Only consider rated movies (where vt >= 0) to avoid including missing data in the loss calculation.
        test_loss += torch.mean(torch.abs(vt[vt >= 0] - v[vt >= 0]))

        # Increment the counter for users with valid ratings in the test set.
        s += 1.0

# Calculate and print the average test loss.
# The average test loss is the total test loss divided by the number of users with ratings, giving an indication
# of the model’s performance in reconstructing unseen data. A lower loss indicates better performance.
print("test loss: " + str(test_loss / s))

test loss: tensor(0.2459)
