Convolution Auto Encoder to generate representaion for the users and item.
Based on the papers, `PixelCAE` and `Joint Deep Modeling of Users and Items Using Reviews for Recommendation`

In [38]:
import pandas as pd
import numpy as np
import pickle
import torch.nn as nn
import torch

Reading data 

In [55]:
# pickle load user_dataset, business_dataset, user_lookup, business_lookup
with open('user_dataset.pkl', 'rb') as f:   
    user_dataset = pickle.load(f)

with open('business_dataset.pkl', 'rb') as f:
    business_dataset = pickle.load(f)

with open('user_lookup.pkl', 'rb') as f:
    user_lookup = pickle.load(f)

with open('business_lookup.pkl', 'rb') as f:
    business_lookup = pickle.load(f)

In [40]:
print("Loading Word2Vec model...")
from gensim.models import Word2Vec
loaded_model = Word2Vec.load("word2vec.model")
print("Word2Vec model loaded.")

Loading Word2Vec model...
Word2Vec model loaded.


In [41]:
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

stop_words = stopwords.words('english')

def tokenize_filter(sentence):
    return [token for token in word_tokenize(sentence) if token not in stop_words 
            and token not in string.punctuation ]

In [135]:
# get embedding of filtered sentence using average of word vectors
def get_embedding(filtered_sentence,model, avg=1,sum=0):
   

    # get embedding of words
    if avg:
        embeddings = []
        for word in filtered_sentence:
            try:
                embeddings.append(model.wv[word])
            except KeyError:
                embeddings.append(np.zeros((model.vector_size,)))

        # get average of embedding of words
        avg_embedding = np.mean(embeddings, axis=0)
        return avg_embedding

    if sum:
        embeddings = []
        for word in filtered_sentence:
            try:
                embeddings.append(model.wv[word])
            except KeyError:
                embeddings.append(np.zeros((model.vector_size,)))

        # get sum of embedding of words
        sum_embedding = np.sum(embeddings, axis=0)
        return sum_embedding


Using the new `user_dataset`, `business_dataset`, `user_lookup` and `business_lookup` we can create our Custom data loader

In [144]:
def get_review_matrix(w2vmodel, review, time_index, scaling_factor=0.2):
    """ Given a review, generate the review matrix and apply time-decay to the review matrix

    Parameters
    ----------
    w2vmodel: word2vec model
    review: review to generate the review matrix
    time_stamp: timestamp of the review
    scaling_factor: scaling factor for time-decay

    Returns
    -------
    review_matrix: review matrix of the review
    """
    filtered_sentence = tokenize_filter(review)
    review_matrix = get_embedding(filtered_sentence,w2vmodel)
    # apply time-decay
    review_matrix = review_matrix * np.exp(-scaling_factor * 10-time_index)
    # reshape review matrix to review matrix.shape[0]/10,10
    review_matrix = review_matrix.reshape(int(review_matrix.shape[0]/10),10)


    return review_matrix


In [137]:
def get_user_matrix(user_dataset, user_lookup, user_id, w2vmodel,):
    """
    Create a user matrix for a user_id in user_dataset

    Parameters
    ----------
    user_dataset : dataframe with latest 10 entries of users
    business_dataset : dataframe with latest 10 entries of businesses
    user_lookup : dict of user_id and its start and end index in user_dataset
    business_lookup : dict of business_id and its start and end index in business_dataset
    w2vmodel : word2vec model

    Returns
    -------
    user_matrix : numpy array of shape (10,10,10)
    """
    # get the start and end index of user_id in user_dataset
    start_index, end_index = user_lookup[user_id]
    # get the latest 10 entries of user_id
    data = user_dataset.iloc[start_index:end_index]
    user_matrix = [np.array(get_review_matrix(w2vmodel,review,i)) for i,review in enumerate(data.review)]
    user_matrix = np.array(user_matrix)
    return user_matrix


In [138]:
def get_item_matrix(business_dataset,business_lookup,business_id,w2vmodel,):
    """
    Create an item matrix for a business_id in business_dataset

    Parameters
    ----------
    business_dataset : dataframe with latest 10 entries of businesses
    business_lookup : dict of business_id and its start and end index in business_dataset
    w2vmodel : word2vec model

    Returns
    -------
    item_matrix : numpy array of shape (10,10,10)
    """
    # get the start and end index of business_id in business_dataset
    start_index, end_index = business_lookup[business_id]
    # get the latest 10 entries of business_id
    data = business_dataset.iloc[start_index:end_index]
    item_matrix = [np.array(get_review_matrix(w2vmodel,review,i)) 
                   for i,review in enumerate(data.review)]
    item_matrix = np.array(item_matrix)
    return item_matrix

In [46]:
from torch.utils.data import Dataset, DataLoader
class CustomDataset(Dataset):
    def __init__(self, user_dataset,business_dataset,user_lookup,business_lookup,
                 w2vmodel,) -> None:
        super().__init__()
        self.user_dataset = user_dataset
        self.business_dataset = business_dataset
        self.user_lookup = user_lookup
        self.business_lookup = business_lookup
        self.w2vmodel = w2vmodel
        self.items = list(self.user_lookup.items())

    def __len__(self):
        return len(self.items)
    
    def __getitem__(self, idx):
        user_id = self.items[idx][0]
        
        # get user_matrix 
        user_matrix = get_user_matrix(self.user_dataset, self.user_lookup, user_id, self.w2vmodel)
        data = self.user_dataset.iloc[user_lookup[user_id][0]:user_lookup[user_id][1]]
        # get item_matrix for each business_id
        item_matrix = []
        for business_id in data.business_id:
            item_matrix.append(get_item_matrix(self.business_dataset, self.business_lookup, business_id, self.w2vmodel))
        item_matrix = np.array(item_matrix)
        return user_matrix, item_matrix





In [343]:
class UserDataset(Dataset):
    def __init__(self, user_dataset,user_ids,user_lookup,w2vmodel):
        super().__init__()
        self.user_dataset = user_dataset
        self.user_ids = user_ids
        self.user_lookup = user_lookup
        self.w2vmodel = w2vmodel
    

    def __len__(self):
        return len(self.user_ids)
    
    def __getitem__(self, idx):
        user_id = self.user_ids[idx]
        
        # get user_matrix 
        user_matrix = get_user_matrix(self.user_dataset, self.user_lookup, user_id, self.w2vmodel)
        return user_matrix, user_id
                  

In [344]:
class BusinessDataset(Dataset):
    def __init__(self, business_dataset,business_ids,business_lookup,w2vmodel):
        super().__init__()
        self.business_dataset = business_dataset
        self.business_lookup = business_lookup
        self.w2vmodel = w2vmodel
        self.business_ids = business_ids

    def __len__(self):
        return len(self.business_ids)
    
    def __getitem__(self, idx):
        business_id = self.business_ids[idx]
        # get item_matrix for each business_id
        item_matrix = get_item_matrix(self.business_dataset, self.business_lookup, business_id, self.w2vmodel)
        return item_matrix, business_id

Splitting the Data in `Train 80%`, `Test 10%` and `Validate 10%`

In [141]:
def create_datasets_and_loaders(dataset, lookup, model, kind, batch_size=64):
    num_items = len(lookup)
    ids = list(lookup.keys())

    train_size = int(0.8 * num_items)
    validation_size = int(0.1 * num_items)
    test_size = num_items - train_size - validation_size

    train_start_index = 0
    train_end_index = train_size

    validation_start_index = train_size
    validation_end_index = train_size + validation_size

    test_start_index = train_size + validation_size
    test_end_index = num_items

    if kind == "user":
        train_dataset       = UserDataset(dataset, ids[train_start_index:train_end_index],
                                           lookup, model)
        validation_dataset  = UserDataset(dataset, ids[validation_start_index:validation_end_index],
                                           lookup, model)
        test_dataset        = UserDataset(dataset, ids[test_start_index:test_end_index],
                                           lookup, model)

    elif kind == "business":
        train_dataset       = BusinessDataset(dataset, ids[train_start_index:train_end_index],
                                               lookup, model)
        validation_dataset  = BusinessDataset(dataset, ids[validation_start_index:validation_end_index],
                                               lookup, model)
        test_dataset        = BusinessDataset(dataset, ids[test_start_index:test_end_index],
                                               lookup, model)

    train_dataloader        = DataLoader(train_dataset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)
    validation_dataloader   = DataLoader(validation_dataset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)
    test_dataloader         = DataLoader(test_dataset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

    return train_dataloader, validation_dataloader, test_dataloader

Now let's create a Convolutional Auto Encoder to learn represnetation of items.

In [219]:
class user_CAE(nn.Module):
    def __init__(self, input_dim):
        super(user_CAE, self).__init__()
        self.input_dim = input_dim
        self.encoder = nn.Sequential(
            nn.Conv2d(10, 32, kernel_size=2 if input_dim[0]==5 else 5, stride=1,
                       padding=2 if input_dim[0] == 20 else 1), 
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=2 if input_dim[0]==5 else 5, stride=1,
                       padding=2 if input_dim[0] == 20 else 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32,64, kernel_size=2, stride=1,
                       padding=0 if input_dim[0] == 20 else 1),
            nn.ReLU(),
            nn.Conv2d(64,64, kernel_size=2, stride=1,
                       padding=0 if input_dim[0] == 20 else (1 if input_dim[0]==10 else (0,1))),
            nn.ReLU(),  
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 100),
        )

        

    def forward(self, x):
        
        # reshape according to the shape of the input
        x = self.encoder(x)
        x = x.reshape(x.shape[0],10*10)
        return x


In [304]:
class user_decoder(nn.Module):
    def __init__(self,output_dim):
        super(user_decoder, self).__init__()
        self.output_dim = output_dim
        self.decoder = nn.Sequential(
            nn.Linear(100, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Unflatten(1, (64, 2, 2)),  # reverse of Flatten
            nn.ConvTranspose2d(64, 64,
                                kernel_size=4 if output_dim[0]==10 else ((8,4) if output_dim[0]==20 else (2,4)),
                                stride=2, padding=1),  # reverse of MaxPool2d
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=2 , stride=1, padding= 1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 
                               kernel_size=3 if output_dim[0]==10 else ((8,6) if output_dim[0]==20 else (4,6)), 
                               stride=2, padding=0 if output_dim[0]==10 else 1),
            nn.ReLU(),
            nn.Conv2d(32, 32, 
                      kernel_size=2 if output_dim[0]==10 else ((3,3) if output_dim[0]==20 else (2,3)), 
                      stride=1, padding=1 if output_dim[0]==10 else (0)),
            nn.ReLU(),
            nn.Conv2d(32,10,kernel_size=1,stride=1,padding=0),
        )

    def forward(self, x):
        x = self.decoder(x)
        x = x.reshape(x.shape[0],10,*self.output_dim)
        return x


In [28]:
class item_CAE(nn.Module):
    def __init__(self):
        super(item_CAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(10, 32, kernel_size=5, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=5, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32,64, kernel_size=2, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(64,64, kernel_size=2, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 100),
        )
    
    def forward(self, x):
        embedding = torch.empty(0).cuda()
        for relevant_item in x:
            for item in relevant_item:
                item = item.unsqueeze(0)
                embedding = torch.cat((embedding, self.encoder(item)), 0)
        
        # Apply weighted time decay and average pooling to the embeddings
        embedding = embedding.reshape(64,10,100)
        embedding = embedding * torch.exp(-0.2 * 10 - torch.arange(10)).unsqueeze(0).unsqueeze(-1).cuda()
        embedding = torch.mean(embedding, 1)
        return embedding
        
        


In [29]:
class item_decoder(nn.Module):
    def __init__(self):
        super(item_decoder, self).__init__()
        self.decoder = nn.Sequential(
            nn.Linear(100, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Unflatten(1, (64, 2, 2)),  # reverse of Flatten
            nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),  # reverse of MaxPool2d
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=2, stride=1, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(32,10,kernel_size=1,stride=1,padding=0),
        )

    def forward(self, x):
        embeddings= torch.empty(0).cuda()
        for item in x:
            embeddings = torch.cat((embeddings, self.decoder(item)), 0)
    
        embeddings = embeddings.reshape(64,10,10,10)
        return embeddings

Let's train the model. Based on reconstrution loss and regularization terms

In [128]:
def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d or type(m) == nn.ConvTranspose2d:
        torch.nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            m.bias.data.fill_(0.01)

In [129]:
def reconstruction_loss_with_regularization(x, y, model, lambda_l1=0.01):
    """Compute reconstruction loss with L1 regularization

    Parameters
    ----------
    x : noisy input
    y : clean input
    model : autoencoder model
    lambda_l1 : regularization parameter

    Returns
    -------
    loss : reconstruction loss with L1 regularization as tensor
    """

    # Compute MSE
    mse_loss = nn.MSELoss()
    mse = mse_loss(x, y)

    # Compute L1 regularization
    l1_reg = torch.tensor(0., requires_grad=True)
    for name, param in model.named_parameters():
        if 'weight' in name:
            l1_reg = l1_reg + torch.norm(param, 1)
    
    # Combine MSE and L1 regularization
    loss = mse + lambda_l1 * l1_reg
    
    return loss

In [75]:
user_encoder_instance = user_CAE()
user_decoder_instance = user_decoder()
""" 
item_encoder_instance = item_CAE()
item_decoder_instance = item_decoder() """

item_encoder_instance = user_CAE()
item_decoder_instance = user_decoder()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

user_encoder_instance.to(device)
user_decoder_instance.to(device)

item_encoder_instance.to(device)
item_decoder_instance.to(device)


user_decoder(
  (decoder): Sequential(
    (0): Linear(in_features=100, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=256, bias=True)
    (3): ReLU()
    (4): Unflatten(dim=1, unflattened_size=(64, 2, 2))
    (5): ConvTranspose2d(64, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (6): ReLU()
    (7): Conv2d(64, 64, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
    (8): ReLU()
    (9): ConvTranspose2d(64, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (10): ReLU()
    (11): Conv2d(32, 32, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(32, 10, kernel_size=(1, 1), stride=(1, 1))
  )
)

In [76]:
from tqdm import tqdm

user_encoder_instance.apply(init_weights)
user_decoder_instance.apply(init_weights)

user_optimizer = torch.optim.RMSprop(list(user_encoder_instance.parameters()) + list(user_decoder_instance.parameters()), lr=0.001)

# train the model
epochs = 10
best_user_loss = float('inf')  # Initialize with a high value

for epoch in range(epochs):
    # Training phase
    pbar = tqdm(user_train_dataloader, desc=f"Epoch {epoch+1}/{epochs} [Train]")
    total_train_loss = 0
    total_train_variance = 0
    for user_matrix in pbar:
        user_matrix = user_matrix.to(device)

        # Compute variance of user_matrix
        variance = user_matrix.var()
        total_train_variance += variance.item()

        # zero the parameter gradients
        user_optimizer.zero_grad()

        # forward + backward + optimize
        user_embedding = user_encoder_instance(user_matrix)
        user_output = user_decoder_instance(user_embedding)
        user_loss = reconstruction_loss_with_regularization(user_output, user_matrix, user_encoder_instance)
        total_train_loss += user_loss.item()

        user_loss.backward()
        user_optimizer.step()

    avg_train_loss = total_train_loss / len(user_train_dataloader)
    avg_train_variance = total_train_variance / len(user_train_dataloader)
    pbar.set_postfix({"Avg Training loss": avg_train_loss, "Avg Input variance": avg_train_variance})

    # Validation phase
    pbar = tqdm(user_validation_dataloader, desc=f"Epoch {epoch+1}/{epochs} [Validate]")
    total_validation_loss = 0
    total_validation_variance = 0
    with torch.no_grad():
        for user_matrix in pbar:
            user_matrix = user_matrix.to(device)

            # Compute variance of user_matrix
            variance = user_matrix.var()
            total_validation_variance += variance.item()

            # forward
            user_embedding = user_encoder_instance(user_matrix)
            user_output = user_decoder_instance(user_embedding)
            user_loss = reconstruction_loss_with_regularization(user_output, user_matrix, user_encoder_instance)
            total_validation_loss += user_loss.item()

    avg_validation_loss = total_validation_loss / len(user_validation_dataloader)
    avg_validation_variance = total_validation_variance / len(user_validation_dataloader)
    pbar.set_postfix({"Avg Validation loss": avg_validation_loss, "Avg Input variance": avg_validation_variance})

    print(f"Epoch: {epoch+1}, Avg Training loss: {avg_train_loss}, Avg Validation loss: {avg_validation_loss}")

    # Save the best model
    if avg_validation_loss < best_user_loss:
        best_user_loss = avg_validation_loss
        torch.save(user_encoder_instance.state_dict(), "best_user_encoder_model.pth")
        torch.save(user_decoder_instance.state_dict(), "best_user_decoder_model.pth")

Epoch 1/10 [Train]:   0%|          | 0/172 [00:00<?, ?it/s]

Epoch 1/10 [Train]: 100%|██████████| 172/172 [06:16<00:00,  2.19s/it]
Epoch 1/10 [Validate]: 100%|██████████| 22/22 [00:39<00:00,  1.81s/it]


Epoch: 1, Avg Training loss: 3.69585053276184, Avg Validation loss: 0.5642752159725536


Epoch 2/10 [Train]: 100%|██████████| 172/172 [05:46<00:00,  2.02s/it]
Epoch 2/10 [Validate]: 100%|██████████| 22/22 [00:41<00:00,  1.88s/it]


Epoch: 2, Avg Training loss: 0.542634176653485, Avg Validation loss: 0.5200417339801788


Epoch 3/10 [Train]: 100%|██████████| 172/172 [05:43<00:00,  2.00s/it]
Epoch 3/10 [Validate]: 100%|██████████| 22/22 [00:40<00:00,  1.84s/it]


Epoch: 3, Avg Training loss: 0.5226871378199999, Avg Validation loss: 0.5132199769670313


Epoch 4/10 [Train]:  23%|██▎       | 40/172 [01:22<04:31,  2.06s/it]


KeyboardInterrupt: 

In [322]:
def train_model(encoder_instance, decoder_instance, train_dataloader, validation_dataloader, device, name,epochs=10):
    from tqdm import tqdm

    encoder_instance.apply(init_weights)
    decoder_instance.apply(init_weights)

    optimizer = torch.optim.RMSprop(list(encoder_instance.parameters()) + list(decoder_instance.parameters()), lr=0.001)

    best_loss = float('inf')  # Initialize with a high value
    training_stats = []

    for epoch in range(epochs):
        # Training phase
        pbar = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{epochs} [Train]")
        total_train_loss = 0
        total_train_variance = 0
        epochs_no_improve = 0
        for matrix in pbar:
            matrix = matrix.to(device).float()

            # Compute variance of matrix
            variance = matrix.var()
            total_train_variance += variance.item()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            embedding = encoder_instance(matrix)
            output = decoder_instance(embedding)
            loss = reconstruction_loss_with_regularization(output, matrix, encoder_instance)
            total_train_loss += loss.item()

            loss.backward()
            optimizer.step()

        avg_train_loss = total_train_loss / len(train_dataloader)
        avg_train_variance = total_train_variance / len(train_dataloader)
        print(f"Avg Training loss: {avg_train_loss}, Avg Input variance: {avg_train_variance}")
        # Validation phase
        pbar = tqdm(validation_dataloader, desc=f"Epoch {epoch+1}/{epochs} [Validate]")
        total_validation_loss = 0
        total_validation_variance = 0
        with torch.no_grad():
            for matrix in pbar:
                matrix = matrix.to(device).float()

                # Compute variance of matrix
                variance = matrix.var()
                total_validation_variance += variance.item()

                # forward
                embedding = encoder_instance(matrix)
                output = decoder_instance(embedding)
                loss = reconstruction_loss_with_regularization(output, matrix, encoder_instance)
                total_validation_loss += loss.item()

        avg_validation_loss = total_validation_loss / len(validation_dataloader)
        avg_validation_variance = total_validation_variance / len(validation_dataloader)
        print(f"Avg Validation loss: {avg_validation_loss}, Avg Input variance: {avg_validation_variance}")

        print(f"Epoch: {epoch+1}, Avg Training loss: {avg_train_loss}, Avg Validation loss: {avg_validation_loss}")

        # Save the best model
        if avg_validation_loss < best_loss:
            best_loss = avg_validation_loss
            torch.save(encoder_instance.state_dict(), f"AE_model_weights\\{name}_best_encoder_model.pth")
            torch.save(decoder_instance.state_dict(), f"AE_model_weights\\{name}_best_decoder_model.pth")
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve == 2:
                print("Early stopping!")
                break

        # Store the metrics
        training_stats.append({
            "name": name,
            'epoch': epoch + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_validation_loss,
            'Training Var': avg_train_variance,
            'Valid. Var': avg_validation_variance
        })

    return training_stats

In [305]:
# Test which of the 6 w2v models performs best

# get the list of files in the directory
import os
files = [("word2vec_model_200_5.model",(20,10)),("word2vec_model_200_10.model",(20,10)),
         ("word2vec_model_50_5.model",(5,10)),("word2vec_model_50_10.model",(5,10)),]



# iterate through the list of files
for file, dimension in files:

    file_name=file.split('.')[0]
    # load the model
    print(f"Loading {file}...")
    loaded_model = Word2Vec.load(f"word2vec_model/{file}")
    print(f"{file} loaded.")

    # get the shape of the embedding
    embedding_dim = loaded_model.wv.vector_size

    # create dataset and dataloader
    user_train_dataloader, user_validation_dataloader, user_test_dataloader = create_datasets_and_loaders(user_dataset, user_lookup, loaded_model, "user", batch_size=64)

    # create model instances
    user_encoder_instance = user_CAE(input_dim = dimension)
    user_decoder_instance = user_decoder(output_dim = dimension)

    # move the model to GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    user_encoder_instance.to(device)
    user_decoder_instance.to(device)

    # train the model
    training_stats = train_model(user_encoder_instance, user_decoder_instance, user_train_dataloader, user_validation_dataloader, device, file_name)

Loading word2vec_model_200_5.model...
word2vec_model_200_5.model loaded.


Epoch 1/10 [Train]: 100%|██████████| 172/172 [05:38<00:00,  1.97s/it]


Avg Training loss: 3.705801231570022, Avg Input variance: 0.00032106780485645326


Epoch 1/10 [Validate]: 100%|██████████| 22/22 [00:40<00:00,  1.83s/it]


Avg Validation loss: 0.5672207420522516, Avg Input variance: 0.00033068730871574104
Epoch: 1, Avg Training loss: 3.705801231570022, Avg Validation loss: 0.5672207420522516


Epoch 2/10 [Train]: 100%|██████████| 172/172 [06:05<00:00,  2.12s/it]


Avg Training loss: 0.5428564108388368, Avg Input variance: 0.000321116432280897


Epoch 2/10 [Validate]: 100%|██████████| 22/22 [00:47<00:00,  2.14s/it]


Avg Validation loss: 0.5227399522607977, Avg Input variance: 0.000330227648109113
Epoch: 2, Avg Training loss: 0.5428564108388368, Avg Validation loss: 0.5227399522607977


Epoch 3/10 [Train]: 100%|██████████| 172/172 [06:32<00:00,  2.28s/it]


Avg Training loss: 0.5229091360125431, Avg Input variance: 0.00032108639949678206


Epoch 3/10 [Validate]: 100%|██████████| 22/22 [00:46<00:00,  2.12s/it]


Avg Validation loss: 0.515880365263332, Avg Input variance: 0.0003305195330705663
Epoch: 3, Avg Training loss: 0.5229091360125431, Avg Validation loss: 0.515880365263332


Epoch 4/10 [Train]: 100%|██████████| 172/172 [06:22<00:00,  2.22s/it]


Avg Training loss: 0.5196466186018878, Avg Input variance: 0.00032107687205330706


Epoch 4/10 [Validate]: 100%|██████████| 22/22 [00:42<00:00,  1.91s/it]


Avg Validation loss: 0.514691645448858, Avg Input variance: 0.00033108126213350755
Epoch: 4, Avg Training loss: 0.5196466186018878, Avg Validation loss: 0.514691645448858


Epoch 5/10 [Train]: 100%|██████████| 172/172 [06:11<00:00,  2.16s/it]


Avg Training loss: 0.5190753247155699, Avg Input variance: 0.0003211013099644333


Epoch 5/10 [Validate]: 100%|██████████| 22/22 [00:42<00:00,  1.91s/it]


Avg Validation loss: 0.5144824087619781, Avg Input variance: 0.00033213274251796645
Epoch: 5, Avg Training loss: 0.5190753247155699, Avg Validation loss: 0.5144824087619781


Epoch 6/10 [Train]: 100%|██████████| 172/172 [05:49<00:00,  2.03s/it]


Avg Training loss: 0.5189736482015875, Avg Input variance: 0.0003210876661902402


Epoch 6/10 [Validate]: 100%|██████████| 22/22 [00:41<00:00,  1.90s/it]


Avg Validation loss: 0.5144463371146809, Avg Input variance: 0.0003318480700148608
Epoch: 6, Avg Training loss: 0.5189736482015875, Avg Validation loss: 0.5144463371146809


Epoch 7/10 [Train]: 100%|██████████| 172/172 [05:45<00:00,  2.01s/it]


Avg Training loss: 0.5189552913571513, Avg Input variance: 0.00032110023650687273


Epoch 7/10 [Validate]: 100%|██████████| 22/22 [00:42<00:00,  1.95s/it]


Avg Validation loss: 0.5144101137464697, Avg Input variance: 0.00033137914777564055
Epoch: 7, Avg Training loss: 0.5189552913571513, Avg Validation loss: 0.5144101137464697


Epoch 8/10 [Train]: 100%|██████████| 172/172 [05:53<00:00,  2.05s/it]


Avg Training loss: 0.5189043408216432, Avg Input variance: 0.00032110644695971727


Epoch 8/10 [Validate]: 100%|██████████| 22/22 [00:44<00:00,  2.02s/it]


Avg Validation loss: 0.5136213383891366, Avg Input variance: 0.000330858240803619
Epoch: 8, Avg Training loss: 0.5189043408216432, Avg Validation loss: 0.5136213383891366


Epoch 9/10 [Train]: 100%|██████████| 172/172 [05:48<00:00,  2.03s/it]


Avg Training loss: 0.5189342488383137, Avg Input variance: 0.00032107378603202875


Epoch 9/10 [Validate]: 100%|██████████| 22/22 [00:42<00:00,  1.95s/it]


Avg Validation loss: 0.5140841738744215, Avg Input variance: 0.00033075182397045535
Epoch: 9, Avg Training loss: 0.5189342488383137, Avg Validation loss: 0.5140841738744215


Epoch 10/10 [Train]: 100%|██████████| 172/172 [05:46<00:00,  2.01s/it]


Avg Training loss: 0.5189331787270169, Avg Input variance: 0.000321039785651306


Epoch 10/10 [Validate]: 100%|██████████| 22/22 [00:40<00:00,  1.84s/it]


Avg Validation loss: 0.5140498849478635, Avg Input variance: 0.00033133701072074473
Epoch: 10, Avg Training loss: 0.5189331787270169, Avg Validation loss: 0.5140498849478635
Loading word2vec_model_200_10.model...
word2vec_model_200_10.model loaded.


Epoch 1/10 [Train]: 100%|██████████| 172/172 [05:47<00:00,  2.02s/it]


Avg Training loss: 3.7206491228452947, Avg Input variance: 0.000494411175591373


Epoch 1/10 [Validate]: 100%|██████████| 22/22 [00:40<00:00,  1.83s/it]


Avg Validation loss: 0.5664361986246976, Avg Input variance: 0.0005110968605467034
Epoch: 1, Avg Training loss: 3.7206491228452947, Avg Validation loss: 0.5664361986246976


Epoch 2/10 [Train]: 100%|██████████| 172/172 [05:37<00:00,  1.96s/it]


Avg Training loss: 0.5430344301600789, Avg Input variance: 0.0004944449246977456


Epoch 2/10 [Validate]: 100%|██████████| 22/22 [00:40<00:00,  1.83s/it]


Avg Validation loss: 0.5220563520084728, Avg Input variance: 0.000510880182528953
Epoch: 2, Avg Training loss: 0.5430344301600789, Avg Validation loss: 0.5220563520084728


Epoch 3/10 [Train]: 100%|██████████| 172/172 [06:12<00:00,  2.16s/it]


Avg Training loss: 0.523087474149327, Avg Input variance: 0.0004944494400216743


Epoch 3/10 [Validate]: 100%|██████████| 22/22 [00:50<00:00,  2.29s/it]


Avg Validation loss: 0.5152169113809412, Avg Input variance: 0.0005115472330627116
Epoch: 3, Avg Training loss: 0.523087474149327, Avg Validation loss: 0.5152169113809412


Epoch 4/10 [Train]: 100%|██████████| 172/172 [07:05<00:00,  2.48s/it]


Avg Training loss: 0.5198247439639513, Avg Input variance: 0.0004943610850226156


Epoch 4/10 [Validate]: 100%|██████████| 22/22 [00:51<00:00,  2.32s/it]


Avg Validation loss: 0.5140294297174974, Avg Input variance: 0.0005096207645361904
Epoch: 4, Avg Training loss: 0.5198247439639513, Avg Validation loss: 0.5140294297174974


Epoch 5/10 [Train]: 100%|██████████| 172/172 [07:09<00:00,  2.50s/it]


Avg Training loss: 0.5192530803209128, Avg Input variance: 0.0004944160778947218


Epoch 5/10 [Validate]: 100%|██████████| 22/22 [00:50<00:00,  2.28s/it]


Avg Validation loss: 0.5138226774605837, Avg Input variance: 0.0005113714300519364
Epoch: 5, Avg Training loss: 0.5192530803209128, Avg Validation loss: 0.5138226774605837


Epoch 6/10 [Train]: 100%|██████████| 172/172 [07:09<00:00,  2.50s/it]


Avg Training loss: 0.5191518757925477, Avg Input variance: 0.0004943960070810389


Epoch 6/10 [Validate]: 100%|██████████| 22/22 [00:50<00:00,  2.31s/it]


Avg Validation loss: 0.5137845277786255, Avg Input variance: 0.0005103122808081521
Epoch: 6, Avg Training loss: 0.5191518757925477, Avg Validation loss: 0.5137845277786255


Epoch 7/10 [Train]: 100%|██████████| 172/172 [07:03<00:00,  2.46s/it]


Avg Training loss: 0.5191333876099697, Avg Input variance: 0.0004944489183524342


Epoch 7/10 [Validate]: 100%|██████████| 22/22 [00:50<00:00,  2.29s/it]


Avg Validation loss: 0.5137398486787622, Avg Input variance: 0.0005108991701325232
Epoch: 7, Avg Training loss: 0.5191333876099697, Avg Validation loss: 0.5137398486787622


Epoch 8/10 [Train]: 100%|██████████| 172/172 [07:03<00:00,  2.46s/it]


Avg Training loss: 0.5190902584513952, Avg Input variance: 0.0004943795380355834


Epoch 8/10 [Validate]: 100%|██████████| 22/22 [00:50<00:00,  2.31s/it]


Avg Validation loss: 0.5161042511463165, Avg Input variance: 0.0005110770897855135
Epoch: 8, Avg Training loss: 0.5190902584513952, Avg Validation loss: 0.5161042511463165


Epoch 9/10 [Train]: 100%|██████████| 172/172 [07:02<00:00,  2.46s/it]


Avg Training loss: 0.5191307507975157, Avg Input variance: 0.0004944321496390499


Epoch 9/10 [Validate]: 100%|██████████| 22/22 [00:50<00:00,  2.30s/it]


Avg Validation loss: 0.5164426836100492, Avg Input variance: 0.0005108372332126072
Epoch: 9, Avg Training loss: 0.5191307507975157, Avg Validation loss: 0.5164426836100492


Epoch 10/10 [Train]: 100%|██████████| 172/172 [07:06<00:00,  2.48s/it]


Avg Training loss: 0.5191305491120316, Avg Input variance: 0.0004944434948874702


Epoch 10/10 [Validate]: 100%|██████████| 22/22 [00:50<00:00,  2.29s/it]


Avg Validation loss: 0.5165692296895114, Avg Input variance: 0.0005114381691038778
Epoch: 10, Avg Training loss: 0.5191305491120316, Avg Validation loss: 0.5165692296895114
Loading word2vec_model_50_5.model...
word2vec_model_50_5.model loaded.


Epoch 1/10 [Train]: 100%|██████████| 172/172 [07:02<00:00,  2.46s/it]


Avg Training loss: 3.628225565996281, Avg Input variance: 0.001265217014046949


Epoch 1/10 [Validate]: 100%|██████████| 22/22 [00:49<00:00,  2.27s/it]


Avg Validation loss: 0.4129491001367569, Avg Input variance: 0.0012916530672968788
Epoch: 1, Avg Training loss: 3.628225565996281, Avg Validation loss: 0.4129491001367569


Epoch 2/10 [Train]: 100%|██████████| 172/172 [07:08<00:00,  2.49s/it]


Avg Training loss: 0.3958437730406606, Avg Input variance: 0.0012651991463255483


Epoch 2/10 [Validate]: 100%|██████████| 22/22 [00:49<00:00,  2.27s/it]


Avg Validation loss: 0.38063851676203986, Avg Input variance: 0.0012961009951223705
Epoch: 2, Avg Training loss: 0.3958437730406606, Avg Validation loss: 0.38063851676203986


Epoch 3/10 [Train]: 100%|██████████| 172/172 [07:00<00:00,  2.45s/it]


Avg Training loss: 0.38132787565159243, Avg Input variance: 0.001265137966689762


Epoch 3/10 [Validate]: 100%|██████████| 22/22 [00:49<00:00,  2.27s/it]


Avg Validation loss: 0.37566492232409393, Avg Input variance: 0.0012947704156183383
Epoch: 3, Avg Training loss: 0.38132787565159243, Avg Validation loss: 0.37566492232409393


Epoch 4/10 [Train]: 100%|██████████| 172/172 [07:05<00:00,  2.47s/it]


Avg Training loss: 0.3789527203107989, Avg Input variance: 0.0012652350821109966


Epoch 4/10 [Validate]: 100%|██████████| 22/22 [00:52<00:00,  2.37s/it]


Avg Validation loss: 0.3747992745854638, Avg Input variance: 0.0012992639570835638
Epoch: 4, Avg Training loss: 0.3789527203107989, Avg Validation loss: 0.3747992745854638


Epoch 5/10 [Train]: 100%|██████████| 172/172 [07:03<00:00,  2.47s/it]


Avg Training loss: 0.3785365717702134, Avg Input variance: 0.0012651670529225537


Epoch 5/10 [Validate]: 100%|██████████| 22/22 [00:49<00:00,  2.23s/it]


Avg Validation loss: 0.3746426525441083, Avg Input variance: 0.0012939565825614739
Epoch: 5, Avg Training loss: 0.3785365717702134, Avg Validation loss: 0.3746426525441083


Epoch 6/10 [Train]: 100%|██████████| 172/172 [06:54<00:00,  2.41s/it]


Avg Training loss: 0.3784627455265023, Avg Input variance: 0.001265350878807218


Epoch 6/10 [Validate]: 100%|██████████| 22/22 [00:48<00:00,  2.23s/it]


Avg Validation loss: 0.3746159781109203, Avg Input variance: 0.0012935549073683267
Epoch: 6, Avg Training loss: 0.3784627455265023, Avg Validation loss: 0.3746159781109203


Epoch 7/10 [Train]: 100%|██████████| 172/172 [07:00<00:00,  2.45s/it]


Avg Training loss: 0.3784488391044528, Avg Input variance: 0.0012652061014641943


Epoch 7/10 [Validate]: 100%|██████████| 22/22 [00:49<00:00,  2.27s/it]


Avg Validation loss: 0.37462595105171204, Avg Input variance: 0.0012962959907864306
Epoch: 7, Avg Training loss: 0.3784488391044528, Avg Validation loss: 0.37462595105171204


Epoch 8/10 [Train]: 100%|██████████| 172/172 [06:50<00:00,  2.39s/it]


Avg Training loss: 0.3784133434988732, Avg Input variance: 0.0012652155493797605


Epoch 8/10 [Validate]: 100%|██████████| 22/22 [00:49<00:00,  2.25s/it]


Avg Validation loss: 0.37415470860221167, Avg Input variance: 0.0012928726342083378
Epoch: 8, Avg Training loss: 0.3784133434988732, Avg Validation loss: 0.37415470860221167


Epoch 9/10 [Train]: 100%|██████████| 172/172 [07:31<00:00,  2.63s/it]


Avg Training loss: 0.37844726703194687, Avg Input variance: 0.001265137959244596


Epoch 9/10 [Validate]: 100%|██████████| 22/22 [00:58<00:00,  2.64s/it]


Avg Validation loss: 0.3736256279728629, Avg Input variance: 0.0012962978798896074
Epoch: 9, Avg Training loss: 0.37844726703194687, Avg Validation loss: 0.3736256279728629


Epoch 10/10 [Train]: 100%|██████████| 172/172 [08:09<00:00,  2.84s/it]


Avg Training loss: 0.378446169544098, Avg Input variance: 0.0012652562196139073


Epoch 10/10 [Validate]: 100%|██████████| 22/22 [00:55<00:00,  2.53s/it]


Avg Validation loss: 0.37388110567222943, Avg Input variance: 0.0012899333325384016
Epoch: 10, Avg Training loss: 0.378446169544098, Avg Validation loss: 0.37388110567222943
Loading word2vec_model_50_10.model...
word2vec_model_50_10.model loaded.


Epoch 1/10 [Train]: 100%|██████████| 172/172 [08:06<00:00,  2.83s/it]


Avg Training loss: 3.633414086387601, Avg Input variance: 0.0019161451403538934


Epoch 1/10 [Validate]: 100%|██████████| 22/22 [00:57<00:00,  2.61s/it]


Avg Validation loss: 0.41547739099372516, Avg Input variance: 0.0019598463285629723
Epoch: 1, Avg Training loss: 3.633414086387601, Avg Validation loss: 0.41547739099372516


Epoch 2/10 [Train]: 100%|██████████| 172/172 [08:09<00:00,  2.85s/it]


Avg Training loss: 0.3964935671104941, Avg Input variance: 0.0019161233869327103


Epoch 2/10 [Validate]: 100%|██████████| 22/22 [00:55<00:00,  2.53s/it]


Avg Validation loss: 0.3830977149985053, Avg Input variance: 0.0019606967477805233
Epoch: 2, Avg Training loss: 0.3964935671104941, Avg Validation loss: 0.3830977149985053


Epoch 3/10 [Train]: 100%|██████████| 172/172 [07:24<00:00,  2.58s/it]


Avg Training loss: 0.38197629361651664, Avg Input variance: 0.0019162573816449663


Epoch 3/10 [Validate]: 100%|██████████| 22/22 [01:01<00:00,  2.80s/it]


Avg Validation loss: 0.37809842689470813, Avg Input variance: 0.001959797492335466
Epoch: 3, Avg Training loss: 0.38197629361651664, Avg Validation loss: 0.37809842689470813


Epoch 4/10 [Train]: 100%|██████████| 172/172 [08:59<00:00,  3.14s/it]


Avg Training loss: 0.379601638504239, Avg Input variance: 0.0019163978374075838


Epoch 4/10 [Validate]: 100%|██████████| 22/22 [01:01<00:00,  2.81s/it]


Avg Validation loss: 0.3772396729751067, Avg Input variance: 0.001973788391544738
Epoch: 4, Avg Training loss: 0.379601638504239, Avg Validation loss: 0.3772396729751067


Epoch 5/10 [Train]: 100%|██████████| 172/172 [08:40<00:00,  3.03s/it]


Avg Training loss: 0.37918488785277965, Avg Input variance: 0.0019161905362379066


Epoch 5/10 [Validate]: 100%|██████████| 22/22 [01:01<00:00,  2.81s/it]


Avg Validation loss: 0.3770838691429658, Avg Input variance: 0.0019685930508950896
Epoch: 5, Avg Training loss: 0.37918488785277965, Avg Validation loss: 0.3770838691429658


Epoch 6/10 [Train]: 100%|██████████| 172/172 [08:40<00:00,  3.02s/it]


Avg Training loss: 0.37911079512086027, Avg Input variance: 0.0019162847805324248


Epoch 6/10 [Validate]: 100%|██████████| 22/22 [01:01<00:00,  2.80s/it]


Avg Validation loss: 0.37706200913949445, Avg Input variance: 0.0019697675280357626
Epoch: 6, Avg Training loss: 0.37911079512086027, Avg Validation loss: 0.37706200913949445


Epoch 7/10 [Train]: 100%|██████████| 172/172 [08:39<00:00,  3.02s/it]


Avg Training loss: 0.3790971054587253, Avg Input variance: 0.0019162566878908683


Epoch 7/10 [Validate]: 100%|██████████| 22/22 [01:01<00:00,  2.78s/it]


Avg Validation loss: 0.3770960122346878, Avg Input variance: 0.0019707258854230695
Epoch: 7, Avg Training loss: 0.3790971054587253, Avg Validation loss: 0.3770960122346878


Epoch 8/10 [Train]: 100%|██████████| 172/172 [08:39<00:00,  3.02s/it]


Avg Training loss: 0.37902588969053225, Avg Input variance: 0.0019164995918440264


Epoch 8/10 [Validate]: 100%|██████████| 22/22 [01:01<00:00,  2.79s/it]


Avg Validation loss: 0.38135168091817334, Avg Input variance: 0.001966949610505253
Epoch: 8, Avg Training loss: 0.37902588969053225, Avg Validation loss: 0.38135168091817334


Epoch 9/10 [Train]: 100%|██████████| 172/172 [08:39<00:00,  3.02s/it]


Avg Training loss: 0.3790941527763078, Avg Input variance: 0.0019163395085935156


Epoch 9/10 [Validate]: 100%|██████████| 22/22 [01:01<00:00,  2.80s/it]


Avg Validation loss: 0.382068085399541, Avg Input variance: 0.001963627937419171
Epoch: 9, Avg Training loss: 0.3790941527763078, Avg Validation loss: 0.382068085399541


Epoch 10/10 [Train]: 100%|██████████| 172/172 [08:39<00:00,  3.02s/it]


Avg Training loss: 0.3790949761520985, Avg Input variance: 0.0019161932442478056


Epoch 10/10 [Validate]: 100%|██████████| 22/22 [01:01<00:00,  2.79s/it]

Avg Validation loss: 0.3821541545065967, Avg Input variance: 0.001968536139677533
Epoch: 10, Avg Training loss: 0.3790949761520985, Avg Validation loss: 0.3821541545065967





In [154]:
training_stats

[{'name': 'word2vec_model_100_5',
  'epoch': 1,
  'Training Loss': 3.702573890602866,
  'Valid. Loss': 0.5644270290027965,
  'Training Var': 0.0006341415519696162,
  'Valid. Var': 0.0006514405567114326},
 {'name': 'word2vec_model_100_5',
  'epoch': 2,
  'Training Loss': 0.5430761939564417,
  'Valid. Loss': 0.5202192041006956,
  'Training Var': 0.0006341679389913414,
  'Valid. Var': 0.0006524893529289825},
 {'name': 'word2vec_model_100_5',
  'epoch': 3,
  'Training Loss': 0.5231285705122837,
  'Valid. Loss': 0.5133956670761108,
  'Training Var': 0.0006342082491501906,
  'Valid. Var': 0.0006500724306203086},
 {'name': 'word2vec_model_100_5',
  'epoch': 4,
  'Training Loss': 0.5198649736337884,
  'Valid. Loss': 0.5122098218310963,
  'Training Var': 0.000634200663541357,
  'Valid. Var': 0.0006513336451131512},
 {'name': 'word2vec_model_100_5',
  'epoch': 5,
  'Training Loss': 0.5192937258371088,
  'Valid. Loss': 0.5120027552951466,
  'Training Var': 0.0006342344392134353,
  'Valid. Var': 0

Now let's train the item embedding generator using the Business Dataloader

In [323]:
item_encoder_instance = user_CAE(input_dim = (5,10))
item_decoder_instance = user_decoder(output_dim = (5,10))

# move the model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
item_encoder_instance.to(device)
item_decoder_instance.to(device)

# create dataset and dataloader
item_train_dataloader, item_validation_dataloader, item_test_dataloader = create_datasets_and_loaders(business_dataset, business_lookup, loaded_model, "business", batch_size=128)


# train the model
training_stats = train_model(item_encoder_instance, item_decoder_instance, item_train_dataloader, item_validation_dataloader, device, "item")

Epoch 1/10 [Train]:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch 1/10 [Train]: 100%|██████████| 67/67 [06:38<00:00,  5.95s/it]


Avg Training loss: 8.57864819889638, Avg Input variance: 0.0019937266731189923


Epoch 1/10 [Validate]: 100%|██████████| 9/9 [00:54<00:00,  6.11s/it]


Avg Validation loss: 0.579171982076433, Avg Input variance: 0.0019558508259554705
Epoch: 1, Avg Training loss: 8.57864819889638, Avg Validation loss: 0.579171982076433


Epoch 2/10 [Train]: 100%|██████████| 67/67 [06:42<00:00,  6.01s/it]


Avg Training loss: 0.4843931060228775, Avg Input variance: 0.0019918647368869454


Epoch 2/10 [Validate]: 100%|██████████| 9/9 [00:55<00:00,  6.20s/it]


Avg Validation loss: 0.43669845660527545, Avg Input variance: 0.0019546463283606702
Epoch: 2, Avg Training loss: 0.4843931060228775, Avg Validation loss: 0.43669845660527545


Epoch 3/10 [Train]: 100%|██████████| 67/67 [07:03<00:00,  6.33s/it]


Avg Training loss: 0.4213167900469766, Avg Input variance: 0.0019924546689239903


Epoch 3/10 [Validate]: 100%|██████████| 9/9 [00:54<00:00,  6.06s/it]


Avg Validation loss: 0.410196539428499, Avg Input variance: 0.0019432866894122628
Epoch: 3, Avg Training loss: 0.4213167900469766, Avg Validation loss: 0.410196539428499


Epoch 4/10 [Train]: 100%|██████████| 67/67 [07:48<00:00,  7.00s/it]


Avg Training loss: 0.3989711372709986, Avg Input variance: 0.0019936220626583074


Epoch 4/10 [Validate]: 100%|██████████| 9/9 [01:03<00:00,  7.06s/it]


Avg Validation loss: 0.38926118943426347, Avg Input variance: 0.0019406687675250901
Epoch: 4, Avg Training loss: 0.3989711372709986, Avg Validation loss: 0.38926118943426347


Epoch 5/10 [Train]: 100%|██████████| 67/67 [08:02<00:00,  7.20s/it]


Avg Training loss: 0.3887824967725953, Avg Input variance: 0.0019924996208760945


Epoch 5/10 [Validate]: 100%|██████████| 9/9 [01:00<00:00,  6.78s/it]


Avg Validation loss: 0.3889416688018375, Avg Input variance: 0.00194791277560095
Epoch: 5, Avg Training loss: 0.3887824967725953, Avg Validation loss: 0.3889416688018375


Epoch 6/10 [Train]: 100%|██████████| 67/67 [07:22<00:00,  6.60s/it]


Avg Training loss: 0.3840321213451784, Avg Input variance: 0.001994121675518578


Epoch 6/10 [Validate]: 100%|██████████| 9/9 [00:53<00:00,  5.94s/it]


Avg Validation loss: 0.37927401396963334, Avg Input variance: 0.0019545620824727747
Epoch: 6, Avg Training loss: 0.3840321213451784, Avg Validation loss: 0.37927401396963334


Epoch 7/10 [Train]: 100%|██████████| 67/67 [07:02<00:00,  6.31s/it]


Avg Training loss: 0.3815356928910782, Avg Input variance: 0.001992264986663723


Epoch 7/10 [Validate]: 100%|██████████| 9/9 [00:54<00:00,  6.00s/it]


Avg Validation loss: 0.38393472962909275, Avg Input variance: 0.001959556571414901
Epoch: 7, Avg Training loss: 0.3815356928910782, Avg Validation loss: 0.38393472962909275


Epoch 8/10 [Train]: 100%|██████████| 67/67 [07:05<00:00,  6.35s/it]


Avg Training loss: 0.3804243304836216, Avg Input variance: 0.0019939909793380926


Epoch 8/10 [Validate]: 100%|██████████| 9/9 [00:53<00:00,  5.98s/it]


Avg Validation loss: 0.3767971032195621, Avg Input variance: 0.0019407485249555772
Epoch: 8, Avg Training loss: 0.3804243304836216, Avg Validation loss: 0.3767971032195621


Epoch 9/10 [Train]: 100%|██████████| 67/67 [07:02<00:00,  6.30s/it]


Avg Training loss: 0.37971982982621266, Avg Input variance: 0.0019933937113982307


Epoch 9/10 [Validate]: 100%|██████████| 9/9 [00:54<00:00,  6.07s/it]


Avg Validation loss: 0.38269079393810695, Avg Input variance: 0.0019463516720053223
Epoch: 9, Avg Training loss: 0.37971982982621266, Avg Validation loss: 0.38269079393810695


Epoch 10/10 [Train]: 100%|██████████| 67/67 [07:03<00:00,  6.32s/it]


Avg Training loss: 0.37950242544288065, Avg Input variance: 0.0019920306582587644


Epoch 10/10 [Validate]: 100%|██████████| 9/9 [00:53<00:00,  5.98s/it]

Avg Validation loss: 0.37617361214425826, Avg Input variance: 0.0019529778769032822
Epoch: 10, Avg Training loss: 0.37950242544288065, Avg Validation loss: 0.37617361214425826





In [324]:
with open('training_datas\\item_training_stats.pkl', 'wb') as f:
    pickle.dump(training_stats, f)

In [341]:
# Obtain model performance on test set

# load the best model
user_encoder_instance = user_CAE(input_dim = (5,10))    
user_decoder_instance = user_decoder(output_dim = (5,10))

user_encoder_instance.load_state_dict(torch.load("AE_model_weights\\word2vec_model_50_5_best_encoder_model.pth"))
user_decoder_instance.load_state_dict(torch.load("AE_model_weights\\word2vec_model_50_5_best_decoder_model.pth"))

# move the model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
user_encoder_instance.to(device)
user_decoder_instance.to(device)

# set the model to evaluation mode
user_encoder_instance.eval()
user_decoder_instance.eval()

# create test dataloader
loaded_model = Word2Vec.load("word2vec_model/word2vec_model_50_5.model")
_, _, user_test_dataloader = create_datasets_and_loaders(user_dataset, user_lookup, loaded_model, "user", batch_size=64)

# using user_test_dataloader to get the test loss
total_test_loss = 0
total_test_variance = 0

with torch.no_grad():
    for user_matrix in user_test_dataloader:
        user_matrix = user_matrix.to(device).float()

        # Compute variance of user_matrix
        variance = user_matrix.var()
        total_test_variance += variance.item()

        # forward
        user_embedding = user_encoder_instance(user_matrix)
        user_output = user_decoder_instance(user_embedding)
        user_loss = reconstruction_loss_with_regularization(user_output, user_matrix, user_encoder_instance)
        total_test_loss += user_loss.item()

avg_test_loss = total_test_loss / len(user_test_dataloader)
avg_test_variance = total_test_variance / len(user_test_dataloader)
print(f"Avg Test loss: {avg_test_loss}, Avg Input variance: {avg_test_variance}")

# load the best model
item_encoder_instance = user_CAE(input_dim = (5,10))
item_decoder_instance = user_decoder(output_dim = (5,10))

item_encoder_instance.load_state_dict(torch.load("AE_model_weights\\item_best_encoder_model.pth"))
item_decoder_instance.load_state_dict(torch.load("AE_model_weights\\item_best_decoder_model.pth"))

# move the model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
item_encoder_instance.to(device)
item_decoder_instance.to(device)

# set the model to evaluation mode
item_encoder_instance.eval()
item_decoder_instance.eval()

# create test dataloader
_, _, item_test_dataloader = create_datasets_and_loaders(business_dataset, business_lookup, loaded_model, "business", batch_size=128)

# using item_test_dataloader to get the test loss
total_test_loss = 0
total_test_variance = 0

with torch.no_grad():
    for item_matrix in item_test_dataloader:
        item_matrix = item_matrix.to(device).float()

        # Compute variance of item_matrix
        variance = item_matrix.var()
        total_test_variance += variance.item()

        # forward
        item_embedding = item_encoder_instance(item_matrix)
        item_output = item_decoder_instance(item_embedding)
        item_loss = reconstruction_loss_with_regularization(item_output, item_matrix, item_encoder_instance)
        total_test_loss += item_loss.item()

avg_test_loss = total_test_loss / len(item_test_dataloader)
avg_test_variance = total_test_variance / len(item_test_dataloader)
print(f"Avg Test loss: {avg_test_loss}, Avg Input variance: {avg_test_variance}")



Avg Test loss: 0.3736098110675812, Avg Input variance: 0.0012796026421710849
Avg Test loss: 0.37574903832541573, Avg Input variance: 0.0013512691819212502


Test 1 with window - 10
1. Avg Test loss: 0.37445858120918274, Avg Input variance: 0.001936642124994912
2. Avg Test loss: 0.3762293689780765, Avg Input variance: 0.0020360151279924642


Obtaining embeddings for all user_ids and item_ids

In [346]:
# We need to create a new Dataset and DataLoader which contains all the users and businesses

full_user_dataset = UserDataset(user_dataset,list(user_lookup.keys()),user_lookup,
                                w2vmodel=loaded_model)

full_business_dataset = BusinessDataset(business_dataset,list(business_lookup.keys()),
                                        business_lookup,w2vmodel=loaded_model)

full_user_dataloader = DataLoader(full_user_dataset, batch_size=64, shuffle=False, num_workers=0)
full_business_dataloader = DataLoader(full_business_dataset, batch_size=128, shuffle=False, num_workers=0) 

In [349]:
# Obtain the embeddings for all users and businesses

# load the best model
user_encoder_instance = user_CAE(input_dim = (5,10))
user_decoder_instance = user_decoder(output_dim = (5,10))

user_encoder_instance.load_state_dict(torch.load("AE_model_weights\\word2vec_model_50_5_best_encoder_model.pth"))
user_decoder_instance.load_state_dict(torch.load("AE_model_weights\\word2vec_model_50_5_best_decoder_model.pth"))

# move the model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

user_encoder_instance.to(device)
user_decoder_instance.to(device)

# set the model to evaluation mode
user_encoder_instance.eval()

# create user_embeddings
user_embeddings = {}

with torch.no_grad():
    for user_matrix, user_id in full_user_dataloader:
        user_matrix = user_matrix.to(device).float()

        # forward
        user_embedding = user_encoder_instance(user_matrix)
        
        # iterate over the batch
        for id, embedding in zip(user_id, user_embedding):
            # move the embedding to cpu and convert to numpy array
            embedding = embedding.cpu().numpy()

            # add the embedding to the dictionary
            user_embeddings[id.item()] = embedding

In [350]:
item_encoder_instance = user_CAE(input_dim = (5,10))

item_encoder_instance.load_state_dict(torch.load("AE_model_weights\\item_best_encoder_model.pth"))

# move the model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

item_encoder_instance.to(device)

# set the model to evaluation mode
item_encoder_instance.eval()

# create item_embeddings
item_embeddings = {}

with torch.no_grad():
    for item_matrix, item_id in full_business_dataloader:
        item_matrix = item_matrix.to(device).float()

        # forward
        item_embedding = item_encoder_instance(item_matrix)
        
        # iterate over the batch
        for id, embedding in zip(item_id, item_embedding):
            # move the embedding to cpu and convert to numpy array
            embedding = embedding.cpu().numpy()

            # add the embedding to the dictionary
            item_embeddings[id.item()] = embedding

In [357]:
with open('user_item_embeddings\\user_embeddings.pkl','wb') as f:
    pickle.dump(user_embeddings,f)

with open('user_item_embeddings\\item_embeddings.pkl','wb') as f:
    pickle.dump(item_embeddings,f)