In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx"
data = pd.read_excel(url)

# Preprocess the data
data['date'] = pd.to_datetime(data['InvoiceDate'])
data = data[['CustomerID', 'StockCode', 'Description', 'Quantity', 'UnitPrice', 'date', 'Country']]
data.dropna(subset=['CustomerID'], inplace=True)
data['CustomerID'] = data['CustomerID'].astype(int)
data['interaction'] = 1

# Rename columns for consistency
data.rename(columns={'CustomerID': 'user_id', 'StockCode': 'item_id'}, inplace=True)

# Create mappings for item descriptions
item_description_mapping = data.set_index('item_id')['Description'].to_dict()

# Extract popular products for each location
popular_products = data.groupby(['Country', 'item_id'])['Quantity'].sum().reset_index()
popular_products = popular_products.sort_values(by=['Country', 'Quantity'], ascending=[True, False])

# Create a mapping of popular products for each location
location_popular_products = {}
for location, group in popular_products.groupby('Country'):
    location_popular_products[location] = group['item_id'].tolist()

# Convert item_id to indices
item_mapping = {item: idx for idx, item in enumerate(data['item_id'].unique())}
inverse_item_mapping = {idx: item for item, idx in item_mapping.items()}  # Add inverse mapping
data['item_id'] = data['item_id'].map(item_mapping)

# Split data into training, validation, and test sets
from sklearn.model_selection import train_test_split
train_data, temp_data = train_test_split(data, test_size=0.4, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

# Prepare training tensor
user_mapping = {user: idx for idx, user in enumerate(train_data['user_id'].unique())}
num_users = len(user_mapping)

input_dim = len(item_mapping)
context_dim = 10  # We'll use the top 10 popular products as context

train_tensor = torch.zeros((num_users, input_dim))
context_tensor = torch.zeros((num_users, context_dim))

for row in train_data.itertuples():
    user_idx = user_mapping[row.user_id]
    item_idx = row.item_id
    train_tensor[user_idx, item_idx] = 1
    location_context = [1 if item in location_popular_products[row.Country][:10] else 0 for item in range(input_dim)]
    context_tensor[user_idx] = torch.tensor(location_context[:context_dim], dtype=torch.float32)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.dropna(subset=['CustomerID'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['CustomerID'] = data['CustomerID'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['interaction'] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class VAE(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim, context_dim):
        super(VAE, self).__init__()
        self.fc1 = nn.Linear(input_dim + context_dim, hidden_dim)
        self.fc2_mu = nn.Linear(hidden_dim, latent_dim)
        self.fc2_logvar = nn.Linear(hidden_dim, latent_dim)
        self.fc3 = nn.Linear(latent_dim + context_dim, hidden_dim)
        self.fc4 = nn.Linear(hidden_dim, input_dim)

    def encode(self, x):
        h = torch.relu(self.fc1(x))
        return self.fc2_mu(h), self.fc2_logvar(h)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        h = torch.relu(self.fc3(z))
        return torch.sigmoid(self.fc4(h))

    def forward(self, x, context):
        xc = torch.cat([x, context], dim=1)
        mu, logvar = self.encode(xc)
        z = self.reparameterize(mu, logvar)
        zc = torch.cat([z, context], dim=1)
        return self.decode(zc), mu, logvar

def loss_function(recon_x, x, mu, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

# Hyperparameters
hidden_dim = 256
latent_dim = 20

# Initialize model
vae = VAE(input_dim, hidden_dim, latent_dim, context_dim)

# Loss and optimizer
optimizer = optim.Adam(vae.parameters(), lr=0.001)


In [None]:
# Training loop
num_epochs = 50
batch_size = 64

for epoch in range(num_epochs):
    vae.train()
    train_loss = 0
    for i in range(0, len(train_tensor), batch_size):
        batch_x = train_tensor[i:i + batch_size]
        batch_context = context_tensor[i:i + batch_size]
        optimizer.zero_grad()
        recon_batch, mu, logvar = vae(batch_x, batch_context)
        loss = loss_function(recon_batch, batch_x, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {train_loss / len(train_tensor)}')



Epoch 1/50, Loss: 166.9732585044859
Epoch 2/50, Loss: 166.62394762006093
Epoch 3/50, Loss: 166.27410784193466
Epoch 4/50, Loss: 165.73321176705855
Epoch 5/50, Loss: 165.970474853798
Epoch 6/50, Loss: 165.5745596884578
Epoch 7/50, Loss: 165.34919682647683
Epoch 8/50, Loss: 163.78335231734238
Epoch 9/50, Loss: 163.2353421029685
Epoch 10/50, Loss: 162.76798981228222
Epoch 11/50, Loss: 162.19341408497698
Epoch 12/50, Loss: 161.79707867642009
Epoch 13/50, Loss: 161.23699145699828
Epoch 14/50, Loss: 160.55122158413212
Epoch 15/50, Loss: 160.05802058710313
Epoch 16/50, Loss: 159.85442534955652
Epoch 17/50, Loss: 159.46750354435937
Epoch 18/50, Loss: 158.68693330346946
Epoch 19/50, Loss: 158.0278507597105
Epoch 20/50, Loss: 157.6109414560608
Epoch 21/50, Loss: 156.85996475237374
Epoch 22/50, Loss: 156.38450238176884
Epoch 23/50, Loss: 155.71518919938364
Epoch 24/50, Loss: 155.18352744856725
Epoch 25/50, Loss: 154.49213196407885
Epoch 26/50, Loss: 154.19226815450696
Epoch 27/50, Loss: 154.25603

In [None]:
import torch

# Function to generate recommendations for a new user based on location
def generate_recommendations_for_new_user(vae, location, item_mapping, location_popular_products, context_dim):
    vae.eval()
    # Create location context with the correct dimensionality
    location_context = [1 if item in location_popular_products[location][:10] else 0 for item in range(context_dim)]
    location_context_tensor = torch.tensor(location_context, dtype=torch.float32).unsqueeze(0)

    # Create a zero vector for the new user with the correct dimensionality
    new_user_vector = torch.zeros((1, input_dim))  # Use the global variable input_dim

    # Generate recommendations
    with torch.no_grad():
        recon_vector, _, _ = vae(new_user_vector, location_context_tensor)

    # Map item indices back to item IDs
    recon_scores = recon_vector.squeeze().numpy()
    item_indices = np.argsort(recon_scores)[::-1]
    recommended_items = [inverse_item_mapping[idx] for idx in item_indices[:10]]

    return recommended_items

# Example usage
new_user_location = "United Kingdom"  # Specify the new user's location
# Pass context_dim as an argument
recommended_items = generate_recommendations_for_new_user(vae, new_user_location, item_mapping, location_popular_products, context_dim)
# Map item IDs back to descriptions
actual_recommended_items = [item_description_mapping[item] for item in recommended_items]

print(f"Top 10 recommended items for a new user in {new_user_location}: {actual_recommended_items}")


Top 10 recommended items for a new user in United Kingdom: ['CINDERELLA CHANDELIER ', 'DOORMAT UNION FLAG', 'EDWARDIAN PARASOL BLACK', 'RECORD FRAME 7" SINGLE SIZE ', 'RECYCLED ACAPULCO MAT GREEN', 'RECYCLED ACAPULCO MAT PINK', 'SET OF 16 VINTAGE BLACK CUTLERY', 'CHILLI LIGHTS', 'CARRIAGE', 'FLOOR CUSHION ELEPHANT CARNIVAL']
