In [16]:
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader

# Load your data
df = pd.read_csv("../data/sample_sales.csv")
df['customer_id'] = df['order_id']  # Use order_id as customer_id (or use actual if available)

# Map customers and products to unique integer IDs
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

df['user'] = user_encoder.fit_transform(df['customer_id'])
df['item'] = item_encoder.fit_transform(df['product_name'])

# Add a dummy 'interaction' column (implicit feedback)
df['interaction'] = 1


In [17]:
class InteractionDataset(Dataset):
    def __init__(self, dataframe):
        self.users = torch.tensor(dataframe['user'].values, dtype=torch.long)
        self.items = torch.tensor(dataframe['item'].values, dtype=torch.long)
        self.labels = torch.tensor(dataframe['interaction'].values, dtype=torch.float32)

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.labels[idx]

dataset = InteractionDataset(df)
loader = DataLoader(dataset, batch_size=64, shuffle=True)


In [18]:
import torch.nn as nn

class MFModel(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=20):
        super(MFModel, self).__init__()
        self.user_embed = nn.Embedding(num_users, embedding_dim)
        self.item_embed = nn.Embedding(num_items, embedding_dim)
        
    def forward(self, user, item):
        user_vec = self.user_embed(user)
        item_vec = self.item_embed(item)
        return (user_vec * item_vec).sum(1)  # dot product


In [19]:
model = MFModel(num_users=df['user'].nunique(), num_items=df['item'].nunique())
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

epochs = 5
for epoch in range(epochs):
    total_loss = 0
    for users, items, labels in loader:
        preds = model(users, items)
        loss = criterion(preds, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}: Loss = {total_loss:.4f}")


Epoch 1: Loss = 1379.9381
Epoch 2: Loss = 626.8506
Epoch 3: Loss = 322.5917
Epoch 4: Loss = 185.5694
Epoch 5: Loss = 110.9011


In [20]:
# Recommend top-N products for a user
def recommend_products(user_id_raw, top_n=5):
    user_id = user_encoder.transform([user_id_raw])[0]
    item_ids = torch.arange(df['item'].nunique())
    user_ids = torch.tensor([user_id] * len(item_ids))

    with torch.no_grad():
        scores = model(user_ids, item_ids)

    top_items = torch.topk(scores, top_n).indices.numpy()
    product_names = item_encoder.inverse_transform(top_items)
    return product_names

# Example: Recommend for a known user (replace with actual ID from your data)
print("Recommended for user_id=1001:", recommend_products(user_id_raw=1001))


Recommended for user_id=1001: ['Lentils 1kg' 'Tomatoes 1kg' 'Sandwich' 'Jeans' 'Paracetamol']
