In [6]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [7]:
# Load your data
investments = pd.read_csv("syntheticDataGenerators/investment/invest_data.csv", sep=';')

# Normalize ratings between 0 and 1 per user
user_totals = investments.groupby('user_id')['investment_amount'].sum()
investments['rating'] = investments.apply(lambda row: row['investment_amount'] / user_totals[row['user_id']], axis=1)

# Encode user and basket IDs to integers
user2idx = {user_id: idx for idx, user_id in enumerate(investments['user_id'].unique())}
basket2idx = {basket: idx for idx, basket in enumerate(investments['basket_name'].unique())}

investments['user_idx'] = investments['user_id'].map(user2idx)
investments['basket_idx'] = investments['basket_name'].map(basket2idx)

In [8]:
class RecommenderNet(nn.Module):
    def __init__(self, num_users, num_items, embedding_size=50):
        super().__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.item_embedding = nn.Embedding(num_items, embedding_size)
        self.fc = nn.Sequential(
            nn.Linear(embedding_size * 2, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, user_ids, item_ids):
        user_vecs = self.user_embedding(user_ids)
        item_vecs = self.item_embedding(item_ids)
        combined = torch.cat([user_vecs, item_vecs], dim=1)
        return self.fc(combined).squeeze()

In [9]:
# Prepare data
X = investments[['user_idx', 'basket_idx']].values
y = investments['rating'].values.astype(np.float32)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to tensors
train_users = torch.tensor(X_train[:, 0])
train_items = torch.tensor(X_train[:, 1])
train_ratings = torch.tensor(y_train)

val_users = torch.tensor(X_val[:, 0])
val_items = torch.tensor(X_val[:, 1])
val_ratings = torch.tensor(y_val)

# Model
model = RecommenderNet(num_users=len(user2idx), num_items=len(basket2idx))
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(train_users, train_items)
    loss = criterion(outputs, train_ratings)
    loss.backward()
    optimizer.step()

    model.eval()
    with torch.no_grad():
        val_preds = model(val_users, val_items)
        val_loss = criterion(val_preds, val_ratings)
    
    print(f"Epoch {epoch+1}: Train Loss = {loss.item():.4f}, Val Loss = {val_loss.item():.4f}")

Epoch 1: Train Loss = 0.1261, Val Loss = 0.0671
Epoch 2: Train Loss = 0.0642, Val Loss = 0.0334
Epoch 3: Train Loss = 0.0308, Val Loss = 0.0177
Epoch 4: Train Loss = 0.0162, Val Loss = 0.0124
Epoch 5: Train Loss = 0.0118, Val Loss = 0.0119
Epoch 6: Train Loss = 0.0119, Val Loss = 0.0131
Epoch 7: Train Loss = 0.0133, Val Loss = 0.0144
Epoch 8: Train Loss = 0.0147, Val Loss = 0.0152
Epoch 9: Train Loss = 0.0154, Val Loss = 0.0152
Epoch 10: Train Loss = 0.0154, Val Loss = 0.0147


In [10]:
def recommend_for_users(user_ids, top_n=5):
    model.eval()
    all_baskets = list(basket2idx.keys())
    
    for user_id in user_ids:
        user_idx = user2idx[user_id]
        
        already_invested = investments[investments['user_id'] == user_id]['basket_name'].unique()
        unseen_baskets = [b for b in all_baskets if b not in already_invested]
        basket_indices = torch.tensor([basket2idx[b] for b in unseen_baskets])
        user_tensor = torch.tensor([user_idx] * len(basket_indices))

        with torch.no_grad():
            preds = model(user_tensor, basket_indices)
        
        top_indices = torch.topk(preds, top_n).indices
        top_baskets = [(unseen_baskets[i], preds[i].item()) for i in top_indices]

        print(f"\n🔍 Top-{top_n} Recommendations for User {user_id}:")
        for basket, score in top_baskets:
            print(f"→ {basket} (score: {score:.2f})")


In [18]:
random_user_ids = np.random.choice(list(user2idx.keys()), size=10, replace=False)
recommend_for_users([1001, 1002, 1003, 1004, 1005])


🔍 Top-5 Recommendations for User 1001:
→ Great World software (score: 0.34)
→ Investment Banking (score: 0.31)
→ Oceania valuecreators (score: 0.29)
→ Strong value growth Europe (score: 0.28)
→ Software Americas, small comp (score: 0.27)

🔍 Top-5 Recommendations for User 1002:
→ Great World software (score: 0.40)
→ MooseBit underdog (score: 0.25)
→ Investment Banking (score: 0.24)
→ Cross over well performing (score: 0.23)
→ Oceania valuecreators (score: 0.23)

🔍 Top-5 Recommendations for User 1003:
→ Great World software (score: 0.44)
→ Investment Banking (score: 0.38)
→ Pharmaceuticals EU (score: 0.29)
→ Australian Health 3 (score: 0.27)
→ Air related companies world (score: 0.26)

🔍 Top-5 Recommendations for User 1004:
→ Great World software (score: 0.38)
→ MooseBit underdog (score: 0.25)
→ Investment Banking (score: 0.24)
→ German Broadcasting (score: 0.23)
→ Pharmaceuticals EU (score: 0.23)

🔍 Top-5 Recommendations for User 1005:
→ Great World software (score: 0.34)
→ Investment 