In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [2]:
# Load your data
investments = pd.read_csv("syntheticDataGenerators/investment/invest_data.csv", sep=';')

# Normalize ratings between 0 and 1 per user
user_totals = investments.groupby('user_id')['investment_amount'].sum()
investments['rating'] = investments.apply(lambda row: row['investment_amount'] / user_totals[row['user_id']], axis=1)

# Encode user and basket IDs to integers
user2idx = {user_id: idx for idx, user_id in enumerate(investments['user_id'].unique())}
basket2idx = {basket: idx for idx, basket in enumerate(investments['basket_name'].unique())}

investments['user_idx'] = investments['user_id'].map(user2idx)
investments['basket_idx'] = investments['basket_name'].map(basket2idx)

In [None]:
class RecommenderNet(nn.Module):
    def __init__(self, num_users, num_items, embedding_size=50):
        super().__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.item_embedding = nn.Embedding(num_items, embedding_size)
        self.fc = nn.Sequential(
            nn.Linear(embedding_size * 2, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, user_ids, item_ids):
        user_vecs = self.user_embedding(user_ids)
        item_vecs = self.item_embedding(item_ids)
        combined = torch.cat([user_vecs, item_vecs], dim=1)
        return self.fc(combined).squeeze()

In [None]:
# Prepare data
X = investments[['user_idx', 'basket_idx']].values
y = investments['rating'].values.astype(np.float32)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to tensors
train_users = torch.tensor(X_train[:, 0])
train_items = torch.tensor(X_train[:, 1])
train_ratings = torch.tensor(y_train)

val_users = torch.tensor(X_val[:, 0])
val_items = torch.tensor(X_val[:, 1])
val_ratings = torch.tensor(y_val)

# Model
model = RecommenderNet(num_users=len(user2idx), num_items=len(basket2idx))
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(train_users, train_items)
    loss = criterion(outputs, train_ratings)
    loss.backward()
    optimizer.step()

    model.eval()
    with torch.no_grad():
        val_preds = model(val_users, val_items)
        val_loss = criterion(val_preds, val_ratings)
    
    print(f"Epoch {epoch+1}: Train Loss = {loss.item():.4f}, Val Loss = {val_loss.item():.4f}")

Epoch 1: Train Loss = 0.1345, Val Loss = 0.0707
Epoch 2: Train Loss = 0.0701, Val Loss = 0.0361
Epoch 3: Train Loss = 0.0349, Val Loss = 0.0196
Epoch 4: Train Loss = 0.0187, Val Loss = 0.0135
Epoch 5: Train Loss = 0.0130, Val Loss = 0.0125
Epoch 6: Train Loss = 0.0121, Val Loss = 0.0134
Epoch 7: Train Loss = 0.0131, Val Loss = 0.0147
Epoch 8: Train Loss = 0.0143, Val Loss = 0.0156
Epoch 9: Train Loss = 0.0151, Val Loss = 0.0159
Epoch 10: Train Loss = 0.0153, Val Loss = 0.0158


In [5]:
def recommend_for_users(user_ids, top_n=5):
    model.eval()
    all_baskets = list(basket2idx.keys())
    
    for user_id in user_ids:
        user_idx = user2idx[user_id]
        
        already_invested = investments[investments['user_id'] == user_id]['basket_name'].unique()
        unseen_baskets = [b for b in all_baskets if b not in already_invested]
        basket_indices = torch.tensor([basket2idx[b] for b in unseen_baskets])
        user_tensor = torch.tensor([user_idx] * len(basket_indices))

        with torch.no_grad():
            preds = model(user_tensor, basket_indices)
        
        top_indices = torch.topk(preds, top_n).indices
        top_baskets = [(unseen_baskets[i], preds[i].item()) for i in top_indices]

        print(f"\n🔍 Top-{top_n} Recommendations for User {user_id}:")
        for basket, score in top_baskets:
            print(f"→ {basket} (score: {score:.2f})")


In [6]:
random_user_ids = np.random.choice(list(user2idx.keys()), size=10, replace=False)
recommend_for_users(random_user_ids)


🔍 Top-5 Recommendations for User 1481:
→ Strong value growth Europe (score: 0.32)
→ Showbizz companies (score: 0.30)
→ Software, Global , small mcap (score: 0.28)
→ Nordic healthcare Facilities (score: 0.27)
→ Cross over well performing (score: 0.27)

🔍 Top-5 Recommendations for User 1970:
→ Nordic healthcare Facilities (score: 0.24)
→ Strong value growth Europe (score: 0.21)
→ Software, Global , small mcap (score: 0.20)
→ European fishing (score: 0.18)
→ Hotels Germany (score: 0.18)

🔍 Top-5 Recommendations for User 1466:
→ Strong value growth Europe (score: 0.35)
→ Software, Global , small mcap (score: 0.29)
→ Showbizz companies (score: 0.28)
→ Nordic healthcare Facilities (score: 0.27)
→ Well performed companies (score: 0.26)

🔍 Top-5 Recommendations for User 1985:
→ Strong value growth Europe (score: 0.31)
→ Software, Global , small mcap (score: 0.26)
→ my best choice ever (score: 0.24)
→ Investment Banking (score: 0.24)
→ Food producents Finland (score: 0.23)

🔍 Top-5 Recommendat