In [29]:
import pandas as pd
from surprise import SVD, Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy

In [30]:
# 1. Data loading
investments = pd.read_csv('syntheticDataGenerators/investment/invest_data.csv', sep=';')

# 2. Calculate total investment amount for each user
user_totals = investments.groupby('user_id')['investment_amount'].sum()

# 3. Create a new column for normalized ratings
investments['normalized_rating'] = investments.apply(
    lambda row: row['investment_amount'] / user_totals[row['user_id']], 
    axis=1
)

# 4. Now use these normalized ratings with Surprise
reader = Reader(rating_scale=(0, 1))  # Since our ratings are now between 0 and 1
data = Dataset.load_from_df(investments[['user_id', 'basket_name', 'normalized_rating']], reader)

# 5. Divide data into training och test(20%) dataset
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# 6. Train SVD-model
model_SVD = SVD()
model_SVD.fit(trainset)

# 7. Test model
predictions = model_SVD.test(testset)

# 8. Evaluate with RMSE, MAE and FCP
print("SVD model:")
print("RMSE: Root Mean Squared Error. Lower values mean better accuracy.")
rmse = accuracy.rmse(predictions)
print("MAE: Mean Absolute Error. Lower values mean better accuracy.")
mae = accuracy.mae(predictions)
print("FCP:Fraction of Concordant Pairs. Higher values mean better accuracy.")
fcp = accuracy.fcp(predictions)

# 9. Make recommendation to a specific user
user_id = 1001
all_baskets = investments['basket_name'].unique()
already_invested = investments[investments['user_id'] == user_id]['basket_name'].unique()
baskets_to_predict = [b for b in all_baskets if b not in already_invested]

# 10. Predict user's intresses for new baskets
user_predictions = [model_SVD.predict(user_id, basket) for basket in baskets_to_predict]
top_recommendations = sorted(user_predictions, key=lambda x: x.est, reverse=True)[:5]

print(f"\nTopp 5 rekommenderade baskets för användare {user_id}:")
for pred in top_recommendations:
    print(f"→ {pred.iid} (förväntad rating: {pred.est:.2f})")

SVD model:
RMSE: Root Mean Squared Error. Lower values mean better accuracy.
RMSE: 0.0928
MAE: Mean Absolute Error. Lower values mean better accuracy.
MAE:  0.0733
FCP:Fraction of Concordant Pairs. Higher values mean better accuracy.
FCP:  0.5338

Topp 5 rekommenderade baskets för användare 1001:
→ Air related companies world (förväntad rating: 0.46)
→ Techs seeking value upgrade (förväntad rating: 0.43)
→ Food producents Norway (förväntad rating: 0.40)
→ UK fintech (förväntad rating: 0.35)
→ Technology stars of value (förväntad rating: 0.35)


In [31]:
# 6. Train KNN-model
model_KNN = KNNBasic(k=10)
model_KNN.fit(trainset)

# 7. Test model
predictions = model_KNN.test(testset)

# 8. Evaluate with RMSE, MAE and FCP
print("KNN model:")
print("RMSE: Root Mean Squared Error. Lower values mean better accuracy.")
rmse = accuracy.rmse(predictions)
print("MAE: Mean Absolute Error. Lower values mean better accuracy.")
mae = accuracy.mae(predictions)
print("FCP:Fraction of Concordant Pairs. Higher values mean better accuracy.")
fcp = accuracy.fcp(predictions)

# 9. Make recommendation to a specific user
user_id = 1001
all_baskets = investments['basket_name'].unique()
already_invested = investments[investments['user_id'] == user_id]['basket_name'].unique()
baskets_to_predict = [b for b in all_baskets if b not in already_invested]

# 10. Predict user's intresses for new baskets
user_predictions = [model_KNN.predict(user_id, basket) for basket in baskets_to_predict]
top_recommendations = sorted(user_predictions, key=lambda x: x.est, reverse=True)[:5]

print(f"\nTopp 5 rekommenderade baskets för användare {user_id}:")
for pred in top_recommendations:
    print(f"→ {pred.iid} (förväntad rating: {pred.est:.2f})")

Computing the msd similarity matrix...
Done computing similarity matrix.
KNN model:
RMSE: Root Mean Squared Error. Lower values mean better accuracy.
RMSE: 0.0657
MAE: Mean Absolute Error. Lower values mean better accuracy.
MAE:  0.0510
FCP:Fraction of Concordant Pairs. Higher values mean better accuracy.
FCP:  0.5406

Topp 5 rekommenderade baskets för användare 1001:
→ Technology stars of value (förväntad rating: 0.29)
→ Well traded with profit margin (förväntad rating: 0.25)
→ Financial World  nu funds (förväntad rating: 0.22)
→ Healthcare Northern Europe (förväntad rating: 0.21)
→ Financial companies Europé (förväntad rating: 0.20)


In [28]:
for raw_rating in data.raw_ratings:
    user_id, item_id, rating, _ = raw_rating
    print(f"User: {user_id}, Item: {item_id}, Rating: {rating}")

User: 1001, Item: Global healthcare, Rating: 0.2998720104399327
User: 1001, Item: World Software companies, Rating: 0.2498933420332773
User: 1001, Item: Global utilities, Rating: 0.19991467362662182
User: 1001, Item: Renewable energy world ALL, Rating: 0.14993600521996636
User: 1001, Item: Basic materials World, Rating: 0.10038396868020177
User: 1002, Item: Global utilities, Rating: 0.3
User: 1002, Item: Basic materials World, Rating: 0.2
User: 1002, Item: Australia tech index, Rating: 0.15
User: 1002, Item: Renewable energy world ALL, Rating: 0.15
User: 1002, Item: BIG INDEX Global, Rating: 0.2
User: 1003, Item: Global utilities, Rating: 0.3
User: 1003, Item: Basic materials World, Rating: 0.2
User: 1003, Item: Global healthcare, Rating: 0.15
User: 1003, Item: Real estate Europe, Rating: 0.15
User: 1003, Item: Financial World  nu funds, Rating: 0.2
User: 1004, Item: Growth Rockets, Rating: 0.3
User: 1004, Item: Swedish fintech, Rating: 0.2
User: 1004, Item: Pers 14, Rating: 0.15
User: