In [419]:
import pandas as pd
import numpy as np
import torch
from torchvision import transforms
from PIL import Image
import requests
from io import BytesIO
from torchvision import models
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import random

In [420]:
artworks = pd.read_csv("../dataset/artworks.csv", delimiter=";")
users = pd.read_csv("../dataset/users.csv", delimiter=";")
artworks.fillna(-1, inplace=True)
users.fillna(-1, inplace=True)
artworks = artworks.drop_duplicates(subset="title", keep="first")
users = users.drop_duplicates(subset="name", keep="first")
user_to_index = {username: idx for idx, username in enumerate(users["name"])}
artwork_to_index = {title: idx for idx, title in enumerate(artworks["title"])}

users["user_id"] = users["name"].map(user_to_index)
artworks["artwork_id"] = artworks["title"].map(artwork_to_index)

print(len(users))
print(len(artworks))

759
766


In [421]:
preprocess = transforms.Compose(
    [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

In [422]:
def preprocess_image(image_url):
    try:
        response = requests.get(image_url)
        if response.status_code == 200:
            image = Image.open(BytesIO(response.content)).convert("RGB")
            image = preprocess(image)
            return image.unsqueeze(0)
        else:
            print(f"Errore nel caricamento dell'immagine dall'URL: {image_url}")
            return torch.zeros(
                (1, 3, 224, 224)
            )
    except Exception as e:
        print(f"Errore nel preprocessamento dell'immagine dall'URL {image_url}: {e}")
        return

In [423]:
resnet50 = models.resnet50(pretrained=True)
feature_extractor = torch.nn.Sequential(*list(resnet50.children())[:-1])


def extract_visual_features_from_url(image_url):
    image = preprocess_image(image_url)
    with torch.no_grad():
        features = feature_extractor(image)
    return features.squeeze()



In [424]:
embedding_dim = 32
num_users = len(user_to_index) +1
num_artworks = len(artwork_to_index)+1

In [425]:
user_embeddings = torch.nn.Embedding(num_users, embedding_dim)
artwork_embeddings = torch.nn.Embedding(num_artworks, embedding_dim)
user_social_embeddings = torch.nn.Embedding(num_users+7, embedding_dim)
artwork_similarity_embeddings = torch.nn.Embedding(num_artworks, embedding_dim)
visual_embedding_layer = nn.Linear(2048, embedding_dim)

torch.nn.init.xavier_uniform_(user_embeddings.weight)
torch.nn.init.xavier_uniform_(artwork_embeddings.weight)
torch.nn.init.xavier_uniform_(user_social_embeddings.weight)
torch.nn.init.xavier_uniform_(artwork_similarity_embeddings.weight)

Parameter containing:
tensor([[-0.0779, -0.0214, -0.0793,  ..., -0.0090,  0.0213,  0.0261],
        [ 0.0302,  0.0659, -0.0661,  ..., -0.0346,  0.0863,  0.0772],
        [ 0.0437, -0.0020, -0.0599,  ..., -0.0176, -0.0463,  0.0286],
        ...,
        [-0.0610, -0.0388,  0.0544,  ..., -0.0848, -0.0380,  0.0151],
        [-0.0023, -0.0462,  0.0405,  ...,  0.0810,  0.0745,  0.0223],
        [-0.0595, -0.0095,  0.0497,  ..., -0.0826,  0.0325,  0.0034]],
       requires_grad=True)

In [426]:
def personalized_decay(k, a, b):
    a_tensor = torch.tensor(a)
    k_tensor = torch.tensor(k - 1)
    return 1 / (torch.exp(a_tensor * k_tensor) + b)

In [427]:
def compute_score(user, artwork, past_artworks, a, b, artwork_visual_features):
    user_emb = user_embeddings(user)
    artwork_emb = artwork_embeddings(artwork)
    similarity_emb = artwork_similarity_embeddings(artwork)

    visual_features = artwork_visual_features
    visual_emb = visual_embedding_layer(visual_features)

    long_term_pref = torch.sum(user_emb * (artwork_emb + visual_emb), dim=-1)

    short_term_sim = 0
    for k, past_artwork in enumerate(past_artworks, 1):
        past_artwork_tensor = torch.tensor([past_artwork])
        past_artwork_emb = artwork_embeddings(past_artwork_tensor)
        past_similarity_emb = artwork_similarity_embeddings(past_artwork_tensor)
        decay_factor = personalized_decay(k, a, b)
        short_term_sim += decay_factor * torch.sum(
            (past_artwork_emb + past_similarity_emb) * (artwork_emb + similarity_emb),
            dim=-1,
        )

    user_social_emb = user_social_embeddings(user)
    artwork_owner_emb = user_social_embeddings(artwork)
    social_sim = torch.sum(user_social_emb * artwork_owner_emb, dim=-1)

    score = long_term_pref + short_term_sim + social_sim
    return score

In [428]:
def s_bpr_loss(pos_score, neg_score):
    return -torch.mean(F.logsigmoid(pos_score - neg_score))

In [429]:
def create_interactions(users_df, artworks_df, num_past_works=2):
    interactions = []

    for _, user_row in users_df.iterrows():
        user_id = user_row["user_id"]
        user_name = user_row["name"]
        non_user_artworks = artworks_df[artworks_df["author"] != user_name]
        user_artworks = artworks_df[artworks_df["author"] == user_name]

        if len(non_user_artworks) > 0 and len(user_artworks) > 0:
            pos_artwork = non_user_artworks.sample(1).iloc[0]["artwork_id"]
            neg_artwork = non_user_artworks.sample(1).iloc[0]["artwork_id"]
            past_artworks = list(
                user_artworks["artwork_id"].sample(
                    min(len(user_artworks), num_past_works)
                )
            )
            interactions.append((user_id, pos_artwork, neg_artwork, past_artworks))

    return interactions

interactions = create_interactions(users, artworks)

In [430]:
def get_interaction_data(interactions, idx):
    user_id, pos_artwork_id, neg_artwork_id, past_artwork_ids = interactions[idx]
    return user_id, pos_artwork_id, neg_artwork_id, past_artwork_ids

In [431]:
def interaction_generator(interactions):
    for interaction in interactions:
        yield get_interaction_data(interactions, interaction)

In [432]:
def get_interactions_length(interactions):
    return len(interactions)

In [433]:
def custom_collate_fn(batch):
    users, pos_artworks, neg_artworks, past_artworks = zip(*batch)
    return (
        torch.tensor(users),
        torch.tensor(pos_artworks),
        torch.tensor(neg_artworks),
        past_artworks,
    )

In [434]:
def create_data_loader(interactions, batch_size=64, shuffle=True):
    data_length = get_interactions_length(interactions)
    indices = list(range(data_length))

    if shuffle:
        random.shuffle(indices)

    def data_generator():
        batch = []
        for idx in indices:
            batch.append(get_interaction_data(interactions, idx))
            if len(batch) == batch_size:
                yield custom_collate_fn(batch)
                batch = []
        if batch:
            yield custom_collate_fn(batch)

    return data_generator()

In [435]:
def train_model(epochs, learning_rate, interactions, a, b):
    optimizer = torch.optim.Adam(
        [
            {"params": user_embeddings.parameters()},
            {"params": artwork_embeddings.parameters()},
            {"params": user_social_embeddings.parameters()},
            {"params": artwork_similarity_embeddings.parameters()},
            {"params": visual_embedding_layer.parameters()},
        ],
        lr=learning_rate,
    )

    for epoch in range(epochs):
        print(f"Epoch: {epoch}")
        total_loss = 0
        data_loader = create_data_loader(interactions)
        for batch in data_loader:
            users, pos_artworks, neg_artworks, past_artworks = batch
            optimizer.zero_grad()
            batch_loss = 0
            for i in range(len(users)):
                user = users[i].unsqueeze(0)
                pos_artwork = pos_artworks[i].unsqueeze(0)
                neg_artwork = neg_artworks[i].unsqueeze(0)
                past_artwork = past_artworks[i]
                pos_artwork_row = artworks.loc[
                    artworks["artwork_id"] == pos_artwork.item()
                ]
                neg_artwork_row = artworks.loc[
                    artworks["artwork_id"] == neg_artwork.item()
                ]
                if pos_artwork_row.empty or neg_artwork_row.empty:
                    continue
                pos_visual_features = extract_visual_features_from_url(
                    pos_artwork_row["img"].values[0]
                )
                neg_visual_features = extract_visual_features_from_url(
                    neg_artwork_row["img"].values[0]
                )
                pos_score = compute_score(
                    user, pos_artwork, past_artwork, a, b, pos_visual_features
                )
                neg_score = compute_score(
                    user, neg_artwork, past_artwork, a, b, neg_visual_features
                )
                loss = s_bpr_loss(pos_score, neg_score)
                batch_loss += loss
            batch_loss /= len(users)
            batch_loss.backward()
            optimizer.step()
            total_loss += batch_loss.item()

        print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

In [None]:
train_model(epochs=10, learning_rate=0.001, interactions=interactions, a=0.1, b=0.5)

In [448]:
def user_has_interacted(user_id, artwork_id, interactions):
    for interaction in interactions:
        if (
            interaction[0] == user_id
            and interaction[1] == artwork_id
        ):
            return True
    return False

In [469]:
def get_past_artworks(user_id, interactions):
    past_artworks = []
    for interaction in interactions:
        if interaction[0] == user_id:
            past_artworks.append(interaction[1])
    return past_artworks

In [472]:
def recommend_for_user(user_id, interactions, k):
    user_tensor = torch.tensor([user_id]).long()
    artwork_scores = []

    for _, artwork_row in artworks.iterrows():
        artwork_id = artwork_row["artwork_id"]

        if user_has_interacted(user_id, artwork_id, interactions):
            continue

        artwork_tensor = torch.tensor([artwork_id]).long()

        artwork_visual_features = extract_visual_features_from_url(artwork_row["img"])

        past_artworks = get_past_artworks(
            user_id, interactions
        )  
        score = compute_score(
            user_tensor,
            artwork_tensor,
            past_artworks,
            a=0.1,
            b=0.5,
            artwork_visual_features=artwork_visual_features,
        )
        artwork_scores.append((artwork_id, score.item()))
    top_recommendations = sorted(artwork_scores, key=lambda x: x[1], reverse=True)[:k]
    return [artwork_id for artwork_id, _ in top_recommendations]

In [474]:
def top_k_artworks_by_popularity_score(k):
    artworks["popularity_score"] = (
        artworks["number_of_views"] * 0.2
        + artworks["likes"] * 0.5
        + artworks["number_of_comments"] * 0.3
    )
    top_k_artworks = artworks.nlargest(k, "popularity_score")
    return top_k_artworks.index.tolist()

In [475]:
def precision_at_k(actual, predicted, k):
    actual_set = set(actual)
    predicted_at_k = predicted[:k]
    return len(set(predicted_at_k) & actual_set) / k


def recall_at_k(actual, predicted, k):
    actual_set = set(actual)
    predicted_at_k = predicted[:k]
    return len(set(predicted_at_k) & actual_set) / len(actual_set)


def ndcg_at_k(actual, predicted, k):
    actual_set = set(actual)
    predicted_at_k = predicted[:k]
    dcg = sum(
        [
            1.0 / np.log2(i + 2) if predicted_at_k[i] in actual_set else 0.0
            for i in range(k)
        ]
    )
    idcg = sum([1.0 / np.log2(i + 2) for i in range(min(len(actual), k))])
    return dcg / idcg if idcg > 0 else 0.0

In [478]:
test = recommend_for_user(0, interactions, 200)
print(test)

[365, 0, 237, 614, 128, 80, 580, 475, 156, 322, 617, 381, 424, 488, 652, 30, 139, 524, 474, 731, 6, 83, 380, 73, 378, 464, 612, 416, 97, 419, 697, 269, 224, 253, 674, 748, 453, 632, 158, 194, 433, 510, 396, 622, 350, 384, 177, 81, 342, 459, 695, 174, 249, 715, 304, 636, 107, 48, 121, 523, 339, 568, 235, 112, 627, 154, 728, 675, 319, 264, 561, 472, 250, 657, 305, 710, 329, 185, 759, 23, 247, 43, 346, 100, 96, 324, 398, 50, 458, 283, 621, 490, 521, 60, 17, 691, 164, 291, 357, 37, 25, 111, 541, 724, 583, 677, 649, 88, 191, 289, 415, 3, 316, 656, 311, 457, 144, 123, 709, 683, 181, 412, 101, 281, 720, 566, 349, 563, 239, 562, 400, 331, 666, 303, 551, 193, 201, 560, 286, 295, 444, 740, 630, 353, 150, 160, 758, 550, 557, 445, 356, 654, 34, 565, 672, 738, 440, 597, 198, 296, 624, 741, 341, 212, 216, 306, 168, 279, 233, 540, 573, 504, 483, 219, 375, 608, 737, 108, 146, 671, 385, 70, 221, 454, 187, 465, 231, 173, 660, 589, 761, 712, 114, 567, 14, 512, 274, 155, 606, 450]


In [482]:
views_set = top_k_artworks_by_popularity_score(100)
precision = precision_at_k(views_set, test, 50)
recall = recall_at_k(views_set, test, 50)
ndcg = ndcg_at_k(views_set, test, 50)
print(f"Precision: {precision}, Recall: {recall}, NDCG: {ndcg}")

Precision: 0.08, Recall: 0.04, NDCG: 0.06225101582759056
