In [41]:
import tensorflow as tf
from tensorflow.keras.layers import (
    Dense,
    Concatenate,
    Input,
    Embedding,
    Lambda,
    TextVectorization,
    LSTM,
    Normalization,
    GlobalAveragePooling2D,
    GlobalAveragePooling1D
)
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import pandas as pd
import requests
from PIL import Image
from io import BytesIO
import numpy as np

In [3]:
embedding_dim = 200
pu_dim = 400
max_text_words = 5000
max_comment_words = 1000
margin = 0.2

In [4]:
resnet = ResNet50(weights='imagenet', include_top=False, pooling=None)

def extract_features(image):
    features=resnet(image)
    features = GlobalAveragePooling2D()(features)
    return features

In [5]:
def create_text_embedding(text_input, max_words, output_dim):
    tokenizer = TextVectorization(max_tokens=max_words)
    text_vectorized = tokenizer(text_input)
    embedding = Embedding(input_dim=max_words, output_dim=output_dim)(text_vectorized)
    pooling_embedding = GlobalAveragePooling1D()(embedding)
    return pooling_embedding

In [6]:
normalization_layer = Normalization()

In [7]:
input_user_name = Input(shape=(1,), name="input_user_name")
input_user_place = Input(shape=(1,), name="input_user_place")
input_user_inscription_date = Input(shape=(1,), name="input_user_inscription_date")
input_user_page_views = Input(shape=(1,), name="input_user_page_views")
input_user_followers = Input(shape=(1,), name="input_user_followers")
input_user_follow = Input(shape=(1,), name="input_user_follow")
input_user_favourites = Input(shape=(1,), name="input_user_favourites")
input_user_comments_made = Input(shape=(1,), name="input_user_comments_made")
input_user_comments_received = Input(shape=(1,), name="input_user_comments_received")
input_user_followers_list = Input(shape=(None,), name="input_user_followers_list")
input_user_follow_list = Input(shape=(None,), name="input_user_follow_list")

normalized_inscription_date = normalization_layer(input_user_inscription_date)
normalized_page_views = normalization_layer(input_user_page_views)
normalized_followers = normalization_layer(input_user_followers)
normalized_follow = normalization_layer(input_user_follow)
normalized_favourites = normalization_layer(input_user_favourites)
normalized_comments_made = normalization_layer(input_user_comments_made)
normalized_comments_received = normalization_layer(input_user_comments_received)

normalized_inscription_date = Dense(embedding_dim, activation='selu')(normalized_inscription_date)
normalized_page_views = Dense(embedding_dim, activation="selu")(normalized_page_views)
normalized_followers = Dense(embedding_dim, activation="selu")(normalized_followers)
normalized_follow = Dense(embedding_dim, activation="selu")(normalized_follow)
normalized_favourites = Dense(embedding_dim, activation="selu")(normalized_favourites)
normalized_comments_made = Dense(embedding_dim, activation="selu")(
    normalized_comments_made
)
normalized_comments_received = Dense(embedding_dim, activation="selu")(
    normalized_comments_received
)

user_features = Concatenate()(
    [
        create_text_embedding(input_user_name, max_text_words, embedding_dim),
        create_text_embedding(input_user_place, max_text_words, embedding_dim),
        normalized_inscription_date,
        normalized_page_views,
        normalized_followers,
        normalized_follow,
        normalized_favourites,
        normalized_comments_made,
        normalized_comments_received,
        create_text_embedding(input_user_followers_list, max_text_words, embedding_dim),
        create_text_embedding(input_user_follow_list, max_text_words, embedding_dim),
    ]
)

In [8]:
input_art_img = Input(shape=(224, 224, 3), name="input_art_img")
input_art_title = Input(shape=(1,), name="input_art_title")
input_art_author = Input(shape=(1,), name="input_art_author")
input_art_likes = Input(shape=(1,), name="input_art_likes")
input_art_comments = Input(shape=(None,), name="input_art_comments")
input_art_views = Input(shape=(1,), name="input_art_views")
input_art_date = Input(shape=(1,), name="input_art_date")

art_image_features = extract_features(input_art_img)

art_title_embedding = create_text_embedding(
    input_art_title, max_text_words, embedding_dim
)
art_author_embedding = create_text_embedding(
    input_art_author, max_text_words, embedding_dim
)
art_comments_embedding = Embedding(
    input_dim=max_comment_words, output_dim=embedding_dim
)(input_art_comments)
art_comments_embedding = LSTM(embedding_dim)(art_comments_embedding)

art_features = Concatenate()(
    [
        art_image_features,
        art_title_embedding,
        art_author_embedding,
        normalization_layer(input_art_likes),
        normalization_layer(input_art_views),
        normalization_layer(input_art_date),
        art_comments_embedding,
    ]
)

In [31]:
def custom_reduce_sum(x, y):
    return K.sum(x * y, axis=1)

In [32]:
def custom_triplet_loss(score_i, score_j, margin=0.2):
    return K.maximum(0.0, margin + score_j - score_i)

In [36]:
# def create_triplet_loss_layer(user_features, art_features_i, art_features_j):
#     score_i = Lambda(lambda x: K.sum(x[0] * x[1], axis=1))(
#         [user_features, art_features_i]
#     )
#     score_j = Lambda(lambda x: K.sum(x[0] * x[1], axis=1))(
#         [user_features, art_features_j]
#     )
#     loss = Lambda(lambda x: K.sigmoid(x[1] - x[0]))([score_j, score_i])
#     return loss

In [42]:
input_pu = Input(shape=(224, 224, 3), name="input_pu")
input_i = Input(shape=(224, 224, 3), name="input_i")
input_j = Input(shape=(224, 224, 3), name="input_j")

pu_features = Lambda(
    lambda x: K.map_fn(
        lambda y: extract_features(y), x, fn_output_signature=tf.float32
    ),
    output_shape=(None, embedding_dim),
)(input_pu)
i_features = extract_features(input_i)
j_features = extract_features(input_j)

dense_layer_1 = Dense(embedding_dim, activation="selu", name="dense_layer_1")
dense_layer_2 = Dense(embedding_dim, activation="selu", name="dense_layer_2")

reduced_pu = Lambda(
    lambda x: K.map_fn(
        lambda y: dense_layer_2(dense_layer_1(y)), x, fn_output_signature=tf.float32
    ),
    output_shape=(None, embedding_dim),
)(pu_features)
reduced_i = dense_layer_2(dense_layer_1(i_features))
reduced_j = dense_layer_2(dense_layer_1(j_features))

average_pooled_pu = Lambda(lambda x: K.mean(x, axis=1), output_shape=(embedding_dim,))(
    reduced_pu
)
max_pooled_pu = Lambda(lambda x: K.max(x, axis=1), output_shape=(embedding_dim,))(
    reduced_pu
)
pooled_pu = Concatenate()([average_pooled_pu, max_pooled_pu])

pu_dense_1 = Dense(300, activation="selu", name="pu_dense_1")(pooled_pu)
pu_dense_2 = Dense(200, activation="selu", name="pu_dense_2")(pu_dense_1)
final_pu = Dense(200, activation="selu", name="pu_dense_3")(pu_dense_2)

In [37]:
score_i = Lambda(lambda x: custom_reduce_sum(x[0], x[1]))([final_pu, reduced_i])
score_j = Lambda(lambda x: custom_reduce_sum(x[0], x[1]))([final_pu, reduced_j])
loss = Lambda(lambda x: custom_triplet_loss(x[0], x[1]))([score_j, score_i])

In [38]:
curatornet = Model(
    inputs=[
        input_user_name,
        input_user_place,
        input_user_inscription_date,
        input_user_page_views,
        input_user_followers,
        input_user_follow,
        input_user_favourites,
        input_user_comments_made,
        input_user_comments_received,
        input_user_followers_list,
        input_user_follow_list,
        input_art_img,
        input_art_title,
        input_art_author,
        input_art_likes,
        input_art_comments,
        input_art_views,
        input_art_date,
        input_pu,
        input_i,
        input_j,
    ],
    outputs=loss,
)

curatornet.compile(optimizer="adam", loss="binary_crossentropy")
curatornet.summary()
curatornet.save("curatornet.h5")

In [43]:
user_df = pd.read_csv("../dataset/users.csv", delimiter=";")
artwork_df = pd.read_csv("../dataset/artworks.csv", delimiter=";")

In [44]:
def preprocess_image(image_url):
    response = requests.get(image_url)
    if response.status_code == 200:
        image = Image.open(BytesIO(response.content))
        image = image.convert("RGB")
        image = image.resize((224, 224))
        image_array = np.array(image)
        image_array = image_array / 255.0
        image_array = np.expand_dims(image_array, axis=0)
        return image_array
    else:
        raise FileNotFoundError(
            f"Impossibile scaricare l'immagine dall'URL: {image_url}"
        )

In [45]:
def create_triples(user_df, artwork_df):
    triples = []
    for _, user_row in user_df.iterrows():
        user_interactions = artwork_df[artwork_df["author"] == user_row["name"]]
        if len(user_interactions) < 2:
            continue
        Pu = user_interactions.sample(frac=1).iloc[:-1]  
        i = user_interactions.sample() 
        non_interacted = artwork_df[~artwork_df["author"].isin([user_row["name"]])]
        j = non_interacted.sample()  
        triples.append((Pu, i, j))
    return triples

In [63]:
def preprocess_data(triples, image_preprocessor):
    processed_triples = []

    for Pu, i, j in triples:
        Pu_images = np.array([image_preprocessor(url) for url in Pu["img"].values])
        i_image = np.expand_dims(image_preprocessor(i["img"].values[0]), axis=0)
        j_image = np.expand_dims(image_preprocessor(j["img"].values[0]), axis=0)
        processed_triples.append((Pu_images, i_image, j_image, Pu, i.iloc[0], j.iloc[0]))

    return processed_triples

In [64]:
def prepare_inputs(processed_triples, users_df):
    inputs = {
        "input_pu": [],
        "input_i": [],
        "input_j": [],
        "input_user_name": [],
        "input_user_place": [],
        "input_user_inscription_date": [],
        "input_user_page_views": [],
        "input_user_followers": [],
        "input_user_follow": [],
        "input_user_favourites": [],
        "input_user_comments_made": [],
        "input_user_comments_received": [],
        "input_user_followers_list": [],
        "input_user_follow_list": [],
        "input_art_title": [],
        "input_art_author": [],
        "input_art_likes": [],
        "input_art_comments": [],
        "input_art_views": [],
        "input_art_date": [],
    }

    for pu_images, i_image, j_image, Pu_meta, i_meta, j_meta in processed_triples:
        user_features = users_df[users_df["name"] == i_meta["author"]].iloc[0]

        inputs["input_pu"].append(pu_images)
        inputs["input_i"].append(i_image)
        inputs["input_j"].append(j_image)
        inputs["input_user_name"].append(user_features["name"])
        inputs["input_user_place"].append(user_features["place"])
        inputs["input_user_inscription_date"].append(user_features["inscription_date"])
        inputs["input_user_page_views"].append(user_features["number_page_views"])
        inputs["input_user_followers"].append(user_features["number_followers"])
        inputs["input_user_follow"].append(user_features["number_follow"])
        inputs["input_user_favourites"].append(user_features["number_favourites"])
        inputs["input_user_comments_made"].append(user_features["number_comments_made"])
        inputs["input_user_comments_received"].append(
            user_features["number_comments_receveid"]
        )
        inputs["input_user_followers_list"].append(user_features["followers_part_1"])
        inputs["input_user_follow_list"].append(user_features["follow_part_1"])

        inputs["input_art_title"].append(i_meta["title"])
        inputs["input_art_author"].append(i_meta["author"])
        inputs["input_art_likes"].append(i_meta["likes"])
        inputs["input_art_comments"].append(i_meta["comment_1"])
        inputs["input_art_views"].append(i_meta["number_of_views"])
        inputs["input_art_date"].append(i_meta["date"])

    for key in inputs:
        inputs[key] = np.array(inputs[key])

    return inputs

In [65]:
triples = create_triples(user_df, artwork_df)
preprocessed_triples = preprocess_data(triples, preprocess_image)
inputs = prepare_inputs(preprocessed_triples, user_df)
labels = np.ones(len(triples))
curatornet.fit(inputs, labels, epochs=1)

TypeError: prepare_inputs() takes 2 positional arguments but 3 were given