# Add painting features
This notebook is used to add painting features like type, style and century next to each object used for retrieval.

### 0. Import libraries

In [None]:
import re
import copy
import json
import polars as pl

INPUT_PATH_EMBEDDINGS = "../../data/embeddings/"
INPUT_PATH_OBJECTS_DATA = "../../data/processed/"

### 1. Load paintings features

In [None]:
def load_paintings_objects():
    with open(f"{INPUT_PATH_OBJECTS_DATA}paintings_with_filtered_objects.json") as f:
        all_annotations = json.load(f)

    object_labels = []
    object_descriptions = []

    for annotation in all_annotations:
        object_labels.append(list(annotation["objects"].keys()))
        object_descriptions.append(
            [obj_data["description"] for obj_data in annotation["objects"].values()]
        )

    paintings_objects = copy.deepcopy(all_annotations)

    for index, _ in enumerate(paintings_objects):
        paintings_objects[index]["objects"] = object_labels[index]
        paintings_objects[index]["object_description"] = object_descriptions[index]

    paintings_objects = (
        pl.from_dicts(paintings_objects).explode("objects", "object_description")
    ).with_columns(
        pl.col("object_description")
        .map_elements(
            lambda x: re.sub(r"\s+", "", x[:-1].replace(".", " | ").strip())
            .lower()
            .replace("è", "e")
            .replace("é", "e")
            .replace("ó", "o")
            .replace("á", "a")
            .replace("ü", "u")
            .replace("ô", "o")
            .replace("ä", "a")
            .replace("à", "a"),
            return_dtype=pl.String,
        )
        .alias("text_no_spaces")
    )

    return paintings_objects

In [None]:
object_features = load_paintings_objects()
object_features

### 2. Add features to clip embeddings

In [None]:
clip_embeddings = pl.read_json(
    f"{INPUT_PATH_EMBEDDINGS}clip_embeddings_test_clip_full_1e_6_diff_lr_not_frozen.json"
).with_columns(
    pl.col("text")
    .map_elements(lambda x: re.sub(r"\s+", "", x), return_dtype=pl.String)
    .alias("text_no_spaces")
)
clip_embeddings

In [None]:
clip_embeddings_with_features = clip_embeddings.join(
    object_features, on="text_no_spaces", how="left"
).filter(~((pl.col("index") == 661) & (pl.col("id") == 6969))).drop("text", "text_no_spaces").rename({"id": "painting_id", "objects": "label"})
clip_embeddings_with_features

In [None]:
clip_embeddings_with_features.write_json(f"{INPUT_PATH_EMBEDDINGS}clip_embeddings_test_clip_full_1e_6_diff_lr_not_frozen_features.json")

### 3. Add features to projected embeddings

In [None]:
projected_embeddings = pl.read_json(
    f"{INPUT_PATH_EMBEDDINGS}baseline_embeddings_test_projections_text_embedding_enhanced.json"
).with_columns(
    pl.col("text")
    .map_elements(lambda x: re.sub(r"\s+", "", x), return_dtype=pl.String)
    .alias("text_no_spaces")
)
projected_embeddings

In [None]:
projected_embeddings_with_features = projected_embeddings.join(
    object_features, on="text_no_spaces", how="left"
).filter(~((pl.col("probability") == 0.5384060740470886) & (pl.col("id") == 6969))).drop("text", "text_no_spaces", "id").rename({"objects": "label"})
projected_embeddings_with_features

In [None]:
clip_embeddings_with_features.write_json(f"{INPUT_PATH_EMBEDDINGS}baseline_embeddings_test_projections_text_embedding_enhanced_features.json")