# Add style and type features to the retrieved paintings
This notebook adds to the retrieved paintings the type and style if they were not available initially. These characteristics are retrieved from Wikiart and Web Gallery of Art.

### 0. Import libraries

In [None]:
import json

import polars as pl
from tqdm import tqdm

from preprocess_data_utils import *

RAW_DATA_PATH = "../../data/raw/"
INTERMEDIATE_DATA_PATH = "../../data/intermediate/"

### 1. Load and preprocess data

#### 1.1. Wikiart data about the style and type of paintings

In [None]:
wikiart_features_df = pl.read_csv(INTERMEDIATE_DATA_PATH + "wikiart_paintings/wikiart_processed.csv")
wikiart_features = wikiart_features_df.to_numpy()
wikiart_features_df

#### 1.2. Web Gallery of Art data about the style and type of paintings

In [None]:
wga_features_df = pl.read_csv(INTERMEDIATE_DATA_PATH + "wga_paintings/wga_processed.csv")
wga_features = wga_features_df.to_numpy()
wga_features_df

#### 1.3. MET data

In [None]:
met_paintings_df = pl.read_csv(INTERMEDIATE_DATA_PATH + "met_paintings/met_processed.csv")
met_paintings = met_paintings_df.to_numpy()
met_paintings_df

### 2. Get style and type for MET paintings available in the Wikiart dataset

In [None]:
def match_painting(painting, painting_features, found_paintings):
    same_painting = is_same_painting(painting[1], painting[2], painting_features[0], painting_features[1])

    if same_painting:
        found_paintings.append(
            {
                "id": painting[0],
                "title": painting[5],
                "artist": painting[6],
                "year": painting[3],
                "type": painting_features[2],
                "style": painting_features[3] if len(painting_features) >= 4 else None,
                "description": painting[4],
            }
        )

    return same_painting

In [None]:
found_paintings = []

for index, painting in enumerate(tqdm(met_paintings)):
    found = False

    for painting_features in wikiart_features:
        found = match_painting(painting, painting_features, found_paintings)
        if found:
            break

    if found:
        continue

    for painting_features in wga_features:
        found = match_painting(painting, painting_features, found_paintings)
        if found:
            break

    if not found:
        found_paintings.append(
            {
                "id": painting[0],
                "title": painting[5],
                "artist": painting[6],
                "year": painting[3],
                "type": None,
                "style": None,
                "description": painting[4],
            }
        )

In [None]:
with open(INTERMEDIATE_DATA_PATH + "met_paintings/met_paintings_enhanced_data.json", "w") as f:
    json.dump(found_paintings, f, indent=4)