In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import anndata as ad
from PIL import Image
from sklearn.cluster import KMeans
import scanpy as sc
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import recall_score
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns
from colorsys import rgb_to_hsv
from itertools import combinations

# Load the AnnData

In [2]:
parent_folder = os.getcwd()
subfolder_name = "adata_baroque_paintings"
file_name = "adata_baroque_paintings_trial6_HSV.h5ad"

save_path = os.path.join(parent_folder, subfolder_name, file_name)
adata_baroque_paintings = sc.read_h5ad(save_path)

In [3]:
adata_baroque_paintings

AnnData object with n_obs × n_vars = 286 × 120
    obs: 'Painting_name', 'Width', 'Height', 'Commission_Year', 'Completion_Year', 'Painter', 'sacred', 'Jesuit_Art', 'Baroque_Art', 'Laplacian_Contrast', 'Gradient_Contrast', 'Hybrid_Contrast', 'File Name', 'color_prop_1', 'color_prop_2', 'color_prop_3', 'color_prop_4', 'color_prop_5', 'color_prop_6', 'color_prop_7', 'color_prop_8', 'color_prop_9', 'color_prop_10', 'color_prop_11', 'color_prop_12', 'color_prop_13', 'color_prop_14', 'color_prop_15', 'color_prop_16', 'color_prop_17', 'color_prop_18', 'color_prop_19', 'color_prop_20', 'not_sacred', 'not_Baroque_Art', 'not_Jesuit_Art'

In [4]:
adata_baroque_paintings.X

array([[2.90000000e+01, 2.40000000e+01, 2.30000000e+01, ...,
        1.15942029e-01, 2.38341969e-01, 7.56862745e-01],
       [2.00000000e+01, 2.10000000e+01, 1.50000000e+01, ...,
        1.17117117e-01, 4.51219512e-01, 3.21568627e-01],
       [3.30000000e+01, 3.00000000e+01, 1.80000000e+01, ...,
        1.11111111e-01, 2.83018868e-02, 8.31372549e-01],
       ...,
       [6.10000000e+01, 8.70000000e+01, 1.00000000e+02, ...,
        1.59420290e-01, 1.13861386e-01, 7.92156863e-01],
       [2.35000000e+02, 2.25000000e+02, 1.32000000e+02, ...,
        2.63888889e-01, 1.67832168e-01, 5.60784314e-01],
       [9.10000000e+01, 1.41000000e+02, 1.26000000e+02, ...,
        2.22222222e-01, 1.02439024e-01, 8.03921569e-01]], shape=(286, 120))

In [5]:
adata_baroque_paintings.var_names

Index(['color_1_R', 'color_1_G', 'color_1_B', 'color_2_R', 'color_2_G',
       'color_2_B', 'color_3_R', 'color_3_G', 'color_3_B', 'color_4_R',
       ...
       'color_17_V', 'color_18_H', 'color_18_S', 'color_18_V', 'color_19_H',
       'color_19_S', 'color_19_V', 'color_20_H', 'color_20_S', 'color_20_V'],
      dtype='object', length=120)

In [6]:
pd.DataFrame(
    adata_baroque_paintings.X,
    index=adata_baroque_paintings.obs_names,
    columns=adata_baroque_paintings.var_names
)

Unnamed: 0_level_0,color_1_R,color_1_G,color_1_B,color_2_R,color_2_G,color_2_B,color_3_R,color_3_G,color_3_B,color_4_R,...,color_17_V,color_18_H,color_18_S,color_18_V,color_19_H,color_19_S,color_19_V,color_20_H,color_20_S,color_20_V
Painting_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,29.0,24.0,23.0,35.0,28.0,26.0,42.0,33.0,27.0,50.0,...,0.552941,0.107639,0.393443,0.478431,0.123016,0.245614,0.670588,0.115942,0.238342,0.756863
2,20.0,21.0,15.0,23.0,24.0,17.0,30.0,27.0,17.0,42.0,...,0.615686,0.120567,0.348148,0.529412,0.118519,0.401786,0.439216,0.117117,0.451220,0.321569
3,33.0,30.0,18.0,115.0,103.0,79.0,47.0,38.0,22.0,100.0,...,0.384314,0.117284,0.160714,0.658824,0.034799,0.733871,0.486275,0.111111,0.028302,0.831373
4,74.0,49.0,28.0,84.0,56.0,32.0,62.0,48.0,34.0,50.0,...,0.474510,0.083333,0.083770,0.749020,0.083333,0.119205,0.592157,0.100000,0.021834,0.898039
5,51.0,42.0,33.0,44.0,37.0,29.0,58.0,47.0,35.0,36.0,...,0.709804,0.104167,0.042328,0.741176,0.115942,0.185484,0.486275,0.101852,0.115385,0.611765
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
282,42.0,38.0,36.0,33.0,29.0,30.0,49.0,56.0,47.0,63.0,...,0.784314,0.084142,0.497585,0.811765,0.047101,0.696970,0.517647,0.173333,0.168919,0.580392
283,169.0,192.0,204.0,182.0,202.0,211.0,146.0,171.0,185.0,156.0,...,0.517647,0.108631,0.708861,0.619608,0.581481,0.445545,0.396078,0.111111,0.524272,0.403922
284,61.0,87.0,100.0,82.0,93.0,92.0,72.0,99.0,114.0,69.0,...,0.188235,0.080247,0.369863,0.286275,0.115942,0.547619,0.658824,0.159420,0.113861,0.792157
285,235.0,225.0,132.0,175.0,154.0,22.0,226.0,215.0,122.0,244.0,...,0.576471,0.170833,0.481928,0.650980,0.166667,0.411765,0.333333,0.263889,0.167832,0.560784


In [7]:
adata_baroque_paintings.obs

Unnamed: 0_level_0,Painting_name,Width,Height,Commission_Year,Completion_Year,Painter,sacred,Jesuit_Art,Baroque_Art,Laplacian_Contrast,...,color_prop_14,color_prop_15,color_prop_16,color_prop_17,color_prop_18,color_prop_19,color_prop_20,not_sacred,not_Baroque_Art,not_Jesuit_Art
Painting_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,David with the Head of Goliath,91.3,110.4,1595,1605,Michelangelo Merisi Caravaggio,1,0,1,0.010107,...,0.016725,0.016200,0.015300,0.014675,0.014350,0.007175,0.006875,0,0,1
2,Santa Catalina,128.0,166.0,1598,1599,Michelangelo Merisi Caravaggio,1,0,1,0.007477,...,0.007775,0.007125,0.007075,0.005350,0.004650,0.003400,0.003400,0,0,1
3,Imposición de la casulla a san Ildefonso,55.0,33.0,1600,1600,Peter Paul Rubens,1,0,1,0.015918,...,0.032725,0.019925,0.019325,0.018600,0.018125,0.011225,0.004700,0,0,1
4,La Sagrada Familia,23.0,35.0,1600,1600,Peter Paul Rubens,1,0,1,0.009649,...,0.035075,0.029500,0.024050,0.013775,0.005925,0.004725,0.004000,0,0,1
5,Vulcano y el fuego,126.0,140.0,1600,1600,Peter Paul Rubens,0,0,1,0.012177,...,0.020775,0.018350,0.016900,0.015575,0.009400,0.007875,0.005100,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
282,La Berceuse,0.0,0.0,1888,1888,Vincent van Gogh,0,0,0,0.050316,...,0.030575,0.028325,0.027550,0.027175,0.026075,0.024800,0.013250,1,1,1
283,Self_protrait_VanGogh,0.0,0.0,1889,1889,Vincent van Gogh,0,0,0,0.053616,...,0.027450,0.023825,0.023425,0.016750,0.015975,0.015425,0.013925,1,1,1
284,Selfportrait_with_grey_felt_hat,0.0,0.0,1887,1888,Vincent van Gogh,0,0,0,0.053712,...,0.033100,0.031175,0.029200,0.028050,0.025850,0.020450,0.009925,1,1,1
285,Vase_with_fourteen_sunflowers,0.0,0.0,1887,1888,Vincent van Gogh,0,0,0,0.033626,...,0.026850,0.023350,0.022350,0.020475,0.019725,0.011175,0.005175,1,1,1


# Calculating Complementary Score

In [9]:
"""
Chat GPT Prompt:
I want to use cosine similarity in order to calculate a color complementary score for each of the rows (paintins) stored in the AnData
adata_baroque_paintings. THe cosine similarity is based on the last 60 columns of the .var, especifically on those columns that end in _H 
(for hue) and _S (for saturation), where _H goes from 0 to 1, but in reality it should be converted into an angle format since the hue is
considered the value of the angle of the vector if the color were to be plotted on a color wheel, and _S, which goes from 0 to 1, is the 
length or mangitude of the vector. The cosine similarity should give a value of -1 if the colors are totally complimentary, and a value 
of 1 if the colors are not complimentary at all (they are the same color). Similarly, for a given painting (row), it should calculate 
the cosine similarity for all the possible combinations of colors (combinations of 2 colors drawn out of 20), and find the weighted 
average of all of them as the ultimate complimentary score. The weighted average should be done using the proportions of each color in
the last columns of the .obs layer of the AnnData (the columns with the color proportions start with "color_prop_").
"""

'\nChat GPT Prompt:\nI want to use cosine similarity in order to calculate a color complementary score for each of the rows (paintins) stored in the AnData\nadata_baroque_paintings. THe cosine similarity is based on the last 60 columns of the .var, especifically on those columns that end in _H \n(for hue) and _S (for saturation), where _H goes from 0 to 1, but in reality it should be converted into an angle format since the hue is\nconsidered the value of the angle of the vector if the color were to be plotted on a color wheel, and _S, which goes from 0 to 1, is the \nlength or mangitude of the vector. The cosine similarity should give a value of -1 if the colors are totally complimentary, and a value \nof 1 if the colors are not complimentary at all (they are the same color). Similarly, for a given painting (row), it should calculate \nthe cosine similarity for all the possible combinations of colors (combinations of 2 colors drawn out of 20), and find the weighted \naverage of all of

In [10]:
# 1. Extract Hue and Saturation column indices
hue_cols = [c for c in adata_baroque_paintings.var_names if c.endswith("_H")]
sat_cols = [c for c in adata_baroque_paintings.var_names if c.endswith("_S")]

hue_idx = [list(adata_baroque_paintings.var_names).index(c) for c in hue_cols]
sat_idx = [list(adata_baroque_paintings.var_names).index(c) for c in sat_cols]

# 2. Extract Hue and Saturation values
H = adata_baroque_paintings.X[:, hue_idx]     # (n_obs, n_colors_max)
S = adata_baroque_paintings.X[:, sat_idx]     # (n_obs, n_colors_max)

# 3. Convert Hue to angles
theta = H * 2 * np.pi

# 4. Color vectors in 2D
Xv = S * np.cos(theta)
Yv = S * np.sin(theta)

# 5. Extract color proportions
prop_cols = [c for c in adata_baroque_paintings.obs.columns if c.startswith("color_prop_")]
prop_df = adata_baroque_paintings.obs[prop_cols].values  # (n_obs, n_colors_max)


#6. 
complement_scores = []

for row in range(adata_baroque_paintings.n_obs):

    pv = prop_df[row]
    valid_idx = np.where(pv > 0)[0]

    if len(valid_idx) < 2:
        complement_scores.append(np.nan)
        continue

    xv = Xv[row, valid_idx]
    yv = Yv[row, valid_idx]
    pv_valid = pv[valid_idx]

    # Dominant (highest proportion) color
    main_idx = np.argmax(pv_valid)

    xv_main = xv[main_idx]
    yv_main = yv[main_idx]
    mag_main = np.sqrt(xv_main**2 + yv_main**2)

    if mag_main == 0:
        complement_scores.append(np.nan)
        continue

    row_scores = []

    for i in range(len(xv)):
        if i == main_idx:
            continue

        mag_i = np.sqrt(xv[i]**2 + yv[i]**2)
        if mag_i == 0:
            continue

        dot = xv_main * xv[i] + yv_main * yv[i]
        cos_sim = dot / (mag_main * mag_i)

        # Adjust cosine to be between 0 and 1 (0 = same, 1 = opposite)
        cos_adj = (1 - cos_sim) / 2

        row_scores.append(cos_adj)

    # Simple average
    score = np.mean(row_scores) if len(row_scores) > 0 else np.nan
    complement_scores.append(score)

# 7. Store in .obs
adata_baroque_paintings.obs["color_complementarity_score"] = complement_scores

print("Complementarity scores computed and stored in adata.obs['color_complementarity_score']")

Complementarity scores computed and stored in adata.obs['color_complementarity_score']


In [11]:
adata_baroque_paintings.obs["color_complementarity_score"]

Painting_index
1      0.042089
2      0.083127
3      0.082595
4      0.005416
5      0.003148
         ...   
282    0.119284
283    0.372845
284    0.552021
285    0.009948
286    0.357886
Name: color_complementarity_score, Length: 286, dtype: float64

In [12]:
compplementarity_data = {
    "Painting": adata_baroque_paintings.obs["Painting_name"],
    "Complementarity Score": adata_baroque_paintings.obs["color_complementarity_score"]
}
complementary_scores_df = pd.DataFrame(compplementarity_data)

In [13]:
complementary_scores_df

Unnamed: 0_level_0,Painting,Complementarity Score
Painting_index,Unnamed: 1_level_1,Unnamed: 2_level_1
1,David with the Head of Goliath,0.042089
2,Santa Catalina,0.083127
3,Imposición de la casulla a san Ildefonso,0.082595
4,La Sagrada Familia,0.005416
5,Vulcano y el fuego,0.003148
...,...,...
282,La Berceuse,0.119284
283,Self_protrait_VanGogh,0.372845
284,Selfportrait_with_grey_felt_hat,0.552021
285,Vase_with_fourteen_sunflowers,0.009948


In [14]:
adata_baroque_paintings.obs["color_complementarity_score"].min()

0.0007483487199736752

In [15]:
row = complementary_scores_df.loc[complementary_scores_df["Complementarity Score"].idxmin()]
print(row)

Painting                 View of the Gardens of the Villa Medici, Rome
Complementarity Score                                         0.000748
Name: 92, dtype: object


In [16]:
row = complementary_scores_df.sort_values("Complementarity Score").head(10)
print(row)

                                                       Painting  \
Painting_index                                                    
92                View of the Gardens of the Villa Medici, Rome   
261                                    The_Visit_to_the_Nursery   
25                                                   San Felipe   
147                                      Saint Anthony of Padua   
69                                                      A Sybil   
22                                          La muerte de Séneca   
100                                Mariana de Austria in Prayer   
143                                                       Fable   
17                                                    San Pedro   
70              Antonia de Ipeñarrieta y Galdós, y su hijo Luis   

                Complementarity Score  
Painting_index                         
92                           0.000748  
261                          0.000764  
25                           0.0009

In [17]:
row = complementary_scores_df.sort_values("Complementarity Score").tail(10)
print(row)

                                   Painting  Complementarity Score
Painting_index                                                    
173               Christ carrying the Cross               0.629662
257                Francois_Henri_dHarcourt               0.703186
231             Victoire duchess de Nemours               0.726360
280                             Eugene_Boch               0.729769
36                          The Lamentation               0.733402
185                                 Colbert               0.754471
246                              Cabra-cega               0.784562
80                  Philip III on Horseback               0.793758
213                         Empress Eugenie               0.835184
202                          Saint Augustin               0.873547


In [18]:
adata_baroque_paintings.obs

Unnamed: 0_level_0,Painting_name,Width,Height,Commission_Year,Completion_Year,Painter,sacred,Jesuit_Art,Baroque_Art,Laplacian_Contrast,...,color_prop_15,color_prop_16,color_prop_17,color_prop_18,color_prop_19,color_prop_20,not_sacred,not_Baroque_Art,not_Jesuit_Art,color_complementarity_score
Painting_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,David with the Head of Goliath,91.3,110.4,1595,1605,Michelangelo Merisi Caravaggio,1,0,1,0.010107,...,0.016200,0.015300,0.014675,0.014350,0.007175,0.006875,0,0,1,0.042089
2,Santa Catalina,128.0,166.0,1598,1599,Michelangelo Merisi Caravaggio,1,0,1,0.007477,...,0.007125,0.007075,0.005350,0.004650,0.003400,0.003400,0,0,1,0.083127
3,Imposición de la casulla a san Ildefonso,55.0,33.0,1600,1600,Peter Paul Rubens,1,0,1,0.015918,...,0.019925,0.019325,0.018600,0.018125,0.011225,0.004700,0,0,1,0.082595
4,La Sagrada Familia,23.0,35.0,1600,1600,Peter Paul Rubens,1,0,1,0.009649,...,0.029500,0.024050,0.013775,0.005925,0.004725,0.004000,0,0,1,0.005416
5,Vulcano y el fuego,126.0,140.0,1600,1600,Peter Paul Rubens,0,0,1,0.012177,...,0.018350,0.016900,0.015575,0.009400,0.007875,0.005100,1,0,1,0.003148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
282,La Berceuse,0.0,0.0,1888,1888,Vincent van Gogh,0,0,0,0.050316,...,0.028325,0.027550,0.027175,0.026075,0.024800,0.013250,1,1,1,0.119284
283,Self_protrait_VanGogh,0.0,0.0,1889,1889,Vincent van Gogh,0,0,0,0.053616,...,0.023825,0.023425,0.016750,0.015975,0.015425,0.013925,1,1,1,0.372845
284,Selfportrait_with_grey_felt_hat,0.0,0.0,1887,1888,Vincent van Gogh,0,0,0,0.053712,...,0.031175,0.029200,0.028050,0.025850,0.020450,0.009925,1,1,1,0.552021
285,Vase_with_fourteen_sunflowers,0.0,0.0,1887,1888,Vincent van Gogh,0,0,0,0.033626,...,0.023350,0.022350,0.020475,0.019725,0.011175,0.005175,1,1,1,0.009948


# Saving the AnnData

In [19]:
import os
parent_folder = os.getcwd()
subfolder_name = "adata_baroque_paintings"
file_name = "adata_baroque_paintings_trial6_complementarity.h5ad"
full_folder_path = os.path.join(parent_folder, subfolder_name)
os.makedirs(full_folder_path, exist_ok=True)
save_path = os.path.join(full_folder_path, file_name)
adata_baroque_paintings.write(save_path)
print(f"AnnData saved to: {save_path}")

AnnData saved to: C:\Users\santy\Documents\Princeton University\COURSES\7. FALL SEMESTER 2025\SML 312 - STATISTICS PROJECT\Final Project\Data Set - Baroque Paintings\adata_baroque_paintings\adata_baroque_paintings_trial6_complementarity.h5ad
