In [6]:
import os
import cv2
import numpy as np
import pandas as pd
import anndata as ad
from PIL import Image
from sklearn.cluster import KMeans
import scanpy as sc
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import recall_score
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns
from colorsys import rgb_to_hsv

# Loading the AnnData

In [7]:
parent_folder = os.getcwd()
subfolder_name = "adata_baroque_paintings"
file_name = "adata_baroque_paintings_trial6.h5ad"

save_path = os.path.join(parent_folder, subfolder_name, file_name)
adata_baroque_paintings = sc.read_h5ad(save_path)

In [8]:
adata_baroque_paintings

AnnData object with n_obs × n_vars = 286 × 60
    obs: 'Painting_name', 'Width', 'Height', 'Commission_Year', 'Completion_Year', 'Painter', 'sacred', 'Jesuit_Art', 'Baroque_Art', 'Laplacian_Contrast', 'Gradient_Contrast', 'Hybrid_Contrast', 'File Name', 'color_prop_1', 'color_prop_2', 'color_prop_3', 'color_prop_4', 'color_prop_5', 'color_prop_6', 'color_prop_7', 'color_prop_8', 'color_prop_9', 'color_prop_10', 'color_prop_11', 'color_prop_12', 'color_prop_13', 'color_prop_14', 'color_prop_15', 'color_prop_16', 'color_prop_17', 'color_prop_18', 'color_prop_19', 'color_prop_20', 'not_sacred', 'not_Baroque_Art', 'not_Jesuit_Art'

In [9]:
adata_baroque_paintings.X

array([[ 29,  24,  23, ..., 193, 179, 147],
       [ 20,  21,  15, ...,  82,  71,  45],
       [ 33,  30,  18, ..., 212, 210, 206],
       ...,
       [ 61,  87, 100, ..., 202, 201, 179],
       [235, 225, 132, ..., 129, 143, 119],
       [ 91, 141, 126, ..., 198, 205, 184]], shape=(286, 60))

In [10]:
adata_baroque_paintings.var_names

Index(['color_1_R', 'color_1_G', 'color_1_B', 'color_2_R', 'color_2_G',
       'color_2_B', 'color_3_R', 'color_3_G', 'color_3_B', 'color_4_R',
       'color_4_G', 'color_4_B', 'color_5_R', 'color_5_G', 'color_5_B',
       'color_6_R', 'color_6_G', 'color_6_B', 'color_7_R', 'color_7_G',
       'color_7_B', 'color_8_R', 'color_8_G', 'color_8_B', 'color_9_R',
       'color_9_G', 'color_9_B', 'color_10_R', 'color_10_G', 'color_10_B',
       'color_11_R', 'color_11_G', 'color_11_B', 'color_12_R', 'color_12_G',
       'color_12_B', 'color_13_R', 'color_13_G', 'color_13_B', 'color_14_R',
       'color_14_G', 'color_14_B', 'color_15_R', 'color_15_G', 'color_15_B',
       'color_16_R', 'color_16_G', 'color_16_B', 'color_17_R', 'color_17_G',
       'color_17_B', 'color_18_R', 'color_18_G', 'color_18_B', 'color_19_R',
       'color_19_G', 'color_19_B', 'color_20_R', 'color_20_G', 'color_20_B'],
      dtype='object')

# Adding HSV Values

In [11]:
rgb_df = pd.DataFrame(
    adata_baroque_paintings.X,
    columns=adata_baroque_paintings.var_names
)

# Number of colors
n_colors = rgb_df.shape[1] // 3

# Creating data structures for storing the HSV values
hsv_values = []
hsv_columns = []

for i in range(1, n_colors + 1):
    # Extract RGB values and normalize so that values go from 0 to 1
    r = rgb_df[f"color_{i}_R"].values / 255.0
    g = rgb_df[f"color_{i}_G"].values / 255.0
    b = rgb_df[f"color_{i}_B"].values / 255.0

    hsv = np.array([
        rgb_to_hsv(r[j], g[j], b[j]) for j in range(len(r))
    ])
    
    hsv_values.append(hsv)

    hsv_columns += [
        f"color_{i}_H",
        f"color_{i}_S",
        f"color_{i}_V"
    ]

# Building the final matrix
hsv_matrix = np.hstack(hsv_values)

var_df = adata_baroque_paintings .var.copy()

hsv_var_df = pd.DataFrame(index=hsv_columns)

# Adding the rows together to the AnnData
var_df = pd.concat([var_df, hsv_var_df])

adata_baroque_paintings._var = var_df

adata_baroque_paintings.X = np.hstack([adata_baroque_paintings.X, hsv_matrix])


In [12]:
print("New .X shape:", adata_baroque_paintings .X.shape)
print("New number of variables:", adata_baroque_paintings .var.shape[0])

New .X shape: (286, 120)
New number of variables: 120


In [13]:
adata_baroque_paintings

AnnData object with n_obs × n_vars = 286 × 120
    obs: 'Painting_name', 'Width', 'Height', 'Commission_Year', 'Completion_Year', 'Painter', 'sacred', 'Jesuit_Art', 'Baroque_Art', 'Laplacian_Contrast', 'Gradient_Contrast', 'Hybrid_Contrast', 'File Name', 'color_prop_1', 'color_prop_2', 'color_prop_3', 'color_prop_4', 'color_prop_5', 'color_prop_6', 'color_prop_7', 'color_prop_8', 'color_prop_9', 'color_prop_10', 'color_prop_11', 'color_prop_12', 'color_prop_13', 'color_prop_14', 'color_prop_15', 'color_prop_16', 'color_prop_17', 'color_prop_18', 'color_prop_19', 'color_prop_20', 'not_sacred', 'not_Baroque_Art', 'not_Jesuit_Art'

In [14]:
adata_baroque_paintings.var_names

Index(['color_1_R', 'color_1_G', 'color_1_B', 'color_2_R', 'color_2_G',
       'color_2_B', 'color_3_R', 'color_3_G', 'color_3_B', 'color_4_R',
       ...
       'color_17_V', 'color_18_H', 'color_18_S', 'color_18_V', 'color_19_H',
       'color_19_S', 'color_19_V', 'color_20_H', 'color_20_S', 'color_20_V'],
      dtype='object', length=120)

In [15]:
pd.DataFrame(
    adata_baroque_paintings.X,
    index=adata_baroque_paintings.obs_names,
    columns=adata_baroque_paintings.var_names
)

Unnamed: 0_level_0,color_1_R,color_1_G,color_1_B,color_2_R,color_2_G,color_2_B,color_3_R,color_3_G,color_3_B,color_4_R,...,color_17_V,color_18_H,color_18_S,color_18_V,color_19_H,color_19_S,color_19_V,color_20_H,color_20_S,color_20_V
Painting_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,29.0,24.0,23.0,35.0,28.0,26.0,42.0,33.0,27.0,50.0,...,0.552941,0.107639,0.393443,0.478431,0.123016,0.245614,0.670588,0.115942,0.238342,0.756863
2,20.0,21.0,15.0,23.0,24.0,17.0,30.0,27.0,17.0,42.0,...,0.615686,0.120567,0.348148,0.529412,0.118519,0.401786,0.439216,0.117117,0.451220,0.321569
3,33.0,30.0,18.0,115.0,103.0,79.0,47.0,38.0,22.0,100.0,...,0.384314,0.117284,0.160714,0.658824,0.034799,0.733871,0.486275,0.111111,0.028302,0.831373
4,74.0,49.0,28.0,84.0,56.0,32.0,62.0,48.0,34.0,50.0,...,0.474510,0.083333,0.083770,0.749020,0.083333,0.119205,0.592157,0.100000,0.021834,0.898039
5,51.0,42.0,33.0,44.0,37.0,29.0,58.0,47.0,35.0,36.0,...,0.709804,0.104167,0.042328,0.741176,0.115942,0.185484,0.486275,0.101852,0.115385,0.611765
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
282,42.0,38.0,36.0,33.0,29.0,30.0,49.0,56.0,47.0,63.0,...,0.784314,0.084142,0.497585,0.811765,0.047101,0.696970,0.517647,0.173333,0.168919,0.580392
283,169.0,192.0,204.0,182.0,202.0,211.0,146.0,171.0,185.0,156.0,...,0.517647,0.108631,0.708861,0.619608,0.581481,0.445545,0.396078,0.111111,0.524272,0.403922
284,61.0,87.0,100.0,82.0,93.0,92.0,72.0,99.0,114.0,69.0,...,0.188235,0.080247,0.369863,0.286275,0.115942,0.547619,0.658824,0.159420,0.113861,0.792157
285,235.0,225.0,132.0,175.0,154.0,22.0,226.0,215.0,122.0,244.0,...,0.576471,0.170833,0.481928,0.650980,0.166667,0.411765,0.333333,0.263889,0.167832,0.560784


# Saving the AnnData

In [16]:
import os
parent_folder = os.getcwd()
subfolder_name = "adata_baroque_paintings"
file_name = "adata_baroque_paintings_trial6_HSV.h5ad"
full_folder_path = os.path.join(parent_folder, subfolder_name)
os.makedirs(full_folder_path, exist_ok=True)
save_path = os.path.join(full_folder_path, file_name)
adata_baroque_paintings.write(save_path)
print(f"AnnData saved to: {save_path}")

AnnData saved to: C:\Users\santy\Documents\Princeton University\COURSES\7. FALL SEMESTER 2025\SML 312 - STATISTICS PROJECT\Final Project\Data Set - Baroque Paintings\adata_baroque_paintings\adata_baroque_paintings_trial6_HSV.h5ad
