In [9]:
import numpy as np
import scipy
import sklearn
from sklearn.metrics import precision_score, accuracy_score
from sklearn.model_selection import KFold
import pandas as pd
import sympy
import torch
from torch.nn.functional import one_hot
import torch_explain as te
from torch_explain.nn.functional import entropy_logic_loss
from torch_explain.logic.nn import entropy
from torch_explain.nn import concepts
from torch_explain.logic.metrics import test_explanation, complexity
from natsort import natsorted, index_natsorted
import random

# **Data preprocessing**

---


In [10]:

df = pd.read_excel("../DATA/ColorsDataset.xlsx", engine="openpyxl")


In [11]:
df.columns = df.iloc[0]
df = df[1:].reset_index(drop=True)
df = df.iloc[index_natsorted(df[df.columns[0]])]

In [12]:
df[df.columns[0]] = df[df.columns[0]].str.replace('.jpg', '', regex=False)

desired_order = ['DIEGO_VELAZQUEZ_', 'JAN_VERMEER_', 'CLAUDE_MONET_', 'PIERRE-AUGUSTE_RENOIR_', 'VINCENT_VAN_GOGH_', 'PAUL_GAUGUIN_']

df[['Artist', 'Suffix']] = df[df.columns[0]].str.extract(r'([a-zA-Z_-]+)_?(\d*)')
df['Suffix'] = pd.to_numeric(df['Suffix'], errors='coerce').fillna(0).astype(int)
artist_dtype = pd.CategoricalDtype(categories=desired_order, ordered=True)
df['Artist'] = df['Artist'].astype(artist_dtype)

df = df.sort_values(by=['Artist', 'Suffix'])
df[df.columns[0]] = df['Artist'].astype(str)  + df['Suffix'].astype(str)
df.loc[df['Suffix'] == 0, df.columns[0]] = df.loc[df['Suffix'] == 0, 'Artist']


df = df.drop(columns=['Artist', 'Suffix'])

In [13]:
artist_mapping = {
    'DIEGO_VELAZQUEZ': 'v',
    'CLAUDE_MONET': 'm',
    'JAN_VERMEER': 'j',
    'PIERRE-AUGUSTE_RENOIR': 'rn',
    'PAUL_GAUGUIN': 'gg',
    'VINCENT_VAN_GOGH': 'vg'
}

def shorten_name(name):
    for artist, abbreviation in artist_mapping.items():
        if artist in name:
            suffix = name.split('_')[-1]
            return abbreviation + suffix if suffix.isdigit() else abbreviation
    return name

df[df.columns[0]] = df[df.columns[0]].apply(shorten_name)

In [14]:
df = df.fillna(0)

new_df = df.copy()
new_df['Sum'] = new_df.drop(columns=[df.columns[0]]).sum(axis=1)
number_columns = new_df.columns[1:-1]

new_df[number_columns] = new_df[number_columns].div(new_df['Sum'], axis=0)
df = new_df.drop(columns=['Sum'])

  df = df.fillna(0)


In [15]:
df["Baroque"] = 0
df['Impressionist'] = 0
df['Post-Impressionist'] = 0

for index, row in df.iterrows():
    name = row[df.columns[0]]
    if name.startswith('v') and not name.startswith('vg'):
        df.loc[index, 'Baroque'] = 1
    elif name.startswith(('j')):
        df.loc[index, 'Baroque'] = 1
    elif name.startswith(('m', 'rn')):
        df.loc[index, 'Impressionist'] = 1
    elif name.startswith(('vg', 'gg')):
        df.loc[index, 'Post-Impressionist'] = 1

In [16]:
df

Unnamed: 0,id,black,dark_grey,grey,light_grey,white,red,orange,yellow,green,...,dark_orange,dark_yellow,dark_green,dark_turquoise,dark_blue,dark_purple,dark_pink,Baroque,Impressionist,Post-Impressionist
36,v1,0.450608,0.009245,0.001306,0.005326,0.014270,0.000000,0.003517,0.000000,0.0,...,0.343383,0.124510,0.000000,0.000000,0.000000,0.0,0.0,1,0,0
47,v2,0.412119,0.148876,0.004113,0.000301,0.000000,0.000000,0.024679,0.000000,0.0,...,0.357745,0.000000,0.000000,0.000000,0.000000,0.0,0.0,1,0,0
58,v3,0.481255,0.018845,0.002005,0.005112,0.000501,0.010525,0.031676,0.000000,0.0,...,0.035686,0.000000,0.000000,0.000000,0.000000,0.0,0.0,1,0,0
69,v4,0.514659,0.005884,0.013263,0.032210,0.008875,0.000000,0.014360,0.000000,0.0,...,0.160351,0.017651,0.003889,0.000000,0.000000,0.0,0.0,1,0,0
70,v5,0.549920,0.051022,0.011327,0.005513,0.013432,0.000000,0.012831,0.003508,0.0,...,0.206596,0.016439,0.000000,0.000000,0.000000,0.0,0.0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151,gg47,0.198427,0.228675,0.194797,0.000202,0.000000,0.001916,0.002319,0.000000,0.0,...,0.021375,0.002420,0.000000,0.000000,0.000000,0.0,0.0,0,0,1
152,gg48,0.055932,0.153539,0.058724,0.021037,0.000199,0.027418,0.028415,0.000000,0.0,...,0.092423,0.009372,0.000000,0.000000,0.103290,0.0,0.0,0,0,1
153,gg49,0.387202,0.078571,0.017262,0.003274,0.000000,0.004067,0.043552,0.000000,0.0,...,0.309226,0.026290,0.000099,0.000000,0.000496,0.0,0.0,0,0,1
155,gg50,0.126181,0.221438,0.055384,0.036790,0.000597,0.077359,0.059362,0.000000,0.0,...,0.251268,0.001690,0.002188,0.000298,0.000000,0.0,0.0,0,0,1


After all this preprocessing we have the dataset we wanted with the color frequencies and the good short names.

In [17]:
df.reset_index()
filtered_dfs = []

prefixes = ['v', 'j', 'm', 'rn', 'vg', 'gg']
final_dataset = pd.DataFrame()
eliminated_rows = pd.DataFrame()

# **LEN work**

---

In [None]:
x_train = torch.tensor(df.drop(columns=[df.columns[0],'Baroque', 'Impressionist', 'Post-Impressionist']).values, dtype=torch.float)
y_train = torch.tensor(df[['Baroque', 'Impressionist', 'Post-Impressionist']].values, dtype=torch.float32)
concept_names = df.columns[1:-3].tolist()
class_names = df.columns[-3:].tolist()

kf = KFold(n_splits=10, shuffle=True, random_state=42)

fold_results = {
    "loss": [],
    "precision": [],
    "accuracy": [],
    "explanation_accuracy": [],
    "Baroque Accuracy": [],
    "Baroque Complexity": [],
    "Impressionist Accuracy": [],
    "Impressionist Complexity": [],
    "Post-Impressionist Accuracy": [],
    "Post-Impressionist Complexity": [],
}

trained_models = []
model_explanations = []

for fold_idx, fold in enumerate(kf.split(x_train)):
    print(f"Fold {fold_idx + 1}")

    final_dataset = pd.DataFrame()
    eliminated_rows = pd.DataFrame()

    for prefix in prefixes:
      if prefix == 'v':
          group = df[df[df.columns[0]].str.startswith('v') & ~df[df.columns[0]].str.startswith('vg')]
      else:
          group = df[df[df.columns[0]].str.startswith(prefix)]

      sampled_rows = group.sample(frac=0.8, random_state=42+fold_idx)
      eliminated_group = group.loc[~group.index.isin(sampled_rows.index)]

      final_dataset = pd.concat([final_dataset, sampled_rows], ignore_index=True)
      eliminated_rows = pd.concat([eliminated_rows, eliminated_group], ignore_index=True)

    final_dataset = final_dataset.iloc[index_natsorted(final_dataset[final_dataset.columns[0]])]
    final_dataset.drop_duplicates(inplace=True)

    eliminated_rows = eliminated_rows.iloc[index_natsorted(eliminated_rows[eliminated_rows.columns[0]])]
    eliminated_rows.drop_duplicates(inplace=True)


    common_elements = df[df.columns[0]][df[df.columns[0]].isin(final_dataset[final_dataset.columns[0]])].values
    original_indices = df.index[df[df.columns[0]].isin(common_elements)].tolist()

    train_mask = torch.zeros(len(df), dtype=torch.bool)
    train_mask[original_indices] = True

    test_mask = ~train_mask

    layers = [
        te.nn.EntropyLinear(x_train.shape[1], 120, n_classes=3, temperature=0.3),
        torch.nn.LeakyReLU(),
        torch.nn.Linear(120, 60),
        torch.nn.LeakyReLU(),
        torch.nn.Linear(60, 1),
    ]
    model = torch.nn.Sequential(*layers)

    loss_form = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001)

    for epoch in range(2000):
        optimizer.zero_grad()
        y_pred = model(x_train).squeeze(-1)
        loss = loss_form(y_pred[train_mask], y_train[train_mask].to(torch.float))
        loss.backward()
        optimizer.step()

    model_path = f"../MODELS/STYLES/model_fold_{fold_idx + 1}.pth"
    torch.save(model.state_dict(), model_path)
    trained_models.append(model_path)

    with torch.no_grad():
        y_test_pred_logits = model(x_train).squeeze(-1)[test_mask]
        y_test_pred = (torch.sigmoid(y_test_pred_logits) > 0.5).to(torch.int)

        test_loss = loss_form(y_test_pred_logits, y_train[test_mask].to(torch.float))
        fold_results["loss"].append(test_loss.item())

        global_explanations, local_explanations = entropy.explain_classes(
            model,
            x_train,
            y_train,
            train_mask=train_mask,
            test_mask=test_mask,
            c_threshold=0.1,
            y_threshold=0.1,
            concept_names=concept_names,
            class_names=class_names,
            max_minterm_complexity=10,
            simplify=True
        )

        model_explanations.append({
            "global": global_explanations,
        })

        precision = precision_score(y_train[test_mask].numpy(), y_test_pred.numpy(), average="micro")
        accuracy = accuracy_score(y_train[test_mask].numpy(), y_test_pred.numpy())
        fold_results["precision"].append(precision)
        fold_results["accuracy"].append(accuracy)
        fold_results["Baroque Accuracy"].append(global_explanations['0']['explanation_accuracy'])
        fold_results["Baroque Complexity"].append(global_explanations['0']['explanation_complexity'])
        fold_results["Impressionist Accuracy"].append(global_explanations['1']['explanation_accuracy'])
        fold_results["Impressionist Complexity"].append(global_explanations['1']['explanation_complexity'])
        fold_results["Post-Impressionist Accuracy"].append(global_explanations['2']['explanation_accuracy'])
        fold_results["Post-Impressionist Complexity"].append(global_explanations['2']['explanation_complexity'])



        print(f"Fold {fold_idx + 1} Test Loss: {test_loss.item():.4f}")
        print(f"Fold {fold_idx + 1} Precision: {precision:.4f}")
        print(f"Fold {fold_idx + 1} Accuracy: {accuracy:.4f}")
        print(f"Fold {fold_idx + 1} Baroque Accuracy: {global_explanations['0']['explanation_accuracy']:.4f}")
        print(f"Fold {fold_idx + 1} Baroque Complexity: {global_explanations['0']['explanation_complexity']:.4f}")
        print(f"Fold {fold_idx + 1} Impressionist Accuracy: {global_explanations['1']['explanation_accuracy']:.4f}")
        print(f"Fold {fold_idx + 1} Impressionist Complexity: {global_explanations['1']['explanation_complexity']:.4f}")
        print(f"Fold {fold_idx + 1} Post-Impressionist Accuracy: {global_explanations['2']['explanation_accuracy']:.4f}")
        print(f"Fold {fold_idx + 1} Post-Impressionist Complexity: {global_explanations['2']['explanation_complexity']:.4f}")

mean_loss = np.mean(fold_results["loss"])
std_loss = np.std(fold_results["loss"])
mean_precision = np.mean(fold_results["precision"])
mean_accuracy = np.mean(fold_results["accuracy"])
mean_Bar_accuracy = np.mean(fold_results["Baroque Accuracy"])
mean_Bar_complexity = np.mean(fold_results["Baroque Complexity"])
mean_Imp_accuracy = np.mean(fold_results["Impressionist Accuracy"])
mean_Imp_complexity = np.mean(fold_results["Impressionist Complexity"])
mean_Pos_accuracy = np.mean(fold_results["Post-Impressionist Accuracy"])
mean_Pos_complexity = np.mean(fold_results["Post-Impressionist Complexity"])

Fold 1
Fold 1 Test Loss: 0.4426
Fold 1 Precision: 0.6889
Fold 1 Accuracy: 0.5800
Fold 1 Baroque Accuracy: 0.7600
Fold 1 Baroque Complexity: 8.0000
Fold 1 Impressionist Accuracy: 0.6400
Fold 1 Impressionist Complexity: 23.0000
Fold 1 Post-Impressionist Accuracy: 0.6600
Fold 1 Post-Impressionist Complexity: 10.0000
Fold 2
Fold 2 Test Loss: 0.4436
Fold 2 Precision: 0.7436
Fold 2 Accuracy: 0.5600
Fold 2 Baroque Accuracy: 0.7400
Fold 2 Baroque Complexity: 5.0000
Fold 2 Impressionist Accuracy: 0.7000
Fold 2 Impressionist Complexity: 9.0000
Fold 2 Post-Impressionist Accuracy: 0.6600
Fold 2 Post-Impressionist Complexity: 5.0000
Fold 3
Fold 3 Test Loss: 0.4607
Fold 3 Precision: 0.7391
Fold 3 Accuracy: 0.6200
Fold 3 Baroque Accuracy: 0.7400
Fold 3 Baroque Complexity: 7.0000
Fold 3 Impressionist Accuracy: 0.6800
Fold 3 Impressionist Complexity: 6.0000
Fold 3 Post-Impressionist Accuracy: 0.7200
Fold 3 Post-Impressionist Complexity: 12.0000
Fold 4
Fold 4 Test Loss: 0.4772
Fold 4 Precision: 0.7297
F

In [None]:
print("\nFinal Results:")
print(f"Average Loss: {mean_loss:.4f} ± {std_loss:.4f}")
print(f"Average Precision: {mean_precision:.4f}")
print(f"Average Accuracy: {mean_accuracy:.4f}")
print(f"Average Baroque Accuracy: {mean_Bar_accuracy:.4f}")
print(f"Average Baroque Complexity: {mean_Bar_complexity:.4f}")
print(f"Average Impressionist Accuracy: {mean_Imp_accuracy:.4f}")
print(f"Average Impressionist Complexity: {mean_Imp_complexity:.4f}")
print(f"Average Post-Impressionist Accuracy: {mean_Pos_accuracy:.4f}")
print(f"Average Post-Impressionist Complexity: {mean_Pos_complexity:.4f}")


Final Results:
Average Loss: 0.4183 ± 0.0421
Average Precision: 0.7478
Average Accuracy: 0.5980
Average Baroque Accuracy: 0.7120
Average Baroque Complexity: 7.7000
Average Impressionist Accuracy: 0.6820
Average Impressionist Complexity: 6.8000
Average Post-Impressionist Accuracy: 0.7240
Average Post-Impressionist Complexity: 6.4000


In [None]:
print("\nExplicaciones de cada modelo:")
for i, explanations in enumerate(model_explanations, start=1):
    print(f"\nModelo Fold {i}:")
    print("Explicaciones Globales:")
    print(explanations["global"])


Explicaciones de cada modelo:

Modelo Fold 1:
Explicaciones Globales:
{'0': {'explanation': 'black & ~grey & ~light_grey & ~red & ~orange & ~yellow & ~pale_blue & ~dark_yellow', 'name': 'Baroque', 'explanation_accuracy': 0.76, 'explanation_complexity': np.int64(8)}, '1': {'explanation': '(pale_red & ~light_yellow) | (dark_red & ~orange) | (light_grey & ~orange & ~light_yellow & ~light_turquoise)', 'name': 'Impressionist', 'explanation_accuracy': 0.68, 'explanation_complexity': np.int64(8)}, '2': {'explanation': 'orange | yellow | light_yellow | light_turquoise | dark_green | (pale_red & ~pale_orange & ~dark_red)', 'name': 'Post-Impressionist', 'explanation_accuracy': 0.7, 'explanation_complexity': np.int64(8)}}

Modelo Fold 2:
Explicaciones Globales:
{'0': {'explanation': 'black & ~orange & ~pale_blue & ~dark_red & ~dark_yellow & ~dark_blue', 'name': 'Baroque', 'explanation_accuracy': 0.58, 'explanation_complexity': np.int64(6)}, '1': {'explanation': '(pale_orange & dark_red) | (light

Here I write the colors that "in theory" are representative of each style:

Baroque: Deep reds(dark_red), golds(yellow/orange), dark greens, browns, blacks and vivid whites

Impressionist: Soft pastels(yellows or pinks), blues, lavenders(purple), peach(orange), light greens

Post-Impressionist: Vivid oranges, cobalt blue, chrome yellow, emerald green