In [29]:
import numpy as np
import scipy
import sklearn
from sklearn.metrics import precision_score, accuracy_score
from sklearn.model_selection import KFold
import pandas as pd
import sympy
import torch
from torch.nn.functional import one_hot
import torch_explain as te
from torch_explain.nn.functional import entropy_logic_loss
from torch_explain.logic.nn import entropy
from torch_explain.nn import concepts
from torch_explain.logic.metrics import test_explanation, complexity
from natsort import natsorted, index_natsorted
import random
import csv
import re
from collections import defaultdict

# **Data preprocessing**

---

In [30]:
input_file = "../DATA/Dataset.pl"

In [31]:
output_file = "../DATA/DatasetConverted.csv"

data = defaultdict(lambda: defaultdict(float))
colours = set()

with open(input_file, "r") as file:
    for line in file:
        match = re.match(r"colour_painting\(([^,]+), ([^,]+), ([0-9.eE+-]+)\)\.", line.strip())
        if match:
            row, column, value = match.groups()
            data[row][column] = float(value)
            colours.add(column)

colours = sorted(colours)

with open(output_file, "w", newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["id"] + colours) 

    for row in sorted(data.keys()):
        row_values = [data[row].get(colour, 0.0) for colour in colours]
        writer.writerow([row] + row_values)

print(f"CSV Generated: {output_file}")

CSV Generated: ../DATA/DatasetConverted.csv


In [32]:
df_1 = pd.read_csv("../DATA/DatasetConverted.csv", sep=',', on_bad_lines='warn')
df_2 = pd.read_csv("../DATA/GenresQArtDataset.csv", sep=';', on_bad_lines='warn')

df_1 = df_1.sort_values(by= df_1.columns[0])
df_2 = df_2.sort_values(by= df_2.columns[0])

df_1["turquoise"] = df_1["turquoise"] + df_1["turquoise "]
df_1.drop(columns=['turquoise '], inplace=True)

In [33]:
df_1

Unnamed: 0,id,black,blue,dark_blue,dark_green,dark_grey,dark_orange,dark_pink,dark_purple,dark_red,...,pale_orange,pale_pink,pale_purple,pale_red,pale_turquoise,pale_yellow,red,turquoise,white,yellow
0,gg1,0.066713,0.0,0.000000,0.000000,0.132228,0.008888,0.000000,0.000000,0.011685,...,0.037052,0.000999,0.000000,0.226406,0.000000,0.000899,0.000000,0.0,0.000000,0.000000
1,gg10,0.074353,0.0,0.000000,0.007626,0.084086,0.076159,0.000000,0.000000,0.036123,...,0.055790,0.000000,0.000000,0.054285,0.000000,0.002910,0.058198,0.0,0.000100,0.008730
2,gg11,0.107608,0.0,0.003504,0.021321,0.156456,0.021121,0.000000,0.000000,0.003604,...,0.072973,0.000000,0.000000,0.014915,0.000000,0.012513,0.003704,0.0,0.017317,0.000000
3,gg12,0.014120,0.0,0.001293,0.012529,0.176295,0.010540,0.000000,0.000000,0.005469,...,0.044844,0.000000,0.000000,0.027543,0.008452,0.153127,0.000696,0.0,0.089689,0.018992
4,gg13,0.181918,0.0,0.000000,0.118222,0.058812,0.219398,0.000000,0.000000,0.042763,...,0.072667,0.000000,0.000000,0.004087,0.000000,0.002891,0.010666,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,vg5,0.136886,0.0,0.050463,0.032333,0.057514,0.199436,0.000000,0.000000,0.007353,...,0.043211,0.000000,0.000101,0.000101,0.004130,0.019541,0.000101,0.0,0.001108,0.038074
86,vg6,0.118155,0.0,0.000298,0.000000,0.111706,0.031548,0.015278,0.007738,0.053869,...,0.067262,0.003671,0.049405,0.053571,0.000000,0.008730,0.004266,0.0,0.018750,0.057738
87,vg7,0.054092,0.0,0.004276,0.004972,0.109575,0.203639,0.000000,0.000000,0.149846,...,0.055185,0.000000,0.000000,0.008452,0.007855,0.115044,0.000199,0.0,0.002784,0.002585
88,vg8,0.000101,0.0,0.000000,0.000201,0.031175,0.002816,0.000000,0.000000,0.001911,...,0.260157,0.000000,0.000000,0.049175,0.002212,0.033789,0.018403,0.0,0.097144,0.001207


In [34]:
df_2

Unnamed: 0,id,historypainting,landscape,portrait,genre painting,stilllife,mountains,glasses,water,table,jewelry,buildings,weapons,flowers,person,people,trees
75,gg1,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0
84,gg10,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,1
85,gg11,0,1,0,0,0,1,0,1,0,0,1,0,1,0,033,1
86,gg12,0,1,0,0,0,1,0,0,0,0,1,0,1,1,033,1
87,gg13,0,0,0,1,0,1,1,1,0,0,0,0,1,0,033,05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,vg5,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0
65,vg6,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,05
66,vg7,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0
67,vg8,0,0,0,1,0,0,1,0,1,0,0,0,0,0,033,05


In [35]:
df_1["historypainting"] = df_2["historypainting"]
df_1["landscape"] = df_2["landscape"]
df_1["portrait"] = df_2["portrait"]
df_1["genre painting"] = df_2["genre painting"]
df_1.rename(columns={'genre painting': 'genre_painting'}, inplace=True)
df_1["stilllife"] = df_2["stilllife"]
df = df_1

In [36]:
df.head(50)

Unnamed: 0,id,black,blue,dark_blue,dark_green,dark_grey,dark_orange,dark_pink,dark_purple,dark_red,...,pale_yellow,red,turquoise,white,yellow,historypainting,landscape,portrait,genre_painting,stilllife
0,gg1,0.066713,0.0,0.0,0.0,0.132228,0.008888,0.0,0.0,0.011685,...,0.000899,0.0,0.0,0.0,0.0,0,0,0,1,0
1,gg10,0.074353,0.0,0.0,0.007626,0.084086,0.076159,0.0,0.0,0.036123,...,0.00291,0.058198,0.0,0.0001,0.00873,0,0,0,1,0
2,gg11,0.107608,0.0,0.003504,0.021321,0.156456,0.021121,0.0,0.0,0.003604,...,0.012513,0.003704,0.0,0.017317,0.0,0,0,0,1,0
3,gg12,0.01412,0.0,0.001293,0.012529,0.176295,0.01054,0.0,0.0,0.005469,...,0.153127,0.000696,0.0,0.089689,0.018992,1,0,0,0,0
4,gg13,0.181918,0.0,0.0,0.118222,0.058812,0.219398,0.0,0.0,0.042763,...,0.002891,0.010666,0.0,0.0,0.0,0,0,0,1,0
5,gg14,0.000804,0.0,0.000201,0.038311,0.03087,0.062544,0.0,0.0,0.013977,...,0.064857,0.002715,0.0,0.037104,0.081649,0,0,0,1,0
6,gg15,0.051208,0.0,0.00179,0.085711,0.086606,0.070796,0.0,0.0,0.002884,...,0.02287,0.000398,0.0,0.005369,9.9e-05,0,0,0,1,0
7,gg2,0.119613,0.0,0.009162,0.008256,0.144382,0.036549,0.0,0.0,0.006746,...,0.015002,0.014096,0.0,0.118506,0.001812,0,0,0,1,0
8,gg3,0.007743,0.0,0.000402,0.03027,0.087993,0.019107,0.0,0.0,0.001207,...,0.06788,0.015386,0.0,0.016492,0.004324,0,0,1,0,0
9,gg4,0.026914,0.0,0.0,0.0001,0.081539,0.005582,0.0,0.0,0.002691,...,0.011264,0.021132,0.0,0.005781,0.0,0,0,0,1,0


# **LEN work**

---

In [37]:
x_train = torch.tensor(df.drop(columns=[df.columns[0],'historypainting', 'landscape', 'portrait','genre_painting','stilllife']).values, dtype=torch.float32)
y_train = torch.tensor(df[['historypainting', 'landscape', 'portrait','genre_painting','stilllife']].values, dtype=torch.float32)
concept_names = df.columns[1:36].tolist()
class_names = df.columns[36:].tolist()

kf = KFold(n_splits=10, shuffle=True, random_state=42)

fold_results = {
    "loss": [],
    "precision": [],
    "accuracy": [],
    "explanation_accuracy": [],
    "history Accuracy": [],
    "history Complexity": [],
    "landscape Accuracy": [],
    "landscape Complexity": [],
    "portrait Accuracy": [],
    "portrait Complexity": [],
    "genre Accuracy": [],
    "genre Complexity": [],
    "stilllife Accuracy": [],
    "stilllife Complexity": [],
}

trained_models = []
model_explanations = []

for fold_idx, fold in enumerate(kf.split(x_train)):
    print(f"Fold {fold_idx + 1}")

    final_dataset = pd.DataFrame()
    eliminated_rows = pd.DataFrame()
    
    for class_idx, class_name in enumerate(class_names):

        class_rows = df[y_train[:, class_idx].cpu().numpy() == 1]

        sampled_rows = class_rows.sample(frac=0.8, random_state=fold_idx + 42)
        
        eliminated_group = class_rows.drop(index=sampled_rows.index)
        

        
        final_dataset = pd.concat([final_dataset, sampled_rows], ignore_index=True)
        eliminated_rows = pd.concat([eliminated_rows, eliminated_group], ignore_index=True)

    
    final_dataset = final_dataset.iloc[index_natsorted(final_dataset[final_dataset.columns[0]])]
    final_dataset.drop_duplicates(inplace=True)

    eliminated_rows = eliminated_rows.iloc[index_natsorted(eliminated_rows[eliminated_rows.columns[0]])]
    eliminated_rows.drop_duplicates(inplace=True)

    common_elements = df[df.columns[0]][df[df.columns[0]].isin(final_dataset[final_dataset.columns[0]])].values
    original_indices = df.index[df[df.columns[0]].isin(common_elements)].tolist()

    train_mask = torch.zeros(len(df), dtype=torch.bool)
    train_mask[original_indices] = True

    test_mask = ~train_mask
    """""

    prefixes = ['v', 'j', 'm', 'rn', 'vg', 'gg']
    
    for prefix in prefixes:
      if prefix == 'v':
          group = df[df[df.columns[0]].str.startswith('v') & ~df[df.columns[0]].str.startswith('vg')]
      else:
          group = df[df[df.columns[0]].str.startswith(prefix)]

      sampled_rows = group.sample(frac=0.8, random_state=42+fold_idx)
      eliminated_group = group.loc[~group.index.isin(sampled_rows.index)]

      final_dataset = pd.concat([final_dataset, sampled_rows], ignore_index=True)
      eliminated_rows = pd.concat([eliminated_rows, eliminated_group], ignore_index=True)

    final_dataset = final_dataset.iloc[index_natsorted(final_dataset[final_dataset.columns[0]])]
    final_dataset.drop_duplicates(inplace=True)

    eliminated_rows = eliminated_rows.iloc[index_natsorted(eliminated_rows[eliminated_rows.columns[0]])]
    eliminated_rows.drop_duplicates(inplace=True)


    common_elements = df[df.columns[0]][df[df.columns[0]].isin(final_dataset[final_dataset.columns[0]])].values
    original_indices = df.index[df[df.columns[0]].isin(common_elements)].tolist()

    train_mask = torch.zeros(len(df), dtype=torch.bool)
    train_mask[original_indices] = True

    test_mask = ~train_mask
    """""

    layers = [
        te.nn.EntropyLinear(x_train.shape[1], 120, n_classes=5, temperature=0.4),
        torch.nn.LeakyReLU(),
        torch.nn.Linear(120, 60),
        torch.nn.LeakyReLU(),
        torch.nn.Linear(60, 1),
    ]
    model = torch.nn.Sequential(*layers)

    loss_form = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

    for epoch in range(2000):
        optimizer.zero_grad()
        y_pred = model(x_train).squeeze(-1)
        loss = loss_form(y_pred[train_mask], y_train[train_mask].to(torch.float))
        loss.backward()
        optimizer.step()

    model_path = f"../MODELS/GENRESBYCOLOR/model_fold_{fold_idx + 1}.pth"
    torch.save(model.state_dict(), model_path)
    trained_models.append(model_path)

    with torch.no_grad():
        y_test_pred_logits = model(x_train).squeeze(-1)[test_mask]
        y_test_pred = (torch.sigmoid(y_test_pred_logits) > 0.5).to(torch.int)

        test_loss = loss_form(y_test_pred_logits, y_train[test_mask].to(torch.float))
        fold_results["loss"].append(test_loss.item())

        global_explanations, local_explanations = entropy.explain_classes(
            model,
            x_train,
            y_train,
            train_mask=train_mask,
            test_mask=test_mask,
            c_threshold=0.1,
            y_threshold=0.2,
            concept_names=concept_names,
            class_names=class_names,
            max_minterm_complexity=10,
            simplify=True
        )

        model_explanations.append({
            "global": global_explanations,
        })

        precision = precision_score(y_train[test_mask].numpy(), y_test_pred.numpy(), average="micro")
        accuracy = accuracy_score(y_train[test_mask].numpy(), y_test_pred.numpy())
        fold_results["precision"].append(precision)
        fold_results["accuracy"].append(accuracy)
        fold_results["history Accuracy"].append(global_explanations['0']['explanation_accuracy'])
        fold_results["history Complexity"].append(global_explanations['0']['explanation_complexity'])
        fold_results["landscape Accuracy"].append(global_explanations['1']['explanation_accuracy'])
        fold_results["landscape Complexity"].append(global_explanations['1']['explanation_complexity'])
        fold_results["portrait Accuracy"].append(global_explanations['2']['explanation_accuracy'])
        fold_results["portrait Complexity"].append(global_explanations['2']['explanation_complexity'])
        fold_results["genre Accuracy"].append(global_explanations['3']['explanation_accuracy'])
        fold_results["genre Complexity"].append(global_explanations['3']['explanation_complexity'])
        fold_results["stilllife Accuracy"].append(global_explanations['4']['explanation_accuracy'])
        fold_results["stilllife Complexity"].append(global_explanations['4']['explanation_complexity'])



        print(f"Fold {fold_idx + 1} Test Loss: {test_loss.item():.4f}")
        print(f"Fold {fold_idx + 1} Precision: {precision:.4f}")
        print(f"Fold {fold_idx + 1} Accuracy: {accuracy:.4f}")
        print(f"Fold {fold_idx + 1} history Accuracy: {global_explanations['0']['explanation_accuracy']:.4f}")
        print(f"Fold {fold_idx + 1} history Complexity: {global_explanations['0']['explanation_complexity']:.4f}")
        print(f"Fold {fold_idx + 1} landscape Accuracy: {global_explanations['1']['explanation_accuracy']:.4f}")
        print(f"Fold {fold_idx + 1} landscape Complexity: {global_explanations['1']['explanation_complexity']:.4f}")
        print(f"Fold {fold_idx + 1} portrait Accuracy: {global_explanations['2']['explanation_accuracy']:.4f}")
        print(f"Fold {fold_idx + 1} portrait Complexity: {global_explanations['2']['explanation_complexity']:.4f}")
        print(f"Fold {fold_idx + 1} genre Accuracy: {global_explanations['3']['explanation_accuracy']:.4f}")
        print(f"Fold {fold_idx + 1} genre Complexity: {global_explanations['3']['explanation_complexity']:.4f}")
        print(f"Fold {fold_idx + 1} stilllife Accuracy: {global_explanations['4']['explanation_accuracy']:.4f}")
        print(f"Fold {fold_idx + 1} stilllife Complexity: {global_explanations['4']['explanation_complexity']:.4f}")

mean_loss = np.mean(fold_results["loss"])
std_loss = np.std(fold_results["loss"])
mean_precision = np.mean(fold_results["precision"])
mean_accuracy = np.mean(fold_results["accuracy"])
mean_his_accuracy = np.mean(fold_results["history Accuracy"])
mean_his_complexity = np.mean(fold_results["history Complexity"])
mean_land_accuracy = np.mean(fold_results["landscape Accuracy"])
mean_land_complexity = np.mean(fold_results["landscape Complexity"])
mean_por_accuracy = np.mean(fold_results["portrait Accuracy"])
mean_por_complexity = np.mean(fold_results["portrait Complexity"])
mean_gen_accuracy = np.mean(fold_results["genre Accuracy"])
mean_gen_complexity = np.mean(fold_results["genre Complexity"])
mean_still_accuracy = np.mean(fold_results["stilllife Accuracy"])
mean_still_complexity = np.mean(fold_results["stilllife Complexity"])

Fold 1


Fold 1 Test Loss: 0.6065
Fold 1 Precision: 0.2857
Fold 1 Accuracy: 0.0526
Fold 1 history Accuracy: 0.0000
Fold 1 history Complexity: 0.0000
Fold 1 landscape Accuracy: 0.0000
Fold 1 landscape Complexity: 0.0000
Fold 1 portrait Accuracy: 0.8421
Fold 1 portrait Complexity: 5.0000
Fold 1 genre Accuracy: 0.5789
Fold 1 genre Complexity: 5.0000
Fold 1 stilllife Accuracy: 0.0000
Fold 1 stilllife Complexity: 0.0000
Fold 2
Fold 2 Test Loss: 0.7013
Fold 2 Precision: 0.3636
Fold 2 Accuracy: 0.2105
Fold 2 history Accuracy: 0.0000
Fold 2 history Complexity: 0.0000
Fold 2 landscape Accuracy: 0.7368
Fold 2 landscape Complexity: 11.0000
Fold 2 portrait Accuracy: 0.0000
Fold 2 portrait Complexity: 0.0000
Fold 2 genre Accuracy: 0.5263
Fold 2 genre Complexity: 10.0000
Fold 2 stilllife Accuracy: 0.0000
Fold 2 stilllife Complexity: 0.0000
Fold 3
Fold 3 Test Loss: 0.7338
Fold 3 Precision: 0.3333
Fold 3 Accuracy: 0.1053
Fold 3 history Accuracy: 0.0000
Fold 3 history Complexity: 0.0000
Fold 3 landscape Accurac

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 9 Test Loss: nan
Fold 9 Precision: 0.0000
Fold 9 Accuracy: 0.0000
Fold 9 history Accuracy: 0.0000
Fold 9 history Complexity: 0.0000
Fold 9 landscape Accuracy: 0.0000
Fold 9 landscape Complexity: 0.0000
Fold 9 portrait Accuracy: 0.0000
Fold 9 portrait Complexity: 0.0000
Fold 9 genre Accuracy: 0.0000
Fold 9 genre Complexity: 0.0000
Fold 9 stilllife Accuracy: 0.0000
Fold 9 stilllife Complexity: 0.0000
Fold 10
Fold 10 Test Loss: nan
Fold 10 Precision: 0.0000
Fold 10 Accuracy: 0.0000
Fold 10 history Accuracy: 0.0000
Fold 10 history Complexity: 0.0000
Fold 10 landscape Accuracy: 0.0000
Fold 10 landscape Complexity: 0.0000
Fold 10 portrait Accuracy: 0.0000
Fold 10 portrait Complexity: 0.0000
Fold 10 genre Accuracy: 0.0000
Fold 10 genre Complexity: 0.0000
Fold 10 stilllife Accuracy: 0.0000
Fold 10 stilllife Complexity: 0.0000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [38]:
train_mask

tensor([ True,  True,  True, False,  True,  True, False,  True,  True,  True,
         True,  True, False,  True,  True,  True,  True,  True,  True,  True,
         True,  True, False,  True,  True,  True, False,  True,  True,  True,
         True,  True,  True, False,  True,  True,  True,  True, False,  True,
         True,  True, False,  True,  True,  True, False,  True,  True,  True,
        False,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True, False,  True,  True,  True,  True,  True,
        False, False, False,  True,  True,  True,  True,  True,  True,  True,
        False,  True, False,  True,  True,  True,  True, False, False, False])

In [39]:
print("\nFinal Results:")
print(f"Average Loss: {mean_loss:.4f} ± {std_loss:.4f}")
print(f"Average Precision: {mean_precision:.4f}")
print(f"Average Accuracy: {mean_accuracy:.4f}")
print(f"Average history Accuracy: {mean_his_accuracy:.4f}")
print(f"Average history Complexity: {mean_his_complexity:.4f}")
print(f"Average landscape Accuracy: {mean_land_accuracy:.4f}")
print(f"Average landscape Complexity: {mean_land_complexity:.4f}")
print(f"Average portrait Accuracy: {mean_por_accuracy:.4f}")
print(f"Average portrait Complexity: {mean_por_complexity:.4f}")
print(f"Average genre Accuracy: {mean_gen_accuracy:.4f}")
print(f"Average genre Complexity: {mean_gen_complexity:.4f}")
print(f"Average stilllife Accuracy: {mean_still_accuracy:.4f}")
print(f"Average stilllife Complexity: {mean_still_complexity:.4f}")


Final Results:
Average Loss: nan ± nan
Average Precision: 0.2663
Average Accuracy: 0.1053
Average history Accuracy: 0.0000
Average history Complexity: 0.0000
Average landscape Accuracy: 0.3842
Average landscape Complexity: 3.4000
Average portrait Accuracy: 0.1737
Average portrait Complexity: 0.7000
Average genre Accuracy: 0.3789
Average genre Complexity: 4.0000
Average stilllife Accuracy: 0.0000
Average stilllife Complexity: 0.0000


In [40]:
print("\nExplicaciones de cada modelo:")
for i, explanations in enumerate(model_explanations, start=1):
    print(f"\nModelo Fold {i}:")
    print("Explicaciones Globales:")
    print(explanations["global"])


Explicaciones de cada modelo:

Modelo Fold 1:
Explicaciones Globales:
{'0': {'explanation': '', 'name': 'historypainting', 'explanation_accuracy': 0.0, 'explanation_complexity': 0}, '1': {'explanation': '', 'name': 'landscape', 'explanation_accuracy': 0.0, 'explanation_complexity': 0}, '2': {'explanation': 'pale_orange & ~black & ~dark_orange & ~grey & ~orange', 'name': 'portrait', 'explanation_accuracy': 0.8421052631578947, 'explanation_complexity': 5}, '3': {'explanation': 'dark_green | (~black & ~light_blue & ~light_orange & ~pale_yellow)', 'name': 'genre_painting', 'explanation_accuracy': 0.5789473684210527, 'explanation_complexity': 5}, '4': {'explanation': '', 'name': 'stilllife', 'explanation_accuracy': 0.0, 'explanation_complexity': 0}}

Modelo Fold 2:
Explicaciones Globales:
{'0': {'explanation': '', 'name': 'historypainting', 'explanation_accuracy': 0.0, 'explanation_complexity': 0}, '1': {'explanation': '(light_green & ~dark_grey) | (light_grey & ~white) | (pale_yellow & ~d

We do not get explanations with accuracy over 0 % in many cases. That means that the explanations generated with the training set do not work in the test set. Meaning that, with these features one cannot classify the genres with precision, while with our other concept dataset, that can be seen on the other test, it can be done.