In [2]:
import numpy as np
import scipy
import sklearn
from sklearn.metrics import precision_score, accuracy_score
from sklearn.model_selection import KFold
import pandas as pd
import sympy
import torch
from torch.nn.functional import one_hot
import torch_explain as te
from torch_explain.nn.functional import entropy_logic_loss
from torch_explain.logic.nn import entropy
from torch_explain.nn import concepts
from torch_explain.logic.metrics import test_explanation, complexity
from natsort import natsorted, index_natsorted
import random

# **Data preprocessing**

---

In [3]:
df = pd.read_csv("../DATA/GenresQArtDataset.csv", sep=';', on_bad_lines='warn')

In [4]:
df.rename(columns={'genre painting': 'genre_painting'}, inplace=True)



df['people_no'] = ((df['people'] == "0")).astype(int)
df['people_few'] = ((df['people'] == "0,33")).astype(int)
df['people_group'] = ((df['people'] == "0,66")).astype(int)
df['people_lot'] = (df['people'] == "1").astype(int)

df.drop(columns=['people'], inplace=True)

df['trees_no'] = (df['trees'] == "0").astype(int)
df['trees_few'] = (df['trees'] == "0,5").astype(int)
df['trees_lot'] = (df['trees'] == "1").astype(int)

df.drop(columns=['trees'], inplace=True)

# **LEN work**

---

In [None]:
x_train = torch.tensor(df.drop(columns=[df.columns[0],'historypainting', 'landscape', 'portrait','genre_painting','stilllife']).values, dtype=torch.float)
y_train = torch.tensor(df[['historypainting', 'landscape', 'portrait','genre_painting','stilllife']].values, dtype=torch.float32)
concept_names = df.columns[6:].tolist()
class_names = df.columns[1:6].tolist()


kf = KFold(n_splits=10, shuffle=True, random_state=42)

fold_results = {
    "loss": [],
    "precision": [],
    "accuracy": [],
    "explanation_accuracy": [],
    "history Accuracy": [],
    "history Complexity": [],
    "landscape Accuracy": [],
    "landscape Complexity": [],
    "portrait Accuracy": [],
    "portrait Complexity": [],
    "genre Accuracy": [],
    "genre Complexity": [],
    "stilllife Accuracy": [],
    "stilllife Complexity": [],
}

trained_models = []
model_explanations = []

for fold_idx, fold in enumerate(kf.split(x_train)):
    print(f"Fold {fold_idx + 1}")

    final_dataset = pd.DataFrame()
    eliminated_rows = pd.DataFrame()

    for class_idx, class_name in enumerate(class_names):

        class_rows = df[y_train[:, class_idx].cpu().numpy() == 1] 

        
        sampled_rows = class_rows.sample(frac=0.8, random_state=fold_idx + 42)
        eliminated_group = class_rows.drop(index=sampled_rows.index)

        
        final_dataset = pd.concat([final_dataset, sampled_rows], ignore_index=True)
        eliminated_rows = pd.concat([eliminated_rows, eliminated_group], ignore_index=True)

    
    final_dataset = final_dataset.iloc[index_natsorted(final_dataset[final_dataset.columns[0]])]
    final_dataset.drop_duplicates(inplace=True)

    eliminated_rows = eliminated_rows.iloc[index_natsorted(eliminated_rows[eliminated_rows.columns[0]])]
    eliminated_rows.drop_duplicates(inplace=True)

    common_elements = df[df.columns[0]][df[df.columns[0]].isin(final_dataset[final_dataset.columns[0]])].values
    original_indices = df.index[df[df.columns[0]].isin(common_elements)].tolist()

    train_mask = torch.zeros(len(df), dtype=torch.bool)
    train_mask[original_indices] = True

    test_mask = ~train_mask

    layers = [
        te.nn.EntropyLinear(x_train.shape[1], 120, n_classes=5, temperature=0.3),
        torch.nn.LeakyReLU(),
        torch.nn.Linear(120, 60),
        torch.nn.LeakyReLU(),
        torch.nn.Linear(60, 1),
    ]
    model = torch.nn.Sequential(*layers)

    loss_form = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001)

    for epoch in range(2000):
        optimizer.zero_grad()
        y_pred = model(x_train).squeeze(-1)
        loss = loss_form(y_pred[train_mask], y_train[train_mask].to(torch.float))
        loss.backward()
        optimizer.step()

    model_path = f"../MODELS/GENRES/model_fold_{fold_idx + 1}.pth"
    torch.save(model.state_dict(), model_path)
    trained_models.append(model_path)

    with torch.no_grad():
        y_test_pred_logits = model(x_train).squeeze(-1)[test_mask]
        y_test_pred = (torch.sigmoid(y_test_pred_logits) > 0.5).to(torch.int)

        test_loss = loss_form(y_test_pred_logits, y_train[test_mask].to(torch.float))
        fold_results["loss"].append(test_loss.item())

        global_explanations, local_explanations = entropy.explain_classes(
            model,
            x_train,
            y_train,
            train_mask=train_mask,
            test_mask=test_mask,
            c_threshold=0.1,
            y_threshold=0.1,
            concept_names=concept_names,
            class_names=class_names,
            max_minterm_complexity=10,
            simplify=True
        )

        model_explanations.append({
            "global": global_explanations,
        })

        precision = precision_score(y_train[test_mask].numpy(), y_test_pred.numpy(), average="micro")
        accuracy = accuracy_score(y_train[test_mask].numpy(), y_test_pred.numpy())
        fold_results["precision"].append(precision)
        fold_results["accuracy"].append(accuracy)
        fold_results["history Accuracy"].append(global_explanations['0']['explanation_accuracy'])
        fold_results["history Complexity"].append(global_explanations['0']['explanation_complexity'])
        fold_results["landscape Accuracy"].append(global_explanations['1']['explanation_accuracy'])
        fold_results["landscape Complexity"].append(global_explanations['1']['explanation_complexity'])
        fold_results["portrait Accuracy"].append(global_explanations['2']['explanation_accuracy'])
        fold_results["portrait Complexity"].append(global_explanations['2']['explanation_complexity'])
        fold_results["genre Accuracy"].append(global_explanations['3']['explanation_accuracy'])
        fold_results["genre Complexity"].append(global_explanations['3']['explanation_complexity'])
        fold_results["stilllife Accuracy"].append(global_explanations['4']['explanation_accuracy'])
        fold_results["stilllife Complexity"].append(global_explanations['4']['explanation_complexity'])



        print(f"Fold {fold_idx + 1} Test Loss: {test_loss.item():.4f}")
        print(f"Fold {fold_idx + 1} Precision: {precision:.4f}")
        print(f"Fold {fold_idx + 1} Accuracy: {accuracy:.4f}")
        print(f"Fold {fold_idx + 1} history Accuracy: {global_explanations['0']['explanation_accuracy']:.4f}")
        print(f"Fold {fold_idx + 1} history Complexity: {global_explanations['0']['explanation_complexity']:.4f}")
        print(f"Fold {fold_idx + 1} landscape Accuracy: {global_explanations['1']['explanation_accuracy']:.4f}")
        print(f"Fold {fold_idx + 1} landscape Complexity: {global_explanations['1']['explanation_complexity']:.4f}")
        print(f"Fold {fold_idx + 1} portrait Accuracy: {global_explanations['2']['explanation_accuracy']:.4f}")
        print(f"Fold {fold_idx + 1} portrait Complexity: {global_explanations['2']['explanation_complexity']:.4f}")
        print(f"Fold {fold_idx + 1} genre Accuracy: {global_explanations['3']['explanation_accuracy']:.4f}")
        print(f"Fold {fold_idx + 1} genre Complexity: {global_explanations['3']['explanation_complexity']:.4f}")
        print(f"Fold {fold_idx + 1} stilllife Accuracy: {global_explanations['4']['explanation_accuracy']:.4f}")
        print(f"Fold {fold_idx + 1} stilllife Complexity: {global_explanations['4']['explanation_complexity']:.4f}")

mean_loss = np.mean(fold_results["loss"])
std_loss = np.std(fold_results["loss"])
mean_precision = np.mean(fold_results["precision"])
mean_accuracy = np.mean(fold_results["accuracy"])
mean_his_accuracy = np.mean(fold_results["history Accuracy"])
mean_his_complexity = np.mean(fold_results["history Complexity"])
mean_land_accuracy = np.mean(fold_results["landscape Accuracy"])
mean_land_complexity = np.mean(fold_results["landscape Complexity"])
mean_por_accuracy = np.mean(fold_results["portrait Accuracy"])
mean_por_complexity = np.mean(fold_results["portrait Complexity"])
mean_gen_accuracy = np.mean(fold_results["genre Accuracy"])
mean_gen_complexity = np.mean(fold_results["genre Complexity"])
mean_still_accuracy = np.mean(fold_results["stilllife Accuracy"])
mean_still_complexity = np.mean(fold_results["stilllife Complexity"])

Fold 1
Fold 1 Test Loss: 0.1520
Fold 1 Precision: 0.8750
Fold 1 Accuracy: 0.6842
Fold 1 history Accuracy: 0.8421
Fold 1 history Complexity: 15.0000
Fold 1 landscape Accuracy: 0.9474
Fold 1 landscape Complexity: 4.0000
Fold 1 portrait Accuracy: 0.8947
Fold 1 portrait Complexity: 7.0000
Fold 1 genre Accuracy: 0.7368
Fold 1 genre Complexity: 5.0000
Fold 1 stilllife Accuracy: 1.0000
Fold 1 stilllife Complexity: 4.0000
Fold 2
Fold 2 Test Loss: 0.4362
Fold 2 Precision: 0.7059
Fold 2 Accuracy: 0.6667
Fold 2 history Accuracy: 0.7778
Fold 2 history Complexity: 6.0000
Fold 2 landscape Accuracy: 0.7778
Fold 2 landscape Complexity: 5.0000
Fold 2 portrait Accuracy: 0.9444
Fold 2 portrait Complexity: 6.0000
Fold 2 genre Accuracy: 0.7778
Fold 2 genre Complexity: 13.0000
Fold 2 stilllife Accuracy: 1.0000
Fold 2 stilllife Complexity: 2.0000
Fold 3
Fold 3 Test Loss: 0.2903
Fold 3 Precision: 0.7273
Fold 3 Accuracy: 0.4444
Fold 3 history Accuracy: 0.8333
Fold 3 history Complexity: 9.0000
Fold 3 landscape 

In [6]:
print("\nFinal Results:")
print(f"Average Loss: {mean_loss:.4f} ± {std_loss:.4f}")
print(f"Average Precision: {mean_precision:.4f}")
print(f"Average Accuracy: {mean_accuracy:.4f}")
print(f"Average history Accuracy: {mean_his_accuracy:.4f}")
print(f"Average history Complexity: {mean_his_complexity:.4f}")
print(f"Average landscape Accuracy: {mean_land_accuracy:.4f}")
print(f"Average landscape Complexity: {mean_land_complexity:.4f}")
print(f"Average portrait Accuracy: {mean_por_accuracy:.4f}")
print(f"Average portrait Complexity: {mean_por_complexity:.4f}")
print(f"Average genre Accuracy: {mean_gen_accuracy:.4f}")
print(f"Average genre Complexity: {mean_gen_complexity:.4f}")
print(f"Average stilllife Accuracy: {mean_still_accuracy:.4f}")
print(f"Average stilllife Complexity: {mean_still_complexity:.4f}")


Final Results:
Average Loss: 0.2770 ± 0.0693
Average Precision: 0.8128
Average Accuracy: 0.6591
Average history Accuracy: 0.8751
Average history Complexity: 10.5000
Average landscape Accuracy: 0.8427
Average landscape Complexity: 3.8000
Average portrait Accuracy: 0.8980
Average portrait Complexity: 7.4000
Average genre Accuracy: 0.7208
Average genre Complexity: 8.9000
Average stilllife Accuracy: 0.9731
Average stilllife Complexity: 3.6000


In [7]:
print("\nExplicaciones de cada modelo:")
for i, explanations in enumerate(model_explanations, start=1):
    print(f"\nModelo Fold {i}:")
    print("Explicaciones Globales:")
    print(explanations["global"])


Explicaciones de cada modelo:

Modelo Fold 1:
Explicaciones Globales:
{'0': {'explanation': '(weapons & people_lot) | (jewelry & people_lot & trees_lot) | (people_lot & ~water & ~table & ~jewelry & ~trees_lot) | (~table & ~buildings & ~person & ~people_no & ~trees_lot)', 'name': 'historypainting', 'explanation_accuracy': 0.8421052631578947, 'explanation_complexity': 15}, '1': {'explanation': '(buildings & ~glasses) | (trees_lot & ~people_lot)', 'name': 'landscape', 'explanation_accuracy': 0.9473684210526315, 'explanation_complexity': 4}, '2': {'explanation': '(mountains & jewelry & person) | (person & ~glasses & ~buildings & ~trees_lot)', 'name': 'portrait', 'explanation_accuracy': 0.8947368421052632, 'explanation_complexity': 7}, '3': {'explanation': '(water & people_lot) | (glasses & ~mountains & ~people_no)', 'name': 'genre_painting', 'explanation_accuracy': 0.7368421052631579, 'explanation_complexity': 5}, '4': {'explanation': 'glasses & ~people_few & ~people_group & ~people_lot',