In [1]:
import os
import sys
from pathlib import Path
sys.path.append(str(Path(os.path.abspath('')).resolve().parents[0]))

In [None]:
from information_noise_reduction.subset_generator import reverse_all_subsets_generator
from information_noise_reduction.evaluate_model import evaluate_subsets
from information_noise_reduction.interpretation import compute_variable_contributions

import pandas as pd
from sklearn.datasets import load_wine
import tensorflow as tf

2024-11-09 15:28:12.675058: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Load Dataset

In [3]:
def get_wine_dataset():
    data = load_wine()
    return pd.DataFrame(data.data, columns=data.feature_names)

df = get_wine_dataset()
df

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0


In [4]:
prediction_column = "color_intensity"
feature_columns = [ 
    "alcohol",	"malic_acid", "ash", "alcalinity_of_ash", "magnesium", 
    "total_phenols", "flavanoids", "nonflavanoid_phenols", "proanthocyanins"
    ]

## Class of models

In [5]:
def model_generator(input_dim: int) -> tf.keras.Sequential:
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='sigmoid', input_shape=(input_dim,)),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

## Analysis

In [6]:
model_gen = model_generator
subset_gen = reverse_all_subsets_generator(feature_columns)
result = evaluate_subsets(df, target_col=prediction_column, subset_gen=subset_gen, model_generator=model_gen, max_subsets=10, epochs=10)

2024-11-09 15:28:16.394544: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Evaluated subset ('alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins') with loss: 8.0909
Evaluated subset ('alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols') with loss: 7.9051
Evaluated subset ('alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'proanthocyanins') with loss: 5.5033
Evaluated subset ('alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'nonflavanoid_phenols', 'proanthocyanins') with loss: 8.3972
Evaluated subset ('alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins') with loss: 13.6315
Evaluated subset ('alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins') with loss: 6.6191
Evaluated subset ('alcohol', '

In [7]:
variable_contributions = compute_variable_contributions(result)
variable_contributions

{'average_losses': {'alcohol': 8.050102180904812,
  'malic_acid': 8.592569563123915,
  'ash': 8.671500894758436,
  'alcalinity_of_ash': 8.556833585103353,
  'magnesium': 8.697577158610025,
  'total_phenols': 7.918421692318386,
  'flavanoids': 8.500012026892769,
  'nonflavanoid_phenols': 8.821561707390678,
  'proanthocyanins': 8.554694599575466},
 'normalized_scores': {'alcohol': 0.6866689450503893,
  'malic_acid': 0.6199305074061714,
  'ash': 0.6102197783796325,
  'alcalinity_of_ash': 0.6243270175355438,
  'magnesium': 0.6070116793010587,
  'total_phenols': 0.7028692740739246,
  'flavanoids': 0.6313176351169576,
  'nonflavanoid_phenols': 0.5917581629628136,
  'proanthocyanins': 0.6245901717028797}}