In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
from hics.incremental_correlation import IncrementalCorrelation
from hics.result_storage import DefaultResultStorage
from hics.scored_slices import ScoredSlices

In [3]:
data = pd.read_csv('../data/mysynthetic.csv')

In [4]:
target = '0'
input_features = [ft for ft in data.columns.values if ft != target]
storage = DefaultResultStorage(input_features)
correlation = IncrementalCorrelation(data, target, storage, iterations = 10, alpha = 0.1, drop_discrete = False)

In [5]:
correlation.update_bivariate_relevancies(runs = 1)

In [6]:
correlation.update_redundancies(k = 5, runs = 10)

In [7]:
correlation.update_multivariate_relevancies(k = 3, runs = 50)

In [8]:
correlation.update_multivariate_relevancies(k = 3, runs = 50, fixed_features = ['1'])

In [9]:
print(storage.relevancies)

              relevancy  iteration
(3,)           0.108662        3.0
(5,)           0.000119        1.0
(4,)           0.000189        2.0
(8,)           0.000125        3.0
(6,)           0.000171        5.0
(1,)           0.512926        2.0
(7,)           0.000121        2.0
(9,)           0.000161        3.0
(2,)           0.276917        2.0
(1, 3, 4, 6)   0.162505        2.0
(4, 8)         0.000149        1.0
(5, 6, 8, 9)   0.000254        1.0
(2, 7, 8, 9)   0.072843        1.0
(1, 6)         0.309141        2.0
(3, 6, 7, 8)   0.024337        1.0
(3, 5, 8)      0.046804        1.0
(3, 6, 7)      0.049139        1.0
(6, 7, 9)      0.000225        1.0
(1, 2, 7, 8)   0.193907        1.0
(3, 4, 6, 7)   0.033060        1.0
(1, 4)         0.319616        6.0
(2, 3, 4, 8)   0.082288        1.0
(3, 4)         0.077388        1.0
(2, 3, 5, 6)   0.082271        1.0
(1, 5)         0.323983        2.0
(5, 6, 8)      0.000176        1.0
(2, 6, 8)      0.131255        1.0
(1, 2, 3, 6)   0.193

In [10]:
print(storage.redundancies.redundancy)
print(storage.redundancies.weight)

          1    2        3         4    5    6        7    8    9
1  0.000000  0.0  0.04994  0.000012  0.0  0.0  0.00000  0.0  0.0
2  0.000000  0.0  0.00000  0.000000  0.0  0.0  0.00000  0.0  0.0
3  0.049940  0.0  0.00000  0.000000  0.0  0.0  0.02497  0.0  0.0
4  0.000012  0.0  0.00000  0.000000  0.0  0.0  0.00000  0.0  0.0
5  0.000000  0.0  0.00000  0.000000  0.0  0.0  0.00000  0.0  0.0
6  0.000000  0.0  0.00000  0.000000  0.0  0.0  0.00000  0.0  0.0
7  0.000000  0.0  0.02497  0.000000  0.0  0.0  0.00000  0.0  0.0
8  0.000000  0.0  0.00000  0.000000  0.0  0.0  0.00000  0.0  0.0
9  0.000000  0.0  0.00000  0.000000  0.0  0.0  0.00000  0.0  0.0
   1  2  3  4  5  6  7  8  9
1  0  0  1  2  0  0  0  0  2
2  0  0  0  2  1  1  0  2  2
3  1  0  0  0  0  0  2  0  2
4  2  2  0  0  2  0  1  1  1
5  0  1  0  2  0  0  0  0  1
6  0  1  0  0  0  0  0  1  2
7  0  0  2  1  0  0  0  0  1
8  0  2  0  1  0  1  0  0  1
9  2  2  2  1  1  2  1  1  0


In [13]:
for feature_set, slices in storage.get_slices().items():
    dict_version = slices.to_dict()
    print(dict_version, '\n')

{'continuous': {'3': {'to_value': [1.4685489486989731, 2.7905554810404167, 6.2288893543681025, 5.02768572213066, 4.3570689215987], 'from_value': [-1.1187320237206304, 1.0325822072568402, 3.0434688076999312, 2.7005011250007582, 2.31432299582693]}, '1': {'to_value': [2.294323441911644, 2.2012351383229776, 5.473807241973522, 2.8100685251322237, 4.315884461434466], 'from_value': [0.4826991324656211, 0.37907526158235183, 3.685238932452728, 1.3594653316695158, 2.6027450120570625]}}, 'scores': [0.40620418078279985, 0.39792665286114626, 0.3927781842555878, 0.33650239360721496, 0.32840053059734703], 'threshold': 0.36, 'to_keep': 5, 'categorical': {}} 

{'continuous': {'1': {'to_value': [2.928128513379808, 5.515641287936272, 3.2986593765612535, 4.216304288739215, 4.48192385498541], 'from_value': [0.488924822937918, 3.0639409778730062, 0.8771307424967378, 1.7678142365877478, 2.2098428178647174]}, '2': {'to_value': [6.786506692119727, 4.203803564934795, 4.599593986099586, 4.20535590801027, 6.00075

In [14]:
for feature_set, slices in storage.get_slices().items():
    output = slices.to_output()
    print(output, '\n')

[{'deviation': 0.40620418078279985, 'features': {'3': {'to_value': 1.4685489486989731, 'from_value': -1.1187320237206304}, '1': {'to_value': 2.2943234419116441, 'from_value': 0.48269913246562107}}}, {'deviation': 0.39792665286114626, 'features': {'3': {'to_value': 2.7905554810404167, 'from_value': 1.0325822072568402}, '1': {'to_value': 2.2012351383229776, 'from_value': 0.37907526158235183}}}, {'deviation': 0.3927781842555878, 'features': {'3': {'to_value': 6.2288893543681025, 'from_value': 3.0434688076999312}, '1': {'to_value': 5.473807241973522, 'from_value': 3.6852389324527279}}}, {'deviation': 0.33650239360721496, 'features': {'3': {'to_value': 5.0276857221306601, 'from_value': 2.7005011250007582}, '1': {'to_value': 2.8100685251322237, 'from_value': 1.3594653316695158}}}, {'deviation': 0.32840053059734703, 'features': {'3': {'to_value': 4.3570689215987004, 'from_value': 2.31432299582693}, '1': {'to_value': 4.3158844614344662, 'from_value': 2.6027450120570625}}}] 

[{'deviation': 0.3