# Import libraries

In [1]:
import os
import warnings

import numpy as np
import pandas as pd

import json

import re

from tqdm import tqdm

from itertools import combinations

In [2]:
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_colwidth', None)

In [3]:
path_to_save = './../results/03a_features'

# Loading data

In [4]:
path_to_ftir_data_3800_2500 = './../data/processed_data/dataset_3800_2500_extracted.csv'
path_to_ftir_data_1900_800= './../data/processed_data/dataset_1900_800_extracted.csv'

## FTIR-features (3800-2500 cm$^{-1}$)

In [5]:
dataset_3800_2500 = pd.read_csv(path_to_ftir_data_3800_2500, sep=';', index_col=0)

print(dataset_3800_2500.shape)
dataset_3800_2500.head(3)

(74, 12)


Unnamed: 0,$I_{2928}$,$I_{3352}$,$I_{3726}$,$\overline{I}_{3800\text{–}2500}$,"$\Sigma I_{p,\ 3800\text{–}2500}$",$\overline{I}_{2928 \pm 50}$,$\overline{I}_{3352 \pm 50}$,$\overline{I}_{3726 \pm 50}$,Split,Fraction_hue,Fraction_grouped_hue,Class
division_1_size_bulk,0.49172,1.0,0.088051,0.526201,1.57977,0.494443,0.983311,0.075225,Train,$> 0$,$d > 5$,2
division_1_size_5,0.406937,1.0,0.028,0.477909,1.434937,0.396989,0.980473,0.034739,Train,$< 5$,$2 < d \leq 5$,1
division_1_size_3,0.618445,1.0,0.078392,0.535789,1.696837,0.581733,0.980981,0.066886,Train,$< 3$,$2 < d \leq 5$,1


## FTIR-features (1900-800 cm$^{-1}$)

In [6]:
dataset_1900_800 = pd.read_csv(path_to_ftir_data_1900_800, sep=';', index_col=0)

print(dataset_1900_800.shape)
dataset_1900_800.head(3)

(74, 26)


Unnamed: 0,$I_{872}$,$I_{1085}$,$I_{1182}$,$I_{1241}$,$I_{1394}$,$I_{1612}$,$I_{1725}$,$\overline{I}_{1900\text{–}800}$,"$\Sigma I_{p,\ 1900\text{–}800}$",$\overline{I}_{1900\text{–}1500}$,"$\Sigma I_{p,\ 1900\text{–}1500}$",$\overline{I}_{1500\text{–}1300}$,"$\Sigma I_{p,\ 1500\text{–}1300}$",$\overline{I}_{1300\text{–}800}$,"$\Sigma I_{p,\ 1300\text{–}800}$",$\overline{I}_{872 \pm 25}$,$\overline{I}_{1085 \pm 25}$,$\overline{I}_{1182 \pm 25}$,$\overline{I}_{1241 \pm 25}$,$\overline{I}_{1394 \pm 25}$,$\overline{I}_{1612 \pm 25}$,$\overline{I}_{1725 \pm 25}$,Split,Fraction_hue,Fraction_grouped_hue,Class
division_1_size_bulk,0.0478,0.681294,0.388923,0.455066,0.760582,1.0,0.423273,0.424162,3.756938,0.426619,1.423273,0.571735,0.760582,0.363338,1.573083,0.047238,0.658256,0.399497,0.447733,0.73886,0.936425,0.41747,Train,$> 0$,$d > 5$,2
division_1_size_5,0.091438,0.606138,0.416593,0.427988,0.687458,1.0,0.384475,0.37873,3.61409,0.378004,1.384475,0.502368,0.687458,0.329999,1.542157,0.092933,0.571531,0.426105,0.424918,0.667947,0.926718,0.378463,Train,$< 5$,$2 < d \leq 5$,1
division_1_size_3,0.108127,0.836176,0.514367,0.40095,0.696553,1.0,0.368753,0.436266,3.924926,0.429362,1.368753,0.523513,0.696553,0.406988,1.85962,0.111725,0.776148,0.526741,0.401435,0.678774,0.951396,0.369746,Train,$< 3$,$2 < d \leq 5$,1


# Feature engineering

## Data wrangling

In [7]:
features_3800_2500 = dataset_3800_2500.loc[:, ~dataset_3800_2500.columns.isin(['Split', 'Fraction_hue', 'Fraction_grouped_hue', 'Class'])]

print(features_3800_2500.shape)
features_3800_2500.head(3)

(74, 8)


Unnamed: 0,$I_{2928}$,$I_{3352}$,$I_{3726}$,$\overline{I}_{3800\text{–}2500}$,"$\Sigma I_{p,\ 3800\text{–}2500}$",$\overline{I}_{2928 \pm 50}$,$\overline{I}_{3352 \pm 50}$,$\overline{I}_{3726 \pm 50}$
division_1_size_bulk,0.49172,1.0,0.088051,0.526201,1.57977,0.494443,0.983311,0.075225
division_1_size_5,0.406937,1.0,0.028,0.477909,1.434937,0.396989,0.980473,0.034739
division_1_size_3,0.618445,1.0,0.078392,0.535789,1.696837,0.581733,0.980981,0.066886


In [8]:
features_1900_800 = dataset_1900_800.loc[:, ~dataset_1900_800.columns.isin(['Split', 'Fraction_hue', 'Fraction_grouped_hue', 'Class'])]

print(features_1900_800.shape)
features_1900_800.head(3)

(74, 22)


Unnamed: 0,$I_{872}$,$I_{1085}$,$I_{1182}$,$I_{1241}$,$I_{1394}$,$I_{1612}$,$I_{1725}$,$\overline{I}_{1900\text{–}800}$,"$\Sigma I_{p,\ 1900\text{–}800}$",$\overline{I}_{1900\text{–}1500}$,"$\Sigma I_{p,\ 1900\text{–}1500}$",$\overline{I}_{1500\text{–}1300}$,"$\Sigma I_{p,\ 1500\text{–}1300}$",$\overline{I}_{1300\text{–}800}$,"$\Sigma I_{p,\ 1300\text{–}800}$",$\overline{I}_{872 \pm 25}$,$\overline{I}_{1085 \pm 25}$,$\overline{I}_{1182 \pm 25}$,$\overline{I}_{1241 \pm 25}$,$\overline{I}_{1394 \pm 25}$,$\overline{I}_{1612 \pm 25}$,$\overline{I}_{1725 \pm 25}$
division_1_size_bulk,0.0478,0.681294,0.388923,0.455066,0.760582,1.0,0.423273,0.424162,3.756938,0.426619,1.423273,0.571735,0.760582,0.363338,1.573083,0.047238,0.658256,0.399497,0.447733,0.73886,0.936425,0.41747
division_1_size_5,0.091438,0.606138,0.416593,0.427988,0.687458,1.0,0.384475,0.37873,3.61409,0.378004,1.384475,0.502368,0.687458,0.329999,1.542157,0.092933,0.571531,0.426105,0.424918,0.667947,0.926718,0.378463
division_1_size_3,0.108127,0.836176,0.514367,0.40095,0.696553,1.0,0.368753,0.436266,3.924926,0.429362,1.368753,0.523513,0.696553,0.406988,1.85962,0.111725,0.776148,0.526741,0.401435,0.678774,0.951396,0.369746


In [9]:
initial_features_df = pd.concat([features_3800_2500, features_1900_800], axis=1)

print(initial_features_df.shape)
initial_features_df.head(3)

(74, 30)


Unnamed: 0,$I_{2928}$,$I_{3352}$,$I_{3726}$,$\overline{I}_{3800\text{–}2500}$,"$\Sigma I_{p,\ 3800\text{–}2500}$",$\overline{I}_{2928 \pm 50}$,$\overline{I}_{3352 \pm 50}$,$\overline{I}_{3726 \pm 50}$,$I_{872}$,$I_{1085}$,$I_{1182}$,$I_{1241}$,$I_{1394}$,$I_{1612}$,$I_{1725}$,$\overline{I}_{1900\text{–}800}$,"$\Sigma I_{p,\ 1900\text{–}800}$",$\overline{I}_{1900\text{–}1500}$,"$\Sigma I_{p,\ 1900\text{–}1500}$",$\overline{I}_{1500\text{–}1300}$,"$\Sigma I_{p,\ 1500\text{–}1300}$",$\overline{I}_{1300\text{–}800}$,"$\Sigma I_{p,\ 1300\text{–}800}$",$\overline{I}_{872 \pm 25}$,$\overline{I}_{1085 \pm 25}$,$\overline{I}_{1182 \pm 25}$,$\overline{I}_{1241 \pm 25}$,$\overline{I}_{1394 \pm 25}$,$\overline{I}_{1612 \pm 25}$,$\overline{I}_{1725 \pm 25}$
division_1_size_bulk,0.49172,1.0,0.088051,0.526201,1.57977,0.494443,0.983311,0.075225,0.0478,0.681294,0.388923,0.455066,0.760582,1.0,0.423273,0.424162,3.756938,0.426619,1.423273,0.571735,0.760582,0.363338,1.573083,0.047238,0.658256,0.399497,0.447733,0.73886,0.936425,0.41747
division_1_size_5,0.406937,1.0,0.028,0.477909,1.434937,0.396989,0.980473,0.034739,0.091438,0.606138,0.416593,0.427988,0.687458,1.0,0.384475,0.37873,3.61409,0.378004,1.384475,0.502368,0.687458,0.329999,1.542157,0.092933,0.571531,0.426105,0.424918,0.667947,0.926718,0.378463
division_1_size_3,0.618445,1.0,0.078392,0.535789,1.696837,0.581733,0.980981,0.066886,0.108127,0.836176,0.514367,0.40095,0.696553,1.0,0.368753,0.436266,3.924926,0.429362,1.368753,0.523513,0.696553,0.406988,1.85962,0.111725,0.776148,0.526741,0.401435,0.678774,0.951396,0.369746


## Functions for processing

### Spectral region determination

In [10]:
def extract_center(name: str):
    nums = re.findall(r"\d+", name)
    return int(nums[0]) if nums else None

def get_interval_tag(center):
    if center is None:
        return None
    if 2500 <= center <= 3800:
        return "high"
    elif 800 <= center <= 1900:
        return "low"
    return None

### Features construction

In [11]:
def generate_transformed_features(data, feature_list, tolerance=50, zero_substitute=1e-8):
    data_transformed = pd.DataFrame(index=data.index)

    def clean(col):
        if col.startswith('$') and col.endswith('$'):
            col = col[1:-1]
        return col.replace("Σ", r"\Sigma")

    def wrap_log(col):
        return fr"$\log {clean(col)}$"

    def wrap_pow(col, power):
        return fr"$({clean(col)})^{{{power}}}$"

    def wrap_div(col1, col2):
        return fr"$\frac{{{clean(col1)}}}{{{clean(col2)}}}$"

    def extract_center(name):
        nums = re.findall(r'\d+', name)
        return int(nums[0]) if nums else None

    def get_interval_tag(center):
        if center is None:
            return None
        if 2500 <= center <= 3800:
            return "high"
        elif 800 <= center <= 1900:
            return "low"
        return None

    def extract_center_and_type(name):
        center = extract_center(name)
        is_avg = r'\overline{I}' in name or '±' in name
        return center, is_avg

    def is_near_duplicate(f1, f2):
        c1, avg1 = extract_center_and_type(f1)
        c2, avg2 = extract_center_and_type(f2)
        if c1 is None or c2 is None:
            return False
        return abs(c1 - c2) <= tolerance and (avg1 or avg2)

    def get_interval_kind(name: str):
        if r"\overline{I}" in name or "±" in name:
            return "average"
        if r"\Sigma I_{p," in name:
            return "sum"
        return None
    
        np.seterr(divide='ignore', invalid='ignore')

    for col in feature_list:
        base = data[col].copy()
        base_safe = base.replace(0, zero_substitute)

        data_transformed[wrap_log(col)] = np.log(base_safe)

        for power in [-1]:
            transformed = np.power(base_safe, power)
            transformed.replace([np.inf, -np.inf], np.nan, inplace=True)
            data_transformed[wrap_pow(col, power)] = transformed

    for f1, f2 in combinations(feature_list, 2):
        if is_near_duplicate(f1, f2):
            continue
        
        kind1 = get_interval_kind(f1)
        kind2 = get_interval_kind(f2)
        if {kind1, kind2} == {"average", "sum"}:
            continue
        
        c1 = extract_center(f1)
        c2 = extract_center(f2)
        tag1 = get_interval_tag(c1)
        tag2 = get_interval_tag(c2)
        if tag1 != tag2:
            continue

        numerator = data[f1].replace(0, zero_substitute)
        denominator = data[f2].replace(0, zero_substitute)
        ratio = numerator / denominator
        ratio.replace([np.inf, -np.inf], np.nan, inplace=True)

        data_transformed[wrap_div(f1, f2)] = ratio

    return data_transformed


### Features classification

In [12]:
def classify_feature_type(base_columns, columns):
    base_columns = [col for col in base_columns if "Unnamed" not in col]
    columns = [col for col in columns if "Unnamed" not in col]

    def clean(col):
        if col.startswith('$') and col.endswith('$'):
            col = col[1:-1]
        return col.replace("Σ", r"\Sigma")

    def is_interval(col):
        col = clean(col)
        return r"\overline{I}_{" in col or r"\Sigma I_{p," in col

    def is_peak(col):
        return not is_interval(col)

    def extract_numbers(col):
        return list(map(int, re.findall(r"\d{3,4}", col)))

    def is_in_range(col, low, high):
        return any(low <= num <= high for num in extract_numbers(clean(col)))

    def classify(col, base=True):
        col_clean = clean(col)
        source = "intervals" if is_interval(col_clean) else "peaks"

        if is_in_range(col_clean, 2500, 3800):
            region = "3800_2500"
        elif is_in_range(col_clean, 800, 1900):
            region = "1900_800"
        else:
            region = "unknown"

        kind = "base" if base else "engineered"
        return f"{kind}_features_{source}_{region}"

    groups = {}

    for col in base_columns:
        key = classify(col, base=True)
        groups.setdefault(key, []).append(col)

    for col in columns:
        if col not in base_columns:
            key = classify(col, base=False)
            groups.setdefault(key, []).append(col)

    groups["base_features"] = base_columns
    groups["engineered_features"] = [col for col in columns if col not in base_columns]

    return groups

## Calculation process

In [13]:
all_features = initial_features_df.columns.tolist()
features_1900_800_list = [c for c in all_features if get_interval_tag(extract_center(c)) == "low"]
features_3800_2500_list = [c for c in all_features if get_interval_tag(extract_center(c)) == "high"]

print(
    f"Число фичей:",
    f"(*) 1900–800: {len(features_1900_800_list)}",
    f"(*) 3800–2500: {len(features_3800_2500_list)}",
    sep='\n'
)

Число фичей:
(*) 1900–800: 22
(*) 3800–2500: 8


### 3800—2500 cm$^{-1}$

In [14]:
engineered_3800_2500 = generate_transformed_features(
    data=features_3800_2500, 
    feature_list=features_3800_2500_list, 
    tolerance=0
)

In [15]:
nan_df = engineered_3800_2500.isna().sum()
nan_summary = nan_df[nan_df > 0].sort_values(ascending=False)

print(f"Столбцов с NaN-values: {nan_summary.shape[0]}")
display(nan_summary.head(10))

Столбцов с NaN-values: 0


Series([], dtype: int64)

In [16]:
all_features_3800_2500 = pd.merge(
    left=features_3800_2500,
    right=engineered_3800_2500,
    how='inner',
    left_index=True,
    right_index=True
)

print(all_features_3800_2500.shape)
all_features_3800_2500.head(3)

(74, 45)


Unnamed: 0,$I_{2928}$,$I_{3352}$,$I_{3726}$,$\overline{I}_{3800\text{–}2500}$,"$\Sigma I_{p,\ 3800\text{–}2500}$",$\overline{I}_{2928 \pm 50}$,$\overline{I}_{3352 \pm 50}$,$\overline{I}_{3726 \pm 50}$,$\log I_{2928}$,$(I_{2928})^{-1}$,$\log I_{3352}$,$(I_{3352})^{-1}$,$\log I_{3726}$,$(I_{3726})^{-1}$,$\log \overline{I}_{3800\text{–}2500}$,$(\overline{I}_{3800\text{–}2500})^{-1}$,"$\log \Sigma I_{p,\ 3800\text{–}2500}$","$(\Sigma I_{p,\ 3800\text{–}2500})^{-1}$",$\log \overline{I}_{2928 \pm 50}$,$(\overline{I}_{2928 \pm 50})^{-1}$,$\log \overline{I}_{3352 \pm 50}$,$(\overline{I}_{3352 \pm 50})^{-1}$,$\log \overline{I}_{3726 \pm 50}$,$(\overline{I}_{3726 \pm 50})^{-1}$,$\frac{I_{2928}}{I_{3352}}$,$\frac{I_{2928}}{I_{3726}}$,$\frac{I_{2928}}{\overline{I}_{3800\text{–}2500}}$,"$\frac{I_{2928}}{\Sigma I_{p,\ 3800\text{–}2500}}$",$\frac{I_{2928}}{\overline{I}_{3352 \pm 50}}$,$\frac{I_{2928}}{\overline{I}_{3726 \pm 50}}$,$\frac{I_{3352}}{I_{3726}}$,$\frac{I_{3352}}{\overline{I}_{3800\text{–}2500}}$,"$\frac{I_{3352}}{\Sigma I_{p,\ 3800\text{–}2500}}$",$\frac{I_{3352}}{\overline{I}_{2928 \pm 50}}$,$\frac{I_{3352}}{\overline{I}_{3726 \pm 50}}$,$\frac{I_{3726}}{\overline{I}_{3800\text{–}2500}}$,"$\frac{I_{3726}}{\Sigma I_{p,\ 3800\text{–}2500}}$",$\frac{I_{3726}}{\overline{I}_{2928 \pm 50}}$,$\frac{I_{3726}}{\overline{I}_{3352 \pm 50}}$,$\frac{\overline{I}_{3800\text{–}2500}}{\overline{I}_{2928 \pm 50}}$,$\frac{\overline{I}_{3800\text{–}2500}}{\overline{I}_{3352 \pm 50}}$,$\frac{\overline{I}_{3800\text{–}2500}}{\overline{I}_{3726 \pm 50}}$,$\frac{\overline{I}_{2928 \pm 50}}{\overline{I}_{3352 \pm 50}}$,$\frac{\overline{I}_{2928 \pm 50}}{\overline{I}_{3726 \pm 50}}$,$\frac{\overline{I}_{3352 \pm 50}}{\overline{I}_{3726 \pm 50}}$
division_1_size_bulk,0.49172,1.0,0.088051,0.526201,1.57977,0.494443,0.983311,0.075225,-0.709847,2.033679,0.0,1.0,-2.429844,11.357112,-0.642072,1.900414,0.457279,0.633003,-0.704324,2.022478,-0.01683,1.016972,-2.587267,13.29339,0.49172,5.584514,0.934471,0.31126,0.500065,6.53662,11.357112,1.900414,0.633003,2.022478,13.29339,0.167333,0.055736,0.17808,0.089545,1.06423,0.535132,6.994997,0.502835,6.572822,13.07154
division_1_size_5,0.406937,1.0,0.028,0.477909,1.434937,0.396989,0.980473,0.034739,-0.899098,2.457385,0.0,1.0,-3.575539,35.713875,-0.738335,2.092448,0.361121,0.696895,-0.923846,2.51896,-0.01972,1.019916,-3.359889,28.786,0.406937,14.533284,0.851494,0.283592,0.415041,11.714078,35.713875,2.092448,0.696895,2.51896,28.786,0.058589,0.019513,0.070532,0.028558,1.203834,0.487427,13.757094,0.404896,11.427734,28.223893
division_1_size_3,0.618445,1.0,0.078392,0.535789,1.696837,0.581733,0.980981,0.066886,-0.480547,1.616959,0.0,1.0,-2.546033,12.756398,-0.624015,1.866407,0.528766,0.589332,-0.541743,1.719,-0.019202,1.019388,-2.704765,14.950801,0.618445,7.88913,1.15427,0.364469,0.630435,9.246247,12.756398,1.866407,0.589332,1.719,14.950801,0.146311,0.046199,0.134756,0.079912,0.921021,0.546177,8.010474,0.593012,8.697381,14.666449


In [17]:
base_columns = features_3800_2500.columns.tolist()
columns = all_features_3800_2500.columns.tolist()
features_description_dict_3800_2500 = classify_feature_type(base_columns=base_columns, columns=columns)

for feature_type, feature_lst in features_description_dict_3800_2500.items():
    print(f'(*) {feature_type}: {len(feature_lst)}')

(*) base_features_peaks_3800_2500: 3
(*) base_features_intervals_3800_2500: 5
(*) engineered_features_peaks_3800_2500: 9
(*) engineered_features_intervals_3800_2500: 28
(*) base_features: 8
(*) engineered_features: 37


### 1900—800 cm$^{-1}$

In [18]:
engineered_1900_800 = generate_transformed_features(
    data=features_1900_800, 
    feature_list=features_1900_800_list, 
    tolerance=0
)

In [19]:
nan_df = engineered_1900_800.isna().sum()
nan_summary = nan_df[nan_df > 0].sort_values(ascending=False)

print(f"Столбцов с NaN-values: {nan_summary.shape[0]}")
display(nan_summary.head(10))

Столбцов с NaN-values: 0


Series([], dtype: int64)

In [20]:
all_features_1900_800 = pd.merge(
    left=features_1900_800,
    right=engineered_1900_800,
    how='inner',
    left_index=True,
    right_index=True
)

print(all_features_1900_800.shape)
all_features_1900_800.head(3)

(74, 245)


Unnamed: 0,$I_{872}$,$I_{1085}$,$I_{1182}$,$I_{1241}$,$I_{1394}$,$I_{1612}$,$I_{1725}$,$\overline{I}_{1900\text{–}800}$,"$\Sigma I_{p,\ 1900\text{–}800}$",$\overline{I}_{1900\text{–}1500}$,"$\Sigma I_{p,\ 1900\text{–}1500}$",$\overline{I}_{1500\text{–}1300}$,"$\Sigma I_{p,\ 1500\text{–}1300}$",$\overline{I}_{1300\text{–}800}$,"$\Sigma I_{p,\ 1300\text{–}800}$",$\overline{I}_{872 \pm 25}$,$\overline{I}_{1085 \pm 25}$,$\overline{I}_{1182 \pm 25}$,$\overline{I}_{1241 \pm 25}$,$\overline{I}_{1394 \pm 25}$,$\overline{I}_{1612 \pm 25}$,$\overline{I}_{1725 \pm 25}$,$\log I_{872}$,$(I_{872})^{-1}$,$\log I_{1085}$,$(I_{1085})^{-1}$,$\log I_{1182}$,$(I_{1182})^{-1}$,$\log I_{1241}$,$(I_{1241})^{-1}$,$\log I_{1394}$,$(I_{1394})^{-1}$,$\log I_{1612}$,$(I_{1612})^{-1}$,$\log I_{1725}$,$(I_{1725})^{-1}$,$\log \overline{I}_{1900\text{–}800}$,$(\overline{I}_{1900\text{–}800})^{-1}$,"$\log \Sigma I_{p,\ 1900\text{–}800}$","$(\Sigma I_{p,\ 1900\text{–}800})^{-1}$",$\log \overline{I}_{1900\text{–}1500}$,$(\overline{I}_{1900\text{–}1500})^{-1}$,"$\log \Sigma I_{p,\ 1900\text{–}1500}$","$(\Sigma I_{p,\ 1900\text{–}1500})^{-1}$",$\log \overline{I}_{1500\text{–}1300}$,$(\overline{I}_{1500\text{–}1300})^{-1}$,"$\log \Sigma I_{p,\ 1500\text{–}1300}$","$(\Sigma I_{p,\ 1500\text{–}1300})^{-1}$",$\log \overline{I}_{1300\text{–}800}$,$(\overline{I}_{1300\text{–}800})^{-1}$,"$\log \Sigma I_{p,\ 1300\text{–}800}$","$(\Sigma I_{p,\ 1300\text{–}800})^{-1}$",$\log \overline{I}_{872 \pm 25}$,$(\overline{I}_{872 \pm 25})^{-1}$,$\log \overline{I}_{1085 \pm 25}$,$(\overline{I}_{1085 \pm 25})^{-1}$,$\log \overline{I}_{1182 \pm 25}$,$(\overline{I}_{1182 \pm 25})^{-1}$,$\log \overline{I}_{1241 \pm 25}$,$(\overline{I}_{1241 \pm 25})^{-1}$,$\log \overline{I}_{1394 \pm 25}$,$(\overline{I}_{1394 \pm 25})^{-1}$,$\log \overline{I}_{1612 \pm 25}$,$(\overline{I}_{1612 \pm 25})^{-1}$,$\log \overline{I}_{1725 \pm 25}$,$(\overline{I}_{1725 \pm 25})^{-1}$,$\frac{I_{872}}{I_{1085}}$,$\frac{I_{872}}{I_{1182}}$,$\frac{I_{872}}{I_{1241}}$,$\frac{I_{872}}{I_{1394}}$,$\frac{I_{872}}{I_{1612}}$,$\frac{I_{872}}{I_{1725}}$,$\frac{I_{872}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{872}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{872}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{872}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{872}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{872}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{872}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{872}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{872}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1085}}{I_{1182}}$,$\frac{I_{1085}}{I_{1241}}$,$\frac{I_{1085}}{I_{1394}}$,$\frac{I_{1085}}{I_{1612}}$,$\frac{I_{1085}}{I_{1725}}$,$\frac{I_{1085}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1085}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1085}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1085}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1085}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1085}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1085}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1085}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1085}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1182}}{I_{1241}}$,$\frac{I_{1182}}{I_{1394}}$,$\frac{I_{1182}}{I_{1612}}$,$\frac{I_{1182}}{I_{1725}}$,$\frac{I_{1182}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1182}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1182}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1182}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1182}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1182}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1182}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1182}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1182}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1241}}{I_{1394}}$,$\frac{I_{1241}}{I_{1612}}$,$\frac{I_{1241}}{I_{1725}}$,$\frac{I_{1241}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1241}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1241}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1241}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1241}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1241}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1241}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1241}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1241}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1394}}{I_{1612}}$,$\frac{I_{1394}}{I_{1725}}$,$\frac{I_{1394}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1394}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1394}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1394}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1394}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1394}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1394}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1394}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1394}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1612}}{I_{1725}}$,$\frac{I_{1612}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1612}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1612}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1612}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1612}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1612}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1612}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1612}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1612}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1725}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1725}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1725}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1725}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1725}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1725}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1725}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1725}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1500\text{–}1300}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1300\text{–}800}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{872 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1725 \pm 25}}$,"$\frac{\Sigma I_{p,\ 1900\text{–}800}}{\Sigma I_{p,\ 1900\text{–}1500}}$","$\frac{\Sigma I_{p,\ 1900\text{–}800}}{\Sigma I_{p,\ 1500\text{–}1300}}$","$\frac{\Sigma I_{p,\ 1900\text{–}800}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1500\text{–}1300}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1300\text{–}800}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{872 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1725 \pm 25}}$,"$\frac{\Sigma I_{p,\ 1900\text{–}1500}}{\Sigma I_{p,\ 1500\text{–}1300}}$","$\frac{\Sigma I_{p,\ 1900\text{–}1500}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1300\text{–}800}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{872 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1725 \pm 25}}$,"$\frac{\Sigma I_{p,\ 1500\text{–}1300}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{872 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1182 \pm 25}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1182 \pm 25}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1182 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1182 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1241 \pm 25}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1241 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1241 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1394 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1394 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1612 \pm 25}}{\overline{I}_{1725 \pm 25}}$
division_1_size_bulk,0.0478,0.681294,0.388923,0.455066,0.760582,1.0,0.423273,0.424162,3.756938,0.426619,1.423273,0.571735,0.760582,0.363338,1.573083,0.047238,0.658256,0.399497,0.447733,0.73886,0.936425,0.41747,-3.040727,20.920439,-0.383761,1.467794,-0.944375,2.571206,-0.787312,2.197482,-0.273672,1.314783,0.0,1.0,-0.859739,2.362543,-0.857641,2.357591,1.323604,0.266174,-0.851863,2.34401,0.352959,0.702606,-0.55908,1.749062,-0.273672,1.314783,-1.012422,2.752259,0.453038,0.635694,-3.052551,21.169287,-0.418161,1.519165,-0.917549,2.503146,-0.803558,2.233473,-0.302647,1.353436,-0.065686,1.067891,-0.873542,2.395381,0.070161,0.122904,0.10504,0.062847,0.0478,0.11293,0.112693,0.012723,0.112044,0.033585,0.083605,0.062847,0.131558,0.030386,0.072616,0.119651,0.10676,0.064694,0.051045,0.1145,1.751748,1.497132,0.895754,0.681294,1.609587,1.606214,0.181343,1.596961,0.478681,1.191626,0.895754,1.875098,0.433095,14.422514,1.705379,1.521652,0.922089,0.727548,1.631959,0.85465,0.511349,0.388923,0.918847,0.916921,0.103521,0.911639,0.273259,0.68025,0.511349,1.070416,0.247236,8.233214,0.590838,0.868648,0.526382,0.415327,0.931618,0.598314,0.455066,1.075114,1.072861,0.121127,1.06668,0.319732,0.795939,0.598314,1.25246,0.289283,9.633431,0.691321,1.139098,0.615903,0.485961,1.090057,0.760582,1.796907,1.793141,0.202447,1.782811,0.534389,1.330305,1.0,2.093317,0.483497,16.100969,1.155449,1.903847,1.698738,0.812218,1.821882,2.362543,2.357591,0.266174,2.34401,0.702606,1.749062,1.314783,2.752259,0.635694,21.169287,1.519165,2.503146,2.233473,1.353436,2.395381,0.997904,0.112664,0.992156,0.297394,0.74033,0.556512,1.164956,0.269072,8.960381,0.643021,1.059513,0.945368,0.572873,0.452009,0.741885,1.167403,8.979201,0.644372,1.061739,0.947354,0.574076,0.452959,1.016029,2.639647,4.939559,2.388263,0.746184,1.174167,9.031226,0.648105,1.06789,0.952843,0.577402,0.455583,1.021916,1.871295,0.904766,1.573562,12.103221,0.86856,1.431136,1.276955,0.773807,0.610551,1.369523,0.483497,7.691605,0.55197,0.909488,0.811505,0.491755,0.388005,0.870333,0.071763,0.118244,0.105505,0.063934,0.050445,0.113154,1.647712,1.470198,0.890908,0.702946,1.576774,0.892266,0.540694,0.42662,0.956948,0.605978,0.47813,1.072491,0.789022,1.769851,2.243094
division_1_size_5,0.091438,0.606138,0.416593,0.427988,0.687458,1.0,0.384475,0.37873,3.61409,0.378004,1.384475,0.502368,0.687458,0.329999,1.542157,0.092933,0.571531,0.426105,0.424918,0.667947,0.926718,0.378463,-2.392093,10.936364,-0.500648,1.64979,-0.875645,2.400424,-0.84866,2.336514,-0.374754,1.454634,0.0,1.0,-0.955877,2.600952,-0.970931,2.6404,1.28484,0.276695,-0.972851,2.645477,0.325321,0.722296,-0.688422,1.990572,-0.374754,1.454634,-1.108665,3.030309,0.433182,0.648442,-2.375877,10.760443,-0.559437,1.749686,-0.853069,2.346839,-0.855859,2.353396,-0.403547,1.497126,-0.076106,1.079077,-0.971638,2.642268,0.150854,0.21949,0.213646,0.133009,0.091438,0.237826,0.241433,0.0253,0.241897,0.066045,0.182014,0.133009,0.277086,0.059292,0.159988,0.21459,0.21519,0.136894,0.098669,0.241604,1.454988,1.41625,0.881708,0.606138,1.576535,1.600446,0.167715,1.603523,0.437811,1.206561,0.881708,1.836785,0.393045,6.522311,1.422508,1.426482,0.907465,0.654069,1.601578,0.973376,0.60599,0.416593,1.083538,1.099973,0.115269,1.102087,0.300903,0.829258,0.60599,1.262406,0.270137,4.482726,0.728907,0.980408,0.623692,0.449536,1.100751,0.622566,0.427988,1.113176,1.13006,0.118422,1.132232,0.309134,0.851941,0.622566,1.296936,0.277526,4.60534,0.748845,1.004419,0.640752,0.461832,1.130859,0.687458,1.788046,1.815165,0.190216,1.818655,0.496548,1.368435,1.0,2.083211,0.445777,7.397356,1.202836,1.613354,1.617862,0.74182,1.816449,2.600952,2.6404,0.276695,2.645477,0.722296,1.990572,1.454634,3.030309,0.648442,10.760443,1.749686,2.346839,2.353396,1.497126,2.642268,1.015167,0.106382,1.017119,0.277704,0.765324,0.55927,1.165077,0.24931,4.137118,0.67271,0.9023,0.904821,0.575607,0.414878,0.75389,1.14767,4.075307,0.662659,0.888819,0.891303,0.567007,0.408679,1.000707,2.610441,5.257177,2.343529,0.752443,1.145468,4.067487,0.661388,0.887114,0.889592,0.565919,0.407895,0.998787,2.013903,0.897752,1.522331,5.405705,0.878987,1.178978,1.182272,0.752109,0.542094,1.327392,0.445777,3.550939,0.577395,0.774455,0.776619,0.494051,0.356095,0.871947,0.162604,0.218099,0.218708,0.139132,0.100282,0.245554,1.341291,1.345039,0.855654,0.616726,1.510138,1.002794,0.637933,0.4598,1.125884,0.636156,0.458519,1.122747,0.720766,1.764894,2.448637
division_1_size_3,0.108127,0.836176,0.514367,0.40095,0.696553,1.0,0.368753,0.436266,3.924926,0.429362,1.368753,0.523513,0.696553,0.406988,1.85962,0.111725,0.776148,0.526741,0.401435,0.678774,0.951396,0.369746,-2.224448,9.248376,-0.178917,1.195921,-0.664819,1.944138,-0.913918,2.494076,-0.361611,1.43564,0.0,1.0,-0.997629,2.711845,-0.829504,2.292181,1.367347,0.254782,-0.845455,2.329038,0.3139,0.730592,-0.647194,1.910173,-0.361611,1.43564,-0.89897,2.457072,0.620372,0.537744,-2.191715,8.950554,-0.253412,1.288414,-0.641046,1.898465,-0.91271,2.491063,-0.387466,1.473243,-0.049825,1.051087,-0.994938,2.704557,0.129311,0.210214,0.269677,0.155232,0.108127,0.293224,0.247847,0.027549,0.251832,0.078997,0.206541,0.155232,0.265676,0.058145,0.139312,0.205276,0.269351,0.159298,0.113651,0.292436,1.625641,2.085485,1.200447,0.836176,2.267579,1.916666,0.213042,1.947485,0.610903,1.59724,1.200447,2.054544,0.449649,7.484235,1.58745,2.082966,1.23189,0.878893,2.261485,1.282869,0.738445,0.514367,1.394883,1.179021,0.131051,1.19798,0.375792,0.98253,0.738445,1.263836,0.276598,4.603867,0.662717,1.28132,0.757787,0.540644,1.391134,0.57562,0.40095,1.087315,0.91905,0.102155,0.933828,0.292931,0.765884,0.57562,0.985163,0.215609,3.588726,0.51659,0.76119,0.590697,0.421433,1.084393,0.696553,1.888945,1.596626,0.177469,1.6223,0.508897,1.330538,1.0,1.711482,0.374568,6.234539,0.897449,1.322383,1.735159,0.732138,1.883869,2.711845,2.292181,0.254782,2.329038,0.730592,1.910173,1.43564,2.457072,0.537744,8.950554,1.288414,1.898465,2.491063,1.473243,2.704557,0.845248,0.093951,0.858839,0.269408,0.704381,0.529396,0.906052,0.198295,3.30054,0.475106,0.700064,0.918586,0.543262,0.387591,0.833343,1.071937,3.904821,0.562091,0.828236,1.086766,0.642726,0.458553,1.179906,2.86752,5.63478,2.110607,0.820155,1.054973,3.843026,0.553196,0.815128,1.069567,0.632554,0.451297,1.161234,1.965036,0.736039,1.286309,4.685729,0.674501,0.993871,1.304103,0.771262,0.550257,1.41587,0.374568,3.642772,0.52437,0.772653,1.013834,0.599593,0.42778,1.100724,0.143948,0.212106,0.278314,0.164598,0.117433,0.302166,1.47349,1.933434,1.143455,0.815799,2.099137,1.312146,0.776018,0.553651,1.424602,0.591411,0.421943,1.085704,0.713451,1.835784,2.573105


In [21]:
base_columns = features_1900_800.columns.tolist()
columns = all_features_1900_800.columns.tolist()
features_description_dict_1900_800 = classify_feature_type(base_columns=base_columns, columns=columns)

for feature_type, feature_lst in features_description_dict_1900_800.items():
    print(f'(*) {feature_type}: {len(feature_lst)}')

(*) base_features_peaks_1900_800: 7
(*) base_features_intervals_1900_800: 15
(*) engineered_features_peaks_1900_800: 35
(*) engineered_features_intervals_1900_800: 188
(*) base_features: 22
(*) engineered_features: 223


## Construction of datasets

### 3800—2500 cm$^{-1}$

In [22]:
dataset_3800_2500_part = dataset_3800_2500[['Split', 'Fraction_hue', 'Fraction_grouped_hue', 'Class']]

print(dataset_3800_2500_part.shape)
dataset_3800_2500_part.head(3)

(74, 4)


Unnamed: 0,Split,Fraction_hue,Fraction_grouped_hue,Class
division_1_size_bulk,Train,$> 0$,$d > 5$,2
division_1_size_5,Train,$< 5$,$2 < d \leq 5$,1
division_1_size_3,Train,$< 3$,$2 < d \leq 5$,1


In [23]:
dataset_3800_2500 = pd.merge(all_features_3800_2500, dataset_3800_2500_part, left_index=True, right_index=True, how='right')

print(dataset_3800_2500.shape)
display(dataset_3800_2500.head(3))
display(pd.DataFrame(dataset_3800_2500.isna().sum()).T)

(74, 49)


Unnamed: 0,$I_{2928}$,$I_{3352}$,$I_{3726}$,$\overline{I}_{3800\text{–}2500}$,"$\Sigma I_{p,\ 3800\text{–}2500}$",$\overline{I}_{2928 \pm 50}$,$\overline{I}_{3352 \pm 50}$,$\overline{I}_{3726 \pm 50}$,$\log I_{2928}$,$(I_{2928})^{-1}$,$\log I_{3352}$,$(I_{3352})^{-1}$,$\log I_{3726}$,$(I_{3726})^{-1}$,$\log \overline{I}_{3800\text{–}2500}$,$(\overline{I}_{3800\text{–}2500})^{-1}$,"$\log \Sigma I_{p,\ 3800\text{–}2500}$","$(\Sigma I_{p,\ 3800\text{–}2500})^{-1}$",$\log \overline{I}_{2928 \pm 50}$,$(\overline{I}_{2928 \pm 50})^{-1}$,$\log \overline{I}_{3352 \pm 50}$,$(\overline{I}_{3352 \pm 50})^{-1}$,$\log \overline{I}_{3726 \pm 50}$,$(\overline{I}_{3726 \pm 50})^{-1}$,$\frac{I_{2928}}{I_{3352}}$,$\frac{I_{2928}}{I_{3726}}$,$\frac{I_{2928}}{\overline{I}_{3800\text{–}2500}}$,"$\frac{I_{2928}}{\Sigma I_{p,\ 3800\text{–}2500}}$",$\frac{I_{2928}}{\overline{I}_{3352 \pm 50}}$,$\frac{I_{2928}}{\overline{I}_{3726 \pm 50}}$,$\frac{I_{3352}}{I_{3726}}$,$\frac{I_{3352}}{\overline{I}_{3800\text{–}2500}}$,"$\frac{I_{3352}}{\Sigma I_{p,\ 3800\text{–}2500}}$",$\frac{I_{3352}}{\overline{I}_{2928 \pm 50}}$,$\frac{I_{3352}}{\overline{I}_{3726 \pm 50}}$,$\frac{I_{3726}}{\overline{I}_{3800\text{–}2500}}$,"$\frac{I_{3726}}{\Sigma I_{p,\ 3800\text{–}2500}}$",$\frac{I_{3726}}{\overline{I}_{2928 \pm 50}}$,$\frac{I_{3726}}{\overline{I}_{3352 \pm 50}}$,$\frac{\overline{I}_{3800\text{–}2500}}{\overline{I}_{2928 \pm 50}}$,$\frac{\overline{I}_{3800\text{–}2500}}{\overline{I}_{3352 \pm 50}}$,$\frac{\overline{I}_{3800\text{–}2500}}{\overline{I}_{3726 \pm 50}}$,$\frac{\overline{I}_{2928 \pm 50}}{\overline{I}_{3352 \pm 50}}$,$\frac{\overline{I}_{2928 \pm 50}}{\overline{I}_{3726 \pm 50}}$,$\frac{\overline{I}_{3352 \pm 50}}{\overline{I}_{3726 \pm 50}}$,Split,Fraction_hue,Fraction_grouped_hue,Class
division_1_size_bulk,0.49172,1.0,0.088051,0.526201,1.57977,0.494443,0.983311,0.075225,-0.709847,2.033679,0.0,1.0,-2.429844,11.357112,-0.642072,1.900414,0.457279,0.633003,-0.704324,2.022478,-0.01683,1.016972,-2.587267,13.29339,0.49172,5.584514,0.934471,0.31126,0.500065,6.53662,11.357112,1.900414,0.633003,2.022478,13.29339,0.167333,0.055736,0.17808,0.089545,1.06423,0.535132,6.994997,0.502835,6.572822,13.07154,Train,$> 0$,$d > 5$,2
division_1_size_5,0.406937,1.0,0.028,0.477909,1.434937,0.396989,0.980473,0.034739,-0.899098,2.457385,0.0,1.0,-3.575539,35.713875,-0.738335,2.092448,0.361121,0.696895,-0.923846,2.51896,-0.01972,1.019916,-3.359889,28.786,0.406937,14.533284,0.851494,0.283592,0.415041,11.714078,35.713875,2.092448,0.696895,2.51896,28.786,0.058589,0.019513,0.070532,0.028558,1.203834,0.487427,13.757094,0.404896,11.427734,28.223893,Train,$< 5$,$2 < d \leq 5$,1
division_1_size_3,0.618445,1.0,0.078392,0.535789,1.696837,0.581733,0.980981,0.066886,-0.480547,1.616959,0.0,1.0,-2.546033,12.756398,-0.624015,1.866407,0.528766,0.589332,-0.541743,1.719,-0.019202,1.019388,-2.704765,14.950801,0.618445,7.88913,1.15427,0.364469,0.630435,9.246247,12.756398,1.866407,0.589332,1.719,14.950801,0.146311,0.046199,0.134756,0.079912,0.921021,0.546177,8.010474,0.593012,8.697381,14.666449,Train,$< 3$,$2 < d \leq 5$,1


Unnamed: 0,$I_{2928}$,$I_{3352}$,$I_{3726}$,$\overline{I}_{3800\text{–}2500}$,"$\Sigma I_{p,\ 3800\text{–}2500}$",$\overline{I}_{2928 \pm 50}$,$\overline{I}_{3352 \pm 50}$,$\overline{I}_{3726 \pm 50}$,$\log I_{2928}$,$(I_{2928})^{-1}$,$\log I_{3352}$,$(I_{3352})^{-1}$,$\log I_{3726}$,$(I_{3726})^{-1}$,$\log \overline{I}_{3800\text{–}2500}$,$(\overline{I}_{3800\text{–}2500})^{-1}$,"$\log \Sigma I_{p,\ 3800\text{–}2500}$","$(\Sigma I_{p,\ 3800\text{–}2500})^{-1}$",$\log \overline{I}_{2928 \pm 50}$,$(\overline{I}_{2928 \pm 50})^{-1}$,$\log \overline{I}_{3352 \pm 50}$,$(\overline{I}_{3352 \pm 50})^{-1}$,$\log \overline{I}_{3726 \pm 50}$,$(\overline{I}_{3726 \pm 50})^{-1}$,$\frac{I_{2928}}{I_{3352}}$,$\frac{I_{2928}}{I_{3726}}$,$\frac{I_{2928}}{\overline{I}_{3800\text{–}2500}}$,"$\frac{I_{2928}}{\Sigma I_{p,\ 3800\text{–}2500}}$",$\frac{I_{2928}}{\overline{I}_{3352 \pm 50}}$,$\frac{I_{2928}}{\overline{I}_{3726 \pm 50}}$,$\frac{I_{3352}}{I_{3726}}$,$\frac{I_{3352}}{\overline{I}_{3800\text{–}2500}}$,"$\frac{I_{3352}}{\Sigma I_{p,\ 3800\text{–}2500}}$",$\frac{I_{3352}}{\overline{I}_{2928 \pm 50}}$,$\frac{I_{3352}}{\overline{I}_{3726 \pm 50}}$,$\frac{I_{3726}}{\overline{I}_{3800\text{–}2500}}$,"$\frac{I_{3726}}{\Sigma I_{p,\ 3800\text{–}2500}}$",$\frac{I_{3726}}{\overline{I}_{2928 \pm 50}}$,$\frac{I_{3726}}{\overline{I}_{3352 \pm 50}}$,$\frac{\overline{I}_{3800\text{–}2500}}{\overline{I}_{2928 \pm 50}}$,$\frac{\overline{I}_{3800\text{–}2500}}{\overline{I}_{3352 \pm 50}}$,$\frac{\overline{I}_{3800\text{–}2500}}{\overline{I}_{3726 \pm 50}}$,$\frac{\overline{I}_{2928 \pm 50}}{\overline{I}_{3352 \pm 50}}$,$\frac{\overline{I}_{2928 \pm 50}}{\overline{I}_{3726 \pm 50}}$,$\frac{\overline{I}_{3352 \pm 50}}{\overline{I}_{3726 \pm 50}}$,Split,Fraction_hue,Fraction_grouped_hue,Class
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### 1900—800 cm$^{-1}$

In [24]:
dataset_1900_800_part = dataset_1900_800[['Split', 'Fraction_hue', 'Fraction_grouped_hue', 'Class']]

print(dataset_1900_800_part.shape)
dataset_1900_800_part.head(3)

(74, 4)


Unnamed: 0,Split,Fraction_hue,Fraction_grouped_hue,Class
division_1_size_bulk,Train,$> 0$,$d > 5$,2
division_1_size_5,Train,$< 5$,$2 < d \leq 5$,1
division_1_size_3,Train,$< 3$,$2 < d \leq 5$,1


In [25]:
dataset_1900_800= pd.merge(all_features_1900_800, dataset_1900_800_part, left_index=True, right_index=True, how='right')

print(dataset_1900_800.shape)
display(dataset_1900_800.head(3))
display(pd.DataFrame(dataset_1900_800.isna().sum()).T)

(74, 249)


Unnamed: 0,$I_{872}$,$I_{1085}$,$I_{1182}$,$I_{1241}$,$I_{1394}$,$I_{1612}$,$I_{1725}$,$\overline{I}_{1900\text{–}800}$,"$\Sigma I_{p,\ 1900\text{–}800}$",$\overline{I}_{1900\text{–}1500}$,"$\Sigma I_{p,\ 1900\text{–}1500}$",$\overline{I}_{1500\text{–}1300}$,"$\Sigma I_{p,\ 1500\text{–}1300}$",$\overline{I}_{1300\text{–}800}$,"$\Sigma I_{p,\ 1300\text{–}800}$",$\overline{I}_{872 \pm 25}$,$\overline{I}_{1085 \pm 25}$,$\overline{I}_{1182 \pm 25}$,$\overline{I}_{1241 \pm 25}$,$\overline{I}_{1394 \pm 25}$,$\overline{I}_{1612 \pm 25}$,$\overline{I}_{1725 \pm 25}$,$\log I_{872}$,$(I_{872})^{-1}$,$\log I_{1085}$,$(I_{1085})^{-1}$,$\log I_{1182}$,$(I_{1182})^{-1}$,$\log I_{1241}$,$(I_{1241})^{-1}$,$\log I_{1394}$,$(I_{1394})^{-1}$,$\log I_{1612}$,$(I_{1612})^{-1}$,$\log I_{1725}$,$(I_{1725})^{-1}$,$\log \overline{I}_{1900\text{–}800}$,$(\overline{I}_{1900\text{–}800})^{-1}$,"$\log \Sigma I_{p,\ 1900\text{–}800}$","$(\Sigma I_{p,\ 1900\text{–}800})^{-1}$",$\log \overline{I}_{1900\text{–}1500}$,$(\overline{I}_{1900\text{–}1500})^{-1}$,"$\log \Sigma I_{p,\ 1900\text{–}1500}$","$(\Sigma I_{p,\ 1900\text{–}1500})^{-1}$",$\log \overline{I}_{1500\text{–}1300}$,$(\overline{I}_{1500\text{–}1300})^{-1}$,"$\log \Sigma I_{p,\ 1500\text{–}1300}$","$(\Sigma I_{p,\ 1500\text{–}1300})^{-1}$",$\log \overline{I}_{1300\text{–}800}$,$(\overline{I}_{1300\text{–}800})^{-1}$,"$\log \Sigma I_{p,\ 1300\text{–}800}$","$(\Sigma I_{p,\ 1300\text{–}800})^{-1}$",$\log \overline{I}_{872 \pm 25}$,$(\overline{I}_{872 \pm 25})^{-1}$,$\log \overline{I}_{1085 \pm 25}$,$(\overline{I}_{1085 \pm 25})^{-1}$,$\log \overline{I}_{1182 \pm 25}$,$(\overline{I}_{1182 \pm 25})^{-1}$,$\log \overline{I}_{1241 \pm 25}$,$(\overline{I}_{1241 \pm 25})^{-1}$,$\log \overline{I}_{1394 \pm 25}$,$(\overline{I}_{1394 \pm 25})^{-1}$,$\log \overline{I}_{1612 \pm 25}$,$(\overline{I}_{1612 \pm 25})^{-1}$,$\log \overline{I}_{1725 \pm 25}$,$(\overline{I}_{1725 \pm 25})^{-1}$,$\frac{I_{872}}{I_{1085}}$,$\frac{I_{872}}{I_{1182}}$,$\frac{I_{872}}{I_{1241}}$,$\frac{I_{872}}{I_{1394}}$,$\frac{I_{872}}{I_{1612}}$,$\frac{I_{872}}{I_{1725}}$,$\frac{I_{872}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{872}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{872}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{872}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{872}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{872}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{872}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{872}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{872}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1085}}{I_{1182}}$,$\frac{I_{1085}}{I_{1241}}$,$\frac{I_{1085}}{I_{1394}}$,$\frac{I_{1085}}{I_{1612}}$,$\frac{I_{1085}}{I_{1725}}$,$\frac{I_{1085}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1085}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1085}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1085}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1085}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1085}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1085}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1085}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1085}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1182}}{I_{1241}}$,$\frac{I_{1182}}{I_{1394}}$,$\frac{I_{1182}}{I_{1612}}$,$\frac{I_{1182}}{I_{1725}}$,$\frac{I_{1182}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1182}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1182}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1182}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1182}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1182}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1182}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1182}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1182}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1241}}{I_{1394}}$,$\frac{I_{1241}}{I_{1612}}$,$\frac{I_{1241}}{I_{1725}}$,$\frac{I_{1241}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1241}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1241}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1241}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1241}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1241}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1241}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1241}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1241}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1394}}{I_{1612}}$,$\frac{I_{1394}}{I_{1725}}$,$\frac{I_{1394}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1394}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1394}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1394}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1394}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1394}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1394}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1394}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1394}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1612}}{I_{1725}}$,$\frac{I_{1612}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1612}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1612}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1612}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1612}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1612}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1612}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1612}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1612}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1725}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1725}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1725}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1725}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1725}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1725}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1725}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1725}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1500\text{–}1300}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1300\text{–}800}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{872 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1725 \pm 25}}$,"$\frac{\Sigma I_{p,\ 1900\text{–}800}}{\Sigma I_{p,\ 1900\text{–}1500}}$","$\frac{\Sigma I_{p,\ 1900\text{–}800}}{\Sigma I_{p,\ 1500\text{–}1300}}$","$\frac{\Sigma I_{p,\ 1900\text{–}800}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1500\text{–}1300}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1300\text{–}800}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{872 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1725 \pm 25}}$,"$\frac{\Sigma I_{p,\ 1900\text{–}1500}}{\Sigma I_{p,\ 1500\text{–}1300}}$","$\frac{\Sigma I_{p,\ 1900\text{–}1500}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1300\text{–}800}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{872 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1725 \pm 25}}$,"$\frac{\Sigma I_{p,\ 1500\text{–}1300}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{872 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1182 \pm 25}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1182 \pm 25}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1182 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1182 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1241 \pm 25}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1241 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1241 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1394 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1394 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1612 \pm 25}}{\overline{I}_{1725 \pm 25}}$,Split,Fraction_hue,Fraction_grouped_hue,Class
division_1_size_bulk,0.0478,0.681294,0.388923,0.455066,0.760582,1.0,0.423273,0.424162,3.756938,0.426619,1.423273,0.571735,0.760582,0.363338,1.573083,0.047238,0.658256,0.399497,0.447733,0.73886,0.936425,0.41747,-3.040727,20.920439,-0.383761,1.467794,-0.944375,2.571206,-0.787312,2.197482,-0.273672,1.314783,0.0,1.0,-0.859739,2.362543,-0.857641,2.357591,1.323604,0.266174,-0.851863,2.34401,0.352959,0.702606,-0.55908,1.749062,-0.273672,1.314783,-1.012422,2.752259,0.453038,0.635694,-3.052551,21.169287,-0.418161,1.519165,-0.917549,2.503146,-0.803558,2.233473,-0.302647,1.353436,-0.065686,1.067891,-0.873542,2.395381,0.070161,0.122904,0.10504,0.062847,0.0478,0.11293,0.112693,0.012723,0.112044,0.033585,0.083605,0.062847,0.131558,0.030386,0.072616,0.119651,0.10676,0.064694,0.051045,0.1145,1.751748,1.497132,0.895754,0.681294,1.609587,1.606214,0.181343,1.596961,0.478681,1.191626,0.895754,1.875098,0.433095,14.422514,1.705379,1.521652,0.922089,0.727548,1.631959,0.85465,0.511349,0.388923,0.918847,0.916921,0.103521,0.911639,0.273259,0.68025,0.511349,1.070416,0.247236,8.233214,0.590838,0.868648,0.526382,0.415327,0.931618,0.598314,0.455066,1.075114,1.072861,0.121127,1.06668,0.319732,0.795939,0.598314,1.25246,0.289283,9.633431,0.691321,1.139098,0.615903,0.485961,1.090057,0.760582,1.796907,1.793141,0.202447,1.782811,0.534389,1.330305,1.0,2.093317,0.483497,16.100969,1.155449,1.903847,1.698738,0.812218,1.821882,2.362543,2.357591,0.266174,2.34401,0.702606,1.749062,1.314783,2.752259,0.635694,21.169287,1.519165,2.503146,2.233473,1.353436,2.395381,0.997904,0.112664,0.992156,0.297394,0.74033,0.556512,1.164956,0.269072,8.960381,0.643021,1.059513,0.945368,0.572873,0.452009,0.741885,1.167403,8.979201,0.644372,1.061739,0.947354,0.574076,0.452959,1.016029,2.639647,4.939559,2.388263,0.746184,1.174167,9.031226,0.648105,1.06789,0.952843,0.577402,0.455583,1.021916,1.871295,0.904766,1.573562,12.103221,0.86856,1.431136,1.276955,0.773807,0.610551,1.369523,0.483497,7.691605,0.55197,0.909488,0.811505,0.491755,0.388005,0.870333,0.071763,0.118244,0.105505,0.063934,0.050445,0.113154,1.647712,1.470198,0.890908,0.702946,1.576774,0.892266,0.540694,0.42662,0.956948,0.605978,0.47813,1.072491,0.789022,1.769851,2.243094,Train,$> 0$,$d > 5$,2
division_1_size_5,0.091438,0.606138,0.416593,0.427988,0.687458,1.0,0.384475,0.37873,3.61409,0.378004,1.384475,0.502368,0.687458,0.329999,1.542157,0.092933,0.571531,0.426105,0.424918,0.667947,0.926718,0.378463,-2.392093,10.936364,-0.500648,1.64979,-0.875645,2.400424,-0.84866,2.336514,-0.374754,1.454634,0.0,1.0,-0.955877,2.600952,-0.970931,2.6404,1.28484,0.276695,-0.972851,2.645477,0.325321,0.722296,-0.688422,1.990572,-0.374754,1.454634,-1.108665,3.030309,0.433182,0.648442,-2.375877,10.760443,-0.559437,1.749686,-0.853069,2.346839,-0.855859,2.353396,-0.403547,1.497126,-0.076106,1.079077,-0.971638,2.642268,0.150854,0.21949,0.213646,0.133009,0.091438,0.237826,0.241433,0.0253,0.241897,0.066045,0.182014,0.133009,0.277086,0.059292,0.159988,0.21459,0.21519,0.136894,0.098669,0.241604,1.454988,1.41625,0.881708,0.606138,1.576535,1.600446,0.167715,1.603523,0.437811,1.206561,0.881708,1.836785,0.393045,6.522311,1.422508,1.426482,0.907465,0.654069,1.601578,0.973376,0.60599,0.416593,1.083538,1.099973,0.115269,1.102087,0.300903,0.829258,0.60599,1.262406,0.270137,4.482726,0.728907,0.980408,0.623692,0.449536,1.100751,0.622566,0.427988,1.113176,1.13006,0.118422,1.132232,0.309134,0.851941,0.622566,1.296936,0.277526,4.60534,0.748845,1.004419,0.640752,0.461832,1.130859,0.687458,1.788046,1.815165,0.190216,1.818655,0.496548,1.368435,1.0,2.083211,0.445777,7.397356,1.202836,1.613354,1.617862,0.74182,1.816449,2.600952,2.6404,0.276695,2.645477,0.722296,1.990572,1.454634,3.030309,0.648442,10.760443,1.749686,2.346839,2.353396,1.497126,2.642268,1.015167,0.106382,1.017119,0.277704,0.765324,0.55927,1.165077,0.24931,4.137118,0.67271,0.9023,0.904821,0.575607,0.414878,0.75389,1.14767,4.075307,0.662659,0.888819,0.891303,0.567007,0.408679,1.000707,2.610441,5.257177,2.343529,0.752443,1.145468,4.067487,0.661388,0.887114,0.889592,0.565919,0.407895,0.998787,2.013903,0.897752,1.522331,5.405705,0.878987,1.178978,1.182272,0.752109,0.542094,1.327392,0.445777,3.550939,0.577395,0.774455,0.776619,0.494051,0.356095,0.871947,0.162604,0.218099,0.218708,0.139132,0.100282,0.245554,1.341291,1.345039,0.855654,0.616726,1.510138,1.002794,0.637933,0.4598,1.125884,0.636156,0.458519,1.122747,0.720766,1.764894,2.448637,Train,$< 5$,$2 < d \leq 5$,1
division_1_size_3,0.108127,0.836176,0.514367,0.40095,0.696553,1.0,0.368753,0.436266,3.924926,0.429362,1.368753,0.523513,0.696553,0.406988,1.85962,0.111725,0.776148,0.526741,0.401435,0.678774,0.951396,0.369746,-2.224448,9.248376,-0.178917,1.195921,-0.664819,1.944138,-0.913918,2.494076,-0.361611,1.43564,0.0,1.0,-0.997629,2.711845,-0.829504,2.292181,1.367347,0.254782,-0.845455,2.329038,0.3139,0.730592,-0.647194,1.910173,-0.361611,1.43564,-0.89897,2.457072,0.620372,0.537744,-2.191715,8.950554,-0.253412,1.288414,-0.641046,1.898465,-0.91271,2.491063,-0.387466,1.473243,-0.049825,1.051087,-0.994938,2.704557,0.129311,0.210214,0.269677,0.155232,0.108127,0.293224,0.247847,0.027549,0.251832,0.078997,0.206541,0.155232,0.265676,0.058145,0.139312,0.205276,0.269351,0.159298,0.113651,0.292436,1.625641,2.085485,1.200447,0.836176,2.267579,1.916666,0.213042,1.947485,0.610903,1.59724,1.200447,2.054544,0.449649,7.484235,1.58745,2.082966,1.23189,0.878893,2.261485,1.282869,0.738445,0.514367,1.394883,1.179021,0.131051,1.19798,0.375792,0.98253,0.738445,1.263836,0.276598,4.603867,0.662717,1.28132,0.757787,0.540644,1.391134,0.57562,0.40095,1.087315,0.91905,0.102155,0.933828,0.292931,0.765884,0.57562,0.985163,0.215609,3.588726,0.51659,0.76119,0.590697,0.421433,1.084393,0.696553,1.888945,1.596626,0.177469,1.6223,0.508897,1.330538,1.0,1.711482,0.374568,6.234539,0.897449,1.322383,1.735159,0.732138,1.883869,2.711845,2.292181,0.254782,2.329038,0.730592,1.910173,1.43564,2.457072,0.537744,8.950554,1.288414,1.898465,2.491063,1.473243,2.704557,0.845248,0.093951,0.858839,0.269408,0.704381,0.529396,0.906052,0.198295,3.30054,0.475106,0.700064,0.918586,0.543262,0.387591,0.833343,1.071937,3.904821,0.562091,0.828236,1.086766,0.642726,0.458553,1.179906,2.86752,5.63478,2.110607,0.820155,1.054973,3.843026,0.553196,0.815128,1.069567,0.632554,0.451297,1.161234,1.965036,0.736039,1.286309,4.685729,0.674501,0.993871,1.304103,0.771262,0.550257,1.41587,0.374568,3.642772,0.52437,0.772653,1.013834,0.599593,0.42778,1.100724,0.143948,0.212106,0.278314,0.164598,0.117433,0.302166,1.47349,1.933434,1.143455,0.815799,2.099137,1.312146,0.776018,0.553651,1.424602,0.591411,0.421943,1.085704,0.713451,1.835784,2.573105,Train,$< 3$,$2 < d \leq 5$,1


Unnamed: 0,$I_{872}$,$I_{1085}$,$I_{1182}$,$I_{1241}$,$I_{1394}$,$I_{1612}$,$I_{1725}$,$\overline{I}_{1900\text{–}800}$,"$\Sigma I_{p,\ 1900\text{–}800}$",$\overline{I}_{1900\text{–}1500}$,"$\Sigma I_{p,\ 1900\text{–}1500}$",$\overline{I}_{1500\text{–}1300}$,"$\Sigma I_{p,\ 1500\text{–}1300}$",$\overline{I}_{1300\text{–}800}$,"$\Sigma I_{p,\ 1300\text{–}800}$",$\overline{I}_{872 \pm 25}$,$\overline{I}_{1085 \pm 25}$,$\overline{I}_{1182 \pm 25}$,$\overline{I}_{1241 \pm 25}$,$\overline{I}_{1394 \pm 25}$,$\overline{I}_{1612 \pm 25}$,$\overline{I}_{1725 \pm 25}$,$\log I_{872}$,$(I_{872})^{-1}$,$\log I_{1085}$,$(I_{1085})^{-1}$,$\log I_{1182}$,$(I_{1182})^{-1}$,$\log I_{1241}$,$(I_{1241})^{-1}$,$\log I_{1394}$,$(I_{1394})^{-1}$,$\log I_{1612}$,$(I_{1612})^{-1}$,$\log I_{1725}$,$(I_{1725})^{-1}$,$\log \overline{I}_{1900\text{–}800}$,$(\overline{I}_{1900\text{–}800})^{-1}$,"$\log \Sigma I_{p,\ 1900\text{–}800}$","$(\Sigma I_{p,\ 1900\text{–}800})^{-1}$",$\log \overline{I}_{1900\text{–}1500}$,$(\overline{I}_{1900\text{–}1500})^{-1}$,"$\log \Sigma I_{p,\ 1900\text{–}1500}$","$(\Sigma I_{p,\ 1900\text{–}1500})^{-1}$",$\log \overline{I}_{1500\text{–}1300}$,$(\overline{I}_{1500\text{–}1300})^{-1}$,"$\log \Sigma I_{p,\ 1500\text{–}1300}$","$(\Sigma I_{p,\ 1500\text{–}1300})^{-1}$",$\log \overline{I}_{1300\text{–}800}$,$(\overline{I}_{1300\text{–}800})^{-1}$,"$\log \Sigma I_{p,\ 1300\text{–}800}$","$(\Sigma I_{p,\ 1300\text{–}800})^{-1}$",$\log \overline{I}_{872 \pm 25}$,$(\overline{I}_{872 \pm 25})^{-1}$,$\log \overline{I}_{1085 \pm 25}$,$(\overline{I}_{1085 \pm 25})^{-1}$,$\log \overline{I}_{1182 \pm 25}$,$(\overline{I}_{1182 \pm 25})^{-1}$,$\log \overline{I}_{1241 \pm 25}$,$(\overline{I}_{1241 \pm 25})^{-1}$,$\log \overline{I}_{1394 \pm 25}$,$(\overline{I}_{1394 \pm 25})^{-1}$,$\log \overline{I}_{1612 \pm 25}$,$(\overline{I}_{1612 \pm 25})^{-1}$,$\log \overline{I}_{1725 \pm 25}$,$(\overline{I}_{1725 \pm 25})^{-1}$,$\frac{I_{872}}{I_{1085}}$,$\frac{I_{872}}{I_{1182}}$,$\frac{I_{872}}{I_{1241}}$,$\frac{I_{872}}{I_{1394}}$,$\frac{I_{872}}{I_{1612}}$,$\frac{I_{872}}{I_{1725}}$,$\frac{I_{872}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{872}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{872}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{872}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{872}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{872}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{872}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{872}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{872}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{872}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1085}}{I_{1182}}$,$\frac{I_{1085}}{I_{1241}}$,$\frac{I_{1085}}{I_{1394}}$,$\frac{I_{1085}}{I_{1612}}$,$\frac{I_{1085}}{I_{1725}}$,$\frac{I_{1085}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1085}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1085}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1085}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1085}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1085}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1085}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1085}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1085}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{1085}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1182}}{I_{1241}}$,$\frac{I_{1182}}{I_{1394}}$,$\frac{I_{1182}}{I_{1612}}$,$\frac{I_{1182}}{I_{1725}}$,$\frac{I_{1182}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1182}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1182}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1182}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1182}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1182}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1182}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1182}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1182}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{1182}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1241}}{I_{1394}}$,$\frac{I_{1241}}{I_{1612}}$,$\frac{I_{1241}}{I_{1725}}$,$\frac{I_{1241}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1241}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1241}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1241}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1241}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1241}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1241}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1241}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1241}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{1241}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1394}}{I_{1612}}$,$\frac{I_{1394}}{I_{1725}}$,$\frac{I_{1394}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1394}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1394}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1394}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1394}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1394}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1394}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1394}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1394}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1612 \pm 25}}$,$\frac{I_{1394}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1612}}{I_{1725}}$,$\frac{I_{1612}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1612}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1612}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1612}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1612}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1612}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1612}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1612}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1612}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1612}}{\overline{I}_{1725 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1900\text{–}800}}$,"$\frac{I_{1725}}{\Sigma I_{p,\ 1900\text{–}800}}$",$\frac{I_{1725}}{\overline{I}_{1900\text{–}1500}}$,"$\frac{I_{1725}}{\Sigma I_{p,\ 1900\text{–}1500}}$",$\frac{I_{1725}}{\overline{I}_{1500\text{–}1300}}$,"$\frac{I_{1725}}{\Sigma I_{p,\ 1500\text{–}1300}}$",$\frac{I_{1725}}{\overline{I}_{1300\text{–}800}}$,"$\frac{I_{1725}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{I_{1725}}{\overline{I}_{872 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1085 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1182 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1241 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1394 \pm 25}}$,$\frac{I_{1725}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1500\text{–}1300}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1300\text{–}800}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{872 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}800}}{\overline{I}_{1725 \pm 25}}$,"$\frac{\Sigma I_{p,\ 1900\text{–}800}}{\Sigma I_{p,\ 1900\text{–}1500}}$","$\frac{\Sigma I_{p,\ 1900\text{–}800}}{\Sigma I_{p,\ 1500\text{–}1300}}$","$\frac{\Sigma I_{p,\ 1900\text{–}800}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1500\text{–}1300}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1300\text{–}800}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{872 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1900\text{–}1500}}{\overline{I}_{1725 \pm 25}}$,"$\frac{\Sigma I_{p,\ 1900\text{–}1500}}{\Sigma I_{p,\ 1500\text{–}1300}}$","$\frac{\Sigma I_{p,\ 1900\text{–}1500}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1300\text{–}800}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{872 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1500\text{–}1300}}{\overline{I}_{1725 \pm 25}}$,"$\frac{\Sigma I_{p,\ 1500\text{–}1300}}{\Sigma I_{p,\ 1300\text{–}800}}$",$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{872 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1300\text{–}800}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1085 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{872 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1182 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1085 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1182 \pm 25}}{\overline{I}_{1241 \pm 25}}$,$\frac{\overline{I}_{1182 \pm 25}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1182 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1182 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1241 \pm 25}}{\overline{I}_{1394 \pm 25}}$,$\frac{\overline{I}_{1241 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1241 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1394 \pm 25}}{\overline{I}_{1612 \pm 25}}$,$\frac{\overline{I}_{1394 \pm 25}}{\overline{I}_{1725 \pm 25}}$,$\frac{\overline{I}_{1612 \pm 25}}{\overline{I}_{1725 \pm 25}}$,Split,Fraction_hue,Fraction_grouped_hue,Class
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


# Saving data

In [26]:
dataset_3800_2500.to_csv('./../data/processed_data/dataset_3800_2500_engineered.csv', sep=';')
dataset_1900_800.to_csv('./../data/processed_data/dataset_1900_800_engineered.csv', sep=';')

In [27]:
with open('./../data/processed_data/features_description_3800_2500.json', 'w', encoding='utf-8') as f:
    json.dump(features_description_dict_3800_2500, f, ensure_ascii=False, indent=2)

In [28]:
with open('./../data/processed_data/features_description_1900_800.json', 'w', encoding='utf-8') as f:
    json.dump(features_description_dict_1900_800, f, ensure_ascii=False, indent=2)