# Min-max cutoffs for continuous (=spatial) features

Define the min-max cutoffs for the min-max normalization of continuous features such as the distances and moments features.

In [1]:
import json
from pathlib import Path

import pandas as pd

from kissim.encoding import FingerprintGenerator



In [2]:
HERE = Path(_dh[-1])
RESULTS = HERE / "../../results/"

## Load fingerprints

In [3]:
fingerprint_generator = FingerprintGenerator.from_json(RESULTS / "fingerprints.json")
print(f"Number of fingerprints: {len(fingerprint_generator.data)}")

Number of fingerprints: 4918


## Distances features

In [4]:
features_d = fingerprint_generator.distances_exploded
features_d

Unnamed: 0_level_0,Unnamed: 1_level_0,hinge_region,dfg_region,front_pocket,center
structure_klifs_id,residue_ix,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3835,1,12.798095,19.079805,14.099948,17.262426
3835,2,11.640075,16.481350,11.632327,15.175342
3835,3,9.240445,15.125156,8.279718,12.447825
3835,4,10.344351,13.665039,7.234169,11.853809
3835,5,12.414604,12.788149,9.114764,12.903294
...,...,...,...,...,...
7219,81,8.892165,7.109349,6.723064,3.793342
7219,82,11.366709,6.088001,10.422721,6.788561
7219,83,13.247789,5.919116,11.564625,9.319808
7219,84,16.129894,9.708284,13.088131,11.473628


In [5]:
features_d_stats = features_d.describe(percentiles=[.01, .99])
features_d_stats

Unnamed: 0,hinge_region,dfg_region,front_pocket,center
count,410834.0,406621.0,410851.0,411460.0
mean,12.874321,13.579542,13.035534,12.006524
std,4.570431,4.86716,4.282973,3.489024
min,2.68981,0.873535,1.355564,0.890513
1%,4.112916,4.659289,5.452664,3.050446
50%,12.555706,13.298841,12.458632,12.020822
99%,23.066009,26.731355,23.41785,21.188289
max,48.509457,50.127373,40.766365,47.974854


In [6]:
distance_cutoff = features_d_stats.loc[["1%", "99%"], :].apply(lambda x: round(x, 2)).to_dict()
distance_cutoff = {key: tuple(value.values()) for key, value in distance_cutoff.items()}
distance_cutoff

{'hinge_region': (4.11, 23.07),
 'dfg_region': (4.66, 26.73),
 'front_pocket': (5.45, 23.42),
 'center': (3.05, 21.19)}

## Moments features

In [7]:
features_m = fingerprint_generator.moments_exploded
features_m = features_m.stack().unstack(level=1).reset_index(drop=True)

In [8]:
features_m_stats = features_m.describe(percentiles=[.01, .99])
features_m_stats

moment,1,2,3
count,19593.0,19593.0,19593.0
mean,12.872311,4.283828,2.7806
std,0.634072,0.56851,1.078366
min,11.311548,2.838044,-2.546785
1%,11.681335,3.260458,-1.694082
50%,12.921171,4.419284,2.882389
99%,14.194312,5.290567,4.721426
max,23.591284,11.646229,12.970084


In [9]:
distance_cutoff = features_m_stats.loc[["1%", "99%"], :].apply(lambda x: round(x, 2)).to_dict()
distance_cutoff = {key: tuple(value.values()) for key, value in distance_cutoff.items()}
distance_cutoff

{1: (11.68, 14.19), 2: (3.26, 5.29), 3: (-1.69, 4.72)}