In [None]:
import numpy as np

def determine_scale(y, percentile=99, target_asinh_value=5.0):
    """
    Determines a scale factor so that the 'percentile' of |y|
    ends up around arcsinh = ±target_asinh_value.
    
    - percentile: which percentile of |y| to consider (e.g., 95, 99).
    - target_asinh_value: arcsinh value we want that percentile mapped to.
      arcsinh(large_number) grows ~ log, so pick 5~10 for moderate spread.

    Returns: float (scale factor)
    """
    # Exclude zero or extremely tiny data from 'abs_y' just in case
    abs_y = np.abs(y[y != 0])  
    if len(abs_y) == 0:
        # all zeros? fallback
        return 1.0
    
    # get the chosen percentile of the absolute values
    val = np.percentile(abs_y, percentile)
    if val == 0:
        # if even the percentile is 0, fallback
        return 1.0
    
    # We want arcsinh(scale * val) ~ target_asinh_value
    # => scale * val ~ sinh(target_asinh_value)
    # => scale = sinh(target_asinh_value) / val
    desired = np.sinh(target_asinh_value)
    scale = desired / val
    return scale

def forward_transform(y, scale):
    """
    Forward transform for GP:
      arcsinh(scale * y)
    """
    return np.arcsinh(scale * y)

def inverse_transform(y_trans, scale):
    """
    Inverse transform:
      sinh(y_trans) / scale
    """
    return np.sinh(y_trans) / scale

scale_factor = determine_scale(f1_out, percentile=67, target_asinh_value=10)
print("Chosen scale factor =", scale_factor)

# 2) Transform the data
f1_out_transformed = forward_transform(f1_out, scale=scale_factor)
f1_out

In [49]:
  ( -3.60606264e-003  +  2.15924904e-054) #DISTNACE FOR THE FIRST ONE  

-0.00360606264

In [36]:
  ( - 2.15924904e-054 - 2.08909327e-091) 

-2.15924904e-54

In [None]:
0 + -0.00360606264 # SECOND NUMBER 

In [None]:
(0 + -0.00360606264 )-2.15924904e-54

In [37]:
from sklearn.preprocessing import PowerTransformer, MinMaxScaler
import numpy as np

# Input: scaled data
scaled_log_data = np.array([
    [0.618109],
    [0.21650371],
    [0.5536672],
    [0.87012798],
    [0.0],
    [1.0],
    [0.47494582],
    [0.09553347],
    [0.61916578],
    [0.20046852],
    [0.54116528]
])
# Step 1: Fit the Yeo-Johnson transformer
yeo_transformer = PowerTransformer(method='yeo-johnson')
yeo_transformed_data = yeo_transformer.fit_transform(scaled_log_data)
# Step 2: Reverse the Yeo-Johnson transformation RENOVE THIS BECASE THAT IS THE WHOLE POINT
###reversed_yeo_data = yeo_transformer.inverse_transform(yeo_transformed_data)
# 2. Reverse log transformation with mpmath applied element-wise
recovered_abs_data = np.array(
    [[power(10, float(x)) - 1e-100] for x in yeo_transformed_data]
)
# 3. Restore the signs to the recovered data
ORIGINAL_DATA_YEO_JOHNSON = recovered_abs_data * signs
ORIGINAL_DATA_YEO_JOHNSON

ValueError: operands could not be broadcast together with shapes (11,1) (10,1) 

In [33]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from mpmath import mp, log10, power
from scipy.stats import skew



# Set precision for mpmath
mp.dps = 6  # Set decimal precision

# Input array
data = np.array([
    [1.32267704e-79],
    [1.03307824e-46],
    [7.71087511e-16],
    [3.34177101e-124],
     [-3.60606264e-3], 
    [-2.15924904e-54],
    [-2.08909327e-91],
    [2.53500115e-40],
    [3.60677119e-81],
    [6.22985647e-48]
])

# Preserve the signs of the original data
signs = np.sign(data)

# Apply absolute to the data for log transformation
abs_data = np.abs(data)

# Log transformation with mpmath applied element-wise
log_transformed_data = np.array(
    [[log10(float(x) + 1e-100)] for x in abs_data]
)

# Convert to float for MinMaxScaler
log_transformed_data_float = np.array(log_transformed_data, dtype=np.float64)

# Scale the log-transformed data
scaler = MinMaxScaler()
scaled_log_data = scaler.fit_transform(log_transformed_data_float)

# Reverse procedure
# 1. Reverse the scaling
reversed_scaled_data = scaler.inverse_transform(scaled_log_data)

# 2. Reverse log transformation with mpmath applied element-wise
recovered_abs_data = np.array(
    [[power(10, float(x)) - 1e-100] for x in reversed_scaled_data]
)

# 3. Restore the signs to the recovered data
original_data_recovered = recovered_abs_data * signs

# Print results
print("Original Data:\n", data)
print("Info Data:\n", skew(data), np.var(data))
print("")
print("Scaled Log-Transformed Data:\n", scaled_log_data)
print("Info:\n", skew(scaled_log_data), np.var(scaled_log_data))




Result
array([-3.60606264e-003, -2.15924904e-054, -2.08909327e-091,
        3.34177101e-124,  3.60677119e-081,  1.32267704e-079,
        6.22985647e-048,  1.03307824e-046,  2.53500115e-040,
        7.71087511e-016])

Original Data:
 [[ 1.32267704e-079]
 [ 1.03307824e-046]
 [ 7.71087511e-016]
 [ 3.34177101e-124]
 [-3.60606264e-003]
 [-2.15924904e-054]
 [-2.08909327e-091]
 [ 2.53500115e-040]
 [ 3.60677119e-081]
 [ 6.22985647e-048]]
Info Data:
 [-2.66666667] 1.170331898724395e-06

Scaled Log-Transformed Data:
 [[0.21650371]
 [0.5536672 ]
 [0.87012798]
 [0.        ]
 [1.        ]
 [0.47494582]
 [0.09553347]
 [0.61916578]
 [0.20046852]
 [0.54116528]]
Info:
 [0.206196] 0.09717255344978551
Recovered Original Data:
 [[mpf('1.32269581e-79')]
 [mpf('1.03306971e-46')]
 [mpf('7.71088185e-16')]
 [mpf('5.84303487e-108')]
 [mpf('-0.00360606099')]
 [mpf('-2.15925935e-54')]
 [mpf('-2.08911117e-91')]
 [mpf('2.53499096e-40')]
 [mpf('3.60673023e-81')]
 [mpf('6.22987522e-48')]]


array([[ -78.87854004],
       [ -45.98587036],
       [ -15.11289597],
       [-100.        ],
       [  -2.44296694],
       [ -53.66569519],
       [ -90.68003845],
       [ -39.59602356],
       [ -80.44288635],
       [ -47.20552063]])

array([ 6.43304664e-80,  1.03307824e-46,  7.71087511e-16, -6.79372376e-80,
       -3.60606264e-03, -2.15924904e-54, -6.79372376e-80,  2.53500115e-40,
       -6.43304664e-80,  6.22985647e-48])

In [14]:
# Input array
data = np.array([
    [1.32267704e-79],
    [1.03307824e-46],
    [7.71087511e-16],
    [3.34177101e-124],
    [-2.15924904e-54],
    [-2.08909327e-91],
    [2.53500115e-40],
    [3.60677119e-81],
    [6.22985647e-48]
])
data
data.

array([[ 1.32267704e-079],
       [ 1.03307824e-046],
       [ 7.71087511e-016],
       [ 3.34177101e-124],
       [-2.15924904e-054],
       [-2.08909327e-091],
       [ 2.53500115e-040],
       [ 3.60677119e-081],
       [ 6.22985647e-048]])

In [21]:

lambda_factor = 1e50  # Amplify small values
amplified_data = data * lambda_factor
amplified_data

from sklearn.preprocessing import RobustScaler

scaler = RobustScaler()
scaled_data = scaler.fit_transform(amplified_data.reshape(-1, 1)).flatten()
scaled_data

array([ 0.00000000e+00,  1.00000000e+00,  7.46397979e+30, -1.28032611e-33,
       -2.09011182e-08, -1.28032611e-33,  2.45383268e+06, -1.24541325e-33,
        6.03038205e-02])

In [22]:
from mpmath import mp

mp.dps = 50  # Set decimal precision to 50 digits
very_small_number = 3.34177101e-124
log_result = mp.log(very_small_number)
print("Logarithm with high precision:", log_result)


Logarithm with high precision: -284.31405062228697403645741280559599135039773529365


In [23]:
from sklearn.preprocessing import RobustScaler
import numpy as np

# Example data
data = np.array([-1e-124, -3e-123, 0, 2e-124, 5e-123])

# Convert to mpmath array
from mpmath import mp
mp.dps = 50
data_mp = [mp.mpf(x) for x in data]

# RobustScaler operates on numpy, so convert to numpy for scaling
scaler = RobustScaler()
scaled_data = scaler.fit_transform(np.array(data_mp, dtype=float).reshape(-1, 1)).flatten()

print("Scaled data (RobustScaler):", scaled_data)


Scaled data (RobustScaler): [-1.e-124 -3.e-123  0.e+000  2.e-124  5.e-123]


In [28]:
from scipy.stats import skew
variance_before = np.var(data)
skewness_before = skew(data, nan_policy='omit')


import numpy as np
from mpmath import mp

mp.dps = 50  # Set precision
epsilon = mp.mpf(1e-124)  # Small constant for stability
data_mp = [mp.mpf(x) for x in data]

log_transformed = [
    mp.sign(x) * mp.log(mp.fabs(x) + epsilon) if x != 0 else 0 for x in data_mp
]
log_transformed = np.array([float(x) for x in log_transformed])


variance_after = np.var(log_transformed)
skewness_after = skew(log_transformed, nan_policy='omit')
print("Variance Before:", variance_before)
print("Variance After:", variance_after)
print("Skewness Before:", skewness_before)
print("Skewness After:", skewness_after)


Variance Before: 6.6336e-246
Variance After: 64177.473385413236
Skewness Before: nan
Skewness After: 0.0005321609579595606


Scaled Data (TorchMinMaxScaler):
 tensor([[1.0000],
        [1.0000],
        [1.0000],
        [1.0000],
        [0.0000],
        [1.0000],
        [1.0000],
        [1.0000],
        [1.0000],
        [1.0000]])
