# Import

In [1]:
import numpy as np

# Define

In [113]:
def Normalizer(sample, std_threshold=10**-6):
    sample_mean = sample.mean()
    sample_std = sample.std()
    if sample_std <=std_threshold:
        sample_std = std_threshold
        print("WARNING : sample standard deviation smaller than threshold, set to threshold : ", std_threshold)
    
    return sample_mean, sample_std, (sample-sample_mean)/sample_std

def Skew(sample, decimals=6):
    return np.round((Normalizer(sample)[2]**3).mean(), decimals)

def Kurtosis(sample, decimals=6):
    return np.round((Normalizer(sample)[2]**4).mean(), decimals)

def SkewlessNormalizer(sample, iterate = 1000, skew_threshold=10**-3):
    is_inverse = False
    if np.abs(Skew(sample)) < skew_threshold:
        return sample
    elif Skew(sample) < 0:
        sample *= -1
        is_inverse = True
    
    sample_min = sample.min() # to non-negative
    bias = 1.
    move = 1.
    sign = 1
    for i in range(iterate):
        pre_sign = sign
        trans_skew = Skew(np.log(sample - sample_min + bias))
        if np.abs(trans_skew)<skew_threshold:
            break
        elif trans_skew > 0:
            sign = -1
        else:
            sign = 1
        
        if sign*pre_sign < 1:
            move *= 0.5
        
        if bias + sign*move > 0:
            bias += sign*move
        else:
            bias = bias/2
        
    return np.log(sample - sample_min + bias)

In [73]:
Normalizer(np.random.normal(2,5, (10000)))

(1.954290831135894,
 4.9994317225792395,
 array([ 0.5630576 ,  0.93481561,  0.84229805, ...,  0.55520729,
        -0.68756425, -1.79996283]))

In [117]:
foo = -0.5*np.ones((10000))
foo[0] = -1
foo[-1] = 1
print(Skew(foo), Kurtosis(foo))
SkewlessNormalizer(foo)

82.205178 8198.576113


array([-1.38629436, -0.28768207, -0.28768207, ..., -0.28768207,
       -0.28768207,  0.81093022])

0.001436 2.995401


1 3
