# Import

In [1]:
import numpy as np

# Define

In [12]:
def Normalizer(sample, std_threshold=10**-6):
    sample_mean = sample.mean()
    sample_std = sample.std()
    if sample_std <=std_threshold:
        sample_std = std_threshold
        print("WARNING : sample standard deviation smaller than threshold, set to threshold : ", std_threshold)
    
    return sample_mean, sample_std, (sample-sample_mean)/sample_std

def Skew(sample, decimals=6):
    return np.round((Normalizer(sample)[2]**3).mean(), decimals)

def Kurtosis(sample, decimals=6):
    return np.round((Normalizer(sample)[2]**4).mean(), decimals)

def SkewlessNormalizer(sample, iterate = 1000, skew_threshold=10**-3):
    is_inverse = False
    if np.abs(Skew(sample)) < skew_threshold:
        return sample
    elif Skew(sample) < 0:
        sample *= -1
        is_inverse = True
    
    sample_min = sample.min() # to non-negative
    bias = 1.
    move = 1.
    sign = 1
    for i in range(iterate):
        pre_sign = sign
        trans_skew = Skew(np.log(sample - sample_min + bias))
        if np.abs(trans_skew)<skew_threshold:
            break
        elif trans_skew > 0:
            sign = -1
        else:
            sign = 1
        
        if sign*pre_sign < 1:
            move *= 0.5
        
        if bias + sign*move > 0:
            bias += sign*move
        else:
            bias = bias/2
        
    return np.log(sample - sample_min + bias)

def RandomBinarySkew(mean, std, skewness, size):
    value = (skewness + np.sqrt(skewness**2+4))/2
    prob = 1/((value**2)+1)
    random_seed = np.random.random(size)
    output = value*(random_seed < prob) - (1/value)*(random_seed >= prob)
    output = std*output + mean
    return output

def RandomBernoulliSkew(mean, std, skewness, order, size):
    order = max(1, order)
    order_sqrt=np.sqrt(order)
    output = BinarySkew(0, 1, skewness*order_sqrt, size)
    for t in range(1, order):
        output += BinarySkew(skewness*order_sqrt, size)
    
    output /= order_sqrt
    output = std*output + mean
    return output

In [73]:
Normalizer(np.random.normal(2,5, (10000)))

(1.954290831135894,
 4.9994317225792395,
 array([ 0.5630576 ,  0.93481561,  0.84229805, ...,  0.55520729,
        -0.68756425, -1.79996283]))

In [117]:
foo = -0.5*np.ones((10000))
foo[0] = -1
foo[-1] = 1
print(Skew(foo), Kurtosis(foo))
SkewlessNormalizer(foo)

82.205178 8198.576113


array([-1.38629436, -0.28768207, -0.28768207, ..., -0.28768207,
       -0.28768207,  0.81093022])

In [11]:
np.random.random((10,10))

array([[0.5349071 , 0.84482905, 0.66142902, 0.46638823, 0.62477758,
        0.31586671, 0.34994047, 0.44400072, 0.17521926, 0.091235  ],
       [0.86639377, 0.64495378, 0.54553912, 0.3107752 , 0.96631722,
        0.91896664, 0.99104516, 0.34451205, 0.23700286, 0.31354764],
       [0.84922486, 0.073862  , 0.04530918, 0.21064867, 0.67359792,
        0.72411072, 0.25085029, 0.91192464, 0.52586246, 0.26866351],
       [0.30566416, 0.85890723, 0.52574904, 0.1008218 , 0.41429895,
        0.53877428, 0.2876384 , 0.83431644, 0.58154477, 0.08083534],
       [0.13486072, 0.59335059, 0.68874091, 0.39641636, 0.79782794,
        0.85730318, 0.37172342, 0.99955705, 0.02568799, 0.81421497],
       [0.97801969, 0.11692506, 0.81878713, 0.29161895, 0.75326591,
        0.22142731, 0.10117083, 0.05682239, 0.78710243, 0.51488978],
       [0.34758949, 0.63919616, 0.39111931, 0.05394447, 0.43311921,
        0.39740134, 0.30644947, 0.10263876, 0.82492296, 0.79957354],
       [0.79249121, 0.2945641 , 0.8280379