# Skew-Confidence-Interval

An algorithm to estimate CI for distribution with high skew.

Generate a distribution $X~\log{(N(\mu, \sigma))}+b$ with unique $\mu, \sigma, b$ match given $m, v, s$ (mean, variance and skewness).

### ps:

Only work when $s > 0$

Use normal when $s := 0$

Input $-s, -m$ when $s < 0$ and consider $-X$ finally.

In [1]:
import numpy as np
import pandas as pd
import scipy.stats
import scipy

In [2]:
def magic_cubic(x, b):
    # an cubic function to solve parameter of log normal from skewness
    return (x-1)*((x+2)**2) - b

def magic_cubic_deri(x):
    # derivative of cubic function above
    return 3*x*(x+2)

class LogNormal():
    def __init__(self, mean, var, skew, newton_threshold=0.000001):
        if skew <= 0:
            raise ValueError("skewness error, expect positive value, got {}".format(skew))
        if var <= 0:
            raise ValueError("variance error, expect positive value, got {}".format(var))
        
        pow_sigma2 = 1 + skew/9
        for t in range(100):
            if magic_cubic(pow_sigma2, skew**2) < newton_threshold:
                break
            
            pow_sigma2 -= magic_cubic(pow_sigma2, skew**2)/magic_cubic_deri(pow_sigma2)
        
        pow_mu = (var/(pow_sigma2*(pow_sigma2 - 1)))**0.5
        self.bias = mean - pow_mu*(pow_sigma2**0.5)
        self.mu = np.log(pow_mu)
        self.sigma = np.sqrt(np.log(pow_sigma2))
    
    def ppf(self, percent):
        # percent point function
        if (percent > 1) or (percent < 0):
            raise ValueError("percent error, expect 0~1, got {}".format(percent))
        
        return np.exp(self.mu + self.sigma*scipy.stats.norm.ppf(percent))+self.bias

## Test

Consider a condition, throw an unfair coin n times

Compare error to :
- Estimate error by normal distribution
- Significance Level

In [3]:
n = 100
p = 0.2

hits = np.arange(n+1) # 0,1,...,n

combine = (np.arange(n+1) < 2).astype(np.float64) # [1,1,0,0,...,0]
for t in range(n-1):
    combine[1:] += combine[:-1]

prob = combine * np.power(p, hits) * np.power(1-p, n - hits)

mean = (hits * prob).sum()
var = (((hits - mean)**2)*prob).sum()
skew = (((hits - mean)**3)*prob).sum()/(var**1.5)

log_normal = LogNormal(mean, var, skew)

for SL in [0.3, 0.2, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001]: #Significance Level
    n_CI = mean + np.sqrt(var)*scipy.stats.norm.ppf(1-SL)
    n_error = np.abs((prob*(hits>n_CI)).sum() - SL)
    
    l_CI = log_normal.ppf(1-SL)
    l_error = np.abs((prob*(hits>l_CI)).sum() - SL)
    print("sl: %4f, error to sl: %4f, error to normal: %4f"%(SL, l_error/SL, l_error/n_error))

sl: 0.300000, error to sl: 0.129776, error to normal: 1.000000
sl: 0.200000, error to sl: 0.054564, error to normal: 1.000000
sl: 0.100000, error to sl: 0.125246, error to normal: 1.000000
sl: 0.050000, error to sl: 0.116654, error to normal: 1.000000
sl: 0.010000, error to sl: 0.124898, error to normal: 1.000000
sl: 0.001000, error to sl: 0.263116, error to normal: 0.478011
sl: 0.000100, error to sl: 0.381483, error to normal: 0.161585
sl: 0.000010, error to sl: 0.031621, error to normal: 0.021122


## Table

skewness to significance level table for normalized distribution

In [20]:
sl = [0.3, 0.2, 0.1, 0.05, 1e-02, 1e-03, 1e-04, 1e-05, 1e-06, 1e-07, 1e-08]
skew = (np.arange(20)+1)/20
table = np.zeros((20,1+len(sl)))
table[:,0] = skew
for i in range(20):
    log_normal = LogNormal(0., 1., skew[i])
    for j in range(len(sl)):
        table[i,j+1] = log_normal.ppf(1-sl[j])

pd.DataFrame(table, columns=["skew"]+sl)

Unnamed: 0,skew,0.3,0.2,0.1,0.05,0.01,0.001,0.0001,1e-05,1e-06,1e-07,1e-08
0,0.05,0.518258,0.839043,1.286733,1.658925,2.363203,3.162191,3.827545,4.410847,4.937396,5.421764,5.873233
1,0.1,0.511921,0.836173,1.291564,1.672698,2.400203,3.235535,3.939261,4.562262,5.129509,5.65539,6.149067
2,0.15,0.505401,0.833016,1.296035,1.686147,2.437294,3.310198,4.054126,4.719173,5.329927,5.900552,6.440068
3,0.2,0.49871,0.829577,1.300138,1.699248,2.474423,3.386109,4.172092,4.8816,5.538789,6.157568,6.746787
4,0.25,0.491861,0.825863,1.303865,1.71198,2.511536,3.46319,4.293097,5.049546,5.756216,6.426733,7.069755
5,0.3,0.484868,0.821884,1.307213,1.724323,2.548578,3.54136,4.417071,5.222998,5.982307,6.708317,7.40948
6,0.35,0.477744,0.817648,1.310179,1.736259,2.585496,3.620529,4.543927,5.401917,6.217129,7.002555,7.766435
7,0.4,0.470505,0.813167,1.312761,1.747773,2.622239,3.700614,4.673578,5.586265,6.460746,7.30968,8.141093
8,0.45,0.463165,0.808452,1.31496,1.758849,2.658756,3.781519,4.805919,5.775971,6.713177,7.62987,8.533866
9,0.5,0.455737,0.803514,1.316779,1.769478,2.694997,3.863149,4.940839,5.97095,6.974424,7.963277,8.945138
