# Risk Bound Engine

---

## Our Risk Bound
This function computes the risk-bound in our main Theorem.

In [None]:
def get_risk_bound__scalar(N,k,F,distortion_known=distortion_given,Risk_or_Concentration=True,distortion_type=3,Lip=Lip_prob):
    # Round N, k, and F down (failsafe)
#     N = int(max(1,N))
#     k = max(1,round(k))
#     F = max(1,round(F))
    #-----------------------------------------#
    ## Case-By-Case Definition of Quantities ## 
    #-----------------------------------------#
    ## -------------------- ##
    ## Low Dimensional Case ##
    ## -------------------- ##
    if F<=2:
        #Test if Distortion is Known, if not..put worst-case
        if distortion_known == False: 
            # Set worst-Case distortion for embedding into at-most two-dimensional Euclidean space
            dist = 12*k
        else:
            # Set Custom Distortion
            dist = distortion_known
            
        # Get Remaining Constants for Concentration Theorem
        if F == 1:
            # rate
            rate = 1/np.sqrt(N)
            # C_F
            C_F = 1/(np.sqrt(8)-2)
        if F == 2:
            # rate
            rate = (32+np.log2(N))/np.sqrt(N)
            # C_F
            C_F = np.sqrt(2)/4
            
    ## ------------------------ ##
    ## Non-Low Dimensional Case ##
    ## ------------------------ ##         
    if F > 2:
        # rate
        rate = 1/(N**(1/F))
        # C_F
        C_F__1 = 2
        C_F__2 = ((F/2)-1)/(2*(1-2**(1-(F/2))))
        C_F__2 = C_F__2**(2/F)
        C_F__3 = (1+1/(2*((F/2)-1)))
        C_F__4 = np.sqrt(F)
        C_F = C_F__1*C_F__2*C_F__3*C_F__4

        
        #Test if Distortion is Known, if not..put worst-case
        if distortion_known == False: 
            # Set worst-Case distortion for embedding into F-dimensional Euclidean space  
            
            # The following is used in all cases where F>2
            ε_mk = (2**(3/2))*(math.log(k)**.5)/F
            tilde_ε_mk_1 = (np.abs(1+ε_mk))**.5
            tilde_ε_mk_2 = (np.abs(1-ε_mk))**.5
            tilde_ε_mk = tilde_ε_mk_1/tilde_ε_mk_2
            
            ## ------------------------- ##
            ## Moderate-Dimensional Case ##
            ## ------------------------- ##    
            
            if F <= 8*np.round(math.log(k),0):
                dist__1 = 720*tilde_ε_mk
                dist__2 = (math.log(k)/k)**.5
                dist__3 = k**(2/F)
                dist__4 = (k/F)**.5
                dist__5 = math.floor(2*math.log2(k)+1)
                
                dist = tilde_ε_mk*dist__1*dist__2*dist__3*dist__4*dist__5
                
            else:
                dist__1 = 48*math.floor(2*math.log2(k)+1) # This is used in both of the following cases
                ## --------------------- ##
                ## High-Dimensional Case ##
                ## --------------------- ##    
                if F < 2**k:
                    dist = dist__1*tilde_ε_mk
                    
                ## -------------------------- ##
                ## Very High-Dimensional Case ##
                ## -------------------------- ##    
                if F >= 2**k:
                    dist = dist__1
                        
        
        else:
            # Set Custom Distortion
            dist = distortion_known

    # Custom Distortion
    if distortion_given == True:
        dist = dist_function(F=F, 
                             distortion_type = distortion_type,
                             k=k,
                             emb_dist_rate=1,
                             C=1,
                             d=d,
                             verbose=False)
    
    #-----------------#
    ## Compute Bound ## 
    #-----------------#
    
    if Risk_or_Concentration == True: ### Compute Risk-Bound:        
        #### Get Dependancies
        bound__1 = L*diam
        bound__2 = C_F*(2*dist-1)*rate
        bound__3 = (math.log(2/delta)**.5)**dist
        bound__4 = 1/np.sqrt(N)
        #### Compute Bound
        bound = bound__1*bound__2*(bound__3+bound__4)
    else: ### Compute Concentration Inequality:
        print('TBD')
        bound = 1 # TEMP

    return bound

get_risk_bound = np.vectorize(get_risk_bound__scalar)

## Benchmarks 

### Trivial VC/Occam Bound (Binary Classification)

From the The (Quantitative) Fundamental Theorem of Statistical Learning ([Theorem 6.8 in "Understanding Machine Learning" (with extra details in Section 28.1)](https://scholar.google.com/citations?view_op=view_citation&hl=en&user=kezPqwoAAAAJ&citation_for_view=kezPqwoAAAAJ:XiVPGOgt02cC)
$$
\begin{aligned}
    \mathcal{R}(f) - \hat{\mathcal{R}}(f)
\le &
    n^{-1/2}
    (
        128 \operatorname{VC-dim}\log(n+1) + \log(8/\delta)
    )^{1/2}
\\ 
\le & 
    n^{-1/2}
    (
        128 \#\mathcal{X}\log(n+1) + \log(8/\delta)
    )^{1/2}
\end{aligned}
$$
where we use the fact that $\operatorname{VC-dim}(2^{\mathcal{X}})\le \log_2(\#2^{\mathcal{X}})=\#\mathcal{X}$.

In [None]:
def trivial_VCBound(N_in=N,k_in=k,delta = delta):
    a = 1/np.sqrt(N_in)
    b = 128 * k_in *np.log(N_in+1)
    c = np.log(8/delta)
    d = np.sqrt(b*c)
    return a*d

## Hoeffding Bound


From [Corollary 4.6 of "Understanding Machine Learning"](https://scholar.google.com/citations?view_op=view_citation&hl=en&user=kezPqwoAAAAJ&citation_for_view=kezPqwoAAAAJ:XiVPGOgt02cC) we have the following Heofding-type bound
$$
\begin{aligned}
    (2n)^{-1/2}
    \log\big(
    2 \times 2^k/\delta
    \big)^{1/2}
    = 
    (2n)^{-1/2}
     \big(
     \log(2/\delta) + k\log(2)
     \big)^{1/2}
\end{aligned}
$$


In [1]:
def Hoeffding_Bound(N_in=N,k_in=k,delta = delta):
    a = 1/np.sqrt(2*N_in)
    b = np.log(2/delta)
    c = k_in*np.log(2)
    d = np.sqrt(b+c)
    return a*d

NameError: name 'N' is not defined

In [None]:
print('Risk Bounds Loaded')

---

# Fin

---