In [1]:
import numpy as np

Here are the formulas for various selection criteria.

#TODO: Add SSE, SSTO, MSE

Coefficient of multiple determination, `R2`:
$$
R^{2}_p = 1 - \frac{SSE_p}{SSTO}
$$

Adjusted coefficient of multiple determination, `adj_R2`:  

$$
R^2_{a,p} = 1 - \left(\frac{n-1}{n-p}\right)\frac{SSE_p}{SSTO} = 1 - \frac{MSE_p}{\frac{SSTO}{n-1}}
$$

Mallows's `C`:
$$
C_p = \frac{SSE_p}{MSE(X_{1},...,X_{p-1})} - (n-2p)
$$

The following functions will calculate different statistical values.

In [2]:
def SSTO(y):
    
    '''Calculates sum of squares from the mean.'''
    
    y_mean = np.mean(y)
    squared_errors = (y - y_mean)**2
    
    return np.sum(squared_errors)

In [3]:
def SSE(y, predictions):
    
    '''Calculates sum of squared errors between predictions and actual values.'''
    
    squared_erros = (y - predictions)**2
    
    return np.sum(squared_errors)

In [4]:
def adj_R2(_sse, _ssto, n, p):
    
    '''Calculates the adjusted R^2.'''
    
    return 1 - (n-1)/(n-p) * _sse/_ssto

In [None]:
def Cp(sse_p, sse_P, n, p, P):
    
    '''Calculates Mallows's C value. Needs sse_p and sse_P to be pre-calculated.'''
    
    return sse_p / (sse_P/(n-P)) - (n - 2*p)

In [None]:
def AIC(_sse, n, p):
    
    '''Calculates the Akaike information criterion'''
    
    return n * np.log(_sse) - n * np.log(n) + 2*p

In [None]:
def SBC(_sse, n, p):
    
    '''Calculates Schwarz Bayesian criterion'''
    
    return n * np.log(_sse) - n * np.log(n) + np.log(n) * p