In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression

Here are the formulas for various selection criteria for the best subset in a linear regression model.

Total sum of squares, `SSTO`:
$$
SSTO = \sum (Y_i - \bar{Y})^2
$$

Error sum of squares, `SSE`:
$$
SSE = \sum (Y_i - \hat{Y_i})^2
$$

Regression sum of squares, `SSR` (not used in calculations, but included for reference):  
$$
SSR = \sum (\hat{Y_i} - \bar{Y})^2
$$

Relationship between `SSTO`, `SSE`, and `SSR`:
$$
SSTO = SSE + SSR
$$

-----

#TODO: explain subscript p and invariance of `SSTO` with p.

#TODO: Add MSE

-----

Coefficient of multiple determination, `R2`:
$$
R^{2}_p = 1 - \frac{SSE_p}{SSTO}
$$

Adjusted coefficient of multiple determination, `adj_R2`:  

$$
R^2_{a,p} = 1 - \left(\frac{n-1}{n-p}\right)\frac{SSE_p}{SSTO} = 1 - \frac{MSE_p}{\frac{SSTO}{n-1}}
$$

Mallows's `Cp`:
$$
C_p = \frac{SSE_p}{MSE(X_{1},...,X_{p-1})} - (n-2p)
$$

The following functions will calculate different statistical values.

In [None]:
def SSTO(y):
    
    '''Calculates sum of squares from the mean.'''
    
    y_mean = np.mean(y)
    squared_errors = (y - y_mean)**2
    
    return np.sum(squared_errors)

In [None]:
def SSE(y, predictions):
    
    '''Calculates sum of squared errors between predictions and actual values.'''
    
    squared_erros = (y - predictions)**2
    
    return np.sum(squared_errors)

In [None]:
def adj_R2(_sse, _ssto, n, p):
    
    '''Calculates the adjusted R^2.'''
    
    return 1 - (n-1)/(n-p) * _sse/_ssto

In [None]:
def Cp(sse_p, sse_P, n, p, P):
    
    '''Calculates Mallows's Cp value. Needs sse_p and sse_P to be pre-calculated.'''
    
    return sse_p / (sse_P/(n-P)) - (n - 2*p)

In [None]:
def AIC(_sse, n, p):
    
    '''Calculates the Akaike information criterion'''
    
    return n * np.log(_sse) - n * np.log(n) + 2*p

In [None]:
def SBC(_sse, n, p):
    
    '''Calculates Schwarz Bayesian criterion'''
    
    return n * np.log(_sse) - n * np.log(n) + np.log(n) * p

In [None]:
def PRESS(y, X):
    
    '''Calculates PRESS criterion.'''
    
    lr = LinearRegression()
    pred = np.zeros(y.shape)
    
    for i in range(X.shape[0]):
        y_mod = np.delete(y, i, 0)
        X_mod = np.delete(X, i, 0)
        lr.fit(X_mod, y_mod)
        pred[i] = lr.predict(X[i].reshape(1, -1))
        
    return SSE(y, pred)