# How to compute evaluation metrics in Python
This template shows you two methods for computing the different metrics used for performance evaluation and feature selection. 

## Computation by hand

In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [3]:
# SST, SSE and SSR have to be calculated manually
# generate some example data
X = np.array([
    [1,3,5,6,7],
    [4.6, 3.7, 3.4, 3.0, 3.1]
]).transpose()
Y = np.array([2.1, 3.5, 4.4, 5.6, 5.9])

from sklearn.linear_model import LinearRegression
model = LinearRegression() # create and instanceof the model

model.fit(X,Y) # fit the model

# calculate evaluation metrics
SST = np.sum(np.square(Y - np.mean(Y)))
print("Sum of Square Total {}".format(SST))

SSE = np.sum(np.square(model.predict(X) - np.mean(Y)))
print("Sum of Square Explained {}".format(SSE))

SSR = np.sum(np.square(Y - model.predict(X)))
print("Sum of Square Residual {}".format(SSR))
print("\n")

# calculate R square and adjusted R-square
R_2 = 1 - SSR/SST
print("R square {}".format(R_2))
R_2_alt = model.score(X,Y) # alternative method to calculate R square
print("R square {}".format(R_2_alt))
n = X.shape[0]
p = X.shape[1]
R_2_adj = 1 - (n-1)/(n-p-1)*(1-R_2)
print("R square adjusted {}".format(R_2_adj))


Sum of Square Total 9.74
Sum of Square Explained 9.61235365853658
Sum of Square Residual 0.1276463414634147


R square 0.9868946261331196
R square 0.9868946261331196
R square adjusted 0.9737892522662392


## Computation with statsmodel

In [4]:
# alternative solution with library statsmodels (useful mainly for linear models)
import statsmodels.api as sm

X2 = sm.add_constant(X) # the coefficient beta_0 also called intercept is not automatically included, so we need to manually add a constant variable equal to one.
est = sm.OLS(Y, X2)
est2 = est.fit()
print("\n")
print("-----------------------------------------------------------------------------------------")
print("------------------------Results from statsmodels-----------------------------------------")
print("-----------------------------------------------------------------------------------------")
print("\n")
print(est2.summary())



-----------------------------------------------------------------------------------------
------------------------Results from statsmodels-----------------------------------------
-----------------------------------------------------------------------------------------


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.987
Model:                            OLS   Adj. R-squared:                  0.974
Method:                 Least Squares   F-statistic:                     75.30
Date:                Fri, 05 Mar 2021   Prob (F-statistic):             0.0131
Time:                        16:23:32   Log-Likelihood:                 2.0751
No. Observations:                   5   AIC:                             1.850
Df Residuals:                       2   BIC:                            0.6781
Df Model:                           2                                         
Covariance Type

  warn("omni_normtest is not valid with less than 8 observations; %i "
