# Regression evaluation metrics
This notebook explores different evaluation metrics that can be used with regression models. It uses the boston house price dataset built into Sklearn.

## Imports

In [1]:
# Core libraries
import pandas as pd

# Sklearn processing
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Sklearn regression algorithms
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor

# Sklearn regression model evaluation functions
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.metrics import r2_score

## Load data

In [2]:
# Load built-in sample data set
from sklearn.datasets import load_boston
boston = load_boston()

## Inspect data

In [3]:
# View the input features
pd.DataFrame(boston.data, columns=boston.feature_names).head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [4]:
# View the target feature (will be the house price)
pd.DataFrame(boston.target, columns=["PRICE"]).head()

Unnamed: 0,PRICE
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


## Split into X and y

In [5]:
# Define the X (input) and y (target) features
X = boston.data
y = boston.target

## Scale features to same range

In [6]:
# Rescale the input features
scaler = MinMaxScaler(feature_range=(0,1))
X = scaler.fit_transform(boston.data)

## Split into train and test sets

In [7]:
# Split into train (2/3) and test (1/3) sets
test_size = 0.33
seed = 7
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed)

## Build 2 models and explore evaluation metrics on training set

In [8]:
# Build some models and check them against training data using MAE, RMSE and R2
models = [LinearRegression(), KNeighborsRegressor()]
for model in models:
    model.fit(X_train, y_train)
    predictions = model.predict(X_train)
    print(type(model).__name__)
    print("    MAE", mean_absolute_error(y_train, predictions))
    print("    RMSE", sqrt(mean_squared_error(y_train, predictions)))
    print("    R2", r2_score(y_train, predictions))

LinearRegression
    MAE 3.2401472701093033
    RMSE 4.61302063554548
    R2 0.7597841312061022
KNeighborsRegressor
    MAE 2.5415339233038345
    RMSE 3.9173708900569375
    R2 0.8267711259528043


## Explore evaluation metrics on test set

In [9]:
# Evaluation the models against test data using MAE, RMSE and R2
for model in models:
    predictions = model.predict(X_test)
    print(type(model).__name__)
    print("    MAE", mean_absolute_error(y_test, predictions))
    print("    RMSE", sqrt(mean_squared_error(y_test, predictions)))
    print("    R2", r2_score(y_test, predictions))

LinearRegression
    MAE 3.32869485458507
    RMSE 5.032127524575088
    R2 0.6663089606572572
KNeighborsRegressor
    MAE 2.9223952095808383
    RMSE 4.458105180052079
    R2 0.738096146289857
