In [13]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
import numpy as np
import pandas as pd
# print(sklearn.__version__)

In [14]:
X, y = datasets.load_boston(return_X_y=True)

In [15]:
# What we do here is splitting our data up into two parts. Train will be used to train our model and find the patterns.
# Test will be used to then see if we can predict future outcomes from the train model.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# What we do here is splitting the test result further into test and validation. Validation can then be used to compare different models.
# and see which gave the best results.
X_test, X_validation, y_test, y_validation = train_test_split(X_test, y_test, test_size=0.3)
print("Number of samples in:")
print(f"    Training: {len(y_train)}")
print(f"    Validation: {len(y_validation)}")
print(f"    Testing: {len(y_test)}")

Number of samples in:
    Training: 354
    Validation: 46
    Testing: 106


In [16]:
np.random.seed(2)

models = [
    DecisionTreeRegressor(splitter="random"),
    SVR(),
    LinearRegression()
        ]

for model in models:
    model.fit(X_train, y_train)

    y_train_pred = model.predict(X_train)
    y_validation_pred = model.predict(X_validation)
    y_test_pred = model.predict(X_test)

    train_loss = mean_squared_error(y_train, y_train_pred)
    validation_loss = mean_squared_error(y_validation, y_validation_pred)
    test_loss = mean_squared_error(y_test, y_test_pred)

    print(
        f"{model.__class__.__name__}: "
        f"Train Loss: {train_loss} | Validation Loss: {validation_loss} | "
        f"Test Loss: {test_loss}"
        )

DecisionTreeRegressor: Train Loss: 0.0 | Validation Loss: 20.535217391304347 | Test Loss: 17.776698113207544
SVR: Train Loss: 69.0863353517138 | Validation Loss: 58.67914381067936 | Test Loss: 59.247172293557
LinearRegression: Train Loss: 22.272769849522344 | Validation Loss: 21.39434674081142 | Test Loss: 22.249596425706063
