In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error

In [2]:
def train_test_split_kf(data: np.array, target: np.array, train: np.array, test: np.array) -> np.array:
    x_train = data[train]
    x_test = data[test]
    y_train = target[train]
    y_test = target[test]
    return x_train, x_test, y_train, y_test


def evaluate_model(model, metric, x_train, x_test, y_train, y_test):
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    score = metric(y_pred, y_test)
    return score

In [3]:
dataset = pd.read_csv("../inputs/felicidad.csv")
data = dataset.drop(["country", "score", "rank"], axis=1)
target = dataset["score"]

models = {"DecisionTreeRegressor": DecisionTreeRegressor()}
print('---- Easy Implementation ----')
for name, model in models.items():
    score = cross_val_score(model, data, target, cv=10, scoring='neg_mean_squared_error')
    print("Scores:", score)
    print("Mean score: ", np.abs(np.mean(score)))
print("=" * 64)

---- Easy Implementation ----
Scores: [-0.18994313 -0.03374152 -0.01686481 -0.03322507 -0.01842594 -0.0079535
 -0.02255674 -0.02900939 -0.20707588 -0.23440295]
Mean score:  0.0793198949591584


In [4]:
kf = KFold(n_splits=10, shuffle=True, random_state=42)
scores = []
print('---- Full Implementation ----')
for name, model in models.items():
    print(f"I'm evaluating: {name}")
    for n_fold, (train, test) in enumerate(kf.split(data)):
        print(f"\t-I'm running fold {n_fold + 1}")
        x_train, x_test, y_train, y_test = train_test_split_kf(data.values, target.values, train, test)
        score = evaluate_model(model, mean_squared_error, x_train, x_test, y_train, y_test)
        print("\t\t-score:", score)
        scores.append(score)
    print("="*64)
    print("Scores:", scores)
    print("Mean score: ", np.mean(scores))

---- Full Implementation ----
I'm evaluating: DecisionTreeRegressor
	-I'm running fold 1
		-score: 0.0070418713211488375
	-I'm running fold 2
		-score: 0.0016953167941594197
	-I'm running fold 3
		-score: 0.003756071682912264
	-I'm running fold 4
		-score: 0.004785132517031086
	-I'm running fold 5
		-score: 0.002910249300917606
	-I'm running fold 6
		-score: 0.004251211268422317
	-I'm running fold 7
		-score: 0.00910051990402171
	-I'm running fold 8
		-score: 0.002800343457380497
	-I'm running fold 9
		-score: 0.0034541374997825286
	-I'm running fold 10
		-score: 0.0044419952459016756
Scores: [0.0070418713211488375, 0.0016953167941594197, 0.003756071682912264, 0.004785132517031086, 0.002910249300917606, 0.004251211268422317, 0.00910051990402171, 0.002800343457380497, 0.0034541374997825286, 0.0044419952459016756]
Mean score:  0.004423684899167795
