In [1]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from pprint import pprint

# magic to import packages from parent directory
import os,sys,inspect
current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parent_dir = os.path.dirname(current_dir)
sys.path.insert(0, parent_dir)

from src.tree_boosting import TreeBoosting
from src import utils 

%load_ext autoreload
%autoreload 2

In [2]:
models = {"hybrid-1": TreeBoosting(1), 
          "hybrid-2": TreeBoosting(2),
          "tree-1": DecisionTreeRegressor(max_depth=1),
          "tree-2": DecisionTreeRegressor(max_depth=2),
          "linear": LinearRegression(fit_intercept=True)}

prokopbev, volkov = utils.load_others()

spacemen = {"Artem'ev":utils.load_artembev(), "Prokop'ev":prokopbev, "Volkov":volkov}

del volkov
del prokopbev

In [6]:
ITERATIONS = 100
TEST_SIZE = 0.2
r2_scores = {}

for man in spacemen:
    r2_scores.update({man:{}})
    X, y = utils.convert_to_3day_mean_format(spacemen.get(man))
    for model in models:
        r2_scores_for_this_experiment = []
        for i in range(ITERATIONS):
            r2_scores_for_this_experiment.append(
                utils.train_and_test(
                    models.get(model), X, y, test_size=0.2, random_state=i, draw=False)
            )
        r2_scores.get(man).update({model:r2_scores_for_this_experiment})

In [7]:
for man in r2_scores:
    print(man)
    for model in r2_scores.get(man):
        print(f'\t{model}, mean R²: {np.mean(r2_scores.get(man).get(model))}')
        print(f'\t{model}, median R²: {np.median(r2_scores.get(man).get(model))}')

Artem'ev
	hybrid-1, mean R²: 0.4312508779596116
	hybrid-1, median R²: 0.4634969994613857
	hybrid-2, mean R²: -64.33949207998296
	hybrid-2, median R²: 0.1882860371283558
	tree-1, mean R²: 0.3873911446890223
	tree-1, median R²: 0.4447447993627317
	tree-2, mean R²: 0.34048471296543875
	tree-2, median R²: 0.3725570537557276
	linear, mean R²: 0.30871940261046854
	linear, median R²: 0.35828392495495454
Prokop'ev
	hybrid-1, mean R²: -4.146005973561607
	hybrid-1, median R²: -0.45281953330908153
	hybrid-2, mean R²: -13.559059108261662
	hybrid-2, median R²: -0.8004112508730982
	tree-1, mean R²: -0.2758867846071075
	tree-1, median R²: -0.18987317509712154
	tree-2, mean R²: -0.48897805061262695
	tree-2, median R²: -0.315138208229243
	linear, mean R²: -0.09023833455781341
	linear, median R²: -0.06583388599459461
Volkov
	hybrid-1, mean R²: 0.013341414021308728
	hybrid-1, median R²: 0.07016961608556987
	hybrid-2, mean R²: -273.3350580321083
	hybrid-2, median R²: -0.2571510060216823
	tree-1, mean R²: 

In [8]:
ITERATIONS = 100
TEST_SIZE = 0.2
r2_scores_full = {}

for man in spacemen:
    r2_scores_full.update({man:{}})
    X, y = utils.convert_to_3day_mean_format(spacemen.get(man))
    for model in models:
        r2_scores_full.get(man).update({model:utils.train_and_test_nosplit(models.get(model), X, y, draw=False)})

In [9]:
for man in r2_scores_full:
    print(man)
    for model in r2_scores_full.get(man):
        print(f'\t{model}, full dataset R²: {r2_scores_full.get(man).get(model)}')

Artem'ev
	hybrid-1, full dataset R²: 0.5939718234582889
	hybrid-2, full dataset R²: 0.73420692109285
	tree-1, full dataset R²: 0.4849412315933995
	tree-2, full dataset R²: 0.5641018259868231
	linear, full dataset R²: 0.43314979962526834
Prokop'ev
	hybrid-1, full dataset R²: 0.3050793438602635
	hybrid-2, full dataset R²: 0.45245318008099866
	tree-1, full dataset R²: 0.1406997597694174
	tree-2, full dataset R²: 0.2893713124948889
	linear, full dataset R²: 0.15250963531658135
Volkov
	hybrid-1, full dataset R²: 0.38449794492821143
	hybrid-2, full dataset R²: 0.48228666592234715
	tree-1, full dataset R²: 0.2018263369941642
	tree-2, full dataset R²: 0.3833255655932254
	linear, full dataset R²: 0.2765216064736745
