# Импорт

In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.tree import DecisionTreeRegressor 
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

from joblib import dump


# Загружаем датасет и разбиваем её на выборку.

In [12]:
data = pd.read_csv('../../Data/DataLaba1/energy_task_moded.csv')
X_train, X_test, y_train, y_test = train_test_split(data.drop('Appliances', axis=1), data['Appliances'], test_size=0.25, random_state=42)
X_train.shape


(14801, 29)

In [13]:
data

Unnamed: 0,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,...,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,day,month,year,time
0,60.0,30.0,19.890000,47.596667,19.200000,44.790000,19.790000,44.730000,19.000000,45.566667,...,6.60,733.5,92.000000,7.000000,63.000000,5.3,11.0,1.0,2016.0,1020.0
1,60.0,30.0,19.890000,46.693333,19.200000,44.722500,19.790000,44.790000,19.000000,45.992500,...,6.48,733.6,92.000000,6.666667,59.166667,5.2,11.0,1.0,2016.0,1030.0
2,50.0,30.0,19.890000,46.300000,19.200000,44.626667,19.790000,44.933333,18.926667,45.890000,...,6.37,733.7,92.000000,6.333333,55.333333,5.1,11.0,1.0,2016.0,1040.0
3,50.0,40.0,19.890000,46.066667,19.200000,44.590000,19.790000,45.000000,18.890000,45.723333,...,6.25,733.8,92.000000,6.000000,51.500000,5.0,11.0,1.0,2016.0,1050.0
4,60.0,40.0,19.890000,46.333333,19.200000,44.530000,19.790000,45.000000,18.890000,45.530000,...,6.13,733.9,92.000000,5.666667,47.666667,4.9,11.0,1.0,2016.0,1060.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19730,100.0,0.0,25.566667,46.560000,25.890000,42.025714,27.200000,41.163333,24.700000,45.590000,...,22.70,755.2,55.666667,3.333333,23.666667,13.3,27.0,5.0,2016.0,1040.0
19731,90.0,0.0,25.500000,46.500000,25.754000,42.080000,27.133333,41.223333,24.700000,45.590000,...,22.60,755.2,56.000000,3.500000,24.500000,13.3,27.0,5.0,2016.0,1050.0
19732,270.0,10.0,25.500000,46.596667,25.628571,42.768571,27.050000,41.690000,24.700000,45.730000,...,22.50,755.2,56.333333,3.666667,25.333333,13.3,27.0,5.0,2016.0,1060.0
19733,420.0,10.0,25.500000,46.990000,25.414000,43.036000,26.890000,41.290000,24.700000,45.790000,...,22.30,755.2,56.666667,3.833333,26.166667,13.2,27.0,5.0,2016.0,1070.0


# Подбираем гиппер-параметры.

In [14]:
decision_tree_regressor_optimal_params = GridSearchCV(DecisionTreeRegressor(), 
                                                       {
                                                        'criterion': ["friedman_mse", "squared_error", "absolute_error", "poisson"],
                                                        'max_depth': np.array(range(1, 2*X_train.shape[1]+1)),
                                                        # 'min_samples_split': np.array(range(2, X_train.shape[0]//2))
                                                        },
                                                        cv=2,
                                                        n_jobs=3,
                                                      scoring='r2'
                                            ).fit(X_train[0:1000], y_train[0:1000])
decision_tree_regressor_optimal_params.best_params_

{'criterion': 'friedman_mse', 'max_depth': 3}

# Обучение

In [15]:
decision_tree_regressor = DecisionTreeRegressor(
    criterion=decision_tree_regressor_optimal_params.best_params_['criterion'] if 'criterion' in decision_tree_regressor_optimal_params.best_params_ else 'squared_error',
    max_depth=decision_tree_regressor_optimal_params.best_params_['max_depth'] if 'max_depth' in decision_tree_regressor_optimal_params.best_params_ else None,
    )
decision_tree_regressor.fit(X_train, y_train)

In [16]:
print(tree.export_text(decision_tree_regressor))

|--- feature_28 <= 465.00
|   |--- feature_28 <= 405.00
|   |   |--- feature_17 <= 19.78
|   |   |   |--- value: [48.50]
|   |   |--- feature_17 >  19.78
|   |   |   |--- value: [57.80]
|   |--- feature_28 >  405.00
|   |   |--- feature_7 <= 20.29
|   |   |   |--- value: [62.35]
|   |   |--- feature_7 >  20.29
|   |   |   |--- value: [87.86]
|--- feature_28 >  465.00
|   |--- feature_28 <= 1265.00
|   |   |--- feature_0 <= 5.00
|   |   |   |--- value: [116.76]
|   |   |--- feature_0 >  5.00
|   |   |   |--- value: [163.80]
|   |--- feature_28 >  1265.00
|   |   |--- feature_28 <= 1335.00
|   |   |   |--- value: [88.39]
|   |   |--- feature_28 >  1335.00
|   |   |   |--- value: [59.36]



# Оценка

In [17]:
y_predicted = decision_tree_regressor.predict(X_test)
decision_tree_regressor_errors = {'MAE': mean_absolute_error(y_predicted, y_test),
                                  'RMSE': mean_squared_error(y_predicted, y_test),
                                  'MSE': mean_squared_error(y_predicted, y_test)**0.5,
                                  'MAPE': mean_absolute_percentage_error(y_predicted, y_test),
                                  'R^2': decision_tree_regressor.score(X_test, y_test)}

decision_tree_regressor_errors

{'MAE': 52.36406039968407,
 'RMSE': 8605.221091571617,
 'MSE': 92.76433092289092,
 'MAPE': 0.4653547875720123,
 'R^2': 0.13374855433018196}