# Workspace for the Sonntagsfrage Use Case

## Definitions

Generell imports

In [25]:
from datetime import datetime as dt
from datetime import timedelta
import sklearn.metrics as met
import pandas as pd
import os
import sys
import json

PATH_DATAFRAMES = '/Users/andreasditte/Desktop/Private_Projekte/Sonntagsfrage/src/dataframes'
sys.path.append(os.getcwd())
sys.path.append('../src/')

In [26]:
# import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, ensemble
from sklearn.inspection import permutation_importance
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [37]:
from azureml.core import Run, Workspace, Datastore, Dataset
from environs import Env
from azureml.core.authentication import ServicePrincipalAuthentication

Function to load dataframes from files

In [28]:
def load_df_from_file(filename, path=PATH_DATAFRAMES):
    """
        Loads a local file into a Pandas Dataframe.

        :param filename: Name of the file that shall be loaded.
        :param path: Path to the file.
        :return: pandas.Dataframe: Returns a Pandas Dataframe created from the loaded file.
    """

    df = pd.read_pickle(f"{path}/{filename}.pkl")

    return df

Azure ML specific stuff

In [38]:
# --- get creds for aservice principalv
with open('../src//Azure_ML/service_principals/sonntagsfrage-ml-auth-file.json') as f:
    svcpr_data = json.load(f)
# --- get service principal
svc_pr = ServicePrincipalAuthentication(
    tenant_id=svcpr_data['tenantId'],
    service_principal_id=svcpr_data['clientId'],
    service_principal_password=svcpr_data['clientSecret'])

In [39]:
# --- get workspace and datastore
env = Env()
env.read_env("../src/Azure_ML/foundation.env")
ws = Workspace(env("AZURE_SUBSCRIPTION_ID"), env("RESOURCE_GROUP"), env("WORKSPACE_NAME"), auth=svc_pr)
datastore = Datastore.get_default(ws)

## Laod dataset from Azure ML workspace

In [19]:
# --- load dataset
dataset = ws.datasets['survey_data_with_all_features']
df_survey_data_with_all_features = dataset.to_pandas_dataframe()

In [20]:
df_survey_data_with_all_features.head()

Unnamed: 0,day_in_month_sin,calendar_week_sin,weekday_sin,dayofyear_sin,month_sin,day_in_month_cos,calendar_week_cos,weekday_cos,dayofyear_cos,month_cos,...,SPD,GRUENE,FDP,LINKE,PIRATEN,AfD,Linke_PDS,PDS,REP_DVU,Sonstige
0,0.743145,0.992709,-0.974928,0.99888,0.866025,0.669131,-0.120537,-0.222521,-0.047321,-0.5,...,46.0,6.0,5.0,0.0,0.0,0.0,0.0,4.0,2.0,3.0
1,0.743145,0.970942,-0.974928,0.985948,0.866025,-0.669131,-0.239316,-0.222521,-0.167052,-0.5,...,45.0,6.0,5.0,0.0,0.0,0.0,0.0,4.0,2.0,3.0
2,-0.587785,0.935016,-0.974928,0.958718,0.866025,-0.809017,-0.354605,-0.222521,-0.284359,-0.5,...,45.0,6.0,5.0,0.0,0.0,0.0,0.0,4.0,2.0,2.0
3,-0.866025,0.885456,-0.974928,0.917584,0.866025,0.5,-0.464723,-0.222521,-0.397543,-0.5,...,45.0,6.0,5.0,0.0,0.0,0.0,0.0,4.0,2.0,2.0
4,0.406737,0.822984,-0.974928,0.863142,0.5,0.913545,-0.568065,-0.222521,-0.504961,-0.866025,...,45.0,6.0,5.0,0.0,0.0,0.0,0.0,4.0,4.0,1.0


In [22]:
person = '{"name": "Bob", "languages": ["English", "Fench"]}'
person_dict = json.loads(person)

In [23]:
person_dict['name']

'Bob'

## Make plots to compare different algorithms 

In [16]:
df_generate_predictions_finish = load_df_from_file('generate_predictions_finish_preds')

In [6]:
df_generate_predictions_finish.columns

Index(['day_in_month_sin', 'calendar_week_sin', 'weekday_sin', 'dayofyear_sin',
       'month_sin', 'day_in_month_cos', 'calendar_week_cos', 'weekday_cos',
       'dayofyear_cos', 'month_cos', 'nb_days_since_last_survey', 'CDU_CSU',
       'SPD', 'GRUENE', 'FDP', 'LINKE', 'PIRATEN', 'AfD', 'Linke_PDS', 'PDS',
       'REP_DVU', 'Sonstige', 'CDU_CSU_pred', 'SPD_pred', 'GRUENE_pred',
       'FDP_pred', 'LINKE_pred', 'PIRATEN_pred', 'AfD_pred', 'Linke_PDS_pred',
       'PDS_pred', 'REP_DVU_pred', 'Sonstige_pred', 'estimator'],
      dtype='object')

In [20]:
# load metrics after prediction
df_generate_predictions_finish_preds = load_df_from_file('generate_predictions_finish_preds')
df_generate_predictions_finish_metrics = load_df_from_file('generate_predictions_finish_metrics')
df_generate_features_all_features = load_df_from_file('generate_features_all_features')

In [21]:
df_generate_features_all_features.head(10)

Unnamed: 0,day_in_month_sin,calendar_week_sin,weekday_sin,dayofyear_sin,month_sin,day_in_month_cos,calendar_week_cos,weekday_cos,dayofyear_cos,month_cos,...,SPD,GRUENE,FDP,LINKE,PIRATEN,AfD,Linke_PDS,PDS,REP_DVU,Sonstige
0,0.994522,0.239316,0.433884,0.137279,0.5,-0.104528,0.970942,-0.900969,0.990532,0.866025,...,43.0,7.0,5.0,0.0,0.0,0.0,0.0,4.0,3.0,2.0
1,0.866025,0.663123,0.433884,0.5808,0.866025,0.5,0.748511,-0.900969,0.814046,0.5,...,45.0,6.0,5.0,0.0,0.0,0.0,0.0,4.0,4.0,1.0
2,0.866025,0.748511,0.781831,0.64863,0.866025,-0.5,0.663123,0.62349,0.761104,0.5,...,41.0,7.0,6.0,0.0,0.0,0.0,0.0,5.0,3.0,2.0
3,0.743145,0.992709,-0.974928,0.99888,0.866025,0.669131,-0.120537,-0.222521,-0.047321,-0.5,...,46.0,6.0,5.0,0.0,0.0,0.0,0.0,4.0,2.0,3.0
4,0.994522,0.970942,0.781831,0.995105,0.866025,0.104528,-0.239316,0.62349,-0.09882,-0.5,...,44.0,7.0,5.0,0.0,0.0,0.0,0.0,4.0,3.0,2.0
5,-0.587785,0.935016,-0.974928,0.958718,0.866025,-0.809017,-0.354605,-0.222521,-0.284359,-0.5,...,45.0,6.0,5.0,0.0,0.0,0.0,0.0,4.0,2.0,2.0
6,-0.866025,0.885456,-0.974928,0.917584,0.866025,0.5,-0.464723,-0.222521,-0.397543,-0.5,...,45.0,6.0,5.0,0.0,0.0,0.0,0.0,4.0,2.0,2.0
7,0.951057,0.748511,-0.974928,0.796183,0.5,-0.309017,-0.663123,-0.222521,-0.605056,-0.866025,...,43.0,7.0,5.0,0.0,0.0,0.0,0.0,4.0,3.0,1.0
8,0.587785,0.663123,0.781831,0.763889,0.5,-0.809017,-0.748511,0.62349,-0.645348,-0.866025,...,39.0,7.0,5.0,0.0,0.0,0.0,0.0,5.0,3.0,2.0
9,-0.207912,0.663123,-0.974928,0.717677,0.5,-0.978148,-0.748511,-0.222521,-0.696376,-0.866025,...,44.0,6.0,5.0,0.0,0.0,0.0,0.0,5.0,4.0,1.0


In [22]:
df_generate_features_all_features.dtypes

day_in_month_sin                    float64
calendar_week_sin                   float64
weekday_sin                         float64
dayofyear_sin                       float64
month_sin                           float64
day_in_month_cos                    float64
calendar_week_cos                   float64
weekday_cos                         float64
dayofyear_cos                       float64
month_cos                           float64
nb_days_since_last_survey             int64
Datum_dt                     datetime64[ns]
CDU_CSU                             float64
SPD                                 float64
GRUENE                              float64
FDP                                 float64
LINKE                               float64
PIRATEN                             float64
AfD                                 float64
Linke_PDS                           float64
PDS                                 float64
REP_DVU                             float64
Sonstige                        

In [13]:
df_generate_features_all_features.describe()

Unnamed: 0,day_in_month_sin,calendar_week_sin,weekday_sin,dayofyear_sin,month_sin,day_in_month_cos,calendar_week_cos,weekday_cos,dayofyear_cos,month_cos,...,SPD,GRUENE,FDP,LINKE,PIRATEN,AfD,Linke_PDS,PDS,REP_DVU,Sonstige
count,820.0,820.0,820.0,820.0,820.0,820.0,820.0,820.0,820.0,820.0,...,820.0,820.0,820.0,820.0,820.0,820.0,820.0,820.0,820.0,820.0
mean,0.056479,0.062672,-0.135579,0.065639,0.04575202,-0.044754,-0.040655,-0.437329,-0.03992,-0.06315196,...,29.408537,10.908537,7.189024,4.085366,0.362195,2.34878,0.839024,2.20122,0.34878,3.939024
std,0.719727,0.706414,0.611544,0.708732,0.7043243,0.691386,0.704707,0.646019,0.702147,0.7064414,...,7.307826,4.403093,2.682576,4.522097,1.468193,4.657271,2.502929,2.674393,0.925984,1.471318
min,-0.994522,-1.0,-0.974928,-0.999991,-1.0,-1.0,-1.0,-0.900969,-0.999963,-1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,-0.743145,-0.663123,-0.433884,-0.663242,-0.5,-0.669131,-0.748511,-0.900969,-0.720667,-0.8660254,...,25.0,7.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
50%,0.207912,0.120537,-0.433884,0.154309,1.224647e-16,-0.104528,-0.120537,-0.900969,-0.081676,-1.83697e-16,...,29.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
75%,0.743145,0.748511,0.433884,0.763889,0.8660254,0.669131,0.663123,-0.222521,0.664855,0.5,...,34.0,13.0,9.0,8.25,0.0,0.0,0.0,5.0,0.0,5.0
max,0.994522,1.0,0.974928,0.999991,1.0,1.0,1.0,1.0,0.999407,1.0,...,46.0,26.0,17.0,14.0,11.0,18.0,12.0,8.0,4.0,9.0


In [14]:
df_generate_features_all_features.isnull().sum()

day_in_month_sin             0
calendar_week_sin            0
weekday_sin                  0
dayofyear_sin                0
month_sin                    0
day_in_month_cos             0
calendar_week_cos            0
weekday_cos                  0
dayofyear_cos                0
month_cos                    0
nb_days_since_last_survey    0
Datum_dt                     0
CDU_CSU                      0
SPD                          0
GRUENE                       0
FDP                          0
LINKE                        0
PIRATEN                      0
AfD                          0
Linke_PDS                    0
PDS                          0
REP_DVU                      0
Sonstige                     0
dtype: int64

In [15]:
df_generate_features_all_features.to_parquet()

ImportError: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.

## Disable Scientific Notation

Disabling is no longer needed since the numbers seem normal.

In [6]:
y_true_spd = df_generate_predictions_finish.dropna()['CDU_CSU']
y_pred_spd = df_generate_predictions_finish.dropna()['CDU_CSU_pred']

In [7]:
mae = met.mean_absolute_error(y_true_spd, y_pred_spd)
print(mae)

5.305043050430505


In [26]:
# load metrics after prediction
df_generate_predictions_finish_preds = load_df_from_file('generate_predictions_finish_preds')
df_generate_predictions_finish_metrics = load_df_from_file('generate_predictions_finish_metrics')
df_generate_features_all_features = load_df_from_file('generate_features_all_features')

In [27]:
df_generate_predictions_finish_metrics.head(10)

Unnamed: 0,mae,mse,rmse,r2,party,estimator
0,4.4945,37.0366,6.0858,-0.2751,CDU_CSU,XGBRegressor
1,5.696,55.3346,7.4387,-0.0411,SPD,XGBRegressor
2,2.8462,17.4908,4.1822,0.092,GRUENE,XGBRegressor
3,2.1709,8.0366,2.8349,-0.1168,FDP,XGBRegressor
4,2.3272,16.7839,4.0968,0.1791,LINKE,XGBRegressor
5,0.4542,2.3223,1.5239,-0.0774,PIRATEN,XGBRegressor
6,1.9487,19.1966,4.3814,0.112,AfD,XGBRegressor
7,1.381,7.1709,2.6779,-0.1448,Linke_PDS,XGBRegressor
8,1.851,5.9902,2.4475,0.1618,PDS,XGBRegressor
9,0.5629,0.9145,0.9563,-0.0668,REP_DVU,XGBRegressor


In [21]:
df_generate_predictions_finish_preds.head()

Unnamed: 0_level_0,day_in_month_sin,calendar_week_sin,weekday_sin,dayofyear_sin,month_sin,day_in_month_cos,calendar_week_cos,weekday_cos,dayofyear_cos,month_cos,...,GRUENE_pred,FDP_pred,LINKE_pred,PIRATEN_pred,AfD_pred,Linke_PDS_pred,PDS_pred,REP_DVU_pred,Sonstige_pred,estimator
Datum_dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1998-01-08,0.994522,0.239316,0.433884,0.137279,0.5,-0.104528,0.970942,-0.900969,0.990532,0.866025,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,DecisionTreeRegressor
1998-02-05,0.866025,0.663123,0.433884,0.5808,0.866025,0.5,0.748511,-0.900969,0.814046,0.5,...,7.0,5.0,0.0,0.0,0.0,0.0,4.0,3.0,2.0,DecisionTreeRegressor
1998-02-10,0.866025,0.748511,0.781831,0.64863,0.866025,-0.5,0.663123,0.62349,0.761104,0.5,...,6.0,5.0,0.0,0.0,0.0,0.0,4.0,4.0,1.0,DecisionTreeRegressor
1998-04-04,0.743145,0.992709,-0.974928,0.99888,0.866025,0.669131,-0.120537,-0.222521,-0.047321,-0.5,...,6.0,5.0,0.0,0.0,0.0,0.0,4.0,4.0,1.0,DecisionTreeRegressor
1998-04-07,0.994522,0.970942,0.781831,0.995105,0.866025,0.104528,-0.239316,0.62349,-0.09882,-0.5,...,7.0,6.0,0.0,0.0,0.0,0.0,5.0,3.0,2.0,DecisionTreeRegressor


In [22]:
pd.Series(df_generate_predictions_finish_preds.columns.values)

0              day_in_month_sin
1             calendar_week_sin
2                   weekday_sin
3                 dayofyear_sin
4                     month_sin
5              day_in_month_cos
6             calendar_week_cos
7                   weekday_cos
8                 dayofyear_cos
9                     month_cos
10    nb_days_since_last_survey
11                      CDU_CSU
12                          SPD
13                       GRUENE
14                          FDP
15                        LINKE
16                      PIRATEN
17                          AfD
18                    Linke_PDS
19                          PDS
20                      REP_DVU
21                     Sonstige
22                 CDU_CSU_pred
23                     SPD_pred
24                  GRUENE_pred
25                     FDP_pred
26                   LINKE_pred
27                 PIRATEN_pred
28                     AfD_pred
29               Linke_PDS_pred
30                     PDS_pred
31      

In [24]:
df_generate_features_all_features.dtypes

day_in_month_sin                    float64
calendar_week_sin                   Float64
weekday_sin                         float64
dayofyear_sin                       float64
month_sin                           float64
day_in_month_cos                    float64
calendar_week_cos                   Float64
weekday_cos                         float64
dayofyear_cos                       float64
month_cos                           float64
nb_days_since_last_survey             int64
Datum_dt                     datetime64[ns]
CDU_CSU                             float64
SPD                                 float64
GRUENE                              float64
FDP                                 float64
LINKE                               float64
PIRATEN                             float64
AfD                                 float64
Linke_PDS                           float64
PDS                                 float64
REP_DVU                             float64
Sonstige                        

In [25]:
df_generate_features_all_features.head()

Unnamed: 0,day_in_month_sin,calendar_week_sin,weekday_sin,dayofyear_sin,month_sin,day_in_month_cos,calendar_week_cos,weekday_cos,dayofyear_cos,month_cos,...,SPD,GRUENE,FDP,LINKE,PIRATEN,AfD,Linke_PDS,PDS,REP_DVU,Sonstige
0,0.994522,0.239316,0.433884,0.137279,0.5,-0.104528,0.970942,-0.900969,0.990532,0.866025,...,43.0,7.0,5.0,0.0,0.0,0.0,0.0,4.0,3.0,2.0
1,0.866025,0.663123,0.433884,0.5808,0.866025,0.5,0.748511,-0.900969,0.814046,0.5,...,45.0,6.0,5.0,0.0,0.0,0.0,0.0,4.0,4.0,1.0
2,0.866025,0.748511,0.781831,0.64863,0.866025,-0.5,0.663123,0.62349,0.761104,0.5,...,41.0,7.0,6.0,0.0,0.0,0.0,0.0,5.0,3.0,2.0
3,0.743145,0.992709,-0.974928,0.99888,0.866025,0.669131,-0.120537,-0.222521,-0.047321,-0.5,...,46.0,6.0,5.0,0.0,0.0,0.0,0.0,4.0,2.0,3.0
4,0.994522,0.970942,0.781831,0.995105,0.866025,0.104528,-0.239316,0.62349,-0.09882,-0.5,...,44.0,7.0,5.0,0.0,0.0,0.0,0.0,4.0,3.0,2.0


## Get algos to work

In [4]:
diabetes = datasets.load_diabetes()
X, y = diabetes.data, diabetes.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, random_state=13)

params = {'n_estimators': 500,
          'max_depth': 4,
          'min_samples_split': 5,
          'learning_rate': 0.01,
          'loss': 'ls'}


In [7]:
y_train

array([172.,  91.,  48., 116., 185., 170., 129., 104.,  85.,  97.,  84.,
       170.,  87., 186., 273., 230., 141., 134.,  96.,  94.,  71., 214.,
       235., 252.,  72., 276., 197.,  91., 277., 145.,  52., 131.,  91.,
        65., 102.,  93., 191.,  83., 201.,  96., 118., 168.,  71., 258.,
        84., 108., 185., 198., 262.,  78., 172.,  72.,  40., 243.,  93.,
       279., 306., 111., 217.,  67., 197., 144.,  42.,  43., 246., 113.,
        59., 128., 225., 156., 219.,  64., 163.,  99.,  52., 103.,  90.,
        96., 275., 129.,  48., 264.,  77., 182., 212., 268.,  81.,  85.,
       242., 121., 195., 293.,  83.,  91., 190., 109., 146., 177., 185.,
       233.,  65., 113., 310.,  74., 139.,  68., 109., 158., 150., 158.,
       139.,  52., 210., 143., 160., 104.,  49., 163.,  90.,  55., 101.,
       109., 200., 310., 288., 192., 263., 164., 220.,  96., 202., 143.,
        55., 216., 173., 131., 245., 110., 109.,  59.,  83.,  72.,  48.,
       341.,  92., 232.,  65., 101.,  88., 122., 14

In [5]:
reg = ensemble.GradientBoostingRegressor(**params)
reg.fit(X_train, y_train)

mse = mean_squared_error(y_test, reg.predict(X_test))
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))

The mean squared error (MSE) on test set: 3006.5641


## Get time to work properly

In [10]:
dt.now()

datetime.datetime(2021, 1, 21, 18, 18, 37, 727878)

In [11]:
dt_now = dt.now()
start = dt_now - timedelta(days=dt_now.weekday())
end = start + timedelta(days=6)
print(start)
print(end)

2021-01-18 18:18:37.977252
2021-01-24 18:18:37.977252


In [12]:
dt.strftime(start, '%d.%m.%Y')

'18.01.2021'

In [15]:
dt.strftime(start, '%d.%m.%Y %H:%M:%S')

'18.01.2021 18:18:37'

## Create dataframe with metrics

In [14]:
y_true = [1,3,5,4,3,2]
y_pred = [6,5,1,2,5,2]
estimator = 'DecisionTreeRegressor'

In [15]:
mae = met.mean_absolute_error(y_true, y_pred)
mse = met.mean_squared_error(y_true, y_pred)
rmse = met.mean_squared_error(y_true, y_pred, squared=False)
mape = met.mean_absolute_percentage_error(y_true, y_pred)
r2 = met.r2_score(y_true, y_pred)

metrics_series = [mae, mse, rmse, mape, r2, estimator]
metrics_array = [[mae, mse, rmse, mape, r2, estimator]]
metrics_colnames = ['mae', 'mse', 'rmse', 'mape', 'r2', 'estimator']

In [16]:
df_metrics = pd.DataFrame(metrics_array, columns =metrics_colnames) 

In [17]:
df_metrics.head()

Unnamed: 0,mae,mse,rmse,mape,r2,estimator
0,2.5,8.833333,2.972092,1.272222,-4.3,DecisionTreeRegressor


In [18]:
print(df_generate_predictions_finish.dropna().count()) # 813
print(df_generate_predictions_finish.count()) # 814

day_in_month_sin             813
calendar_week_sin            813
weekday_sin                  813
dayofyear_sin                813
month_sin                    813
day_in_month_cos             813
calendar_week_cos            813
weekday_cos                  813
dayofyear_cos                813
month_cos                    813
nb_days_since_last_survey    813
CDU_CSU                      813
SPD                          813
GRUENE                       813
FDP                          813
LINKE                        813
PIRATEN                      813
AfD                          813
Linke_PDS                    813
PDS                          813
REP_DVU                      813
Sonstige                     813
CDU_CSU_pred                 813
SPD_pred                     813
GRUENE_pred                  813
FDP_pred                     813
LINKE_pred                   813
PIRATEN_pred                 813
AfD_pred                     813
Linke_PDS_pred               813
PDS_pred  

In [19]:
y_true_spd = df_generate_predictions_finish.dropna()['SPD']
y_pred_spd = df_generate_predictions_finish.dropna()['SPD_pred']

In [20]:
mae = met.mean_absolute_error(y_true_spd, y_pred_spd)
print(mae)

6.204797047970479


In [21]:
y_pred_spd

Datum_dt
1998-02-05    43.0
1998-02-10    45.0
1998-04-04    45.0
1998-04-07    41.0
1998-04-18    41.0
              ... 
2020-08-20    14.0
2020-09-01    24.0
2020-09-17    26.0
2020-10-15    14.0
2020-11-15    26.0
Name: SPD_pred, Length: 813, dtype: float64

In [22]:
test = []
test.append(metrics_series)
test.append(metrics_series)
print(test)

[[2.5, 8.833333333333334, 2.972092416687835, 1.2722222222222224, -4.3, 'DecisionTreeRegressor'], [2.5, 8.833333333333334, 2.972092416687835, 1.2722222222222224, -4.3, 'DecisionTreeRegressor']]


In [27]:
metrics_series_test = [mae, mse, rmse, mape, r2]
round_to_tenths = [round(num, 4) for num in metrics_series_test]
print(metrics_series_test)
print(round_to_tenths)

[6.204797047970479, 8.833333333333334, 2.972092416687835, 1.2722222222222224, -4.3]
[6.2048, 8.8333, 2.9721, 1.2722, -4.3]


In [28]:
metrics_series_test.append('estimator')

In [29]:
print(metrics_series_test)

[6.204797047970479, 8.833333333333334, 2.972092416687835, 1.2722222222222224, -4.3, 'estimator']


In [59]:
df_test = pd.DataFrame(test, columns =metrics_colnames) 
df_test.head()

Unnamed: 0,mae,mse,rmse,mape,r2,estimator
0,2.5,8.833333,2.972092,1.272222,-4.3,DecisionTreeRegressor
1,2.5,8.833333,2.972092,1.272222,-4.3,DecisionTreeRegressor
