# Model evaluation:

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
from sklearn.model_selection import train_test_split

In [None]:
energy_0 = pd.read_csv("http://video.ittensive.com/machine-learning/ashrae/train.0.0.csv.gz")
energy_0 = energy_0[energy_0['meter_reading'] > 0]
energy_0['timestamp'] = pd.to_datetime(energy_0['timestamp'])
energy_0['hour'] = energy_0['timestamp'].dt.hour
energy_0

Unnamed: 0,building_id,meter,timestamp,meter_reading,hour
704,0,0,2016-01-30 08:00:00,43.6839,8
725,0,0,2016-01-31 05:00:00,37.5408,5
737,0,0,2016-01-31 17:00:00,52.5571,17
2366,0,0,2016-04-08 14:00:00,59.3827,14
2923,0,0,2016-05-01 19:00:00,448.0000,19
...,...,...,...,...,...
8779,0,0,2016-12-31 19:00:00,232.0710,19
8780,0,0,2016-12-31 20:00:00,189.0690,20
8781,0,0,2016-12-31 21:00:00,169.9580,21
8782,0,0,2016-12-31 22:00:00,169.9580,22


In [None]:
energy_0_train, energy_0_test = train_test_split(energy_0, test_size=0.2)
energy_0_train.head()

Unnamed: 0,building_id,meter,timestamp,meter_reading,hour
3832,0,0,2016-06-08 16:00:00,255.278,16
6323,0,0,2016-09-20 11:00:00,242.991,11
5310,0,0,2016-08-09 06:00:00,243.674,6
6805,0,0,2016-10-10 13:00:00,267.564,13
7351,0,0,2016-11-02 07:00:00,211.594,7


In [None]:
energy_0_train_hours = energy_0.groupby("hour")
energy_0_train_averages = pd.DataFrame(
    {"Середнє": energy_0_train_hours["meter_reading"].agg("mean"),
     "Медіана": energy_0_train_hours["meter_reading"].agg("median")}
)
energy_0_train_averages

Unnamed: 0_level_0,Середнє,Медіана
hour,Unnamed: 1_level_1,Unnamed: 2_level_1
0,238.683751,240.261
1,239.211608,240.944
2,239.651475,242.309
3,240.203588,244.357
4,239.263176,245.039
5,238.050422,245.039
6,237.482464,245.039
7,240.904364,246.404
8,242.704538,239.579
9,235.522704,234.801


In [None]:
def calculate_model (x):
  meter_reading_log = np.log(x.meter_reading + 1)
  meter_reading_mean = np.log(energy_0_train_averages['Середнє'][x.hour] + 1)
  meter_reading_median = np.log(energy_0_train_averages['Медіана'][x.hour] + 1)
  x["meter_reading_mean_q"] = (meter_reading_log - meter_reading_mean) ** 2
  x["meter_reading_median_q"] = (meter_reading_log - meter_reading_median) ** 2
  x["meter_reading_zero_q"] = (meter_reading_log) ** 2
  return x

energy_0_test = energy_0_test.apply(calculate_model, axis=1, result_type="expand")
energy_0_test

Unnamed: 0,building_id,meter,timestamp,meter_reading,hour,meter_reading_mean_q,meter_reading_median_q,meter_reading_zero_q
7164,0,0,2016-10-25 12:00:00,170.640,12,0.102534,0.123715,26.475134
8487,0,0,2016-12-19 15:00:00,189.069,15,0.050619,0.062372,27.535072
5392,0,0,2016-08-12 16:00:00,309.882,16,0.071304,0.058701,32.940866
8191,0,0,2016-12-07 07:00:00,242.309,7,0.000034,0.000279,30.187687
3801,0,0,2016-06-07 09:00:00,242.991,9,0.000966,0.001166,30.218453
...,...,...,...,...,...,...,...,...
7908,0,0,2016-11-25 12:00:00,178.148,12,0.076948,0.095431,26.917547
6768,0,0,2016-10-09 00:00:00,243.674,0,0.000425,0.000197,30.249194
5529,0,0,2016-08-18 09:00:00,309.882,9,0.074731,0.076411,32.940866
6074,0,0,2016-09-10 02:00:00,240.944,2,0.000029,0.000032,30.125897


In [None]:
energy_0_test_median_rmsle = np.sqrt(energy_0_test['meter_reading_median_q'].sum() / len(energy_0_test))
energy_0_test_mean_rmsle = np.sqrt(energy_0_test['meter_reading_mean_q'].sum() / len(energy_0_test))
energy_0_test_zero_rmsle = np.sqrt(energy_0_test['meter_reading_zero_q'].sum() / len(energy_0_test))
print(f"Якість медіани: {energy_0_test_median_rmsle}")
print(f"Якість середнього: {energy_0_test_mean_rmsle}")
print(f"Якість нуля: {energy_0_test_zero_rmsle}")

Якість медіани: 0.2594841927456961
Якість середнього: 0.2566473005164491
Якість нуля: 5.446681261737153
