In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, date, time

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn import metrics

In [2]:
header_list = ['Date Time', 'Voltage', 'Current']
df = pd.read_csv('sensors_data.csv', names = header_list)
df['Power (W)'] = df['Voltage'] * df['Current']
df['Power (KW)'] = df['Power (W)'] / 1000
df['Date Time'] = pd.to_datetime(df['Date Time'])
df['Date'] = df['Date Time'].dt.date
df['Date'] = pd.to_datetime(df['Date'])
df['Time'] = pd.to_datetime(df['Date Time']).dt.time
df['Hour'] = pd.to_datetime(df['Date Time']).dt.hour
df['Time'] = df['Time'].astype(str)

In [3]:
df.head(5)

Unnamed: 0,Date Time,Voltage,Current,Power (W),Power (KW),Date,Time,Hour
0,2022-06-24 23:31:26,0.26855,0.0,0.0,0.0,2022-06-24,23:31:26,23
1,2022-06-25 00:31:26,0.26855,0.0,0.0,0.0,2022-06-25,00:31:26,0
2,2022-06-25 01:31:26,0.26855,0.0,0.0,0.0,2022-06-25,01:31:26,1
3,2022-06-25 01:32:26,0.26855,0.0,0.0,0.0,2022-06-25,01:32:26,1
4,2022-06-25 01:33:26,0.31738,0.0,0.0,0.0,2022-06-25,01:33:26,1


In [4]:
rearrange_columns = ['Date Time', 'Date', 'Time', 'Hour', 'Voltage', 'Current', 'Power (W)', 'Power (KW)']
df = df[rearrange_columns]
df.tail(5)

Unnamed: 0,Date Time,Date,Time,Hour,Voltage,Current,Power (W),Power (KW)
23811,2022-07-13 20:56:11,2022-07-13,20:56:11,20,13.42773,0.0,0.0,0.0
23812,2022-07-13 20:57:11,2022-07-13,20:57:11,20,13.40332,0.0,0.0,0.0
23813,2022-07-13 21:55:11,2022-07-13,21:55:11,21,13.40332,0.0,0.0,0.0
23814,2022-07-13 22:55:11,2022-07-13,22:55:11,22,13.40332,0.0,0.0,0.0
23815,2022-07-13 23:55:11,2022-07-13,23:55:11,23,13.40332,0.0,0.0,0.0


In [5]:
unique_date = df['Date'].unique()
unique_date

array(['2022-06-24T00:00:00.000000000', '2022-06-25T00:00:00.000000000',
       '2022-06-26T00:00:00.000000000', '2022-06-27T00:00:00.000000000',
       '2022-06-28T00:00:00.000000000', '2022-06-29T00:00:00.000000000',
       '2022-06-30T00:00:00.000000000', '2022-07-01T00:00:00.000000000',
       '2022-07-02T00:00:00.000000000', '2022-07-03T00:00:00.000000000',
       '2022-07-04T00:00:00.000000000', '2022-07-05T00:00:00.000000000',
       '2022-07-06T00:00:00.000000000', '2022-07-07T00:00:00.000000000',
       '2022-07-08T00:00:00.000000000', '2022-07-09T00:00:00.000000000',
       '2022-07-10T00:00:00.000000000', '2022-07-11T00:00:00.000000000',
       '2022-07-12T00:00:00.000000000', '2022-07-13T00:00:00.000000000'],
      dtype='datetime64[ns]')

In [6]:
filter_today_values = df[df['Date'] == unique_date[-1]][['Date', 'Hour', 'Power (KW)']]
today_hourly_values = filter_today_values.groupby(['Date', 'Hour'])['Power (KW)'].sum().reset_index()
length_today_hourly_values = len(today_hourly_values)
length_today_hourly_values
# today_hourly_values

24

In [7]:
filter_daily_values = df[(df['Date'] > '2022-06-24') & (df['Date'] <= unique_date[-2])][['Date', 'Hour', 'Power (KW)']]
filter_daily_values

Unnamed: 0,Date,Hour,Power (KW)
1,2022-06-25,0,0.0
2,2022-06-25,1,0.0
3,2022-06-25,1,0.0
4,2022-06-25,1,0.0
5,2022-06-25,1,0.0
...,...,...,...
22556,2022-07-12,22,0.0
22557,2022-07-12,23,0.0
22558,2022-07-12,23,0.0
22559,2022-07-12,23,0.0


In [8]:
daily_hourly_values = filter_daily_values.groupby(['Date', 'Hour'])['Power (KW)'].sum().reset_index()
daily_hourly_values

Unnamed: 0,Date,Hour,Power (KW)
0,2022-06-25,0,0.000000
1,2022-06-25,1,0.000000
2,2022-06-25,2,0.000000
3,2022-06-25,3,0.000000
4,2022-06-25,4,0.000000
...,...,...,...
427,2022-07-12,19,0.344380
428,2022-07-12,20,0.309929
429,2022-07-12,21,0.010904
430,2022-07-12,22,0.000000


In [9]:
header_list = ['Date', 'Time', 'SolarIrradiance (W/m2)', 'weather status', 'Temp (°C)', 'RealFeelTemp (°C)', 'DewPoint (°C)', 'Wind (km/h)',
              'Direction', 'Hum (%)', 'Visibility (km)', 'UVIndex', 'UVIndexText', 'PreProbability (%)', 'RainProbability (%)',
              'CloudCover (%)']
weather_data = pd.read_csv('hourly_weather_forecasted_data.csv', names = header_list, encoding= 'unicode_escape')
# weather_data['Code'] = pd.factorize(weather_data['weather status'])[0]
# weather_data.drop(['Date', 'Time','DewPoint (°C)', 'Direction', 'Visibility (km)', 
#                    'UVIndexText', 'PreProbability (%)', 'RainProbability (%)', 'weather status', 'Hum (%)',
#                    'UVIndex', 'Temp (°C)'], axis = 1, inplace = True)
weather_data.drop(['Date', 'Time','DewPoint (°C)', 'Direction', 'Visibility (km)', 
                   'UVIndexText', 'PreProbability (%)', 'RainProbability (%)', 'weather status', 'Hum (%)',
                   'CloudCover (%)', 'Temp (°C)'], axis = 1, inplace = True)
# weather_data.drop(['Date', 'Time','DewPoint (°C)', 'Direction', 'Visibility (km)', 
#                    'UVIndexText', 'PreProbability (%)', 'RainProbability (%)', 'weather status', 'Hum (%)',
#                    'Temp (°C)', 'RealFeelTemp (°C)'], axis = 1, inplace = True)
weather_data.tail(14)

Unnamed: 0,SolarIrradiance (W/m2),RealFeelTemp (°C),Wind (km/h),UVIndex
442,868.4,21.7,14.8,4
443,947.1,23.5,14.8,5
444,1206.7,26.1,11.1,7
445,1269.5,27.0,13.0,7
446,1142.8,27.3,13.0,7
447,987.5,26.5,13.0,5
448,813.8,25.6,14.8,4
449,600.3,24.0,14.8,3
450,399.4,22.4,14.8,1
451,220.0,20.5,16.7,1


In [10]:
df1 = pd.concat([daily_hourly_values, weather_data], axis = 1)
df1.drop(['Date', 'Hour'], axis = 1, inplace = True)
# df1.loc[df1['SolarIrradiance (W/m2)'] == 0, ['RealFeelTemp (°C)', 'Wind (km/h)', 'CloudCover (%)']] = 0
df1.loc[df1['SolarIrradiance (W/m2)'] == 0, ['RealFeelTemp (°C)', 'Wind (km/h)', 'UVIndex']] = 0
# df1.loc[df1['SolarIrradiance (W/m2)'] == 0, ['Wind (km/h)', 'UVIndex', 'CloudCover (%)']] = 0
df1.tail(25)

Unnamed: 0,Power (KW),SolarIrradiance (W/m2),RealFeelTemp (°C),Wind (km/h),UVIndex
431,0.0,0.0,0.0,0.0,0
432,,0.0,0.0,0.0,0
433,,0.0,0.0,0.0,0
434,,0.0,0.0,0.0,0
435,,0.0,0.0,0.0,0
436,,0.0,0.0,0.0,0
437,,15.2,13.7,11.1,0
438,,189.7,14.6,14.8,1
439,,341.1,15.9,16.7,1
440,,620.1,18.1,14.8,2


In [11]:
count_total_rows = len(df1) - 24

In [12]:
independent_columns = df1[['SolarIrradiance (W/m2)', 'RealFeelTemp (°C)', 'Wind (km/h)', 'UVIndex']][0:count_total_rows]
# independent_columns = df1[['SolarIrradiance (W/m2)', 'Wind (km/h)', 'UVIndex', 'CloudCover (%)']][0:count_total_rows]

In [13]:
dependent_column = df1['Power (KW)'][0:count_total_rows]

In [14]:
dtr = DecisionTreeRegressor()
dtr.fit(independent_columns, dependent_column)

DecisionTreeRegressor()

#### Is model well fit or not?

In [15]:
r_sq = dtr.score(independent_columns, dependent_column)
r_sq

1.0

In [16]:
forcasted_data = df1[['SolarIrradiance (W/m2)', 'RealFeelTemp (°C)', 'Wind (km/h)', 'UVIndex']].tail(24)
# forcasted_data = df1[['SolarIrradiance (W/m2)', 'Wind (km/h)', 'UVIndex', 'CloudCover (%)']].tail(24)

In [17]:
return_array = list(dtr.predict(forcasted_data))

In [18]:
# predicted_data = pd.DataFrame(return_array, columns = ['Power (KW)'])
# predicted_data

In [19]:
now = datetime.now()
date = now.strftime('%Y-%m-%d')
current_date_24 = [date, date, date, date, date, date, date, date, date, date, date, date, date, date, date, date,
                date, date, date, date, date, date, date, date]
current_date_12 = [date, date, date, date, date, date, date, date, date, date, date, date]

In [20]:
hours_24 = list(daily_hourly_values['Hour'][0:24])
hours_12 = list(daily_hourly_values['Hour'][0:12])

In [21]:
data_dict = {'Date': current_date_24, 'Hour': hours_24, 'Power (KW)': return_array}

In [22]:
data_dataframe = pd.DataFrame(data_dict)
data_dataframe

Unnamed: 0,Date,Hour,Power (KW)
0,2022-07-13,0,0.0
1,2022-07-13,1,0.0
2,2022-07-13,2,0.0
3,2022-07-13,3,0.0
4,2022-07-13,4,0.0
5,2022-07-13,5,0.0
6,2022-07-13,6,0.233251
7,2022-07-13,7,0.309929
8,2022-07-13,8,2.044928
9,2022-07-13,9,1.161204


In [23]:
data_dataframe['Power (KW)'].sum()

18.803184714064

## Evaluating the model

#### MSE = mean_squared_error(actual value, predicted value)

In [24]:
mean_sq_error = metrics.mean_squared_error(today_hourly_values['Power (KW)'], 
                                           data_dataframe['Power (KW)'].head(length_today_hourly_values))
mean_sq_error

0.14621143244416107

In [25]:
root_mean_sq_error = np.sqrt(mean_sq_error)
root_mean_sq_error

0.3823760353946898

In [26]:
mean_ab_error = metrics.mean_absolute_error(today_hourly_values['Power (KW)'], 
                                            data_dataframe['Power (KW)'].head(length_today_hourly_values))
mean_ab_error

0.2066651048603

#### R squared

In [27]:
r_squared = metrics.r2_score(today_hourly_values['Power (KW)'], 
                             data_dataframe['Power (KW)'].head(length_today_hourly_values))
r_squared

0.827739443310705