In [1]:
import pandas as pd
import numpy as np
from numpy import math
from datetime import datetime, date, time
from datetime import timedelta

from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics

In [2]:
header_list = ['Date Time', 'Voltage', 'Current']
df = pd.read_csv('sensors_data.csv', names = header_list)
df['Power (W)'] = df['Voltage'] * df['Current']
df['Power (KW)'] = df['Power (W)'] / 1000
df['Date Time'] = pd.to_datetime(df['Date Time'])
df['Date'] = df['Date Time'].dt.date
df['Date'] = pd.to_datetime(df['Date'])
df['Time'] = pd.to_datetime(df['Date Time']).dt.time
df['Hour'] = pd.to_datetime(df['Date Time']).dt.hour
df['Time'] = df['Time'].astype(str)
# df['Hour'] = df['Hour'].astype(str)

In [3]:
df.head(5)

Unnamed: 0,Date Time,Voltage,Current,Power (W),Power (KW),Date,Time,Hour
0,2022-06-11 23:53:00,0.43945,0.0,0.0,0.0,2022-06-11,23:53:00,23
1,2022-06-11 23:54:00,0.48828,0.0,0.0,0.0,2022-06-11,23:54:00,23
2,2022-06-11 23:55:00,0.46387,0.0,0.0,0.0,2022-06-11,23:55:00,23
3,2022-06-11 23:56:00,0.46387,0.0,0.0,0.0,2022-06-11,23:56:00,23
4,2022-06-11 23:57:00,0.46387,0.0,0.0,0.0,2022-06-11,23:57:00,23


In [4]:
rearrange_columns = ['Date Time', 'Date', 'Time', 'Hour', 'Voltage', 'Current', 'Power (W)', 'Power (KW)']
df = df[rearrange_columns]
df.tail(5)

Unnamed: 0,Date Time,Date,Time,Hour,Voltage,Current,Power (W),Power (KW)
17780,2022-06-25 00:50:18,2022-06-25,00:50:18,0,0.26855,0.0,0.0,0.0
17781,2022-06-25 00:51:18,2022-06-25,00:51:18,0,0.24414,0.0,0.0,0.0
17782,2022-06-25 00:52:18,2022-06-25,00:52:18,0,0.29297,0.0,0.0,0.0
17783,2022-06-25 00:53:18,2022-06-25,00:53:18,0,0.36621,0.0,0.0,0.0
17784,2022-06-25 00:54:19,2022-06-25,00:54:19,0,0.3418,0.0,0.0,0.0


In [5]:
unique_date = df['Date'].unique()
unique_date

array(['2022-06-11T00:00:00.000000000', '2022-06-12T00:00:00.000000000',
       '2022-06-13T00:00:00.000000000', '2022-06-14T00:00:00.000000000',
       '2022-06-15T00:00:00.000000000', '2022-06-16T00:00:00.000000000',
       '2022-06-17T00:00:00.000000000', '2022-06-18T00:00:00.000000000',
       '2022-06-19T00:00:00.000000000', '2022-06-20T00:00:00.000000000',
       '2022-06-21T00:00:00.000000000', '2022-06-22T00:00:00.000000000',
       '2022-06-23T00:00:00.000000000', '2022-06-24T00:00:00.000000000',
       '2022-06-25T00:00:00.000000000'], dtype='datetime64[ns]')

In [6]:
filter_today_values = df[df['Date'] == unique_date[-1]][['Date', 'Hour', 'Power (KW)']]
today_hourly_values = filter_today_values.groupby(['Date', 'Hour'])['Power (KW)'].sum().reset_index()

In [7]:
filter_daily_values = df[(df['Date'] >= '2022-06-18') & (df['Date'] <= unique_date[-2])][['Date', 'Hour', 'Power (KW)']]
filter_daily_values

Unnamed: 0,Date,Hour,Power (KW)
8169,2022-06-18,0,0.0
8170,2022-06-18,0,0.0
8171,2022-06-18,0,0.0
8172,2022-06-18,0,0.0
8173,2022-06-18,0,0.0
...,...,...,...
17725,2022-06-24,21,0.0
17726,2022-06-24,22,0.0
17727,2022-06-24,23,0.0
17728,2022-06-24,23,0.0


In [8]:
daily_hourly_values = filter_daily_values.groupby(['Date', 'Hour'])['Power (KW)'].sum().reset_index()
daily_hourly_values

Unnamed: 0,Date,Hour,Power (KW)
0,2022-06-18,0,0.000000
1,2022-06-18,1,0.000000
2,2022-06-18,2,0.000000
3,2022-06-18,3,0.000000
4,2022-06-18,4,0.000000
...,...,...,...
163,2022-06-24,19,0.034753
164,2022-06-24,20,0.000000
165,2022-06-24,21,0.000000
166,2022-06-24,22,0.000000


In [9]:
# daily_values = daily_values[daily_values['Date'] > '2022-06-11']

In [10]:
header_list = ['SolarIrradiance (W/m2)', 'weather status', 'Temp (°C)', 'RealFeelTemp (°C)', 'DewPoint (°C)', 'Wind (km/h)',
              'Direction', 'Hum (%)', 'Visibility (km)', 'UVIndex', 'UVIndexText', 'PreProbability (%)', 'RainProbability (%)',
              'CloudCover (%)']
weather_data = pd.read_csv('hourly_weather_forecasted_data.csv', names = header_list, encoding= 'unicode_escape')
weather_data.drop(['RealFeelTemp (°C)', 'DewPoint (°C)', 'Wind (km/h)', 'Direction', 'Visibility (km)', 'UVIndex', 
                   'UVIndexText', 'PreProbability (%)', 'RainProbability (%)', 'weather status'], axis = 1, inplace = True)
weather_data.tail(14)

Unnamed: 0,SolarIrradiance (W/m2),Temp (°C),Hum (%),CloudCover (%)
178,114.0,15.8,60,70
179,123.5,16.5,55,70
180,174.2,17.7,54,44
181,183.9,18.4,55,40
182,171.2,18.6,57,46
183,124.2,17.4,66,70
184,115.0,16.7,70,70
185,134.7,17.3,60,46
186,127.5,17.6,51,33
187,109.1,17.1,51,19


In [11]:
df1 = pd.concat([daily_hourly_values, weather_data], axis = 1)
df1.drop(['Date', 'Hour'], axis = 1, inplace = True)
df1.loc[df1['SolarIrradiance (W/m2)'] == 0, ['Temp (°C)', 'Hum (%)', 'CloudCover (%)']] = 0
# df1.to_csv('solar_energy_predictions.csv')
df1.tail(14)

Unnamed: 0,Power (KW),SolarIrradiance (W/m2),Temp (°C),Hum (%),CloudCover (%)
178,,114.0,15.8,60,70
179,,123.5,16.5,55,70
180,,174.2,17.7,54,44
181,,183.9,18.4,55,40
182,,171.2,18.6,57,46
183,,124.2,17.4,66,70
184,,115.0,16.7,70,70
185,,134.7,17.3,60,46
186,,127.5,17.6,51,33
187,,109.1,17.1,51,19


In [12]:
count_total_rows = len(df1) - 24

In [13]:
independent_columns = df1[['SolarIrradiance (W/m2)', 'Temp (°C)', 'Hum (%)', 'CloudCover (%)']][0:count_total_rows]

In [14]:
dependent_column = df1['Power (KW)'][0:count_total_rows]

In [15]:
rfr = RandomForestRegressor(n_estimators = 100, random_state = 0)
rfr.fit(independent_columns, dependent_column)

RandomForestRegressor(random_state=0)

#### Is model well fit or not?

In [16]:
r_sq = rfr.score(independent_columns, dependent_column)
r_sq

0.9506491210817467

In [17]:
forcasted_data = df1[['SolarIrradiance (W/m2)', 'Temp (°C)', 'Hum (%)', 'CloudCover (%)']].tail(24)

In [19]:
return_array = list(rfr.predict(forcasted_data))

In [20]:
# predicted_data = pd.DataFrame(return_array, columns = ['Power (KW)'])
# predicted_data

In [21]:
now = datetime.now()
date = now.strftime('%Y-%m-%d')
current_date_24 = [date, date, date, date, date, date, date, date, date, date, date, date, date, date, date, date,
                date, date, date, date, date, date, date, date]
current_date_12 = [date, date, date, date, date, date, date, date, date, date, date, date]

In [22]:
hours_24 = list(daily_hourly_values['Hour'][0:24])
hours_12 = list(daily_hourly_values['Hour'][0:12])

In [23]:
data_dict = {'Date': current_date_24, 'Hour': hours_24, 'Power (KW)': return_array}

In [24]:
data_dataframe = pd.DataFrame(data_dict)
data_dataframe

Unnamed: 0,Date,Hour,Power (KW)
0,2022-06-25,0,0.0
1,2022-06-25,1,0.0
2,2022-06-25,2,0.0
3,2022-06-25,3,0.0
4,2022-06-25,4,0.0
5,2022-06-25,5,0.246984
6,2022-06-25,6,0.266741
7,2022-06-25,7,0.561554
8,2022-06-25,8,0.605697
9,2022-06-25,9,0.715395
