In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from numpy import math
from datetime import datetime, date, time
from datetime import timedelta

from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

In [2]:
header_list = ['Date Time', 'Voltage', 'Current']
df = pd.read_csv('sensors_data.csv', names = header_list)
df['Power (W)'] = df['Voltage'] * df['Current']
df['Power (KW)'] = df['Power (W)'] / 1000
df['Date Time'] = pd.to_datetime(df['Date Time'])
df['Date'] = df['Date Time'].dt.date
df['Date'] = pd.to_datetime(df['Date'])
df['Time'] = pd.to_datetime(df['Date Time']).dt.time
df['Hour'] = pd.to_datetime(df['Date Time']).dt.hour
df['Time'] = df['Time'].astype(str)
# df['Hour'] = df['Hour'].astype(str)

In [3]:
df.head(5)

Unnamed: 0,Date Time,Voltage,Current,Power (W),Power (KW),Date,Time,Hour
0,2022-06-24 23:31:26,0.26855,0.0,0.0,0.0,2022-06-24,23:31:26,23
1,2022-06-25 00:31:26,0.26855,0.0,0.0,0.0,2022-06-25,00:31:26,0
2,2022-06-25 01:31:26,0.26855,0.0,0.0,0.0,2022-06-25,01:31:26,1
3,2022-06-25 01:32:26,0.26855,0.0,0.0,0.0,2022-06-25,01:32:26,1
4,2022-06-25 01:33:26,0.31738,0.0,0.0,0.0,2022-06-25,01:33:26,1


In [4]:
rearrange_columns = ['Date Time', 'Date', 'Time', 'Hour', 'Voltage', 'Current', 'Power (W)', 'Power (KW)']
df = df[rearrange_columns]
df.tail(5)

Unnamed: 0,Date Time,Date,Time,Hour,Voltage,Current,Power (W),Power (KW)
8972,2022-07-01 19:43:59,2022-07-01,19:43:59,19,13.81836,0.06251,0.863786,0.000864
8973,2022-07-01 19:44:59,2022-07-01,19:44:59,19,13.81836,0.06591,0.910768,0.000911
8974,2022-07-01 19:45:59,2022-07-01,19:45:59,19,13.81836,0.06458,0.89239,0.000892
8975,2022-07-01 19:46:59,2022-07-01,19:46:59,19,13.79395,0.05888,0.812188,0.000812
8976,2022-07-01 19:47:59,2022-07-01,19:47:59,19,13.79395,0.05096,0.70294,0.000703


In [5]:
unique_date = df['Date'].unique()
unique_date

array(['2022-06-24T00:00:00.000000000', '2022-06-25T00:00:00.000000000',
       '2022-06-26T00:00:00.000000000', '2022-06-27T00:00:00.000000000',
       '2022-06-28T00:00:00.000000000', '2022-06-29T00:00:00.000000000',
       '2022-06-30T00:00:00.000000000', '2022-07-01T00:00:00.000000000'],
      dtype='datetime64[ns]')

In [6]:
filter_today_values = df[df['Date'] == unique_date[-1]][['Date', 'Hour', 'Power (KW)']]
today_hourly_values = filter_today_values.groupby(['Date', 'Hour'])['Power (KW)'].sum().reset_index()
length_today_hourly_values = len(today_hourly_values)
length_today_hourly_values
# today_hourly_values

20

In [7]:
filter_daily_values = df[(df['Date'] > '2022-06-24') & (df['Date'] <= unique_date[-2])][['Date', 'Hour', 'Power (KW)']]
filter_daily_values

Unnamed: 0,Date,Hour,Power (KW)
1,2022-06-25,0,0.0
2,2022-06-25,1,0.0
3,2022-06-25,1,0.0
4,2022-06-25,1,0.0
5,2022-06-25,1,0.0
...,...,...,...
7934,2022-06-30,23,0.0
7935,2022-06-30,23,0.0
7936,2022-06-30,23,0.0
7937,2022-06-30,23,0.0


In [8]:
daily_hourly_values = filter_daily_values.groupby(['Date', 'Hour'])['Power (KW)'].sum().reset_index()
daily_hourly_values

Unnamed: 0,Date,Hour,Power (KW)
0,2022-06-25,0,0.0
1,2022-06-25,1,0.0
2,2022-06-25,2,0.0
3,2022-06-25,3,0.0
4,2022-06-25,4,0.0
...,...,...,...
139,2022-06-30,19,0.0
140,2022-06-30,20,0.0
141,2022-06-30,21,0.0
142,2022-06-30,22,0.0


In [9]:
# daily_values = daily_values[daily_values['Date'] > '2022-06-11']

In [10]:
data_selection = {'Mostly sunny': 3,
                  'Partly sunny': 2,
                  'Partly cloudy': 2,
                  'Intermittent clouds': 2,
                  'Partly sunny w/ showers': 1.5,
                  'Partly cloudy w/ showers': 1.5,
                  'Mostly clear': 1,
                  'Clear': 1,
                  'Mostly cloudy w/ t-storms': 0.5,
                  'Mostly cloudy': 0.5,
                  'Showers': 0.5,
                  'Cloudy': 0.5,
                  'Thunderstorms': 0.5,
                  'Mostly cloudy w/ showers': 0.5,
                  'Rain': 0}

In [11]:
header_list = ['Date', 'Time', 'SolarIrradiance (W/m2)', 'weather status', 'Temp (°C)', 'RealFeelTemp (°C)', 'DewPoint (°C)', 'Wind (km/h)',
              'Direction', 'Hum (%)', 'Visibility (km)', 'UVIndex', 'UVIndexText', 'PreProbability (%)', 'RainProbability (%)',
              'CloudCover (%)']
weather_data = pd.read_csv('hourly_weather_forecasted_data.csv', names = header_list, encoding= 'unicode_escape')
weather_data['modified_weather_status'] = weather_data['weather status'].map(data_selection)
# weather_data['code'] = pd.factorize(weather_data['weather status'])[0]
weather_data.loc[weather_data['SolarIrradiance (W/m2)'] == 0, ['modified_weather_status', 'Temp (°C)', 'Hum (%)', 'CloudCover (%)']] = 0
weather_data.drop(['Date', 'Time', 'SolarIrradiance (W/m2)', 'weather status', 'RealFeelTemp (°C)', 'DewPoint (°C)', 'Wind (km/h)', 'Direction', 'Visibility (km)', 'UVIndex', 
                   'UVIndexText', 'PreProbability (%)', 'RainProbability (%)', 'Hum (%)'], axis = 1, inplace = True)
# weather_data['weather status'].unique()
# weather_data.tail(14)
# ce = weather_data[weather_data['code'] == 0 ][['code', 'weather status']]
# weather_data['CloudCover (%)'].unique()

In [12]:
df1 = pd.concat([daily_hourly_values, weather_data], axis = 1)
df1.drop(['Date', 'Hour'], axis = 1, inplace = True)
# df1.loc[df1['SolarIrradiance (W/m2)'] == 0, ['Temp (°C)', 'Hum (%)', 'CloudCover (%)']] = 0
# df1.to_csv('solar_energy_predictions.csv')
df1.tail(25)

Unnamed: 0,Power (KW),Temp (°C),CloudCover (%),modified_weather_status
143,0.0,0.0,0,0.0
144,,0.0,0,0.0
145,,0.0,0,0.0
146,,0.0,0,0.0
147,,0.0,0,0.0
148,,0.0,0,0.0
149,,9.2,46,2.0
150,,10.0,68,0.5
151,,11.1,70,2.0
152,,12.8,70,2.0


In [13]:
count_total_rows = len(df1) - 24

In [14]:
independent_columns = df1[['Temp (°C)', 'CloudCover (%)', 'modified_weather_status']][0:count_total_rows]

In [15]:
dependent_column = df1['Power (KW)'][0:count_total_rows]

In [16]:
# scaler = StandardScaler()
# x_transform = scaler.fit_transform(independent_columns)
# y_transform = scaler.fit_transform(dependent_column.values.reshape(-1, 1))

# Normaliation
min_max_scaler = MinMaxScaler()
x_transform = min_max_scaler.fit_transform(independent_columns)
y_transform = min_max_scaler.fit_transform(dependent_column.values.reshape(-1, 1))

In [17]:
rfr = RandomForestRegressor(n_estimators = 100, random_state = 0)
rfr.fit(x_transform, y_transform)

  rfr.fit(x_transform, y_transform)


RandomForestRegressor(random_state=0)

In [18]:
forcasted_data = df1[['Temp (°C)', 'CloudCover (%)', 'modified_weather_status']].tail(24)
# forcast_transform = scaler.fit_transform(forcasted_data)
forcast_transform = min_max_scaler.fit_transform(forcasted_data)

In [20]:
return_array = rfr.predict(forcast_transform)
min_max_scaler.inverse_transform(return_array)

ValueError: Expected 2D array, got 1D array instead:
array=[0.         0.         0.         0.         0.         0.00893098
 0.02326769 0.02774567 0.26889539 0.279184   0.57201571 0.47594781
 0.42290531 0.40755069 0.56477595 0.62889482 0.81231883 0.67406303
 0.50742967 0.51436724 0.43741462 0.33341492 0.         0.        ].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.