In [1]:
import pandas as pd
import numpy as np
from numpy import math
from datetime import datetime, date, time
from datetime import timedelta

from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn import metrics

In [2]:
header_list = ['Date Time', 'Voltage', 'Current']
df = pd.read_csv('sensors_data.csv', names = header_list)
df['Power (W)'] = df['Voltage'] * df['Current']
df['Power (KW)'] = df['Power (W)'] / 1000
df['Date Time'] = pd.to_datetime(df['Date Time'])
df['Date'] = df['Date Time'].dt.date
df['Date'] = pd.to_datetime(df['Date'])
df['Time'] = pd.to_datetime(df['Date Time']).dt.time
df['Hour'] = pd.to_datetime(df['Date Time']).dt.hour
df['Time'] = df['Time'].astype(str)
# df['Hour'] = df['Hour'].astype(str)

In [3]:
df.head(5)

Unnamed: 0,Date Time,Voltage,Current,Power (W),Power (KW),Date,Time,Hour
0,2022-06-24 23:31:26,0.26855,0.0,0.0,0.0,2022-06-24,23:31:26,23
1,2022-06-25 00:31:26,0.26855,0.0,0.0,0.0,2022-06-25,00:31:26,0
2,2022-06-25 01:31:26,0.26855,0.0,0.0,0.0,2022-06-25,01:31:26,1
3,2022-06-25 01:32:26,0.26855,0.0,0.0,0.0,2022-06-25,01:32:26,1
4,2022-06-25 01:33:26,0.31738,0.0,0.0,0.0,2022-06-25,01:33:26,1


In [4]:
rearrange_columns = ['Date Time', 'Date', 'Time', 'Hour', 'Voltage', 'Current', 'Power (W)', 'Power (KW)']
df = df[rearrange_columns]
df.tail(5)

Unnamed: 0,Date Time,Date,Time,Hour,Voltage,Current,Power (W),Power (KW)
3910,2022-06-27 22:04:35,2022-06-27,22:04:35,22,3.07617,0.0,0.0,0.0
3911,2022-06-27 22:05:35,2022-06-27,22:05:35,22,2.7832,0.0,0.0,0.0
3912,2022-06-27 22:06:35,2022-06-27,22:06:35,22,2.51465,0.0,0.0,0.0
3913,2022-06-27 23:52:32,2022-06-27,23:52:32,23,0.21973,0.0,0.0,0.0
3914,2022-06-27 23:53:32,2022-06-27,23:53:32,23,0.26855,0.0,0.0,0.0


In [5]:
unique_date = df['Date'].unique()
unique_date

array(['2022-06-24T00:00:00.000000000', '2022-06-25T00:00:00.000000000',
       '2022-06-26T00:00:00.000000000', '2022-06-27T00:00:00.000000000'],
      dtype='datetime64[ns]')

In [6]:
filter_today_values = df[df['Date'] == unique_date[-1]][['Date', 'Hour', 'Power (KW)']]
today_hourly_values = filter_today_values.groupby(['Date', 'Hour'])['Power (KW)'].sum().reset_index()
length_today_hourly_values = len(today_hourly_values)
length_today_hourly_values
# today_hourly_values

24

In [7]:
filter_daily_values = df[(df['Date'] > '2022-06-24') & (df['Date'] <= unique_date[-2])][['Date', 'Hour', 'Power (KW)']]
filter_daily_values

Unnamed: 0,Date,Hour,Power (KW)
1,2022-06-25,0,0.0
2,2022-06-25,1,0.0
3,2022-06-25,1,0.0
4,2022-06-25,1,0.0
5,2022-06-25,1,0.0
...,...,...,...
2584,2022-06-26,23,0.0
2585,2022-06-26,23,0.0
2586,2022-06-26,23,0.0
2587,2022-06-26,23,0.0


In [8]:
daily_hourly_values = filter_daily_values.groupby(['Date', 'Hour'])['Power (KW)'].sum().reset_index()
daily_hourly_values

Unnamed: 0,Date,Hour,Power (KW)
0,2022-06-25,0,0.0
1,2022-06-25,1,0.0
2,2022-06-25,2,0.0
3,2022-06-25,3,0.0
4,2022-06-25,4,0.0
5,2022-06-25,5,0.006974
6,2022-06-25,6,0.087022
7,2022-06-25,7,0.194788
8,2022-06-25,8,0.896399
9,2022-06-25,9,1.671361


In [9]:
# daily_values = daily_values[daily_values['Date'] > '2022-06-11']

In [10]:
header_list = ['Date', 'Time', 'SolarIrradiance (W/m2)', 'weather status', 'Temp (°C)', 'RealFeelTemp (°C)', 'DewPoint (°C)', 'Wind (km/h)',
              'Direction', 'Hum (%)', 'Visibility (km)', 'UVIndex', 'UVIndexText', 'PreProbability (%)', 'RainProbability (%)',
              'CloudCover (%)']
weather_data = pd.read_csv('hourly_weather_forecasted_data.csv', names = header_list, encoding= 'unicode_escape')
weather_data.drop(['Date', 'Time', 'RealFeelTemp (°C)', 'DewPoint (°C)', 'Wind (km/h)', 'Direction', 'Visibility (km)', 'UVIndex', 
                   'UVIndexText', 'PreProbability (%)', 'RainProbability (%)', 'weather status'], axis = 1, inplace = True)
weather_data.tail(25)

Unnamed: 0,SolarIrradiance (W/m2),Temp (°C),Hum (%),CloudCover (%)
47,0.0,13.1,67,45
48,0.0,12.4,76,44
49,0.0,12.0,76,33
50,0.0,11.5,81,32
51,0.0,11.2,85,31
52,0.0,10.7,83,30
53,20.5,11.1,78,45
54,80.5,11.4,75,45
55,132.5,12.1,71,45
56,93.5,12.6,73,92


In [11]:
df1 = pd.concat([daily_hourly_values, weather_data], axis = 1)
df1.drop(['Date', 'Hour'], axis = 1, inplace = True)
df1.loc[df1['SolarIrradiance (W/m2)'] == 0, ['Temp (°C)', 'Hum (%)', 'CloudCover (%)']] = 0
# df1.to_csv('solar_energy_predictions.csv')
df1.tail(25)

Unnamed: 0,Power (KW),SolarIrradiance (W/m2),Temp (°C),Hum (%),CloudCover (%)
47,0.0,0.0,0.0,0,0
48,,0.0,0.0,0,0
49,,0.0,0.0,0,0
50,,0.0,0.0,0,0
51,,0.0,0.0,0,0
52,,0.0,0.0,0,0
53,,20.5,11.1,78,45
54,,80.5,11.4,75,45
55,,132.5,12.1,71,45
56,,93.5,12.6,73,92


In [12]:
count_total_rows = len(df1) - 24

In [13]:
independent_columns = df1[['SolarIrradiance (W/m2)', 'Temp (°C)', 'Hum (%)', 'CloudCover (%)']][0:count_total_rows]

In [14]:
dependent_column = df1['Power (KW)'][0:count_total_rows]

In [15]:
reg = linear_model.LinearRegression(fit_intercept = False)
reg.fit(independent_columns, dependent_column)

LinearRegression(fit_intercept=False)

#### Is model well fit or not?

In [16]:
r_sq = reg.score(independent_columns, dependent_column)
r_sq

0.7207027383690898

In [17]:
reg.intercept_

0.0

In [18]:
reg.coef_

array([ 0.01202662, -0.05605291, -0.00351308,  0.01238466])

In [19]:
forcasted_data = df1[['SolarIrradiance (W/m2)', 'Temp (°C)', 'Hum (%)', 'CloudCover (%)']].tail(24)

In [20]:
return_array = list(reg.predict(forcasted_data))

In [21]:
# predicted_data = pd.DataFrame(return_array, columns = ['Power (KW)'])
# predicted_data

In [22]:
now = datetime.now()
date = now.strftime('%Y-%m-%d')
current_date_24 = [date, date, date, date, date, date, date, date, date, date, date, date, date, date, date, date,
                date, date, date, date, date, date, date, date]
current_date_12 = [date, date, date, date, date, date, date, date, date, date, date, date]

In [23]:
hours_24 = list(daily_hourly_values['Hour'][0:24])
hours_12 = list(daily_hourly_values['Hour'][0:12])

In [24]:
data_dict = {'Date': current_date_24, 'Hour': hours_24, 'Power (KW)': return_array}

In [25]:
data_dataframe = pd.DataFrame(data_dict)
data_dataframe

Unnamed: 0,Date,Hour,Power (KW)
0,2022-06-28,0,0.0
1,2022-06-28,1,0.0
2,2022-06-28,2,0.0
3,2022-06-28,3,0.0
4,2022-06-28,4,0.0
5,2022-06-28,5,-0.092352
6,2022-06-28,6,0.622968
7,2022-06-28,7,1.223168
8,2022-06-28,8,1.301156
9,2022-06-28,9,1.604482


## Evaluating the model

#### MSE = mean_squared_error(actual value, predicted value)

In [26]:
mean_sq_error = metrics.mean_squared_error(today_hourly_values['Power (KW)'], 
                                           data_dataframe['Power (KW)'].head(length_today_hourly_values))
mean_sq_error

0.5277197388381697

In [27]:
root_mean_sq_error = np.sqrt(mean_sq_error)
root_mean_sq_error

0.7264432110207719

In [28]:
mean_ab_error = metrics.mean_absolute_error(today_hourly_values['Power (KW)'], 
                                            data_dataframe['Power (KW)'].head(length_today_hourly_values))
mean_ab_error

0.5256221657227542

#### R squared

In [29]:
r_squared = metrics.r2_score(today_hourly_values['Power (KW)'], 
                             data_dataframe['Power (KW)'].head(length_today_hourly_values))
r_squared

0.11390879974055246

#### Yesterday results

In [42]:
filter_last_day_values = df[df['Date'] == unique_date[-2]][['Date', 'Hour', 'Power (KW)']]
last_day_hourly_values = filter_last_day_values.groupby(['Date', 'Hour'])['Power (KW)'].sum().reset_index()

filter_yes_values = df[(df['Date'] >= '2022-06-25') & (df['Date'] <= unique_date[-3])][['Date', 'Hour', 'Power (KW)']]
yes_hourly_values = filter_yes_values.groupby(['Date', 'Hour'])['Power (KW)'].sum().reset_index()
header_list = ['Date', 'Time', 'SolarIrradiance (W/m2)', 'weather status', 'Temp (°C)', 'RealFeelTemp (°C)', 'DewPoint (°C)', 'Wind (km/h)',
              'Direction', 'Hum (%)', 'Visibility (km)', 'UVIndex', 'UVIndexText', 'PreProbability (%)', 'RainProbability (%)',
              'CloudCover (%)']
weather_data1 = pd.read_csv('hourly_weather_forecasted_data.csv', names = header_list, encoding= 'unicode_escape')
weather_unique_date = weather_data1['Date'].unique()
filter_weather_yes_values = weather_data1[
                                         (weather_data1['Date'] >= '2022-06-25') & 
                                         (weather_data1['Date'] <= weather_unique_date[-3])][['SolarIrradiance (W/m2)',
                                                                                             'Temp (°C)', 'Hum (%)',
                                                                                             'CloudCover (%)']]
yes_df1 = pd.concat([yes_hourly_values, filter_weather_yes_values], axis = 1)
yes_df1.drop(['Date', 'Hour'], axis = 1, inplace = True)
yes_df1.loc[yes_df1['SolarIrradiance (W/m2)'] == 0, ['Temp (°C)', 'Hum (%)', 'CloudCover (%)']] = 0
yes_count_total_rows = len(yes_df1)
yes_independent_columns = yes_df1[['SolarIrradiance (W/m2)', 'Temp (°C)', 'Hum (%)', 'CloudCover (%)']][0:yes_count_total_rows]
yes_dependent_column = yes_df1['Power (KW)'][0:yes_count_total_rows]
yes_reg = linear_model.LinearRegression(fit_intercept = False)
yes_reg.fit(yes_independent_columns, yes_dependent_column)
forcasted_yes_values = weather_data1[(weather_data1['Date'] == weather_unique_date[-2])][['SolarIrradiance (W/m2)',
                                                                                        'Temp (°C)', 'Hum (%)',
                                                                                        'CloudCover (%)']]
forcasted_yes_values.loc[forcasted_yes_values['SolarIrradiance (W/m2)'] == 0, ['Temp (°C)', 'Hum (%)', 'CloudCover (%)']] = 0
return_array = yes_reg.predict(forcasted_yes_values)
predicted_data = pd.DataFrame(return_array, columns = ['Power (KW)'])
mv_pe = predicted_data['Power (KW)'].sum()
mv_mse = metrics.mean_squared_error(last_day_hourly_values['Power (KW)'], predicted_data['Power (KW)'])
mv_rmse = np.sqrt(mv_mse)
mv_mae = metrics.mean_absolute_error(last_day_hourly_values['Power (KW)'], predicted_data['Power (KW)'])
mv_rs = metrics.r2_score(last_day_hourly_values['Power (KW)'], predicted_data['Power (KW)'])
mv_pe

29.288274961178672

In [31]:
# dependent_variable = 'Power (KW)'

In [32]:
# independent_variables = df1.columns.tolist()
# independent_variables.remove(dependent_variable)

In [33]:
# independent_variables

In [34]:
# X = df1[independent_variables].values

In [35]:
# y = df1[dependent_variable].values

In [36]:
# X_train, X_test, y_train, y_test, = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [37]:
# X_train

In [38]:
# regressor = LinearRegression()
# regressor.fit(X_train, y_train)

In [39]:
# y_pred = regressor.predict(X_test)
# y_pred

In [40]:
# math.sqrt(mean_squared_error(y_test, y_pred))

In [41]:
# r2_score(y_test, y_pred)