In [1]:
import pandas as pd
import numpy as np
from numpy import math
from datetime import datetime, date, time
from datetime import timedelta

from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

In [2]:
header_list = ['Date Time', 'Voltage', 'Current']
df = pd.read_csv('sensors_data.csv', names = header_list)
df['Power (W)'] = df['Voltage'] * df['Current']
df['Power (KW)'] = df['Power (W)'] / 1000
df['Date Time'] = pd.to_datetime(df['Date Time'])
df['Date'] = df['Date Time'].dt.date
df['Date'] = pd.to_datetime(df['Date'])
df['Time'] = pd.to_datetime(df['Date Time']).dt.time
df['Hour'] = pd.to_datetime(df['Date Time']).dt.hour
df['Time'] = df['Time'].astype(str)
# df['Hour'] = df['Hour'].astype(str)

In [3]:
df.head(5)

Unnamed: 0,Date Time,Voltage,Current,Power (W),Power (KW),Date,Time,Hour
0,2022-06-11 23:53:22,0.43945,0.0,0.0,0.0,2022-06-11,23:53:22,23
1,2022-06-11 23:54:22,0.48828,0.0,0.0,0.0,2022-06-11,23:54:22,23
2,2022-06-11 23:55:22,0.46387,0.0,0.0,0.0,2022-06-11,23:55:22,23
3,2022-06-11 23:56:22,0.46387,0.0,0.0,0.0,2022-06-11,23:56:22,23
4,2022-06-11 23:57:22,0.46387,0.0,0.0,0.0,2022-06-11,23:57:22,23


In [4]:
rearrange_columns = ['Date Time', 'Date', 'Time', 'Hour', 'Voltage', 'Current', 'Power (W)', 'Power (KW)']
df = df[rearrange_columns]
df.tail(5)

Unnamed: 0,Date Time,Date,Time,Hour,Voltage,Current,Power (W),Power (KW)
11693,2022-06-20 10:58:06,2022-06-20,10:58:06,10,22.24121,0.0,0.0,0.0
11694,2022-06-20 10:59:06,2022-06-20,10:59:06,10,14.25781,1.60124,22.830176,0.02283
11695,2022-06-20 11:00:06,2022-06-20,11:00:06,11,14.42871,1.88629,27.216731,0.027217
11696,2022-06-20 11:01:06,2022-06-20,11:01:06,11,14.08691,1.2965,18.263679,0.018264
11697,2022-06-20 11:02:06,2022-06-20,11:02:06,11,14.01367,1.3087,18.33969,0.01834


In [5]:
unique_date = df['Date'].unique()
unique_date

array(['2022-06-11T00:00:00.000000000', '2022-06-12T00:00:00.000000000',
       '2022-06-13T00:00:00.000000000', '2022-06-14T00:00:00.000000000',
       '2022-06-15T00:00:00.000000000', '2022-06-16T00:00:00.000000000',
       '2022-06-17T00:00:00.000000000', '2022-06-18T00:00:00.000000000',
       '2022-06-19T00:00:00.000000000', '2022-06-20T00:00:00.000000000'],
      dtype='datetime64[ns]')

In [6]:
filter_today_values = df[df['Date'] == unique_date[-1]][['Date', 'Hour', 'Power (KW)']]
today_hourly_values = filter_today_values.groupby(['Date', 'Hour'])['Power (KW)'].sum().reset_index()

In [7]:
filter_daily_values = df[(df['Date'] >= unique_date[-3]) & (df['Date'] <= unique_date[-2])][['Date', 'Hour', 'Power (KW)']]
filter_daily_values

Unnamed: 0,Date,Hour,Power (KW)
8169,2022-06-18,0,0.0
8170,2022-06-18,0,0.0
8171,2022-06-18,0,0.0
8172,2022-06-18,0,0.0
8173,2022-06-18,0,0.0
...,...,...,...
11032,2022-06-19,23,0.0
11033,2022-06-19,23,0.0
11034,2022-06-19,23,0.0
11035,2022-06-19,23,0.0


In [31]:
daily_hourly_values = filter_daily_values.groupby(['Date', 'Hour'])['Power (KW)'].sum().reset_index()
daily_hourly_values.head()

Unnamed: 0,Date,Hour,Power (KW)
0,2022-06-18,0,0.0
1,2022-06-18,1,0.0
2,2022-06-18,2,0.0
3,2022-06-18,3,0.0
4,2022-06-18,4,0.0


In [9]:
# daily_values = daily_values[daily_values['Date'] > '2022-06-11']

In [33]:
header_list = ['SolarIrradiance (W/m2)', 'weather status', 'Temp (°C)', 'RealFeelTemp (°C)', 'DewPoint (°C)', 'Wind (km/h)',
              'Direction', 'Hum (%)', 'Visibility (km)', 'UVIndex', 'UVIndexText', 'PreProbability (%)', 'RainProbability (%)',
              'CloudCover (%)']
weather_data = pd.read_csv('hourly_weather_forecasted_data.csv', names = header_list, encoding= 'unicode_escape')
# weather_data.drop(['RealFeelTemp (°C)', 'DewPoint (°C)', 'Wind (km/h)', 'Direction', 'Visibility (km)', 'UVIndex', 
#                    'UVIndexText', 'PreProbability (%)', 'RainProbability (%)', 'weather status'], axis = 1, inplace = True)
weather_data.head()

Unnamed: 0,SolarIrradiance (W/m2),weather status,Temp (°C),RealFeelTemp (°C),DewPoint (°C),Wind (km/h),Direction,Hum (%),Visibility (km),UVIndex,UVIndexText,PreProbability (%),RainProbability (%),CloudCover (%)
0,0.0,Mostly cloudy,19.4,18.3,15.2,7.4,SW,77,16.1,0,Low,1,1,76
1,0.0,Mostly cloudy,18.7,17.5,14.9,7.4,WSW,78,16.1,0,Low,4,4,76
2,0.0,Mostly cloudy,18.0,17.5,14.7,9.3,W,81,16.1,0,Low,24,24,76
3,0.0,Mostly cloudy,17.4,16.5,14.9,11.1,NW,85,16.1,0,Low,24,24,76
4,0.0,Mostly cloudy,16.9,15.7,14.2,13.0,SW,84,16.1,0,Low,29,29,76


In [34]:
df1 = pd.concat([daily_hourly_values, weather_data], axis = 1)
# df1.drop(['Date', 'Hour'], axis = 1, inplace = True)
# df1.loc[df1['SolarIrradiance (W/m2)'] == 0, ['Temp (°C)', 'Hum (%)', 'CloudCover (%)']] = 0
df1.to_csv('solar_energy_predictions.csv')
df1

Unnamed: 0,Date,Hour,Power (KW),SolarIrradiance (W/m2),weather status,Temp (°C),RealFeelTemp (°C),DewPoint (°C),Wind (km/h),Direction,Hum (%),Visibility (km),UVIndex,UVIndexText,PreProbability (%),RainProbability (%),CloudCover (%)
0,2022-06-18,0.0,0.0,0.0,Mostly cloudy,19.4,18.3,15.2,7.4,SW,77,16.1,0,Low,1,1,76
1,2022-06-18,1.0,0.0,0.0,Mostly cloudy,18.7,17.5,14.9,7.4,WSW,78,16.1,0,Low,4,4,76
2,2022-06-18,2.0,0.0,0.0,Mostly cloudy,18.0,17.5,14.7,9.3,W,81,16.1,0,Low,24,24,76
3,2022-06-18,3.0,0.0,0.0,Mostly cloudy,17.4,16.5,14.9,11.1,NW,85,16.1,0,Low,24,24,76
4,2022-06-18,4.0,0.0,0.0,Mostly cloudy,16.9,15.7,14.2,13.0,SW,84,16.1,0,Low,29,29,76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,NaT,,,17.7,Mostly sunny,20.5,20.7,7.4,9.3,NNW,42,16.1,2,Low,0,0,25
68,NaT,,,11.0,Mostly sunny,19.2,18.7,7.3,9.3,NNW,46,16.1,1,Low,0,0,26
69,NaT,,,3.5,Mostly sunny,17.6,17.4,7.7,7.4,NNW,52,16.1,0,Low,0,0,26
70,NaT,,,0.0,Mostly clear,16.0,15.8,8.4,7.4,N,61,16.1,0,Low,0,0,26


In [12]:
count_total_rows = len(df1) - 24

In [13]:
independent_columns = df1[['SolarIrradiance (W/m2)', 'Temp (°C)', 'Hum (%)', 'CloudCover (%)']][0:count_total_rows]

In [14]:
dependent_column = df1['Power (KW)'][0:count_total_rows]

In [15]:
reg = linear_model.LinearRegression()
reg.fit(independent_columns, dependent_column)

LinearRegression()

In [16]:
forcasted_data = df1[['SolarIrradiance (W/m2)', 'Temp (°C)', 'Hum (%)', 'CloudCover (%)']].tail(24)

In [17]:
return_array = list(reg.predict(forcasted_data))

In [18]:
# predicted_data = pd.DataFrame(return_array, columns = ['Power (KW)'])
# predicted_data

In [19]:
now = datetime.now()
date = now.strftime('%Y-%m-%d')
current_date = [date, date, date, date, date, date, date, date, date, date, date, date, date, date, date, date,
                date, date, date, date, date, date, date, date]

In [28]:
hours = list(daily_hourly_values['Hour'][0:24])

In [29]:
data_dict = {'Date': current_date, 'Hour': hours, 'Power (KW)': return_array}

In [30]:
data_dataframe = pd.DataFrame(data_dict)
data_dataframe

Unnamed: 0,Date,Hour,Power (KW)
0,2022-06-20,0,0.004905
1,2022-06-20,1,0.004905
2,2022-06-20,2,0.004905
3,2022-06-20,3,0.004905
4,2022-06-20,4,0.004905
5,2022-06-20,5,-0.103581
6,2022-06-20,6,-0.072211
7,2022-06-20,7,-0.025368
8,2022-06-20,8,0.027428
9,2022-06-20,9,0.101927


In [None]:
# dependent_variable = 'Power (KW)'

In [None]:
# independent_variables = df1.columns.tolist()
# independent_variables.remove(dependent_variable)

In [None]:
# independent_variables

In [None]:
# X = df1[independent_variables].values

In [None]:
# y = df1[dependent_variable].values

In [None]:
# X_train, X_test, y_train, y_test, = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
# X_train

In [None]:
# regressor = LinearRegression()
# regressor.fit(X_train, y_train)

In [None]:
# y_pred = regressor.predict(X_test)
# y_pred

In [None]:
# math.sqrt(mean_squared_error(y_test, y_pred))

In [None]:
# r2_score(y_test, y_pred)