In [1]:
import pandas as pd

In [2]:
header_list = ['Date Time', 'Voltage', 'Current']
df = pd.read_csv('sensors_data.csv', names = header_list)
df['Power (W)'] = df['Voltage'] * df['Current']
df['Power (KW)'] = df['Power (W)'] / 1000
df['Date Time'] = pd.to_datetime(df['Date Time'])
df['Date'] = df['Date Time'].dt.date
df['Date'] = pd.to_datetime(df['Date'])
df['Time'] = pd.to_datetime(df['Date Time']).dt.time
df['Time'] = df['Time'].astype(str)

In [3]:
df.head(5)

Unnamed: 0,Date Time,Voltage,Current,Power (W),Power (KW),Date,Time
0,2022-06-11 23:53:22,0.43945,0.0,0.0,0.0,2022-06-11,23:53:22
1,2022-06-11 23:54:22,0.48828,0.0,0.0,0.0,2022-06-11,23:54:22
2,2022-06-11 23:55:22,0.46387,0.0,0.0,0.0,2022-06-11,23:55:22
3,2022-06-11 23:56:22,0.46387,0.0,0.0,0.0,2022-06-11,23:56:22
4,2022-06-11 23:57:22,0.46387,0.0,0.0,0.0,2022-06-11,23:57:22


### Re-arrange columns

In [4]:
rearrange_columns = ['Date Time', 'Date', 'Time', 'Voltage', 'Current', 'Power (W)', 'Power (KW)']
df = df[rearrange_columns]
df.tail(5)

Unnamed: 0,Date Time,Date,Time,Voltage,Current,Power (W),Power (KW)
5957,2022-06-16 11:04:27,2022-06-16,11:04:27,14.0625,1.33364,18.754312,0.018754
5958,2022-06-16 11:05:27,2022-06-16,11:05:27,13.98926,1.24283,17.386272,0.017386
5959,2022-06-16 11:06:27,2022-06-16,11:06:27,14.0625,1.33299,18.745172,0.018745
5960,2022-06-16 11:07:27,2022-06-16,11:07:27,14.03809,1.30778,18.358733,0.018359
5961,2022-06-16 11:08:28,2022-06-16,11:08:28,14.11133,1.39261,19.651579,0.019652


### Calculate daily power generated

In [5]:
daily_values = df.groupby(['Date'])['Power (KW)'].sum().reset_index()
daily_values = daily_values[daily_values['Date'] > '2022-06-11']
daily_values

Unnamed: 0,Date,Power (KW)
1,2022-06-12,4.411079
2,2022-06-13,3.648721
3,2022-06-14,13.15221
4,2022-06-15,11.044132
5,2022-06-16,1.992261


### Set date column as index

In [6]:
daily_values.index = pd.to_datetime(daily_values['Date'])
daily_values.set_index('Date')

Unnamed: 0_level_0,Power (KW)
Date,Unnamed: 1_level_1
2022-06-12,4.411079
2022-06-13,3.648721
2022-06-14,13.15221
2022-06-15,11.044132
2022-06-16,1.992261


In [7]:
from pmdarima import auto_arima

In [8]:
import warnings
warnings.filterwarnings("ignore")

In [9]:
stepwise_fit = auto_arima(daily_values['Power (KW)'], trace = True, suppress_warnings = True)
stepwise_fit.summary()

Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=36.020, Time=0.20 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=33.021, Time=0.01 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=34.994, Time=0.04 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.17 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=37.163, Time=0.01 sec

Best model:  ARIMA(0,0,0)(0,0,0)[0] intercept
Total fit time: 0.436 seconds


0,1,2,3
Dep. Variable:,y,No. Observations:,5.0
Model:,SARIMAX,Log Likelihood,-14.511
Date:,"Thu, 16 Jun 2022",AIC,33.021
Time:,12:12:01,BIC,32.24
Sample:,0,HQIC,30.925
,- 5,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,6.8497,2.519,2.719,0.007,1.912,11.787
sigma2,19.4205,36.480,0.532,0.594,-52.079,90.920

0,1,2,3
Ljung-Box (Q):,4.98,Jarque-Bera (JB):,0.67
Prob(Q):,0.29,Prob(JB):,0.71
Heteroskedasticity (H):,2.54,Skew:,0.38
Prob(H) (two-sided):,0.56,Kurtosis:,1.37


In [10]:
from statsmodels.tsa.arima_model import ARIMA

In [11]:
train = daily_values.iloc[:-2]
test = daily_values.iloc[-2:]

In [20]:
model = ARIMA(train['Power (KW)'], order = (1,0,0))
model = model.fit()
model.summary()

0,1,2,3
Dep. Variable:,Power (KW),No. Observations:,3.0
Model:,"ARMA(1, 0)",Log Likelihood,-8.485
Method:,css-mle,S.D. of innovations,3.939
Date:,"Thu, 16 Jun 2022",AIC,22.969
Time:,12:14:01,BIC,20.265
Sample:,06-12-2022,HQIC,17.533
,- 06-14-2022,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,6.6219,1.860,3.560,0.000,2.976,10.268
ar.L1.Power (KW),-0.4528,0.703,-0.644,0.520,-1.832,0.926

0,1,2,3,4
,Real,Imaginary,Modulus,Frequency
AR.1,-2.2083,+0.0000j,2.2083,0.5000


In [27]:
start = len(train)
end = len(train) + len(test) - 1
pred = model.predict(start = start, end = end, typ = 'levels').rename('ARIMA Predictions').reset_index()
# pred.index = daily_values[start:end + 1]
pred

Unnamed: 0,index,ARIMA Predictions
0,2022-06-15,3.664741
1,2022-06-16,7.960981


In [22]:
test['Power (KW)'].mean()

6.518196585381149

In [23]:
from sklearn.metrics import mean_squared_error
from math import sqrt
rmse = sqrt(mean_squared_error(pred, test['Power (KW)']))
rmse

6.711223703683169

In [37]:
model2 = ARIMA(daily_values['Power (KW)'], order = (1,0,0))
model2 = model2.fit()

In [38]:
# index_future_dates = pd.date_range(start = '2022-06-15', end = '2022-06-18')
pred = model2.predict(start = len(daily_values), end = len(daily_values) + 3,
                      typ = 'levels').rename('ARIMA Predictions').reset_index()
# pred.index = index_future_dates
pred

Unnamed: 0,index,ARIMA Predictions
0,2022-06-17,7.378811
1,2022-06-18,6.930869
2,2022-06-19,6.96812
3,2022-06-20,6.965022
