In [1]:
import pandas as pd

In [2]:
header_list = ['Date Time', 'Voltage', 'Current']
df = pd.read_csv('sensors_data.csv', names = header_list)
df['Power (W)'] = df['Voltage'] * df['Current']
df['Power (KW)'] = df['Power (W)'] / 1000
df['Date Time'] = pd.to_datetime(df['Date Time'])
df['Date'] = df['Date Time'].dt.date
df['Date'] = pd.to_datetime(df['Date'])
df['Time'] = pd.to_datetime(df['Date Time']).dt.time
df['Time'] = df['Time'].astype(str)

In [3]:
df.head(5)

Unnamed: 0,Date Time,Voltage,Current,Power (W),Power (KW),Date,Time
0,2022-06-11 23:53:22,0.43945,0.0,0.0,0.0,2022-06-11,23:53:22
1,2022-06-11 23:54:22,0.48828,0.0,0.0,0.0,2022-06-11,23:54:22
2,2022-06-11 23:55:22,0.46387,0.0,0.0,0.0,2022-06-11,23:55:22
3,2022-06-11 23:56:22,0.46387,0.0,0.0,0.0,2022-06-11,23:56:22
4,2022-06-11 23:57:22,0.46387,0.0,0.0,0.0,2022-06-11,23:57:22


### Re-arrange columns

In [4]:
rearrange_columns = ['Date Time', 'Date', 'Time', 'Voltage', 'Current', 'Power (W)', 'Power (KW)']
df = df[rearrange_columns]
df.tail(5)

Unnamed: 0,Date Time,Date,Time,Voltage,Current,Power (W),Power (KW)
9095,2022-06-18 15:33:52,2022-06-18,15:33:52,13.74512,0.16931,2.327186,0.002327
9096,2022-06-18 15:34:52,2022-06-18,15:34:52,13.69629,0.16656,2.281254,0.002281
9097,2022-06-18 15:35:52,2022-06-18,15:35:52,13.76953,0.2867,3.947724,0.003948
9098,2022-06-18 15:36:52,2022-06-18,15:36:52,13.79395,0.32263,4.450342,0.00445
9099,2022-06-18 15:37:53,2022-06-18,15:37:53,13.74512,0.36912,5.073599,0.005074


### Calculate daily power generated

In [5]:
daily_values = df.groupby(['Date'])['Power (KW)'].sum().reset_index()
daily_values = daily_values[daily_values['Date'] > '2022-06-11']
daily_values

Unnamed: 0,Date,Power (KW)
1,2022-06-12,4.411079
2,2022-06-13,3.648721
3,2022-06-14,13.15221
4,2022-06-15,11.044132
5,2022-06-16,9.923652
6,2022-06-17,11.903512
7,2022-06-18,0.771489


### Set date column as index

In [6]:
daily_values.index = pd.to_datetime(daily_values['Date'])
daily_values.set_index('Date')

Unnamed: 0_level_0,Power (KW)
Date,Unnamed: 1_level_1
2022-06-12,4.411079
2022-06-13,3.648721
2022-06-14,13.15221
2022-06-15,11.044132
2022-06-16,9.923652
2022-06-17,11.903512
2022-06-18,0.771489


In [7]:
from pmdarima import auto_arima

In [8]:
import warnings
warnings.filterwarnings("ignore")

In [9]:
stepwise_fit = auto_arima(daily_values['Power (KW)'], trace = True, suppress_warnings = True)
stepwise_fit.summary()

Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.14 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=44.766, Time=0.01 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=46.754, Time=0.03 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.15 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=52.645, Time=0.01 sec

Best model:  ARIMA(0,0,0)(0,0,0)[0] intercept
Total fit time: 0.348 seconds


0,1,2,3
Dep. Variable:,y,No. Observations:,7.0
Model:,SARIMAX,Log Likelihood,-20.383
Date:,"Sat, 18 Jun 2022",AIC,44.766
Time:,16:07:34,BIC,44.658
Sample:,0,HQIC,43.429
,- 7,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,7.8364,1.907,4.110,0.000,4.099,11.574
sigma2,19.8040,23.869,0.830,0.407,-26.977,66.585

0,1,2,3
Ljung-Box (Q):,7.18,Jarque-Bera (JB):,0.78
Prob(Q):,0.3,Prob(JB):,0.68
Heteroskedasticity (H):,2.27,Skew:,-0.34
Prob(H) (two-sided):,0.61,Kurtosis:,1.51


In [10]:
from statsmodels.tsa.arima_model import ARIMA

In [11]:
train = daily_values.iloc[:-3]
test = daily_values.iloc[-3:]

In [12]:
model = ARIMA(train['Power (KW)'], order = (1,0,0))
model = model.fit()
model.summary()

0,1,2,3
Dep. Variable:,Power (KW),No. Observations:,4.0
Model:,"ARMA(1, 0)",Log Likelihood,-11.293
Method:,css-mle,S.D. of innovations,4.063
Date:,"Sat, 18 Jun 2022",AIC,28.586
Time:,16:07:35,BIC,26.745
Sample:,06-12-2022,HQIC,24.546
,- 06-15-2022,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,8.0383,2.277,3.529,0.000,3.575,12.502
ar.L1.Power (KW),0.1421,0.513,0.277,0.782,-0.864,1.148

0,1,2,3,4
,Real,Imaginary,Modulus,Frequency
AR.1,7.0390,+0.0000j,7.0390,0.0000


In [16]:
start = len(train)
end = len(train) + len(test) - 1
pred = model.predict(start = start, end = end, typ = 'levels')
# pred.index = daily_values[start:end + 1]
pred

2022-06-16    8.465334
2022-06-17    8.098976
2022-06-18    8.046929
Freq: D, dtype: float64

In [17]:
test['Power (KW)'].mean()

7.532884558825066

In [18]:
from sklearn.metrics import mean_squared_error
from math import sqrt
rmse = sqrt(mean_squared_error(pred, test['Power (KW)']))
rmse

4.814326177564005

In [28]:
model2 = ARIMA(daily_values['Power (KW)'], order = (1,0,0))
model2 = model2.fit()

In [32]:
# index_future_dates = pd.date_range(start = '2022-06-15', end = '2022-06-18')
pred = model2.predict(start = len(daily_values), end = len(daily_values) + 3,
                      typ = 'levels').rename('ARIMA Predictions').reset_index()
# pred.index = index_future_dates
pred

Unnamed: 0,index,ARIMA Predictions
0,2022-06-19,8.305885
1,2022-06-20,7.893904
2,2022-06-21,7.916431
3,2022-06-22,7.915199
