# Wind Power Prediciotn using Time Series


<h1><center><a href=https://colab.research.google.com/github/Nemczek/wind_power_prediction/blob/main/wind_power_prediction_time_series.ipynb>Open in Colab!</a></center></h1>

<center><img src='windmills.jpg'></center>

Continued prediction of Wind Power Generation data set using time series

Note: You can run notebook directly in your browser by replacing .com with .dev in github link.
Example: [https://github.dev/Nemczek/wind_power_prediction_time_series](https://github.dev/Nemczek/wind_power_prediction_time_series)

## Packages 

In [None]:
# Run only once - installing packages. Needed to run notebook in browser
install.packages(c("forecast", "tempdisagg", "tidyverse"))

In [None]:
library(forecast)
library(tempdisagg)
library(tidyverse)

## Data loading and cleaning/prepering 

In [3]:
dane <- read.csv("https://raw.githubusercontent.com/Nemczek/wind_power_prediction/main/Location1.csv")

### Aggregate data into monthly data and load and split into test and teaching data

In [None]:
dane$Time <- as.Date(dane$Time)
dane$Miesiac <- format(dane$Time, "%Y-%m")

dane.fan <- dane %>%
  group_by(Miesiac) %>%
  summarise(srednia_Power = mean(Power))

dane.fan <- data.frame(dane.fan$srednia_Power)

### Cleaning data and looking for Trend and seasonality

In [None]:

fan <- ts(dane.fan, start = c(2017,1), frequency = 12)

In [None]:
ts.plot(fan)

In [None]:
monthplot(fan)

In [None]:
seasonplot(fan, year.labels = T, col = 1:5)

In [None]:
lag.plot(fan, do.lines = F, lags = 12) # There is T i S

In [None]:
Acf(fan)

In [None]:
Pacf(fan)

In [None]:
tsoutliers(fan)
fan[13] <- 0.4186132 # 20018-01 

In [None]:
fan.ts <- window(fan, end = c(2020,12))

In [None]:
ts.plot(fan.ts)

In [None]:
monthplot(fan.ts)

In [None]:
seasonplot(fan.ts, year.labels = T, col = rainbow(4))

In [None]:
lag.plot(fan.ts, do.lines = F, lags = 12) # there is T i S

In [None]:

Acf(fan.ts)

In [None]:

Pacf(fan.ts)

In [None]:
tsoutliers(fan.ts)
fan[28] <- 0.3541302 # 2019-02

In [None]:
fan.test <- window(fan, start = c(2021,1))
tsoutliers(fan.test)
rm(dane,dane.fan,fan)

## Manual differentiation 

In [None]:
fan.ts.diff <- diff(fan.ts, lag = 12)
ts.plot(fan.ts.diff)
lag.plot(fan.ts.diff, do.lines = F, lags = 12)
Acf(fan.ts.diff) #MA(3)
Pacf(fan.ts.diff) #AR(3)

### MA(3)

In [None]:
model.MA3 <- Arima(y =fan.ts, order = c(0,0,3), seasonal = c(0,1,0))
summary(model.MA3)

coefs = model.MA3$coef
coefs.sd = sqrt(diag(model.MA3$var.coef))
ind = abs(coefs/(1.96*coefs.sd)) # ratio
signif = which(ind >= 1 )
temp.fixed = numeric(3)
temp.fixed[signif] = NA
model.MA3.signif = Arima( y = fan.ts, order = c(0,0,3), seasonal = c(0,1,0), fixed = temp.fixed )
summary(model.MA3.signif) #No significant coefficients 

SARIMA(0,0,0)(0,1,0)[12] model can be done due to the non-significance of the MA(3) coefficient

In [None]:

model.0 <- Arima(fan.ts, order = c(0,0,0), seasonal = c(0,1,0))
summary(model.0)
prog.model.0 <- forecast(model.0, h = 12)
prog.model.0$mean
ts.plot(prog.model.0$mean)
plot(prog.model.0)
lines(fan.test, col = "red")
Acc0 <- accuracy(prog.model.0, fan.test)[,c("ME","MAE","MPE",
                                "MAPE", "RMSE", "Theil's U")]

### AR(3)

In [None]:
model.AR3 <- Arima(fan.ts, order = c(3,0,0), seasonal = c(0,1,0))
summary(model.AR3)


coefs = model.AR3$coef
coefs.sd = sqrt(diag(model.AR3$var.coef))
ind = abs(coefs/(1.96*coefs.sd)) # ratio
signif = which(ind >= 1 )
temp.fixed = numeric(3)
temp.fixed[signif] = NA
model.AR3.signif = Arima( y = fan.ts, order = c(3,0,0), seasonal = c(0,1,0), fixed = temp.fixed )
summary(model.AR3.signif) ##significant coefficients: AR(3) 

Forecast

In [None]:
prog.AR3 <- forecast(model.AR3.signif, h = 12)
prog.AR3$mean
ts.plot(prog.AR3$mean)
plot(prog.AR3)
lines(fan.test, col = "red")
AccAR3 <- accuracy(prog.AR3, fan.test)[,c("ME","MAE","MPE",
                                          "MAPE", "RMSE", "Theil's U")]

## AutoArima

For comparison the auto.arima was used

In [None]:
fan.ts.auto <- auto.arima(fan.ts)
summary(fan.ts.auto) #ARIMA(0,0,0)(0,1,0)[12]#


We got ARIMA(0,0,0)(0,1,0)[12] model

Forecast for auto.arima model 

In [None]:
fan.prog.auto <- forecast(fan.ts.auto, h = 12)
fan.prog.auto$mean
ts.plot(fan.prog.auto$mean)
plot(fan.prog.auto)
lines(fan.test, col = "red")
accuracy(fan.prog.auto, fan.test)[,c("ME","MAE","MPE",
                                     "MAPE", "RMSE", "Theil's U")]

## Classical decomposition 

In [None]:
fan.decom <- decompose(fan.ts)
plot(fan.decom)
fan.decom.res <- fan.decom$random
Acf(fan.decom.res) #MA(12)
Pacf(fan.decom.res) #AR(2)


### Models for classical decomposition

In [None]:
model.decom.MA12 <- Arima(fan.ts, order = c(0,0,12), seasonal = c(0,1,0))
summary(model.decom.MA12)

In [None]:
model.decom.AR2 <- Arima(fan.ts, order = c(2,0,0), seasonal = c(0,1,0))
summary(model.decom.AR2)

#### MA(12)

In [None]:
coefs = model.decom.MA12$coef
coefs.sd = sqrt(diag(model.decom.MA12$var.coef))
ind = abs(coefs/(1.96*coefs.sd)) # ratio
signif = which(ind >= 1 )
temp.fixed = numeric(12)
temp.fixed[signif] = NA
model.decom.1.signif = Arima( y = fan.ts, order = c(0,0,12), seasonal = c(0,1,0), fixed = temp.fixed )
summary(model.decom.1.signif)# No significant coefficients

model Identical to  SARIMA(0,0,0)(0,1,0)

#### AR(2)

In [None]:

#AR(2)
coefs = model.decom.AR2$coef
coefs.sd = sqrt(diag(model.decom.AR2$var.coef))
ind = abs(coefs/(1.96*coefs.sd)) # ratio
signif = which(ind >= 1 )
temp.fixed = numeric(2)
temp.fixed[signif] = NA
model.decom.2.signif = Arima( y = fan.ts, order = c(2,0,0), seasonal = c(0,1,0), fixed = temp.fixed )
summary(model.decom.2.signif)# No significant coefficients

model Identical to  SARIMA(0,0,0)(0,1,0)

## TSLM

tslm can be done for comparison but after the classical decomposition you can see that the trend is not linear

In [None]:
fan.tslm <- tslm(fan.ts ~ trend + season)
fan.tslm.res <- fan.tslm$residuals 
ts.plot(fan.tslm.res)
lag.plot(fan.tslm.res, do.lines = F, lags = 12)
Acf(fan.tslm.res)#MA24
Pacf(fan.tslm.res)
prog.tslm <- forecast(fan.tslm, h = 12)
ts.plot(prog.tslm$mean)

differentiating tslm due to uncertainty in lag plot result #1 vs. trend 

In [None]:
fan.tslm.diff <- diff(fan.tslm.res)
ts.plot(fan.tslm.diff)
lag.plot(fan.tslm.diff, do.lines = F, lags = 12)
Acf(fan.tslm.diff, lag=60) # MA(1)
Pacf(fan.tslm.diff) # AR(2)

### MA(1)

In [None]:
model.res.MA1 = Arima(fan.tslm.res, order = c(0,1,1), seasonal = c(0,0,0))
summary(model.res.MA1)
coefs.tslm = model.res.MA1$coef
coefs.sd.1 = sqrt(diag(model.res.MA1$var.coef))
ind = abs(coefs.tslm/(1.96*coefs.sd.1)) # |ratio| 
signif.tsml = which(ind >=1)
temp.fixed = numeric(1)
temp.fixed[signif.tsml] = NA
model.res.MA1.signif = Arima(fan.tslm.res, order = c(0,1,1), seasonal = c(0,0,0), fixed = temp.fixed)
summary(model.res.MA1.signif)
prognoza.tslm.MA1 = forecast(model.res.MA1.signif, h = 12) # Forcast for MA(1)

### Forecast T + S + MA(1)

In [None]:

prognoza.tslm.arima.MA1 = prog.tslm$mean + prognoza.tslm.MA1$mean
ts.plot(prog.tslm$mean, prognoza.tslm.arima.MA1, col = c("red","green"))
AcctslmMA1<- accuracy(prognoza.tslm.arima.MA1, fan.test)[,c("ME","MAE","MPE",
                                          "MAPE", "RMSE", "Theil's U")]

### AR(2)

In [None]:
model.res.AR2 = Arima(fan.tslm.res, order = c(2,1,0), seasonal = c(0,0,0))
summary(model.res.AR2)
coefs.tslm = model.res.AR2$coef
coefs.sd.1 = sqrt(diag(model.res.AR2$var.coef))
ind = abs(coefs.tslm/(1.96*coefs.sd.1)) # |ratio| 
signif.tsml = which(ind >=1)
temp.fixed = numeric(2)
temp.fixed[signif.tsml] = NA
model.res.AR2.signif = Arima(fan.tslm.res, order = c(2,1,0), seasonal = c(0,0,0), fixed = temp.fixed)
summary(model.res.AR2.signif)
prognoza.tslm.AR2 = forecast(model.res.AR2.signif, h = 12) # Forecast for AR(2)

### Forecast T + S + AR(2)

In [None]:
prognoza.tslm.arima.AR2 = prog.tslm$mean + prognoza.tslm.AR2$mean
ts.plot(prog.tslm$mean, prognoza.tslm.arima.AR2,fan.test, col = c("red","green","blue"))
AcctslmAR2 <- accuracy(prognoza.tslm.arima.AR2, fan.test)[,c("ME","MAE","MPE",
                                               "MAPE", "RMSE", "Theil's U")]


## Holt-Winters Metode 

In [None]:
prog.hw <- hw(fan.ts, h = 12)
summary(prog.hw)
plot(prog.hw)
lines(fan.test, col = "red")
AccHW <- accuracy(prog.hw, fan.test)[,c("ME","MAE","MPE",
                               "MAPE", "RMSE", "Theil's U")]

## Forecast Plot

In [None]:

ts.plot(prog.hw$mean,prognoza.tslm.arima.AR2,
        prog.model.0$mean, prog.AR3$mean,fan.test,
     col = rainbow(5))
legend("bottomleft",lwd = 2, text.width = 0.3, cex = 0.55,
       legend = c("H-W", "T+S+AR(2)", "SARIMA(0,0,0)(0,1,0)[12]"
                  ,"SARIMA(3,0,0)(0,1,0)[12]","Rzeczywista"), col = rainbow(5))

## AIC, AICc, BIC comparison

In [None]:
models <- list(
  Arima = model.0,
  AR3 = model.AR3.signif,
  tslmAR2 = model.res.AR2.signif
)


get_ic_values <- function(model) {
  return(c(model$aic, model$aicc, model$bic))
}


results_df <- data.frame(Model = names(models), matrix(NA, nrow = length(models), ncol = 3))
colnames(results_df)[2:4] <- c("AIC", "AICc", "BIC")

for (i in seq_along(models)) {
  results_df[i, 2:4] <- get_ic_values(models[[i]])
}
results_df[4,c("Model","AIC","AICc","BIC")] <- c("HW",-98.61202, -78.21202, -66.80161)
print(results_df)

## Comparison of errors

In [None]:
Acc0 <- as.data.frame(Acc0)
AccAR3 <- as.data.frame(AccAR3)
AccHW <- as.data.frame(AccHW)

acc_df <- rbind(Acc0,AccAR3, AccHW, AcctslmAR2)

acc_df <- cbind(acc_df, c("Arima","Arima","AR3","AR3","HW","HW","AR2"))
d <- "Training Set3"
rownames(acc_df)[length(rownames(acc_df))] <- d
colnames(acc_df)[length(colnames(acc_df))] <- "Modele"

## Conclusions

The results of the models and estimation were not clear enough to determine whether any model emerges as the best fit for predicting the values of our data, but a few models can be identified that seemed to be close to the actual values. The SARIMA(0,0,0)(0,1,0) model[12] was confirmed by auto.arime, and by classical decomposition, and had most of the smallest errors. The T+S+AR(2) model was the best fit in the AIC, AICc and BIC criteria but had some of the larger prediction errors for learners. The SARIMA(3,0,0)(0,1,0)[12] model appeared to be centered between the two models mentioned above. In terms of performance prediction, all three models did fairly well. The Holt-Winters method performed the worst.