# **Solar Power Generation Forecast**

CS 458

Juan Caridad

## **Reading in dataset**



In [2]:
import pandas as pd 
import numpy as np

dts = pd.read_csv('solar.csv')
dts.head(10)

Unnamed: 0,ZONEID,TIMESTAMP,VAR78,VAR79,VAR134,VAR157,VAR164,VAR165,VAR166,VAR167,VAR169,VAR175,VAR178,VAR228,POWER
0,1,20120401 01:00,0.001967,0.003609,94843.625,60.221909,0.244601,1.039334,-2.503039,294.448486,2577830.0,1202532.0,2861797.0,0.0,0.754103
1,1,20120401 02:00,0.005524,0.033575,94757.9375,54.678604,0.457138,2.482865,-2.99333,295.651367,5356093.0,2446757.0,5949378.0,0.0,0.555
2,1,20120401 03:00,0.030113,0.132009,94732.8125,61.294891,0.771429,3.339867,-1.982535,294.45459,7921788.0,3681336.0,8939176.0,0.001341,0.438397
3,1,20120401 04:00,0.057167,0.110645,94704.0625,67.775284,0.965866,3.106102,-1.446051,293.261475,9860520.0,4921504.0,11331679.0,0.002501,0.145449
4,1,20120401 05:00,0.051027,0.18956,94675.0,70.172989,0.944669,2.601146,-1.904493,292.73291,11143097.0,6254380.0,13105558.0,0.003331,0.111987
5,1,20120401 06:00,0.036996,0.099045,94676.9375,72.374039,0.641353,1.333368,-1.728431,292.077148,11815767.0,7558415.0,14198503.0,0.00396,0.057244
6,1,20120401 07:00,0.080911,0.121323,94708.0625,81.798737,0.753142,1.457923,-1.03462,291.069336,12274591.0,8798617.0,14925342.0,0.00497,0.088718
7,1,20120401 08:00,0.036159,0.139069,94748.8125,87.854065,0.788338,2.374826,-1.08904,289.073486,12351290.0,10041167.0,15112951.0,0.006477,0.030064
8,1,20120401 09:00,0.036372,0.072609,94785.8125,88.793488,0.502275,1.985531,-0.96301,288.03125,12351290.0,11257316.0,15112951.0,0.006725,0.000128
9,1,20120401 10:00,0.014353,0.035797,94817.75,90.450668,0.501918,1.999518,-0.93032,287.405762,12351290.0,12460132.0,15112951.0,0.006745,0.0


## **Splitting to training and test datasets**

In [46]:
import numpy as np
import pandas as pd 

trainingDTS = []
testDTS = []

training = dts['TIMESTAMP'].between("20120401 01:00", "20130701 00:00")
test = dts['TIMESTAMP'].between("20130701 00:10", "20140701 00:00")
for i in range(len(training)):
    if training.iloc[i] == True:
        trainingDTS.append(dts.iloc[i].values)

for i in range(len(test)):
    if test.iloc[i] == True:
        testDTS.append(dts.iloc[i].values)

nptrain = np.array(trainingDTS)
nptest = np.array(testDTS)

trainDF = pd.DataFrame(data = nptrain)
testDF = pd.DataFrame(data = nptest)

trainDF.to_csv('solar_training.csv', index = False, header = ['ZONEID', 'TIMESTAMP', 'VAR78', 'VAR79', 'VAR134', 'VAR157', 'VAR164', 'VAR165', 'VAR166', 'VAR167', 'VAR169', 'VAR175', 'VAR178', 'VAR228', 'POWER'])
testDF.to_csv('solar_test.csv', index = False, header = ['ZONEID', 'TIMESTAMP', 'VAR78', 'VAR79', 'VAR134', 'VAR157', 'VAR164', 'VAR165', 'VAR166', 'VAR167', 'VAR169', 'VAR175', 'VAR178', 'VAR228', 'POWER'])


## **Building 24 Hour Ahead Solar Power Generation forecast model**

In [5]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

trainDS = pd.read_csv("solar_training.csv")
testDS = pd.read_csv("solar_test.csv")
trainDF = pd.DataFrame(trainDS, columns=['TIMESTAMP','VAR78','VAR79','VAR134','VAR157','VAR164','VAR165','VAR166','VAR167','VAR169','VAR175','VAR178','VAR228','POWER'])
testDF = pd.DataFrame(testDS, columns=['ZONEID','TIMESTAMP', 'VAR78','VAR79','VAR134','VAR157','VAR164','VAR165','VAR166','VAR167','VAR169','VAR175','VAR178','VAR228','POWER'])

trainDF['TIME'] = trainDF['TIMESTAMP']
testDF['TIME'] = testDF['TIMESTAMP']
trainDF['TIMESTAMP'] = pd.to_datetime(trainDF['TIMESTAMP']).view(dtype = np.int64)
testDF['TIMESTAMP'] = pd.to_datetime(testDF['TIMESTAMP']).view(dtype = np.int64)

maxHour = testDF.TIMESTAMP.max()
minHour = testDF.TIMESTAMP.min()

minus = -1
plus = 1

testDF['TIMESTAMP'] = (testDF.TIMESTAMP-minHour) * (plus-minus) / (maxHour-minHour) + minus
trainDF['TIMESTAMP'] = (trainDF.TIMESTAMP-minHour) * (plus-minus) / (maxHour-minHour) + minus

zoneID = testDF.iloc[:, 0].values
zoneID2 = np.where(zoneID == 2)
zoneID3 = np.where(zoneID == 3)
testDF.drop('ZONEID', axis = 1, inplace = True)

Xtrain1 = []
Xtrain2 = []
Xtrain3 = []
yTrain1 = []
yTrain2 = []
yTrain3 = []

for index in range(trainDF.shape[0]):
  if(index < zoneID2[0][0]):
    Xtrain1 = trainDF.iloc[:zoneID2[0][0], :-2].values
    yTrain1 = trainDF.iloc[:zoneID2[0][0], 13].values
  else:
    if(index < zoneID3[0][0]):
      Xtrain2 = trainDF.iloc[zoneID2[0][0]:zoneID3[0][0], :-2].values
      yTrain2 = trainDF.iloc[zoneID2[0][0]:zoneID3[0][0], 13].values
    else:
      Xtrain3 = trainDF.iloc[zoneID3[0][0]:, :-2].values
      yTrain3 = trainDF.iloc[zoneID3[0][0]:, 13].values

Xtest1 = []
Xtest2 = []
Xtest3 = []
ytest1 = []
ytest2 = []
ytest3 = []

for index in range(testDF.shape[0]):
    if(index < zoneID2[0][0]):
        Xtest1 = testDF.iloc[:zoneID2[0][0], :-2].values
        ytest1 = testDF.iloc[:zoneID2[0][0], 13].values
    else:
        if(index < zoneID3[0][0]):
            Xtest2 = testDF.iloc[zoneID2[0][0]:zoneID3[0][0], :-2].values
            ytest2 = testDF.iloc[zoneID2[0][0]:zoneID3[0][0], 13].values
        else:
            Xtest3 = testDF.iloc[zoneID3[0][0]:, :-2].values
            ytest3 = testDF.iloc[zoneID3[0][0]:, 13].values

RanForReg1 = RandomForestRegressor(n_estimators = 300, max_depth = 15, random_state = 0,  min_samples_split = 8)
RanForReg1.fit(Xtrain1, yTrain1)

RanForReg2 = RandomForestRegressor(n_estimators = 300, max_depth = 15, random_state = 0,  min_samples_split = 8)
RanForReg2.fit(Xtrain2, yTrain2)

RanForReg3 = RandomForestRegressor(n_estimators = 300, max_depth = 15, random_state = 0,  min_samples_split = 8)
RanForReg3.fit(Xtrain3, yTrain3)

print("Zone1 accuracy: ", RanForReg1.score(Xtrain1, yTrain1))
print("Zone2 accuracy: ",RanForReg2.score(Xtrain2, yTrain2))
print("Zone3 accuracy: ",RanForReg3.score(Xtrain3, yTrain3))



Zone1 accuracy:  0.974563865857889
Zone2 accuracy:  0.9752844203870384
Zone3 accuracy:  0.9751745182450151


## **Checking the Mean Absolute Error and Root Mean Squared Error**

In [9]:
MeanError = []
Root2Error = []

yPred1 = RanForReg1.predict(Xtest1)
DFID1 = pd.DataFrame({'A': ytest1, 'P': yPred1})
MeanError.append(mean_absolute_error(ytest1, yPred1))
Root2Error.append(np.sqrt(mean_squared_error(ytest1, yPred1)))

yPred2 = RanForReg2.predict(Xtest2)
DFID2 = pd.DataFrame({'A': ytest2, 'P': yPred2})
MeanError.append(mean_absolute_error(ytest2, yPred2))
Root2Error.append(np.sqrt(mean_squared_error(ytest2, yPred2)))

yPred3 = RanForReg3.predict(Xtest3)
DFID3 = pd.DataFrame({'A': ytest3, 'P': yPred3})
MeanError.append(mean_absolute_error(ytest3, yPred3))
Root2Error.append(np.sqrt(mean_squared_error(ytest3, yPred3)))

MeanError.append(sum(MeanError)/len(MeanError))
Root2Error.append(sum(Root2Error)/len(Root2Error))

print('ZONE 1')
print('December 31, 2013 21:00 PM, Actual Power: ', DFID1.iat[4413-1,0], 'Predicted Power: ', DFID3.iat[4413-1, 1])
print('December 31, 2013 22:00 PM - January 01, 2014 21:00 PM, Forecast power:')
print(DFID1.iloc[4413:4413+24])

print('ZONE 2')
print('December 31, 2013 21:00 PM, Actual Power: ', DFID2.iat[4413-1,0], 'Predicted Power: ', DFID3.iat[4413-1, 1])
print('December 31, 2013 22:00 PM - January 01, 2014 21:00 PM, Forecast power')
print(DFID2.iloc[4413:4413+24])

print('ZONE 3')
print('December 31, 2013 21:00 PM, Actual Power: ', DFID3.iat[4413-1,0], 'Predicted Power: ', DFID3.iat[4413-1, 1])
print('December 31, 2013 22:00 PM - January 01, 2014 21:00 PM, Forecast power:')
print(DFID3.iloc[4413:4413+24])

print("\n")
print("ZONEID\t1\t\t2\t\t3\t\tOverall")
print("MAE\t{0:1.8f}\t{1:1.8f}\t{2:1.8f}\t{3:1.8f}".format(MeanError[0], MeanError[1], MeanError[2], MeanError[3]))
print("RMSE\t{0:1.8f}\t{1:1.8f}\t{2:1.8f}\t{3:1.8f}".format(Root2Error[0], Root2Error[1], Root2Error[2], Root2Error[3]))

ZONE 1
December 31, 2013 21:00 PM, Actual Power:  0.159935897 Predicted Power:  0.2547665703216161
December 31, 2013 22:00 PM - January 01, 2014 21:00 PM, Forecast power:
             A         P
4413  0.289487  0.482962
4414  0.643846  0.593774
4415  0.532500  0.642309
4416  0.706218  0.568053
4417  0.409615  0.460485
4418  0.173462  0.425319
4419  0.151923  0.336971
4420  0.106410  0.275665
4421  0.095769  0.231845
4422  0.066218  0.123139
4423  0.024808  0.062172
4424  0.013141  0.006542
4425  0.000385  0.001739
4426  0.000000  0.001273
4427  0.000000  0.006012
4428  0.000000  0.005940
4429  0.000000  0.006318
4430  0.000000  0.008367
4431  0.000000  0.047765
4432  0.000000  0.065816
4433  0.000000  0.069070
4434  0.000321  0.166265
4435  0.024551  0.173562
4436  0.137949  0.260128
ZONE 2
December 31, 2013 21:00 PM, Actual Power:  0.099676113 Predicted Power:  0.2547665703216161
December 31, 2013 22:00 PM - January 01, 2014 21:00 PM, Forecast power
             A         P
4413  0.2